WebVTTTokenizer.cpp [plain text]
#include "config.h"
#if ENABLE(VIDEO_TRACK)
#include "WebVTTTokenizer.h"
#include "MarkupTokenizerInlines.h"
namespace WebCore {
#define WEBVTT_BEGIN_STATE(stateName) BEGIN_STATE(WebVTTTokenizerState, stateName)
#define WEBVTT_ADVANCE_TO(stateName) ADVANCE_TO(WebVTTTokenizerState, stateName)
WebVTTTokenizer::WebVTTTokenizer()
: m_inputStreamPreprocessor(this)
{
reset();
}
template <typename CharacterType>
inline bool vectorEqualsString(const Vector<CharacterType, 32>& vector, const String& string)
{
if (vector.size() != string.length())
return false;
if (!string.length())
return true;
return equal(string.impl(), vector.data(), vector.size());
}
void WebVTTTokenizer::reset()
{
m_state = WebVTTTokenizerState::DataState;
m_token = 0;
m_buffer.clear();
}
bool WebVTTTokenizer::nextToken(SegmentedString& source, WebVTTToken& token)
{
ASSERT(!m_token || m_token == &token || token.type() == WebVTTTokenTypes::Uninitialized);
m_token = &token;
if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
return haveBufferedCharacterToken();
UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
switch (m_state) {
WEBVTT_BEGIN_STATE(DataState) {
if (cc == '&') {
m_buffer.append(static_cast<LChar>(cc));
WEBVTT_ADVANCE_TO(EscapeState);
} else if (cc == '<') {
if (m_token->type() == WebVTTTokenTypes::Uninitialized
|| vectorEqualsString<UChar>(m_token->characters(), emptyString()))
WEBVTT_ADVANCE_TO(TagState);
else
return emitAndResumeIn(source, WebVTTTokenizerState::TagState);
} else if (cc == kEndOfFileMarker)
return emitEndOfFile(source);
else {
bufferCharacter(cc);
WEBVTT_ADVANCE_TO(DataState);
}
}
END_STATE()
WEBVTT_BEGIN_STATE(EscapeState) {
if (cc == ';') {
if (vectorEqualsString(m_buffer, "&"))
bufferCharacter('&');
else if (vectorEqualsString(m_buffer, "<"))
bufferCharacter('<');
else if (vectorEqualsString(m_buffer, ">"))
bufferCharacter('>');
else {
m_buffer.append(static_cast<LChar>(cc));
m_token->appendToCharacter(m_buffer);
}
m_buffer.clear();
WEBVTT_ADVANCE_TO(DataState);
} else if (isASCIIAlphanumeric(cc)) {
m_buffer.append(static_cast<LChar>(cc));
WEBVTT_ADVANCE_TO(EscapeState);
} else if (cc == kEndOfFileMarker) {
m_token->appendToCharacter(m_buffer);
return emitEndOfFile(source);
} else {
if (!vectorEqualsString(m_buffer, "&"))
m_token->appendToCharacter(m_buffer);
m_buffer.clear();
WEBVTT_ADVANCE_TO(DataState);
}
}
END_STATE()
WEBVTT_BEGIN_STATE(TagState) {
if (isTokenizerWhitespace(cc)) {
m_token->beginEmptyStartTag();
WEBVTT_ADVANCE_TO(StartTagAnnotationState);
} else if (cc == '.') {
m_token->beginEmptyStartTag();
WEBVTT_ADVANCE_TO(StartTagClassState);
} else if (cc == '/') {
WEBVTT_ADVANCE_TO(EndTagOpenState);
} else if (WTF::isASCIIDigit(cc)) {
m_token->beginTimestampTag(cc);
WEBVTT_ADVANCE_TO(TimestampTagState);
} else if (cc == '>' || cc == kEndOfFileMarker) {
m_token->beginEmptyStartTag();
return emitAndResumeIn(source, WebVTTTokenizerState::DataState);
} else {
m_token->beginStartTag(cc);
WEBVTT_ADVANCE_TO(StartTagState);
}
}
END_STATE()
WEBVTT_BEGIN_STATE(StartTagState) {
if (isTokenizerWhitespace(cc))
WEBVTT_ADVANCE_TO(StartTagAnnotationState);
else if (cc == '.')
WEBVTT_ADVANCE_TO(StartTagClassState);
else if (cc == '>' || cc == kEndOfFileMarker)
return emitAndResumeIn(source, WebVTTTokenizerState::DataState);
else {
m_token->appendToName(cc);
WEBVTT_ADVANCE_TO(StartTagState);
}
}
END_STATE()
WEBVTT_BEGIN_STATE(StartTagClassState) {
if (isTokenizerWhitespace(cc)) {
m_token->addNewClass();
WEBVTT_ADVANCE_TO(StartTagAnnotationState);
} else if (cc == '.') {
m_token->addNewClass();
WEBVTT_ADVANCE_TO(StartTagClassState);
} else if (cc == '>' || cc == kEndOfFileMarker) {
m_token->addNewClass();
return emitAndResumeIn(source, WebVTTTokenizerState::DataState);
} else {
m_token->appendToClass(cc);
WEBVTT_ADVANCE_TO(StartTagClassState);
}
}
END_STATE()
WEBVTT_BEGIN_STATE(StartTagAnnotationState) {
if (cc == '>' || cc == kEndOfFileMarker) {
m_token->addNewAnnotation();
return emitAndResumeIn(source, WebVTTTokenizerState::DataState);
}
m_token->appendToAnnotation(cc);
WEBVTT_ADVANCE_TO(StartTagAnnotationState);
}
END_STATE()
WEBVTT_BEGIN_STATE(EndTagOpenState) {
if (cc == '>' || cc == kEndOfFileMarker) {
m_token->beginEndTag('\0');
return emitAndResumeIn(source, WebVTTTokenizerState::DataState);
}
m_token->beginEndTag(cc);
WEBVTT_ADVANCE_TO(EndTagState);
}
END_STATE()
WEBVTT_BEGIN_STATE(EndTagState) {
if (cc == '>' || cc == kEndOfFileMarker)
return emitAndResumeIn(source, WebVTTTokenizerState::DataState);
m_token->appendToName(cc);
WEBVTT_ADVANCE_TO(EndTagState);
}
END_STATE()
WEBVTT_BEGIN_STATE(TimestampTagState) {
if (cc == '>' || cc == kEndOfFileMarker)
return emitAndResumeIn(source, WebVTTTokenizerState::DataState);
m_token->appendToTimestamp(cc);
WEBVTT_ADVANCE_TO(TimestampTagState);
}
END_STATE()
}
ASSERT_NOT_REACHED();
return false;
}
}
#endif