/* * Copyright (c) 2005-2007 Henri Sivonen * Copyright (c) 2007-2017 Mozilla Foundation * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla * Foundation, and Opera Software ASA. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE.
*/
/** * An implementation of * https://html.spec.whatwg.org/multipage/syntax.html#tokenization * * This class implements the <code>Locator</code> interface. This is not an * incidental implementation detail: Users of this class are encouraged to make * use of the <code>Locator</code> nature. * * By default, the tokenizer may report data that XML 1.0 bans. The tokenizer * can be configured to treat these conditions as fatal or to coerce the infoset * to something that XML 1.0 allows. * * @version $Id$ * @author hsivonen
*/ publicclass Tokenizer implements Locator, Locator2 {
/** * Magic value for UTF-16 operations.
*/ privatestaticfinalint LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
/** * UTF-16 code unit array containing less than and greater than for emitting * those characters on certain parse errors.
*/ privatestaticfinal @NoLength char[] LT_GT = { '<', '>' };
/** * UTF-16 code unit array containing less than and solidus for emitting * those characters on certain parse errors.
*/ privatestaticfinal @NoLength char[] LT_SOLIDUS = { '<', '/' };
/** * UTF-16 code unit array containing ]] for emitting those characters on * state transitions.
*/ privatestaticfinal @NoLength char[] RSQB_RSQB = { ']', ']' };
/** * Array version of U+FFFD.
*/ privatestaticfinal @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
// [NOCPP[
/** * Array version of space.
*/ privatestaticfinal @NoLength char[] SPACE = { ' ' };
// ]NOCPP]
/** * Array version of line feed.
*/ privatestaticfinal @NoLength char[] LF = { '\n' };
/** * The element whose end tag closes the current CDATA or RCDATA element.
*/ protected ElementName endTagExpectation = null;
privatechar[] endTagExpectationAsArray; // not @Auto!
/** * <code>true</code> if tokenizing an end tag
*/ protectedboolean endTag;
/** * <code>true</code> iff the current element/attribute name contains * a hyphen.
*/ privateboolean containsHyphen;
/** * The current tag token name. One of * 1) null, * 2) non-owning reference to nonInternedTagName * 3) non-owning reference to a pre-interned ElementName
*/ private ElementName tagName = null;
/** * The recycled ElementName instance for the non-pre-interned cases.
*/ private ElementName nonInternedTagName = null;
/** * The current attribute name.
*/ protected AttributeName attributeName = null;
/* * The line number of the current attribute. First set to the line of the * attribute name and if there is a value, set to the line the value * started on.
*/ // CPPONLY: private int attributeLine;
public ErrorHandler getErrorHandler() { returnthis.errorHandler;
}
/** * Gets the errorProfile. * * @param errorProfile
*/ public HashMap getErrorProfile() { returnnull;
}
/** * Sets the commentPolicy. * * @param commentPolicy * the commentPolicy to set
*/ publicvoid setCommentPolicy(XmlViolationPolicy commentPolicy) { this.commentPolicy = commentPolicy;
}
/** * Sets the contentNonXmlCharPolicy. * * @param contentNonXmlCharPolicy * the contentNonXmlCharPolicy to set
*/ publicvoid setContentNonXmlCharPolicy(
XmlViolationPolicy contentNonXmlCharPolicy) { if (contentNonXmlCharPolicy != XmlViolationPolicy.ALLOW) { thrownew IllegalArgumentException( "Must use ErrorReportingTokenizer to set contentNonXmlCharPolicy to non-ALLOW.");
}
}
/** * Sets the contentSpacePolicy. * * @param contentSpacePolicy * the contentSpacePolicy to set
*/ publicvoid setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { this.contentSpacePolicy = contentSpacePolicy;
}
/** * Sets the xmlnsPolicy. * * @param xmlnsPolicy * the xmlnsPolicy to set
*/ publicvoid setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { if (xmlnsPolicy == XmlViolationPolicy.FATAL) { thrownew IllegalArgumentException("Can't use FATAL here.");
} this.xmlnsPolicy = xmlnsPolicy;
}
/** * Sets the tokenizer state and the associated element name. This should * only ever used to put the tokenizer into one of the states that have * a special end tag expectation. * * @param specialTokenizerState * the tokenizer state to set
*/ publicvoid setState(int specialTokenizerState) { this.stateSave = specialTokenizerState; this.endTagExpectation = null; this.endTagExpectationAsArray = null;
}
// [NOCPP[
/** * Sets the tokenizer state and the associated element name. This should * only ever used to put the tokenizer into one of the states that have * a special end tag expectation. For use from the tokenizer test harness. * * @param specialTokenizerState * the tokenizer state to set * @param endTagExpectation * the expected end tag for transitioning back to normal
*/ publicvoid setStateAndEndTagExpectation(int specialTokenizerState,
@Local String endTagExpectation) { this.stateSave = specialTokenizerState; if (specialTokenizerState == Tokenizer.DATA) { return;
}
@Auto char[] asArray = Portability.newCharArrayFromLocal(endTagExpectation); this.endTagExpectation = ElementName.elementNameByBuffer(asArray,
asArray.length); assertthis.endTagExpectation != null;
endTagExpectationToArray();
}
// ]NOCPP]
/** * Sets the tokenizer state and the associated element name. This should * only ever used to put the tokenizer into one of the states that have * a special end tag expectation. * * @param specialTokenizerState * the tokenizer state to set * @param endTagExpectation * the expected end tag for transitioning back to normal
*/ publicvoid setStateAndEndTagExpectation(int specialTokenizerState,
ElementName endTagExpectation) { this.stateSave = specialTokenizerState; this.endTagExpectation = endTagExpectation;
endTagExpectationToArray();
}
privatevoid endTagExpectationToArray() { switch (endTagExpectation.getGroup()) { case TreeBuilder.TITLE:
endTagExpectationAsArray = TITLE_ARR; return; case TreeBuilder.SCRIPT:
endTagExpectationAsArray = SCRIPT_ARR; return; case TreeBuilder.STYLE:
endTagExpectationAsArray = STYLE_ARR; return; case TreeBuilder.PLAINTEXT:
endTagExpectationAsArray = PLAINTEXT_ARR; return; case TreeBuilder.XMP:
endTagExpectationAsArray = XMP_ARR; return; case TreeBuilder.TEXTAREA:
endTagExpectationAsArray = TEXTAREA_ARR; return; case TreeBuilder.IFRAME:
endTagExpectationAsArray = IFRAME_ARR; return; case TreeBuilder.NOEMBED:
endTagExpectationAsArray = NOEMBED_ARR; return; case TreeBuilder.NOSCRIPT:
endTagExpectationAsArray = NOSCRIPT_ARR; return; case TreeBuilder.NOFRAMES:
endTagExpectationAsArray = NOFRAMES_ARR; return; default: assertfalse: "Bad end tag expectation."; return;
}
}
/** * For C++ use only.
*/ publicvoid setLineNumber(int line) { // CPPONLY: this.attributeLine = line; // XXX is this needed? this.line = line;
}
@Inline privatevoid clearStrBufBeforeUse() { assert strBufLen == 0: "strBufLen not reset after previous use!";
strBufLen = 0; // no-op in the absence of bugs
}
@Inline privatevoid clearStrBufAfterOneHyphen() { assert strBufLen == 1: "strBufLen length not one!"; assert strBuf[0] == '-': "strBuf does not start with a hyphen!";
strBufLen = 0;
}
/** * Appends to the buffer. * * @param c * the UTF-16 code unit to append
*/
@Inline privatevoid appendStrBuf(char c) { // CPPONLY: assert strBufLen < strBuf.length: "Previous buffer length insufficient."; // CPPONLY: if (strBufLen == strBuf.length) { // CPPONLY: if (!EnsureBufferSpace(1)) { // CPPONLY: assert false: "RELEASE: Unable to recover from buffer reallocation failure"; // CPPONLY: } // TODO: Add telemetry when outer if fires but inner does not // CPPONLY: }
strBuf[strBufLen++] = c;
}
/** * The buffer as a String. Currently only used for error reporting. * * <p> * C++ memory note: The return value must be released. * * @return the buffer as a string
*/ protected String strBufToString() {
String str = Portability.newStringFromBuffer(strBuf, 0, strBufLen // CPPONLY: , tokenHandler, !newAttributesEachTime && attributeName == AttributeName.CLASS
);
clearStrBufAfterUse(); return str;
}
/** * Returns the buffer as a local name. The return value is released in * emitDoctypeToken(). * * @return the buffer as local name
*/ privatevoid strBufToDoctypeName() {
doctypeName = Portability.newLocalNameFromBuffer(strBuf, strBufLen, interner);
clearStrBufAfterUse();
}
/** * Emits the buffer as character tokens. * * @throws SAXException * if the token handler threw
*/ privatevoid emitStrBuf() throws SAXException { if (strBufLen > 0) {
tokenHandler.characters(strBuf, 0, strBufLen);
clearStrBufAfterUse();
}
}
@Inline privatevoid appendSecondHyphenToBogusComment() throws SAXException { // [NOCPP[ switch (commentPolicy) { case ALTER_INFOSET:
appendStrBuf(' '); // CPPONLY: MOZ_FALLTHROUGH; case ALLOW:
warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); // ]NOCPP]
appendStrBuf('-'); // [NOCPP[ break; case FATAL:
fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); break;
} // ]NOCPP]
}
// [NOCPP[ privatevoid maybeAppendSpaceToBogusComment() throws SAXException { switch (commentPolicy) { case ALTER_INFOSET:
appendStrBuf(' '); // CPPONLY: MOZ_FALLTHROUGH; case ALLOW:
warn("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment."); break; case FATAL:
fatal("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment."); break;
}
}
// ]NOCPP]
@Inline privatevoid adjustDoubleHyphenAndAppendToStrBufAndErr(char c, boolean reportedConsecutiveHyphens) throws SAXException { // [NOCPP[ switch (commentPolicy) { case ALTER_INFOSET:
strBufLen--; // WARNING!!! This expands the worst case of the buffer length // given the length of input!
appendStrBuf(' ');
appendStrBuf('-'); // CPPONLY: MOZ_FALLTHROUGH; case ALLOW: if (!reportedConsecutiveHyphens) {
warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
} // ]NOCPP]
appendStrBuf(c); // [NOCPP[ break; case FATAL:
fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); break;
} // ]NOCPP]
}
privatevoid appendStrBuf(@NoLength char[] buffer, int offset, int length) throws SAXException { int newLen = Portability.checkedAdd(strBufLen, length); // CPPONLY: assert newLen <= strBuf.length: "Previous buffer length insufficient."; // CPPONLY: if (strBuf.length < newLen) { // CPPONLY: if (!EnsureBufferSpace(length)) { // CPPONLY: assert false: "RELEASE: Unable to recover from buffer reallocation failure"; // CPPONLY: } // TODO: Add telemetry when outer if fires but inner does not // CPPONLY: }
System.arraycopy(buffer, offset, strBuf, strBufLen, length);
strBufLen = newLen;
}
/** * Append the contents of the char reference buffer to the main one.
*/
@Inline privatevoid appendCharRefBufToStrBuf() throws SAXException {
appendStrBuf(charRefBuf, 0, charRefBufLen);
charRefBufLen = 0;
}
/** * Emits the current comment token. * * NOTE: The method may set <code>shouldSuspend</code>, so the caller * must have this pattern after the state's <code>transition</code> call: * * <pre> * if (shouldSuspend) { * break stateloop; * } * continue stateloop; * </pre> * * @param pos * TODO * * @throws SAXException
*/ privatevoid emitComment(int provisionalHyphens, int pos) throws SAXException { // CPPONLY: RememberGt(pos); // [NOCPP[ if (wantsComments) { // ]NOCPP]
tokenHandler.comment(strBuf, 0, strBufLen
- provisionalHyphens); // [NOCPP[
} // ]NOCPP]
clearStrBufAfterUse();
cstart = pos + 1;
suspendIfRequestedAfterCurrentNonTextToken();
}
/** * Flushes coalesced character tokens. * * @param buf * TODO * @param pos * TODO * * @throws SAXException
*/ protectedvoid flushChars(@NoLength char[] buf, int pos) throws SAXException { if (pos > cstart) {
tokenHandler.characters(buf, cstart, pos - cstart);
}
cstart = Integer.MAX_VALUE;
}
/** * Reports an condition that would make the infoset incompatible with XML * 1.0 as fatal. * * @param message * the message * @throws SAXException * @throws SAXParseException
*/ publicvoid fatal(String message) throws SAXException {
SAXParseException spe = new SAXParseException(message, this); if (errorHandler != null) {
errorHandler.fatalError(spe);
} throw spe;
}
/** * Reports a Parse Error. * * @param message * the message * @throws SAXException
*/ publicvoid err(String message) throws SAXException { if (errorHandler == null) { return;
}
SAXParseException spe = new SAXParseException(message, this);
errorHandler.error(spe);
}
if (attributes == null) {
attributes = new HtmlAttributes(mappingLangToXmlLang);
}
/* * When the user agent leaves the attribute name state (and before * emitting the tag token, if appropriate), the complete attribute's * name must be compared to the other attributes on the same token; if * there is already an attribute on the token with the exact same name, * then this is a parse error and the new attribute must be dropped, * along with the value that gets associated with it (if any).
*/ if (attributes.contains(attributeName)) {
errDuplicateAttribute();
attributeName = null;
}
}
publicvoid start() throws SAXException {
initializeWithoutStarting();
tokenHandler.startTokenization(this); // CPPONLY: if (mViewSource) { // CPPONLY: line = 1; // CPPONLY: col = -1; // CPPONLY: nextCharOnNewLine = false; // CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) { // CPPONLY: line = 0; // CPPONLY: col = 1; // CPPONLY: nextCharOnNewLine = true; // CPPONLY: } else { // CPPONLY: line = -1; // CPPONLY: col = -1; // CPPONLY: nextCharOnNewLine = false; // CPPONLY: } // [NOCPP[
startErrorReporting(); // ]NOCPP]
}
publicboolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException { int state = stateSave; int returnState = returnStateSave; char c = '\u0000';
shouldSuspend = false;
lastCR = false;
int start = buffer.getStart(); int end = buffer.getEnd();
// In C++, the caller of tokenizeBuffer needs to do this explicitly. // [NOCPP[
ensureBufferSpace(end - start); // ]NOCPP]
/** * The index of the last <code>char</code> read from <code>buf</code>.
*/ int pos = start - 1;
/** * The index of the first <code>char</code> in <code>buf</code> that is * part of a coalesced run of character tokens or * <code>Integer.MAX_VALUE</code> if there is not a current run being * coalesced.
*/ switch (state) { case DATA: case RCDATA: case SCRIPT_DATA: case PLAINTEXT: case RAWTEXT: case CDATA_SECTION: case SCRIPT_DATA_ESCAPED: case SCRIPT_DATA_ESCAPE_START: case SCRIPT_DATA_ESCAPE_START_DASH: case SCRIPT_DATA_ESCAPED_DASH: case SCRIPT_DATA_ESCAPED_DASH_DASH: case SCRIPT_DATA_DOUBLE_ESCAPE_START: case SCRIPT_DATA_DOUBLE_ESCAPED: case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: case SCRIPT_DATA_DOUBLE_ESCAPE_END:
cstart = start; break; default:
cstart = Integer.MAX_VALUE; break;
}
/** * The number of <code>char</code>s in <code>buf</code> that have * meaning. (The rest of the array is garbage and should not be * examined.)
*/ // CPPONLY: if (mViewSource) { // CPPONLY: mViewSource.SetBuffer(buffer); // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); // CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1); // CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) { // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); // CPPONLY: } else { // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); // CPPONLY: } // [NOCPP[
pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState,
end); // ]NOCPP] if (pos == end) { // exiting due to end of buffer
buffer.setStart(pos);
} else {
buffer.setStart(pos + 1);
} return lastCR;
}
// [NOCPP[ privatevoid ensureBufferSpace(int inputLength) throws SAXException { // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB. // Adding to the general worst case instead of only the // TreeBuilder-exposed worst case to avoid re-introducing a bug when // unifying the tokenizer and tree builder buffers in the future. int worstCase = strBufLen + inputLength + charRefBufLen + 2;
tokenHandler.ensureBufferSpace(worstCase); if (commentPolicy == XmlViolationPolicy.ALTER_INFOSET) { // When altering infoset, if the comment contents are consecutive // hyphens, each hyphen generates a space, too. These buffer // contents never get emitted as characters() to the tokenHandler, // which is why this calculation happens after the call to // ensureBufferSpace on tokenHandler.
worstCase *= 2;
} if (strBuf == null) { // Add an arbitrary small value to avoid immediate reallocation // once there are a few characters in the buffer.
strBuf = newchar[worstCase + 128];
} elseif (worstCase > strBuf.length) { // HotSpot reportedly allocates memory with 8-byte accuracy, so // there's no point in trying to do math here to avoid slop. // Maybe we should add some small constant to worstCase here // but not doing that without profiling. In C++ with jemalloc, // the corresponding method should do math to round up here // to avoid slop. char[] newBuf = newchar[Math.max(worstCase, (strBuf.length*5)/4)];
System.arraycopy(strBuf, 0, newBuf, 0, strBufLen);
strBuf = newBuf;
}
} // ]NOCPP]
@SuppressWarnings("unused") privateint stateLoop(int state, char c, int pos, @NoLength char[] buf, boolean reconsume, int returnState, int endPos) throws SAXException { boolean reportedConsecutiveHyphens = false; /* * Idioms used in this code: * * * Consuming the next input character * * To consume the next input character, the code does this: if (++pos == * endPos) { break stateloop; } c = checkChar(buf, pos); * * * Staying in a state * * When there's a state that the tokenizer may stay in over multiple * input characters, the state has a wrapper |for(;;)| loop and staying * in the state continues the loop. * * * Switching to another state * * To switch to another state, the code sets the state variable to the * magic number of the new state. Then it either continues stateloop or * breaks out of the state's own wrapper loop if the target state is * right after the current state in source order. (This is a partial * workaround for Java's lack of goto.) * * * Reconsume support * * The spec sometimes says that an input character is reconsumed in * another state. If a state can ever be entered so that an input * character can be reconsumed in it, the state's code starts with an * |if (reconsume)| that sets reconsume to false and skips over the * normal code for consuming a new character. * * To reconsume the current character in another state, the code sets * |reconsume| to true and then switches to the other state. * * * Emitting character tokens * * This method emits character tokens lazily. Whenever a new range of * character tokens starts, the field cstart must be set to the start * index of the range. The flushChars() method must be called at the end * of a range to flush it. * * * U+0000 handling * * The various states have to handle the replacement of U+0000 with * U+FFFD. However, if U+0000 would be reconsumed in another state, the * replacement doesn't need to happen, because it's handled by the * reconsuming state. * * * LF handling * * Every state needs to increment the line number upon LF unless the LF * gets reconsumed by another state which increments the line number. * * * CR handling * * Every state needs to handle CR unless the CR gets reconsumed and is * handled by the reconsuming state. The CR needs to be handled as if it * were and LF, the lastCR field must be set to true and then this * method must return. The IO driver will then swallow the next * character if it is an LF to coalesce CRLF.
*/
stateloop: for (;;) { switch (state) { case DATA:
dataloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} switch (c) { case'&': /* * U+0026 AMPERSAND (&) Switch to the character * reference in data state.
*/
flushChars(buf, pos); assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
appendCharRefBuf(c);
setAdditionalAndRememberAmpersandLocation('\u0000');
returnState = state;
state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); continue stateloop; case'<': /* * U+003C LESS-THAN SIGN (<) Switch to the tag * open state.
*/
flushChars(buf, pos);
state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break dataloop; case'\u0000':
maybeEmitReplacementCharacter(buf, pos); continue; case'\r':
emitCarriageReturn(buf, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Emit the input character as a * character token. * * Stay in the data state.
*/ continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case TAG_OPEN:
tagopenloop: for (;;) { /* * The behavior of this state depends on the content * model flag.
*/ if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * If the content model flag is set to the PCDATA state * Consume the next input character:
*/ if (c >= 'A' && c <= 'Z') { /* * U+0041 LATIN CAPITAL LETTER A through to U+005A * LATIN CAPITAL LETTER Z Create a new start tag * token,
*/
endTag = false; /* * set its tag name to the lowercase version of the * input character (add 0x0020 to the character's * code point),
*/
clearStrBufBeforeUse();
appendStrBuf((char) (c + 0x20));
containsHyphen = false; /* then switch to the tag name state. */
state = transition(state, Tokenizer.TAG_NAME, reconsume, pos); /* * (Don't emit the token yet; further details will * be filled in before it is emitted.)
*/ // `break` optimizes; `continue stateloop;` would be valid break tagopenloop;
} elseif (c >= 'a' && c <= 'z') { /* * U+0061 LATIN SMALL LETTER A through to U+007A * LATIN SMALL LETTER Z Create a new start tag * token,
*/
endTag = false; /* * set its tag name to the input character,
*/
clearStrBufBeforeUse();
appendStrBuf(c);
containsHyphen = false; /* then switch to the tag name state. */
state = transition(state, Tokenizer.TAG_NAME, reconsume, pos); /* * (Don't emit the token yet; further details will * be filled in before it is emitted.)
*/ // `break` optimizes; `continue stateloop;` would be valid break tagopenloop;
} switch (c) { case'!': /* * U+0021 EXCLAMATION MARK (!) Switch to the * markup declaration open state.
*/
state = transition(state, Tokenizer.MARKUP_DECLARATION_OPEN, reconsume, pos); continue stateloop; case'/': /* * U+002F SOLIDUS (/) Switch to the close tag * open state.
*/
state = transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos); continue stateloop; case'?': // CPPONLY: if (viewingXmlSource) { // CPPONLY: state = transition(state, // CPPONLY: Tokenizer.PROCESSING_INSTRUCTION, // CPPONLY: reconsume, // CPPONLY: pos); // CPPONLY: continue stateloop; // CPPONLY: } /* * U+003F QUESTION MARK (?) Parse error.
*/
errProcessingInstruction(); /* * Switch to the bogus comment state.
*/
clearStrBufBeforeUse();
appendStrBuf(c);
state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Parse error.
*/
errLtGt(); /* * Emit a U+003C LESS-THAN SIGN character token * and a U+003E GREATER-THAN SIGN character * token.
*/
tokenHandler.characters(Tokenizer.LT_GT, 0, 2); /* Switch to the data state. */
cstart = pos + 1;
state = transition(state, Tokenizer.DATA, reconsume, pos); continue stateloop; default: /* * Anything else Parse error.
*/
errBadCharAfterLt(c); /* * Emit a U+003C LESS-THAN SIGN character token
*/
tokenHandler.characters(Tokenizer.LT_GT, 0, 1); /* * and reconsume the current input character in * the data state.
*/
cstart = pos;
reconsume = true;
state = transition(state, Tokenizer.DATA, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case TAG_NAME:
tagnameloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn();
strBufToElementNameString();
state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the before attribute name state.
*/
strBufToElementNameString();
state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break tagnameloop; case'/': /* * U+002F SOLIDUS (/) Switch to the self-closing * start tag state.
*/
strBufToElementNameString();
state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); continue stateloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * tag token.
*/
strBufToElementNameString();
state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); if (shouldSuspend) { break stateloop;
} /* * Switch to the data state.
*/ continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: if (c >= 'A' && c <= 'Z') { /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Append the * lowercase version of the current input * character (add 0x0020 to the character's * code point) to the current tag token's * tag name.
*/
c += 0x20;
} elseif (c == '-') {
containsHyphen = true;
} /* * Anything else Append the current input * character to the current tag token's tag * name.
*/
appendStrBuf(c); /* * Stay in the tag name state.
*/ continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case BEFORE_ATTRIBUTE_NAME:
beforeattributenameloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn(); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the before attribute name state.
*/ continue; case'/': /* * U+002F SOLIDUS (/) Switch to the self-closing * start tag state.
*/
state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); continue stateloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * tag token.
*/
state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); if (shouldSuspend) { break stateloop;
} /* * Switch to the data state.
*/ continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; case'\"': case'\'': case'<': case'=': /* * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS * SIGN (=) Parse error.
*/
errBadCharBeforeAttributeNameOrNull(c); /* * Treat it as per the "anything else" entry * below.
*/ // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Start a new attribute in the * current tag token.
*/ if (c >= 'A' && c <= 'Z') { /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Set that * attribute's name to the lowercase version * of the current input character (add * 0x0020 to the character's code point)
*/
c += 0x20;
} // CPPONLY: attributeLine = line; /* * Set that attribute's name to the current * input character,
*/
clearStrBufBeforeUse();
appendStrBuf(c); /* * and its value to the empty string.
*/ // Will do later. /* * Switch to the attribute name state.
*/
state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break beforeattributenameloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case ATTRIBUTE_NAME:
attributenameloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn();
attributeNameComplete();
state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the after attribute name state.
*/
attributeNameComplete();
state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos); continue stateloop; case'/': /* * U+002F SOLIDUS (/) Switch to the self-closing * start tag state.
*/
attributeNameComplete();
addAttributeWithoutValue();
state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); continue stateloop; case'=': /* * U+003D EQUALS SIGN (=) Switch to the before * attribute value state.
*/
attributeNameComplete();
state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break attributenameloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * tag token.
*/
attributeNameComplete();
addAttributeWithoutValue();
state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); if (shouldSuspend) { break stateloop;
} /* * Switch to the data state.
*/ continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; case'\"': case'\'': case'<': /* * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE * (') U+003C LESS-THAN SIGN (<) Parse error.
*/
errQuoteOrLtInAttributeNameOrNull(c); /* * Treat it as per the "anything else" entry * below.
*/ // CPPONLY: MOZ_FALLTHROUGH; default: if (c >= 'A' && c <= 'Z') { /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Append the * lowercase version of the current input * character (add 0x0020 to the character's * code point) to the current attribute's * name.
*/
c += 0x20;
} /* * Anything else Append the current input * character to the current attribute's name.
*/
appendStrBuf(c); /* * Stay in the attribute name state.
*/ continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case BEFORE_ATTRIBUTE_VALUE:
beforeattributevalueloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn(); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the before attribute value state.
*/ continue; case'"': /* * U+0022 QUOTATION MARK (") Switch to the * attribute value (double-quoted) state.
*/ // CPPONLY: attributeLine = line;
clearStrBufBeforeUse();
state = transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break beforeattributevalueloop; case'&': /* * U+0026 AMPERSAND (&) Switch to the attribute * value (unquoted) state and reconsume this * input character.
*/ // CPPONLY: attributeLine = line;
clearStrBufBeforeUse();
reconsume = true;
state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
noteUnquotedAttributeValue(); continue stateloop; case'\'': /* * U+0027 APOSTROPHE (') Switch to the attribute * value (single-quoted) state.
*/ // CPPONLY: attributeLine = line;
clearStrBufBeforeUse();
state = transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos); continue stateloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Parse error.
*/
errAttributeValueMissing(); /* * Emit the current tag token.
*/
addAttributeWithoutValue();
state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); if (shouldSuspend) { break stateloop;
} /* * Switch to the data state.
*/ continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; case'<': case'=': case'`': /* * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN * (=) U+0060 GRAVE ACCENT (`)
*/
errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c); /* * Treat it as per the "anything else" entry * below.
*/ // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Append the current input * character to the current attribute's value.
*/ // CPPONLY: attributeLine = line;
clearStrBufBeforeUse();
appendStrBuf(c); /* * Switch to the attribute value (unquoted) * state.
*/
state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
noteUnquotedAttributeValue(); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
attributevaluedoublequotedloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} /* * Consume the next input character:
*/ switch (c) { case'"': /* * U+0022 QUOTATION MARK (") Switch to the after * attribute value (quoted) state.
*/
addAttributeWithValue();
state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break attributevaluedoublequotedloop; case'&': /* * U+0026 AMPERSAND (&) Switch to the character * reference in attribute value state, with the * additional allowed character being U+0022 * QUOTATION MARK (").
*/ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
appendCharRefBuf(c);
setAdditionalAndRememberAmpersandLocation('\"');
returnState = state;
state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); continue stateloop; case'\r':
appendStrBufCarriageReturn(); break stateloop; case'\n':
appendStrBufLineFeed(); continue; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Append the current input * character to the current attribute's value.
*/
appendStrBuf(c); /* * Stay in the attribute value (double-quoted) * state.
*/ continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case AFTER_ATTRIBUTE_VALUE_QUOTED:
afterattributevaluequotedloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn();
state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the before attribute name state.
*/
state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); continue stateloop; case'/': /* * U+002F SOLIDUS (/) Switch to the self-closing * start tag state.
*/
state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break afterattributevaluequotedloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * tag token.
*/
state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); if (shouldSuspend) { break stateloop;
} /* * Switch to the data state.
*/ continue stateloop; default: /* * Anything else Parse error.
*/
errNoSpaceBetweenAttributes(); /* * Reconsume the character in the before * attribute name state.
*/
reconsume = true;
state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case SELF_CLOSING_START_TAG: if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'>': /* * U+003E GREATER-THAN SIGN (>) Set the self-closing * flag of the current tag token. Emit the current * tag token.
*/
state = transition(state, emitCurrentTagToken(true, pos), reconsume, pos); if (shouldSuspend) { break stateloop;
} /* * Switch to the data state.
*/ continue stateloop; default: /* Anything else Parse error. */
errSlashNotFollowedByGt(); /* * Reconsume the character in the before attribute * name state.
*/
reconsume = true;
state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); continue stateloop;
} // no fallthrough, reordering opportunity case ATTRIBUTE_VALUE_UNQUOTED: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn();
addAttributeWithValue();
state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the before attribute name state.
*/
addAttributeWithValue();
state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); continue stateloop; case'&': /* * U+0026 AMPERSAND (&) Switch to the character * reference in attribute value state, with the * additional allowed character being U+003E * GREATER-THAN SIGN (>)
*/ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
appendCharRefBuf(c);
setAdditionalAndRememberAmpersandLocation('>');
returnState = state;
state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); continue stateloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * tag token.
*/
addAttributeWithValue();
state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); if (shouldSuspend) { break stateloop;
} /* * Switch to the data state.
*/ continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; case'<': case'\"': case'\'': case'=': case'`': /* * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error.
*/
errUnquotedAttributeValOrNull(c); /* * Treat it as per the "anything else" entry * below.
*/ // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Append the current input * character to the current attribute's value.
*/
appendStrBuf(c); /* * Stay in the attribute value (unquoted) state.
*/ continue;
}
} // no fallthrough, reordering opportunity case AFTER_ATTRIBUTE_NAME: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn(); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the after attribute name state.
*/ continue; case'/': /* * U+002F SOLIDUS (/) Switch to the self-closing * start tag state.
*/
addAttributeWithoutValue();
state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); continue stateloop; case'=': /* * U+003D EQUALS SIGN (=) Switch to the before * attribute value state.
*/
state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos); continue stateloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * tag token.
*/
addAttributeWithoutValue();
state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); if (shouldSuspend) { break stateloop;
} /* * Switch to the data state.
*/ continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; case'\"': case'\'': case'<':
errQuoteOrLtInAttributeNameOrNull(c); /* * Treat it as per the "anything else" entry * below.
*/ // CPPONLY: MOZ_FALLTHROUGH; default:
addAttributeWithoutValue(); /* * Anything else Start a new attribute in the * current tag token.
*/ if (c >= 'A' && c <= 'Z') { /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Set that * attribute's name to the lowercase version * of the current input character (add * 0x0020 to the character's code point)
*/
c += 0x20;
} /* * Set that attribute's name to the current * input character,
*/
clearStrBufBeforeUse();
appendStrBuf(c); /* * and its value to the empty string.
*/ // Will do later. /* * Switch to the attribute name state.
*/
state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos); continue stateloop;
}
} // no fallthrough, reordering opportunity case MARKUP_DECLARATION_OPEN:
markupdeclarationopenloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * If the next two characters are both U+002D * HYPHEN-MINUS characters (-), consume those two * characters, create a comment token whose data is the * empty string, and switch to the comment start state. * * Otherwise, if the next seven characters are an ASCII * case-insensitive match for the word "DOCTYPE", then * consume those characters and switch to the DOCTYPE * state. * * Otherwise, if the insertion mode is * "in foreign content" and the current node is not an * element in the HTML namespace and the next seven * characters are an case-sensitive match for the string * "[CDATA[" (the five uppercase letters "CDATA" with a * U+005B LEFT SQUARE BRACKET character before and * after), then consume those characters and switch to * the CDATA section state. * * Otherwise, is is a parse error. Switch to the bogus * comment state. The next character that is consumed, * if any, is the first character that will be in the * comment.
*/ switch (c) { case'-':
clearStrBufBeforeUse();
appendStrBuf(c);
state = transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break markupdeclarationopenloop; case'd': case'D':
clearStrBufBeforeUse();
appendStrBuf(c);
index = 0;
state = transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos); continue stateloop; case'[': if (tokenHandler.cdataSectionAllowed()) {
clearStrBufBeforeUse();
appendStrBuf(c);
index = 0;
state = transition(state, Tokenizer.CDATA_START, reconsume, pos); continue stateloop;
} // CPPONLY: MOZ_FALLTHROUGH; default:
errBogusComment();
clearStrBufBeforeUse();
reconsume = true;
state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case MARKUP_DECLARATION_HYPHEN:
markupdeclarationhyphenloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); switch (c) { case'-':
clearStrBufAfterOneHyphen();
state = transition(state, Tokenizer.COMMENT_START, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break markupdeclarationhyphenloop; default:
errBogusComment();
reconsume = true;
state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case COMMENT_START:
reportedConsecutiveHyphens = false;
commentstartloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Comment start state * * * Consume the next input character:
*/ switch (c) { case'-': /* * U+002D HYPHEN-MINUS (-) Switch to the comment * start dash state.
*/
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos); continue stateloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Parse error.
*/
errPrematureEndOfComment(); /* Emit the comment token. */
emitComment(0, pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos); continue stateloop; case'\r':
appendStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos); break stateloop; case'\n':
appendStrBufLineFeed();
state = transition(state, Tokenizer.COMMENT, reconsume, pos); break commentstartloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Append the input character to * the comment token's data.
*/
appendStrBuf(c); /* * Switch to the comment state.
*/
state = transition(state, Tokenizer.COMMENT, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break commentstartloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case COMMENT:
commentloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Comment state Consume the next input character:
*/ switch (c) { case'-': /* * U+002D HYPHEN-MINUS (-) Switch to the comment * end dash state
*/
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break commentloop; case'<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos); continue stateloop; case'\r':
appendStrBufCarriageReturn(); break stateloop; case'\n':
appendStrBufLineFeed(); continue; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Append the input character to * the comment token's data.
*/
appendStrBuf(c); /* * Stay in the comment state.
*/ continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case COMMENT_END_DASH:
commentenddashloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Comment end dash state Consume the next input * character:
*/ switch (c) { case'-': /* * U+002D HYPHEN-MINUS (-) Switch to the comment * end state
*/
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_END, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break commentenddashloop; case'<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos); continue stateloop; case'\r':
appendStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos); break stateloop; case'\n':
appendStrBufLineFeed();
state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Append a U+002D HYPHEN-MINUS * (-) character and the input character to the * comment token's data.
*/
appendStrBuf(c); /* * Switch to the comment state.
*/
state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case COMMENT_END:
commentendloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Comment end dash state Consume the next input * character:
*/ switch (c) { case'>': /* * U+003E GREATER-THAN SIGN (>) Emit the comment * token.
*/
emitComment(2, pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'-': /* U+002D HYPHEN-MINUS (-) Parse error. */ /* * Append a U+002D HYPHEN-MINUS (-) character to * the comment token's data.
*/
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true; /* * Stay in the comment end state.
*/ continue; case'<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos); continue stateloop; case'\r':
adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos); break stateloop; case'\n':
adjustDoubleHyphenAndAppendToStrBufLineFeed();
state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop; case'!':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break commentendloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: /* * Append two U+002D HYPHEN-MINUS (-) characters * and the input character to the comment * token's data.
*/
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true; /* * Switch to the comment state.
*/
state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case COMMENT_END_BANG: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Comment end bang state * * Consume the next input character:
*/ switch (c) { case'>': /* * U+003E GREATER-THAN SIGN (>) Emit the comment * token.
*/
emitComment(3, pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'-': /* * Append two U+002D HYPHEN-MINUS (-) characters * and a U+0021 EXCLAMATION MARK (!) character * to the comment token's data.
*/
appendStrBuf(c); /* * Switch to the comment end dash state.
*/
state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos); continue stateloop; case'\r':
appendStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos); break stateloop; case'\n':
appendStrBufLineFeed();
state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Append two U+002D HYPHEN-MINUS * (-) characters, a U+0021 EXCLAMATION MARK (!) * character, and the input character to the * comment token's data. Switch to the comment * state.
*/
appendStrBuf(c); /* * Switch to the comment state.
*/
state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop;
}
} // no fallthrough, reordering opportunity case COMMENT_LESSTHAN:
commentlessthanloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); switch (c) { case'!':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN_BANG, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break commentlessthanloop; case'<':
appendStrBuf(c); continue; case'-':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos); continue stateloop; case'\r':
appendStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos); break stateloop; case'\n':
appendStrBufLineFeed();
state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default:
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case COMMENT_LESSTHAN_BANG:
commentlessthanbangloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); switch (c) { case'-':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN_BANG_DASH, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break commentlessthanbangloop; case'<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos); continue stateloop; case'\r':
appendStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos); break stateloop; case'\n':
appendStrBufLineFeed();
state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default:
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case COMMENT_LESSTHAN_BANG_DASH: if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); switch (c) { case'-':
appendStrBuf(c);
state = transition(state,
Tokenizer.COMMENT_LESSTHAN_BANG_DASH_DASH,
reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break; case'<':
appendStrBuf(c);
state = transition(state,
Tokenizer.COMMENT_LESSTHAN, reconsume, pos); continue stateloop; case'\r':
appendStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT,
reconsume, pos); break stateloop; case'\n':
appendStrBufLineFeed();
state = transition(state, Tokenizer.COMMENT,
reconsume, pos); continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default:
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT,
reconsume, pos); continue stateloop;
} // CPPONLY: MOZ_FALLTHROUGH; case COMMENT_LESSTHAN_BANG_DASH_DASH: if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); switch (c) { case'>':
appendStrBuf(c);
emitComment(3, pos);
state = transition(state, Tokenizer.DATA, reconsume,
pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'-':
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c,
reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state, Tokenizer.COMMENT_END,
reconsume, pos); continue stateloop; case'\r':
c = '\n';
silentCarriageReturn();
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c,
reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state, Tokenizer.COMMENT,
reconsume, pos); break stateloop; case'\n':
silentLineFeed();
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c,
reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state, Tokenizer.COMMENT,
reconsume, pos); continue stateloop; case'!':
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c,
reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state,
Tokenizer.COMMENT_END_BANG, reconsume, pos); continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default:
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c,
reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state, Tokenizer.COMMENT,
reconsume, pos); continue stateloop;
} // no fallthrough, reordering opportunity case COMMENT_START_DASH: if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Comment start dash state * * Consume the next input character:
*/ switch (c) { case'-': /* * U+002D HYPHEN-MINUS (-) Switch to the comment end * state
*/
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_END, reconsume, pos); continue stateloop; case'>':
errPrematureEndOfComment(); /* Emit the comment token. */
emitComment(1, pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos); continue stateloop; case'\r':
appendStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos); break stateloop; case'\n':
appendStrBufLineFeed();
state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: /* * Append a U+002D HYPHEN-MINUS character (-) and * the current input character to the comment * token's data.
*/
appendStrBuf(c); /* * Switch to the comment state.
*/
state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop;
} // no fallthrough, reordering opportunity case CDATA_START: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); if (index < 6) { // CDATA_LSQB.length if (c == Tokenizer.CDATA_LSQB[index]) {
appendStrBuf(c);
} else {
errBogusComment();
reconsume = true;
state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop;
}
index++; continue;
} else {
clearStrBufAfterUse();
cstart = pos; // start coalescing
reconsume = true;
state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break;
}
} // CPPONLY: MOZ_FALLTHROUGH; case CDATA_SECTION:
cdatasectionloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} switch (c) { case']':
flushChars(buf, pos);
state = transition(state, Tokenizer.CDATA_RSQB, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break cdatasectionloop; case'\u0000':
maybeEmitReplacementCharacter(buf, pos); continue; case'\r':
emitCarriageReturn(buf, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; default: continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case CDATA_RSQB: if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); switch (c) { case']':
state = transition(state, Tokenizer.CDATA_RSQB_RSQB,
reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break; default:
tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1);
cstart = pos;
reconsume = true;
state = transition(state, Tokenizer.CDATA_SECTION,
reconsume, pos); continue stateloop;
} // CPPONLY: MOZ_FALLTHROUGH; case CDATA_RSQB_RSQB:
cdatarsqbrsqb: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); switch (c) { case']': // Saw a third ]. Emit one ] (logically the // first one) and stay in this state to // remember that the last two characters seen // have been ]].
tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1); continue; case'>':
cstart = pos + 1;
state = transition(state, Tokenizer.DATA, reconsume, pos); // Since a CDATA section starts with a less-than sign, it // participates in the suspension-after-current-token // behavior. (The suspension can be requested when the // less-than sign has been seen but we don't yet know the // resulting token type.) Therefore, we need to deal with // a potential request here.
suspendIfRequestedAfterCurrentNonTextToken(); if (shouldSuspend) { break stateloop;
} continue stateloop; default:
tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
cstart = pos;
reconsume = true;
state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos); continue stateloop;
}
} // no fallthrough, reordering opportunity case ATTRIBUTE_VALUE_SINGLE_QUOTED:
attributevaluesinglequotedloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} /* * Consume the next input character:
*/ switch (c) { case'\'': /* * U+0027 APOSTROPHE (') Switch to the after * attribute value (quoted) state.
*/
addAttributeWithValue();
state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos); continue stateloop; case'&': /* * U+0026 AMPERSAND (&) Switch to the character * reference in attribute value state, with the * + additional allowed character being U+0027 * APOSTROPHE (').
*/ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
appendCharRefBuf(c);
setAdditionalAndRememberAmpersandLocation('\'');
returnState = state;
state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break attributevaluesinglequotedloop; case'\r':
appendStrBufCarriageReturn(); break stateloop; case'\n':
appendStrBufLineFeed(); continue; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Append the current input * character to the current attribute's value.
*/
appendStrBuf(c); /* * Stay in the attribute value (double-quoted) * state.
*/ continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case CONSUME_CHARACTER_REFERENCE: if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Unlike the definition is the spec, this state does not * return a value and never requires the caller to * backtrack. This state takes care of emitting characters * or appending to the current attribute value. It also * takes care of that in the case when consuming the * character reference fails.
*/ /* * This section defines how to consume a character * reference. This definition is used when parsing character * references in text and in attributes. * * The behavior depends on the identity of the next * character (the one immediately after the U+0026 AMPERSAND * character):
*/ switch (c) { case' ': case'\t': case'\n': case'\r': // we'll reconsume! case'\u000C': case'<': case'&': case'\u0000': case';':
emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = pos;
}
reconsume = true;
state = transition(state, returnState, reconsume, pos); continue stateloop; case'#': /* * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER * SIGN.
*/
appendCharRefBuf('#');
state = transition(state, Tokenizer.CONSUME_NCR, reconsume, pos); continue stateloop; default: if (c == additional) {
emitOrAppendCharRefBuf(returnState);
reconsume = true;
state = transition(state, returnState, reconsume, pos); continue stateloop;
} if (c >= 'a' && c <= 'z') {
firstCharKey = c - 'a' + 26;
} elseif (c >= 'A' && c <= 'Z') {
firstCharKey = c - 'A';
} else { // No match if (c == ';') {
errNoNamedCharacterMatch();
}
emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = pos;
}
reconsume = true;
state = transition(state, returnState, reconsume, pos); continue stateloop;
} // Didn't fail yet
appendCharRefBuf(c);
state = transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break;
} // CPPONLY: MOZ_FALLTHROUGH; case CHARACTER_REFERENCE_HILO_LOOKUP:
{ if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * The data structure is as follows: * * HILO_ACCEL is a two-dimensional int array whose major * index corresponds to the second character of the * character reference (code point as index) and the * minor index corresponds to the first character of the * character reference (packed so that A-Z runs from 0 * to 25 and a-z runs from 26 to 51). This layout makes * it easier to use the sparseness of the data structure * to omit parts of it: The second dimension of the * table is null when no character reference starts with * the character corresponding to that row. * * The int value HILO_ACCEL (by these indeces) is zero * if there exists no character reference starting with * that two-letter prefix. Otherwise, the value is an * int that packs two shorts so that the higher short is * the index of the highest character reference name * with that prefix in NAMES and the lower short * corresponds to the index of the lowest character * reference name with that prefix. (It happens that the * first two character reference names share their * prefix so the packed int cannot be 0 by packing the * two shorts.) * * NAMES is an array of byte arrays where each byte * array encodes the name of a character references as * ASCII. The names omit the first two letters of the * name. (Since storing the first two letters would be * redundant with the data contained in HILO_ACCEL.) The * entries are lexically sorted. * * For a given index in NAMES, the same index in VALUES * contains the corresponding expansion as an array of * two UTF-16 code units (either the character and * U+0000 or a suggogate pair).
*/ int hilo = 0; if (c <= 'z') {
@Const @NoLength int[] row = NamedCharactersAccel.HILO_ACCEL[c]; if (row != null) {
hilo = row[firstCharKey];
}
} if (hilo == 0) { if (c == ';') {
errNoNamedCharacterMatch();
}
emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = pos;
}
reconsume = true;
state = transition(state, returnState, reconsume, pos); continue stateloop;
} // Didn't fail yet
appendCharRefBuf(c);
lo = hilo & 0xFFFF;
hi = hilo >> 16;
entCol = -1;
candidate = -1;
charRefBufMark = 0;
state = transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos); // fallthrough optimizes; `continue stateloop;` would also be valid
} // CPPONLY: MOZ_FALLTHROUGH; case CHARACTER_REFERENCE_TAIL:
outer: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
entCol++; /* * Consume the maximum number of characters possible, * with the consumed characters matching one of the * identifiers in the first column of the named * character references table (in a case-sensitive * manner).
*/
loloop: for (;;) { if (hi < lo) { break outer;
} if (entCol == NamedCharacters.NAMES[lo].length()) {
candidate = lo;
charRefBufMark = charRefBufLen;
lo++;
} elseif (entCol > NamedCharacters.NAMES[lo].length()) { break outer;
} elseif (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
lo++;
} else { break loloop;
}
}
if (c == ';') { // If we see a semicolon, there cannot be a // longer match. Break the loop. However, before // breaking, take the longest match so far as the // candidate, if we are just about to complete a // match. if (entCol + 1 == NamedCharacters.NAMES[lo].length()) {
candidate = lo;
charRefBufMark = charRefBufLen;
} break outer;
}
if (candidate == -1) { // reconsume deals with CR, LF or nul if (c == ';') {
errNoNamedCharacterMatch();
}
emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = pos;
}
reconsume = true;
state = transition(state, returnState, reconsume, pos); continue stateloop;
} else { // c can't be CR, LF or nul if we got here
@Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate]; if (candidateName.length() == 0
|| candidateName.charAt(candidateName.length() - 1) != ';') { /* * If the last character matched is not a U+003B * SEMICOLON (;), there is a parse error.
*/ if ((returnState & DATA_AND_RCDATA_MASK) != 0) { /* * If the entity is being consumed as part of an * attribute, and the last character matched is * not a U+003B SEMICOLON (;),
*/ char ch; if (charRefBufMark == charRefBufLen) {
ch = c;
} else {
ch = charRefBuf[charRefBufMark];
} if (ch == '=' || (ch >= '0' && ch <= '9')
|| (ch >= 'A' && ch <= 'Z')
|| (ch >= 'a' && ch <= 'z')) { /* * and the next character is either a U+003D * EQUALS SIGN character (=) or in the range * U+0030 DIGIT ZERO to U+0039 DIGIT NINE, * U+0041 LATIN CAPITAL LETTER A to U+005A * LATIN CAPITAL LETTER Z, or U+0061 LATIN * SMALL LETTER A to U+007A LATIN SMALL * LETTER Z, then, for historical reasons, * all the characters that were matched * after the U+0026 AMPERSAND (&) must be * unconsumed, and nothing is returned.
*/ if (c == ';') {
errNoNamedCharacterMatch();
}
appendCharRefBufToStrBuf();
reconsume = true;
state = transition(state, returnState, reconsume, pos); continue stateloop;
}
} if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
errUnescapedAmpersandInterpretedAsCharacterReference();
} else {
errNotSemicolonTerminated();
}
}
/* * Otherwise, return a character token for the character * corresponding to the entity name (as given by the * second column of the named character references * table).
*/ // CPPONLY: completedNamedCharacterReference();
@Const @NoLength char[] val = NamedCharacters.VALUES[candidate]; if ( // [NOCPP[
val.length == 1 // ]NOCPP] // CPPONLY: val[1] == 0
) {
emitOrAppendOne(val, returnState);
} else {
emitOrAppendTwo(val, returnState);
} // this is so complicated! if (charRefBufMark < charRefBufLen) { if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
appendStrBuf(charRefBuf, charRefBufMark,
charRefBufLen - charRefBufMark);
} else {
tokenHandler.characters(charRefBuf, charRefBufMark,
charRefBufLen - charRefBufMark);
}
} // charRefBufLen will be zeroed below!
// Check if we broke out early with c being the last // character that matched as opposed to being the // first one that didn't match. In the case of an // early break, the next run on text should start // *after* the current character and the current // character shouldn't be reconsumed. boolean earlyBreak = (c == ';' && charRefBufMark == charRefBufLen);
charRefBufLen = 0; if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = earlyBreak ? pos + 1 : pos;
}
reconsume = !earlyBreak;
state = transition(state, returnState, reconsume, pos); continue stateloop; /* * If the markup contains I'm ¬it; I tell you, the * entity is parsed as "not", as in, I'm ¬it; I tell * you. But if the markup was I'm ∉ I tell you, * the entity would be parsed as "notin;", resulting in * I'm ∉ I tell you.
*/
} // no fallthrough, reordering opportunity case CONSUME_NCR: if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
value = 0;
seenDigits = false; /* * The behavior further depends on the character after the * U+0023 NUMBER SIGN:
*/ switch (c) { case'x': case'X':
/* * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL * LETTER X Consume the X. * * Follow the steps below, but using the range of * characters U+0030 DIGIT ZERO through to U+0039 * DIGIT NINE, U+0061 LATIN SMALL LETTER A through * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL * LETTER F (in other words, 0-9, A-F, a-f). * * When it comes to interpreting the number, * interpret it as a hexadecimal number.
*/
appendCharRefBuf(c);
state = transition(state, Tokenizer.HEX_NCR_LOOP, reconsume, pos); continue stateloop; default: /* * Anything else Follow the steps below, but using * the range of characters U+0030 DIGIT ZERO through * to U+0039 DIGIT NINE (i.e. just 0-9). * * When it comes to interpreting the number, * interpret it as a decimal number.
*/
reconsume = true;
state = transition(state, Tokenizer.DECIMAL_NRC_LOOP, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break;
} // CPPONLY: MOZ_FALLTHROUGH; case DECIMAL_NRC_LOOP:
decimalloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} /* * Consume as many characters as match the range of * characters given above.
*/ assert value >= 0: "value must not become negative."; if (c >= '0' && c <= '9') {
seenDigits = true; // Avoid overflow if (value <= 0x10FFFF) {
value *= 10;
value += c - '0';
} continue;
} elseif (c == ';') { if (seenDigits) { if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = pos + 1;
}
state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break decimalloop;
} else {
errNoDigitsInNCR();
appendCharRefBuf(';');
emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = pos + 1;
}
state = transition(state, returnState, reconsume, pos); continue stateloop;
}
} else { /* * If no characters match the range, then don't * consume any characters (and unconsume the U+0023 * NUMBER SIGN character and, if appropriate, the X * character). This is a parse error; nothing is * returned. * * Otherwise, if the next character is a U+003B * SEMICOLON, consume that too. If it isn't, there * is a parse error.
*/ if (!seenDigits) {
errNoDigitsInNCR();
emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = pos;
}
reconsume = true;
state = transition(state, returnState, reconsume, pos); continue stateloop;
} else {
errCharRefLacksSemicolon(); if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = pos;
}
reconsume = true;
state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break decimalloop;
}
}
} // CPPONLY: MOZ_FALLTHROUGH; case HANDLE_NCR_VALUE: // WARNING previous state sets reconsume // We are not going to emit the contents of charRefBuf.
charRefBufLen = 0; // XXX inline this case if the method size can take it
handleNcrValue(returnState);
state = transition(state, returnState, reconsume, pos); continue stateloop; // no fallthrough, reordering opportunity case HEX_NCR_LOOP: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume as many characters as match the range of * characters given above.
*/ assert value >= 0: "value must not become negative."; if (c >= '0' && c <= '9') {
seenDigits = true; // Avoid overflow if (value <= 0x10FFFF) {
value *= 16;
value += c - '0';
} continue;
} elseif (c >= 'A' && c <= 'F') {
seenDigits = true; // Avoid overflow if (value <= 0x10FFFF) {
value *= 16;
value += c - 'A' + 10;
} continue;
} elseif (c >= 'a' && c <= 'f') {
seenDigits = true; // Avoid overflow if (value <= 0x10FFFF) {
value *= 16;
value += c - 'a' + 10;
} continue;
} elseif (c == ';') { if (seenDigits) { if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = pos + 1;
}
state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos); continue stateloop;
} else {
errNoDigitsInNCR();
appendCharRefBuf(';');
emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = pos + 1;
}
state = transition(state, returnState, reconsume, pos); continue stateloop;
}
} else { /* * If no characters match the range, then don't * consume any characters (and unconsume the U+0023 * NUMBER SIGN character and, if appropriate, the X * character). This is a parse error; nothing is * returned. * * Otherwise, if the next character is a U+003B * SEMICOLON, consume that too. If it isn't, there * is a parse error.
*/ if (!seenDigits) {
errNoDigitsInNCR();
emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = pos;
}
reconsume = true;
state = transition(state, returnState, reconsume, pos); continue stateloop;
} else {
errCharRefLacksSemicolon(); if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = pos;
}
reconsume = true;
state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos); continue stateloop;
}
}
} // no fallthrough, reordering opportunity case PLAINTEXT:
plaintextloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} switch (c) { case'\u0000':
emitPlaintextReplacementCharacter(buf, pos); continue; case'\r':
emitCarriageReturn(buf, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Emit the current input * character as a character token. Stay in the * RAWTEXT state.
*/ continue;
}
} // no fallthrough, reordering opportunity case CLOSE_TAG_OPEN: if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Otherwise, if the content model flag is set to the PCDATA * state, or if the next few characters do match that tag * name, consume the next input character:
*/ switch (c) { case'>': /* U+003E GREATER-THAN SIGN (>) Parse error. */
errLtSlashGt(); /* * Switch to the data state.
*/
cstart = pos + 1;
state = transition(state, Tokenizer.DATA, reconsume, pos); continue stateloop; case'\r':
silentCarriageReturn(); /* Anything else Parse error. */
errGarbageAfterLtSlash(); /* * Switch to the bogus comment state.
*/
clearStrBufBeforeUse();
appendStrBuf('\n');
state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); /* Anything else Parse error. */
errGarbageAfterLtSlash(); /* * Switch to the bogus comment state.
*/
clearStrBufBeforeUse();
appendStrBuf(c);
state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: if (c >= 'A' && c <= 'Z') {
c += 0x20;
} if (c >= 'a' && c <= 'z') { /* * U+0061 LATIN SMALL LETTER A through to U+007A * LATIN SMALL LETTER Z Create a new end tag * token,
*/
endTag = true; /* * set its tag name to the input character,
*/
clearStrBufBeforeUse();
appendStrBuf(c);
containsHyphen = false; /* * then switch to the tag name state. (Don't * emit the token yet; further details will be * filled in before it is emitted.)
*/
state = transition(state, Tokenizer.TAG_NAME, reconsume, pos); continue stateloop;
} else { /* Anything else Parse error. */
errGarbageAfterLtSlash(); /* * Switch to the bogus comment state.
*/
clearStrBufBeforeUse();
appendStrBuf(c);
state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop;
}
} // no fallthrough, reordering opportunity case RCDATA:
rcdataloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} switch (c) { case'&': /* * U+0026 AMPERSAND (&) Switch to the character * reference in RCDATA state.
*/
flushChars(buf, pos); assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
appendCharRefBuf(c);
setAdditionalAndRememberAmpersandLocation('\u0000');
returnState = state;
state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); continue stateloop; case'<': /* * U+003C LESS-THAN SIGN (<) Switch to the * RCDATA less-than sign state.
*/
flushChars(buf, pos);
returnState = state;
state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos); continue stateloop; case'\u0000':
emitReplacementCharacter(buf, pos); continue; case'\r':
emitCarriageReturn(buf, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; default: /* * Emit the current input character as a * character token. Stay in the RCDATA state.
*/ continue;
}
} // no fallthrough, reordering opportunity case RAWTEXT:
rawtextloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} switch (c) { case'<': /* * U+003C LESS-THAN SIGN (<) Switch to the * RAWTEXT less-than sign state.
*/
flushChars(buf, pos);
returnState = state;
state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break rawtextloop; case'\u0000':
emitReplacementCharacter(buf, pos); continue; case'\r':
emitCarriageReturn(buf, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; default: /* * Emit the current input character as a * character token. Stay in the RAWTEXT state.
*/ continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case RAWTEXT_RCDATA_LESS_THAN_SIGN:
rawtextrcdatalessthansignloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); switch (c) { case'/': /* * U+002F SOLIDUS (/) Set the temporary buffer * to the empty string. Switch to the script * data end tag open state.
*/
index = 0;
clearStrBufBeforeUse();
state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break rawtextrcdatalessthansignloop; default: /* * Otherwise, emit a U+003C LESS-THAN SIGN * character token
*/
tokenHandler.characters(Tokenizer.LT_GT, 0, 1); /* * and reconsume the current input character in * the data state.
*/
cstart = pos;
reconsume = true;
state = transition(state, returnState, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case NON_DATA_END_TAG_NAME: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * ASSERT! when entering this state, set index to 0 and * call clearStrBufBeforeUse(); Let's implement the above * without lookahead. strBuf is the 'temporary buffer'.
*/ if (endTagExpectationAsArray == null) {
tokenHandler.characters(Tokenizer.LT_SOLIDUS,
0, 2);
cstart = pos;
reconsume = true;
state = transition(state, returnState, reconsume, pos); continue stateloop;
} elseif (index < endTagExpectationAsArray.length) { char e = endTagExpectationAsArray[index]; char folded = c; if (c >= 'A' && c <= 'Z') {
folded += 0x20;
} if (folded != e) { // [NOCPP[
errHtml4LtSlashInRcdata(folded); // ]NOCPP]
tokenHandler.characters(Tokenizer.LT_SOLIDUS,
0, 2);
emitStrBuf();
cstart = pos;
reconsume = true;
state = transition(state, returnState, reconsume, pos); continue stateloop;
}
appendStrBuf(c);
index++; continue;
} else {
endTag = true; // XXX replace contentModelElement with different // type
tagName = endTagExpectation; switch (c) { case'\r':
silentCarriageReturn();
clearStrBufAfterUse(); // strBuf not used
state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE * FEED (LF) U+000C FORM FEED (FF) U+0020 * SPACE If the current end tag token is an * appropriate end tag token, then switch to * the before attribute name state.
*/
clearStrBufAfterUse(); // strBuf not used
state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); continue stateloop; case'/': /* * U+002F SOLIDUS (/) If the current end tag * token is an appropriate end tag token, * then switch to the self-closing start tag * state.
*/
clearStrBufAfterUse(); // strBuf not used
state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); continue stateloop; case'>': /* * U+003E GREATER-THAN SIGN (>) If the * current end tag token is an appropriate * end tag token, then emit the current tag * token and switch to the data state.
*/
clearStrBufAfterUse(); // strBuf not used
state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; default: /* * Emit a U+003C LESS-THAN SIGN character * token, a U+002F SOLIDUS character token, * a character token for each of the * characters in the temporary buffer (in * the order they were added to the buffer), * and reconsume the current input character * in the RAWTEXT state.
*/ // [NOCPP[
errWarnLtSlashInRcdata(); // ]NOCPP]
tokenHandler.characters(
Tokenizer.LT_SOLIDUS, 0, 2);
emitStrBuf();
cstart = pos; // don't drop the // character
reconsume = true;
state = transition(state, returnState, reconsume, pos); continue stateloop;
}
}
} // no fallthrough, reordering opportunity // BEGIN HOTSPOT WORKAROUND case BOGUS_COMMENT:
boguscommentloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} /* * Consume every character up to and including the first * U+003E GREATER-THAN SIGN character (>) or the end of * the file (EOF), whichever comes first. Emit a comment * token whose data is the concatenation of all the * characters starting from and including the character * that caused the state machine to switch into the * bogus comment state, up to and including the * character immediately before the last consumed * character (i.e. up to the character just before the * U+003E or EOF character). (If the comment was started * by the end of the file (EOF), the token is empty.) * * Switch to the data state. * * If the end of the file was reached, reconsume the EOF * character.
*/ switch (c) { case'>':
emitComment(0, pos);
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'-':
appendStrBuf(c);
state = transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break boguscommentloop; case'\r':
appendStrBufCarriageReturn(); break stateloop; case'\n':
appendStrBufLineFeed(); continue; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default:
appendStrBuf(c); continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case BOGUS_COMMENT_HYPHEN:
boguscommenthyphenloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); switch (c) { case'>': // [NOCPP[
maybeAppendSpaceToBogusComment(); // ]NOCPP]
emitComment(0, pos);
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'-':
appendSecondHyphenToBogusComment(); continue boguscommenthyphenloop; case'\r':
appendStrBufCarriageReturn();
state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); break stateloop; case'\n':
appendStrBufLineFeed();
state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default:
appendStrBuf(c);
state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop;
}
} // no fallthrough, reordering opportunity case SCRIPT_DATA:
scriptdataloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} switch (c) { case'<': /* * U+003C LESS-THAN SIGN (<) Switch to the * script data less-than sign state.
*/
flushChars(buf, pos);
returnState = state;
state = transition(state, Tokenizer.SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break scriptdataloop; case'\u0000':
emitReplacementCharacter(buf, pos); continue; case'\r':
emitCarriageReturn(buf, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Emit the current input * character as a character token. Stay in the * script data state.
*/ continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case SCRIPT_DATA_LESS_THAN_SIGN:
scriptdatalessthansignloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); switch (c) { case'/': /* * U+002F SOLIDUS (/) Set the temporary buffer * to the empty string. Switch to the script * data end tag open state.
*/
index = 0;
clearStrBufBeforeUse();
state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos); continue stateloop; case'!':
tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
cstart = pos;
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break scriptdatalessthansignloop; default: /* * Otherwise, emit a U+003C LESS-THAN SIGN * character token
*/
tokenHandler.characters(Tokenizer.LT_GT, 0, 1); /* * and reconsume the current input character in * the data state.
*/
cstart = pos;
reconsume = true;
state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case SCRIPT_DATA_ESCAPE_START:
scriptdataescapestartloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Switch to the * script data escape start dash state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START_DASH, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break scriptdataescapestartloop; default: /* * Anything else Reconsume the current input * character in the script data state.
*/
reconsume = true;
state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case SCRIPT_DATA_ESCAPE_START_DASH:
scriptdataescapestartdashloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Switch to the * script data escaped dash dash state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break scriptdataescapestartdashloop; default: /* * Anything else Reconsume the current input * character in the script data state.
*/
reconsume = true;
state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case SCRIPT_DATA_ESCAPED_DASH_DASH:
scriptdataescapeddashdashloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Stay in the * script data escaped dash dash state.
*/ continue; case'<': /* * U+003C LESS-THAN SIGN (<) Switch to the * script data escaped less-than sign state.
*/
flushChars(buf, pos);
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos); continue stateloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Emit a U+003E * GREATER-THAN SIGN character token. Switch to * the script data state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); continue stateloop; case'\u0000':
emitReplacementCharacter(buf, pos);
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); break scriptdataescapeddashdashloop; case'\r':
emitCarriageReturn(buf, pos);
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Emit the current input * character as a character token. Switch to the * script data escaped state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break scriptdataescapeddashdashloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case SCRIPT_DATA_ESCAPED:
scriptdataescapedloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} /* * Consume the next input character:
*/ switch (c) { case'-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Switch to the * script data escaped dash state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break scriptdataescapedloop; case'<': /* * U+003C LESS-THAN SIGN (<) Switch to the * script data escaped less-than sign state.
*/
flushChars(buf, pos);
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos); continue stateloop; case'\u0000':
emitReplacementCharacter(buf, pos); continue; case'\r':
emitCarriageReturn(buf, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Emit the current input * character as a character token. Stay in the * script data escaped state.
*/ continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case SCRIPT_DATA_ESCAPED_DASH:
scriptdataescapeddashloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Switch to the * script data escaped dash dash state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos); continue stateloop; case'<': /* * U+003C LESS-THAN SIGN (<) Switch to the * script data escaped less-than sign state.
*/
flushChars(buf, pos);
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break scriptdataescapeddashloop; case'\u0000':
emitReplacementCharacter(buf, pos);
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); continue stateloop; case'\r':
emitCarriageReturn(buf, pos);
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Emit the current input * character as a character token. Switch to the * script data escaped state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
scriptdataescapedlessthanloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'/': /* * U+002F SOLIDUS (/) Set the temporary buffer * to the empty string. Switch to the script * data escaped end tag open state.
*/
index = 0;
clearStrBufBeforeUse();
returnState = Tokenizer.SCRIPT_DATA_ESCAPED;
state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos); continue stateloop; case'S': case's': /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Emit a U+003C * LESS-THAN SIGN character token and the * current input character as a character token.
*/
tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
cstart = pos;
index = 1; /* * Set the temporary buffer to the empty string. * Append the lowercase version of the current * input character (add 0x0020 to the * character's code point) to the temporary * buffer. Switch to the script data double * escape start state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break scriptdataescapedlessthanloop; default: /* * Anything else Emit a U+003C LESS-THAN SIGN * character token and reconsume the current * input character in the script data escaped * state.
*/
tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
cstart = pos;
reconsume = true;
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case SCRIPT_DATA_DOUBLE_ESCAPE_START:
scriptdatadoubleescapestartloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); assert index > 0; if (index < 6) { // SCRIPT_ARR.length char folded = c; if (c >= 'A' && c <= 'Z') {
folded += 0x20;
} if (folded != Tokenizer.SCRIPT_ARR[index]) {
reconsume = true;
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); continue stateloop;
}
index++; continue;
} switch (c) { case'\r':
emitCarriageReturn(buf, pos);
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': case'/': case'>': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN * (>) Emit the current input character as a * character token. If the temporary buffer is * the string "script", then switch to the * script data double escaped state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break scriptdatadoubleescapestartloop; default: /* * Anything else Reconsume the current input * character in the script data escaped state.
*/
reconsume = true;
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case SCRIPT_DATA_DOUBLE_ESCAPED:
scriptdatadoubleescapedloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} /* * Consume the next input character:
*/ switch (c) { case'-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Switch to the * script data double escaped dash state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break scriptdatadoubleescapedloop; case'<': /* * U+003C LESS-THAN SIGN (<) Emit a U+003C * LESS-THAN SIGN character token. Switch to the * script data double escaped less-than sign * state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos); continue stateloop; case'\u0000':
emitReplacementCharacter(buf, pos); continue; case'\r':
emitCarriageReturn(buf, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Emit the current input * character as a character token. Stay in the * script data double escaped state.
*/ continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
scriptdatadoubleescapeddashloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Switch to the * script data double escaped dash dash state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break scriptdatadoubleescapeddashloop; case'<': /* * U+003C LESS-THAN SIGN (<) Emit a U+003C * LESS-THAN SIGN character token. Switch to the * script data double escaped less-than sign * state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos); continue stateloop; case'\u0000':
emitReplacementCharacter(buf, pos);
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); continue stateloop; case'\r':
emitCarriageReturn(buf, pos);
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Emit the current input * character as a character token. Switch to the * script data double escaped state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
scriptdatadoubleescapeddashdashloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Stay in the * script data double escaped dash dash state.
*/ continue; case'<': /* * U+003C LESS-THAN SIGN (<) Emit a U+003C * LESS-THAN SIGN character token. Switch to the * script data double escaped less-than sign * state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break scriptdatadoubleescapeddashdashloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Emit a U+003E * GREATER-THAN SIGN character token. Switch to * the script data state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); continue stateloop; case'\u0000':
emitReplacementCharacter(buf, pos);
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); continue stateloop; case'\r':
emitCarriageReturn(buf, pos);
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Emit the current input * character as a character token. Switch to the * script data double escaped state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
scriptdatadoubleescapedlessthanloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'/': /* * U+002F SOLIDUS (/) Emit a U+002F SOLIDUS * character token. Set the temporary buffer to * the empty string. Switch to the script data * double escape end state.
*/
index = 0;
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break scriptdatadoubleescapedlessthanloop; default: /* * Anything else Reconsume the current input * character in the script data double escaped * state.
*/
reconsume = true;
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case SCRIPT_DATA_DOUBLE_ESCAPE_END:
scriptdatadoubleescapeendloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); if (index < 6) { // SCRIPT_ARR.length char folded = c; if (c >= 'A' && c <= 'Z') {
folded += 0x20;
} if (folded != Tokenizer.SCRIPT_ARR[index]) {
reconsume = true;
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); continue stateloop;
}
index++; continue;
} switch (c) { case'\r':
emitCarriageReturn(buf, pos);
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': case'/': case'>': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN * (>) Emit the current input character as a * character token. If the temporary buffer is * the string "script", then switch to the * script data escaped state.
*/
state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); continue stateloop; default: /* * Reconsume the current input character in the * script data double escaped state.
*/
reconsume = true;
state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); continue stateloop;
}
} // no fallthrough, reordering opportunity case MARKUP_DECLARATION_OCTYPE:
markupdeclarationdoctypeloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); if (index < 6) { // OCTYPE.length char folded = c; if (c >= 'A' && c <= 'Z') {
folded += 0x20;
} if (folded == Tokenizer.OCTYPE[index]) {
appendStrBuf(c);
} else {
errBogusComment();
reconsume = true;
state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop;
}
index++; continue;
} else {
reconsume = true;
state = transition(state, Tokenizer.DOCTYPE, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break markupdeclarationdoctypeloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case DOCTYPE:
doctypeloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
}
initDoctypeFields(); /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn();
state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the before DOCTYPE name state.
*/
state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break doctypeloop; default: /* * Anything else Parse error.
*/
errMissingSpaceBeforeDoctypeName(); /* * Reconsume the current character in the before * DOCTYPE name state.
*/
reconsume = true;
state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break doctypeloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case BEFORE_DOCTYPE_NAME:
beforedoctypenameloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn(); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the before DOCTYPE name state.
*/ continue; case'>': /* * U+003E GREATER-THAN SIGN (>) Parse error.
*/
errNamelessDoctype(); /* * Create a new DOCTYPE token. Set its * force-quirks flag to on.
*/
forceQuirks = true; /* * Emit the token.
*/
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: if (c >= 'A' && c <= 'Z') { /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Create a * new DOCTYPE token. Set the token's name * to the lowercase version of the input * character (add 0x0020 to the character's * code point).
*/
c += 0x20;
} /* Anything else Create a new DOCTYPE token. */ /* * Set the token's name name to the current * input character.
*/
clearStrBufBeforeUse();
appendStrBuf(c); /* * Switch to the DOCTYPE name state.
*/
state = transition(state, Tokenizer.DOCTYPE_NAME, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break beforedoctypenameloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case DOCTYPE_NAME:
doctypenameloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn();
strBufToDoctypeName();
state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the after DOCTYPE name state.
*/
strBufToDoctypeName();
state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break doctypenameloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * DOCTYPE token.
*/
strBufToDoctypeName();
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Append the * lowercase version of the input character (add * 0x0020 to the character's code point) to the * current DOCTYPE token's name.
*/ if (c >= 'A' && c <= 'Z') {
c += 0x0020;
} /* * Anything else Append the current input * character to the current DOCTYPE token's * name.
*/
appendStrBuf(c); /* * Stay in the DOCTYPE name state.
*/ continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case AFTER_DOCTYPE_NAME:
afterdoctypenameloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn(); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the after DOCTYPE name state.
*/ continue; case'>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * DOCTYPE token.
*/
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'p': case'P':
index = 0;
state = transition(state, Tokenizer.DOCTYPE_UBLIC, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break afterdoctypenameloop; case's': case'S':
index = 0;
state = transition(state, Tokenizer.DOCTYPE_YSTEM, reconsume, pos); continue stateloop; default: /* * Otherwise, this is the parse error.
*/
bogusDoctype();
/* * Set the DOCTYPE token's force-quirks flag to * on.
*/ // done by bogusDoctype(); /* * Switch to the bogus DOCTYPE state.
*/
state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case DOCTYPE_UBLIC:
doctypeublicloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * If the six characters starting from the current input * character are an ASCII case-insensitive match for the * word "PUBLIC", then consume those characters and * switch to the before DOCTYPE public identifier state.
*/ if (index < 5) { // UBLIC.length char folded = c; if (c >= 'A' && c <= 'Z') {
folded += 0x20;
} if (folded != Tokenizer.UBLIC[index]) {
bogusDoctype(); // forceQuirks = true;
reconsume = true;
state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); continue stateloop;
}
index++; continue;
} else {
reconsume = true;
state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break doctypeublicloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case AFTER_DOCTYPE_PUBLIC_KEYWORD:
afterdoctypepublickeywordloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn();
state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the before DOCTYPE public * identifier state.
*/
state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break afterdoctypepublickeywordloop; case'"': /* * U+0022 QUOTATION MARK (") Parse Error.
*/
errNoSpaceBetweenDoctypePublicKeywordAndQuote(); /* * Set the DOCTYPE token's public identifier to * the empty string (not missing),
*/
clearStrBufBeforeUse(); /* * then switch to the DOCTYPE public identifier * (double-quoted) state.
*/
state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); continue stateloop; case'\'': /* * U+0027 APOSTROPHE (') Parse Error.
*/
errNoSpaceBetweenDoctypePublicKeywordAndQuote(); /* * Set the DOCTYPE token's public identifier to * the empty string (not missing),
*/
clearStrBufBeforeUse(); /* * then switch to the DOCTYPE public identifier * (single-quoted) state.
*/
state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); continue stateloop; case'>': /* U+003E GREATER-THAN SIGN (>) Parse error. */
errExpectedPublicId(); /* * Set the DOCTYPE token's force-quirks flag to * on.
*/
forceQuirks = true; /* * Emit that DOCTYPE token.
*/
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; default:
bogusDoctype(); /* * Set the DOCTYPE token's force-quirks flag to * on.
*/ // done by bogusDoctype(); /* * Switch to the bogus DOCTYPE state.
*/
state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
beforedoctypepublicidentifierloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn(); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the before DOCTYPE public identifier * state.
*/ continue; case'"': /* * U+0022 QUOTATION MARK (") Set the DOCTYPE * token's public identifier to the empty string * (not missing),
*/
clearStrBufBeforeUse(); /* * then switch to the DOCTYPE public identifier * (double-quoted) state.
*/
state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break beforedoctypepublicidentifierloop; case'\'': /* * U+0027 APOSTROPHE (') Set the DOCTYPE token's * public identifier to the empty string (not * missing),
*/
clearStrBufBeforeUse(); /* * then switch to the DOCTYPE public identifier * (single-quoted) state.
*/
state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); continue stateloop; case'>': /* U+003E GREATER-THAN SIGN (>) Parse error. */
errExpectedPublicId(); /* * Set the DOCTYPE token's force-quirks flag to * on.
*/
forceQuirks = true; /* * Emit that DOCTYPE token.
*/
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; default:
bogusDoctype(); /* * Set the DOCTYPE token's force-quirks flag to * on.
*/ // done by bogusDoctype(); /* * Switch to the bogus DOCTYPE state.
*/
state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
doctypepublicidentifierdoublequotedloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'"': /* * U+0022 QUOTATION MARK (") Switch to the after * DOCTYPE public identifier state.
*/
publicIdentifier = strBufToString();
state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break doctypepublicidentifierdoublequotedloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Parse error.
*/
errGtInPublicId(); /* * Set the DOCTYPE token's force-quirks flag to * on.
*/
forceQuirks = true; /* * Emit that DOCTYPE token.
*/
publicIdentifier = strBufToString();
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'\r':
appendStrBufCarriageReturn(); break stateloop; case'\n':
appendStrBufLineFeed(); continue; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Append the current input * character to the current DOCTYPE token's * public identifier.
*/
appendStrBuf(c); /* * Stay in the DOCTYPE public identifier * (double-quoted) state.
*/ continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
afterdoctypepublicidentifierloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn();
state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the between DOCTYPE public and * system identifiers state.
*/
state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break afterdoctypepublicidentifierloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * DOCTYPE token.
*/
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'"': /* * U+0022 QUOTATION MARK (") Parse error.
*/
errNoSpaceBetweenPublicAndSystemIds(); /* * Set the DOCTYPE token's system identifier to * the empty string (not missing),
*/
clearStrBufBeforeUse(); /* * then switch to the DOCTYPE system identifier * (double-quoted) state.
*/
state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); continue stateloop; case'\'': /* * U+0027 APOSTROPHE (') Parse error.
*/
errNoSpaceBetweenPublicAndSystemIds(); /* * Set the DOCTYPE token's system identifier to * the empty string (not missing),
*/
clearStrBufBeforeUse(); /* * then switch to the DOCTYPE system identifier * (single-quoted) state.
*/
state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); continue stateloop; default:
bogusDoctype(); /* * Set the DOCTYPE token's force-quirks flag to * on.
*/ // done by bogusDoctype(); /* * Switch to the bogus DOCTYPE state.
*/
state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
betweendoctypepublicandsystemidentifiersloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn(); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the between DOCTYPE public and system * identifiers state.
*/ continue; case'>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * DOCTYPE token.
*/
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'"': /* * U+0022 QUOTATION MARK (") Set the DOCTYPE * token's system identifier to the empty string * (not missing),
*/
clearStrBufBeforeUse(); /* * then switch to the DOCTYPE system identifier * (double-quoted) state.
*/
state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break betweendoctypepublicandsystemidentifiersloop; case'\'': /* * U+0027 APOSTROPHE (') Set the DOCTYPE token's * system identifier to the empty string (not * missing),
*/
clearStrBufBeforeUse(); /* * then switch to the DOCTYPE system identifier * (single-quoted) state.
*/
state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); continue stateloop; default:
bogusDoctype(); /* * Set the DOCTYPE token's force-quirks flag to * on.
*/ // done by bogusDoctype(); /* * Switch to the bogus DOCTYPE state.
*/
state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
doctypesystemidentifierdoublequotedloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'"': /* * U+0022 QUOTATION MARK (") Switch to the after * DOCTYPE system identifier state.
*/
systemIdentifier = strBufToString();
state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break doctypesystemidentifierdoublequotedloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Parse error.
*/
errGtInSystemId(); /* * Set the DOCTYPE token's force-quirks flag to * on.
*/
forceQuirks = true; /* * Emit that DOCTYPE token.
*/
systemIdentifier = strBufToString();
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'\r':
appendStrBufCarriageReturn(); break stateloop; case'\n':
appendStrBufLineFeed(); continue; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Append the current input * character to the current DOCTYPE token's * system identifier.
*/
appendStrBuf(c); /* * Stay in the DOCTYPE system identifier * (double-quoted) state.
*/ continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
afterdoctypesystemidentifierloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn(); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the after DOCTYPE system identifier state.
*/ continue; case'>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * DOCTYPE token.
*/
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; default: /* * Switch to the bogus DOCTYPE state. (This does * not set the DOCTYPE token's force-quirks flag * to on.)
*/
bogusDoctypeWithoutQuirks();
state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break afterdoctypesystemidentifierloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case BOGUS_DOCTYPE: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} /* * Consume the next input character:
*/ switch (c) { case'>': /* * U+003E GREATER-THAN SIGN (>) Emit that * DOCTYPE token.
*/
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'\r':
silentCarriageReturn(); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Stay in the bogus DOCTYPE * state.
*/ continue;
}
} // no fallthrough, reordering opportunity case DOCTYPE_YSTEM:
doctypeystemloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Otherwise, if the six characters starting from the * current input character are an ASCII case-insensitive * match for the word "SYSTEM", then consume those * characters and switch to the before DOCTYPE system * identifier state.
*/ if (index < 5) { // YSTEM.length char folded = c; if (c >= 'A' && c <= 'Z') {
folded += 0x20;
} if (folded != Tokenizer.YSTEM[index]) {
bogusDoctype();
reconsume = true;
state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); continue stateloop;
}
index++; continue stateloop;
} else {
reconsume = true;
state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break doctypeystemloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case AFTER_DOCTYPE_SYSTEM_KEYWORD:
afterdoctypesystemkeywordloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn();
state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the before DOCTYPE public * identifier state.
*/
state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break afterdoctypesystemkeywordloop; case'"': /* * U+0022 QUOTATION MARK (") Parse Error.
*/
errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); /* * Set the DOCTYPE token's system identifier to * the empty string (not missing),
*/
clearStrBufBeforeUse(); /* * then switch to the DOCTYPE public identifier * (double-quoted) state.
*/
state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); continue stateloop; case'\'': /* * U+0027 APOSTROPHE (') Parse Error.
*/
errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); /* * Set the DOCTYPE token's public identifier to * the empty string (not missing),
*/
clearStrBufBeforeUse(); /* * then switch to the DOCTYPE public identifier * (single-quoted) state.
*/
state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); continue stateloop; case'>': /* U+003E GREATER-THAN SIGN (>) Parse error. */
errExpectedPublicId(); /* * Set the DOCTYPE token's force-quirks flag to * on.
*/
forceQuirks = true; /* * Emit that DOCTYPE token.
*/
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; default:
bogusDoctype(); /* * Set the DOCTYPE token's force-quirks flag to * on.
*/ // done by bogusDoctype(); /* * Switch to the bogus DOCTYPE state.
*/
state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
beforedoctypesystemidentifierloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn(); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the before DOCTYPE system identifier * state.
*/ continue; case'"': /* * U+0022 QUOTATION MARK (") Set the DOCTYPE * token's system identifier to the empty string * (not missing),
*/
clearStrBufBeforeUse(); /* * then switch to the DOCTYPE system identifier * (double-quoted) state.
*/
state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); continue stateloop; case'\'': /* * U+0027 APOSTROPHE (') Set the DOCTYPE token's * system identifier to the empty string (not * missing),
*/
clearStrBufBeforeUse(); /* * then switch to the DOCTYPE system identifier * (single-quoted) state.
*/
state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break beforedoctypesystemidentifierloop; case'>': /* U+003E GREATER-THAN SIGN (>) Parse error. */
errExpectedSystemId(); /* * Set the DOCTYPE token's force-quirks flag to * on.
*/
forceQuirks = true; /* * Emit that DOCTYPE token.
*/
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; default:
bogusDoctype(); /* * Set the DOCTYPE token's force-quirks flag to * on.
*/ // done by bogusDoctype(); /* * Switch to the bogus DOCTYPE state.
*/
state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\'': /* * U+0027 APOSTROPHE (') Switch to the after * DOCTYPE system identifier state.
*/
systemIdentifier = strBufToString();
state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); continue stateloop; case'>':
errGtInSystemId(); /* * Set the DOCTYPE token's force-quirks flag to * on.
*/
forceQuirks = true; /* * Emit that DOCTYPE token.
*/
systemIdentifier = strBufToString();
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'\r':
appendStrBufCarriageReturn(); break stateloop; case'\n':
appendStrBufLineFeed(); continue; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Append the current input * character to the current DOCTYPE token's * system identifier.
*/
appendStrBuf(c); /* * Stay in the DOCTYPE system identifier * (double-quoted) state.
*/ continue;
}
} // no fallthrough, reordering opportunity case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\'': /* * U+0027 APOSTROPHE (') Switch to the after * DOCTYPE public identifier state.
*/
publicIdentifier = strBufToString();
state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); continue stateloop; case'>':
errGtInPublicId(); /* * Set the DOCTYPE token's force-quirks flag to * on.
*/
forceQuirks = true; /* * Emit that DOCTYPE token.
*/
publicIdentifier = strBufToString();
emitDoctypeToken(pos); /* * Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos); if (shouldSuspend) { break stateloop;
} continue stateloop; case'\r':
appendStrBufCarriageReturn(); break stateloop; case'\n':
appendStrBufLineFeed(); continue; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Append the current input * character to the current DOCTYPE token's * public identifier.
*/
appendStrBuf(c); /* * Stay in the DOCTYPE public identifier * (single-quoted) state.
*/ continue;
}
} // no fallthrough, reordering opportunity case PROCESSING_INSTRUCTION:
processinginstructionloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); switch (c) { case'?':
state = transition(
state,
Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,
reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break processinginstructionloop; default: continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case PROCESSING_INSTRUCTION_QUESTION_MARK: if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); switch (c) { case'>':
state = transition(state, Tokenizer.DATA,
reconsume, pos); // Processing instruction syntax goes through these // states only in Gecko's XML View Source--not in HTML // parsing in Java or in Gecko. // Since XML View Source doesn't use the // suspension-after-current-token facility, its extension // to processing-instruction states is strictly unnecessary // at the moment. However, if these states ever were to be // used together with the suspension-after-current-token // facility, these states would need to participate, since // suspension could be requested when only less-than has been // seen and we don't yet know if we end up here. Handling // the currently-unnecessary case in order to avoid leaving // a trap for future modification.
suspendIfRequestedAfterCurrentNonTextToken(); if (shouldSuspend) { break stateloop;
} continue stateloop; default:
state = transition(state,
Tokenizer.PROCESSING_INSTRUCTION,
reconsume, pos); continue stateloop;
} // END HOTSPOT WORKAROUND
}
}
flushChars(buf, pos); /* * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
*/ // Save locals
stateSave = state;
returnStateSave = returnState; return pos;
}
// HOTSPOT WORKAROUND INSERTION POINT
// [NOCPP[
protectedint transition(int from, int to, boolean reconsume, int pos) throws SAXException { return to;
}
// ]NOCPP]
privatevoid initDoctypeFields() { // Discard the characters "DOCTYPE" accumulated as a potential bogus // comment into strBuf.
clearStrBufAfterUse();
doctypeName = null; if (systemIdentifier != null) {
Portability.releaseString(systemIdentifier);
systemIdentifier = null;
} if (publicIdentifier != null) {
Portability.releaseString(publicIdentifier);
publicIdentifier = null;
}
forceQuirks = false;
}
privatevoid handleNcrValue(int returnState) throws SAXException { /* * If one or more characters match the range, then take them all and * interpret the string of characters as a number (either hexadecimal or * decimal as appropriate).
*/ if (value <= 0xFFFF) { if (value >= 0x80 && value <= 0x9f) { /* * If that number is one of the numbers in the first column of * the following table, then this is a parse error.
*/
errNcrInC1Range(); /* * Find the row with that number in the first column, and return * a character token for the Unicode character given in the * second column of that row.
*/
@NoLength char[] val = NamedCharacters.WINDOWS_1252[value - 0x80];
emitOrAppendOne(val, returnState); // [NOCPP[
} elseif (value == 0xC
&& contentSpacePolicy != XmlViolationPolicy.ALLOW) { if (contentSpacePolicy == XmlViolationPolicy.ALTER_INFOSET) {
emitOrAppendOne(Tokenizer.SPACE, returnState);
} elseif (contentSpacePolicy == XmlViolationPolicy.FATAL) {
fatal("A character reference expanded to a form feed which is not legal XML 1.0 white space.");
} // ]NOCPP]
} elseif (value == 0x0) {
errNcrZero();
emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
} elseif ((value & 0xF800) == 0xD800) {
errNcrSurrogate();
emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
} else { /* * Otherwise, return a character token for the Unicode character * whose code point is that number.
*/ char ch = (char) value; // [NOCPP[ if (value == 0x0D) {
errNcrCr();
} elseif ((value <= 0x0008) || (value == 0x000B)
|| (value >= 0x000E && value <= 0x001F)) {
ch = errNcrControlChar(ch);
} elseif (value >= 0xFDD0 && value <= 0xFDEF) {
errNcrUnassigned();
} elseif ((value & 0xFFFE) == 0xFFFE) {
ch = errNcrNonCharacter(ch);
} elseif (value >= 0x007F && value <= 0x009F) {
errNcrControlChar();
} else {
maybeWarnPrivateUse(ch);
} // ]NOCPP]
bmpChar[0] = ch;
emitOrAppendOne(bmpChar, returnState);
}
} elseif (value <= 0x10FFFF) { // [NOCPP[
maybeWarnPrivateUseAstral(); if ((value & 0xFFFE) == 0xFFFE) {
errAstralNonCharacter(value);
} // ]NOCPP]
astralChar[0] = (char) (Tokenizer.LEAD_OFFSET + (value >> 10));
astralChar[1] = (char) (0xDC00 + (value & 0x3FF));
emitOrAppendTwo(astralChar, returnState);
} else {
errNcrOutOfRange();
emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
}
}
publicvoid eof() throws SAXException { int state = stateSave; int returnState = returnStateSave;
eofloop: for (;;) { switch (state) { case SCRIPT_DATA_LESS_THAN_SIGN: case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: /* * Otherwise, emit a U+003C LESS-THAN SIGN character token
*/
tokenHandler.characters(Tokenizer.LT_GT, 0, 1); /* * and reconsume the current input character in the data * state.
*/ break eofloop; case TAG_OPEN: /* * The behavior of this state depends on the content model * flag.
*/ /* * Anything else Parse error.
*/
errEofAfterLt(); /* * Emit a U+003C LESS-THAN SIGN character token
*/
tokenHandler.characters(Tokenizer.LT_GT, 0, 1); /* * and reconsume the current input character in the data * state.
*/ break eofloop; case RAWTEXT_RCDATA_LESS_THAN_SIGN: /* * Emit a U+003C LESS-THAN SIGN character token
*/
tokenHandler.characters(Tokenizer.LT_GT, 0, 1); /* * and reconsume the current input character in the RCDATA * state.
*/ break eofloop; case NON_DATA_END_TAG_NAME: /* * Emit a U+003C LESS-THAN SIGN character token, a U+002F * SOLIDUS character token,
*/
tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2); /* * a character token for each of the characters in the * temporary buffer (in the order they were added to the * buffer),
*/
emitStrBuf(); /* * and reconsume the current input character in the RCDATA * state.
*/ break eofloop; case CLOSE_TAG_OPEN: /* EOF Parse error. */
errEofAfterLt(); /* * Emit a U+003C LESS-THAN SIGN character token and a U+002F * SOLIDUS character token.
*/
tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case TAG_NAME: /* * EOF Parse error.
*/
errEofInTagName(); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case BEFORE_ATTRIBUTE_NAME: case AFTER_ATTRIBUTE_VALUE_QUOTED: case SELF_CLOSING_START_TAG: /* EOF Parse error. */
errEofWithoutGt(); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case ATTRIBUTE_NAME: /* * EOF Parse error.
*/
errEofInAttributeName(); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case AFTER_ATTRIBUTE_NAME: case BEFORE_ATTRIBUTE_VALUE: /* EOF Parse error. */
errEofWithoutGt(); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case ATTRIBUTE_VALUE_DOUBLE_QUOTED: case ATTRIBUTE_VALUE_SINGLE_QUOTED: case ATTRIBUTE_VALUE_UNQUOTED: /* EOF Parse error. */
errEofInAttributeValue(); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case BOGUS_COMMENT:
emitComment(0, 0); break eofloop; case BOGUS_COMMENT_HYPHEN: // [NOCPP[
maybeAppendSpaceToBogusComment(); // ]NOCPP]
emitComment(0, 0); break eofloop; case MARKUP_DECLARATION_OPEN:
errBogusComment();
emitComment(0, 0); break eofloop; case MARKUP_DECLARATION_HYPHEN:
errBogusComment();
emitComment(0, 0); break eofloop; case MARKUP_DECLARATION_OCTYPE: if (index < 6) {
errBogusComment();
emitComment(0, 0);
} else { /* EOF Parse error. */
errEofInDoctype(); /* * Create a new DOCTYPE token. Set its force-quirks flag * to on.
*/
doctypeName = null; if (systemIdentifier != null) {
Portability.releaseString(systemIdentifier);
systemIdentifier = null;
} if (publicIdentifier != null) {
Portability.releaseString(publicIdentifier);
publicIdentifier = null;
}
forceQuirks = true; /* * Emit the token.
*/
emitDoctypeToken(0); /* * Reconsume the EOF character in the data state.
*/ break eofloop;
} break eofloop; case COMMENT_START: case COMMENT: case COMMENT_LESSTHAN: case COMMENT_LESSTHAN_BANG: /* * EOF Parse error.
*/
errEofInComment(); /* Emit the comment token. */
emitComment(0, 0); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case COMMENT_END: case COMMENT_LESSTHAN_BANG_DASH_DASH:
errEofInComment(); /* Emit the comment token. */
emitComment(2, 0); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case COMMENT_END_DASH: case COMMENT_START_DASH: case COMMENT_LESSTHAN_BANG_DASH:
errEofInComment(); /* Emit the comment token. */
emitComment(1, 0); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case COMMENT_END_BANG:
errEofInComment(); /* Emit the comment token. */
emitComment(3, 0); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case DOCTYPE: case BEFORE_DOCTYPE_NAME:
errEofInDoctype(); /* * Create a new DOCTYPE token. Set its force-quirks flag to * on.
*/
forceQuirks = true; /* * Emit the token.
*/
emitDoctypeToken(0); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case DOCTYPE_NAME:
errEofInDoctype();
strBufToDoctypeName(); /* * Set the DOCTYPE token's force-quirks flag to on.
*/
forceQuirks = true; /* * Emit that DOCTYPE token.
*/
emitDoctypeToken(0); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case DOCTYPE_UBLIC: case DOCTYPE_YSTEM: case AFTER_DOCTYPE_NAME: case AFTER_DOCTYPE_PUBLIC_KEYWORD: case AFTER_DOCTYPE_SYSTEM_KEYWORD: case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
errEofInDoctype(); /* * Set the DOCTYPE token's force-quirks flag to on.
*/
forceQuirks = true; /* * Emit that DOCTYPE token.
*/
emitDoctypeToken(0); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: /* EOF Parse error. */
errEofInPublicId(); /* * Set the DOCTYPE token's force-quirks flag to on.
*/
forceQuirks = true; /* * Emit that DOCTYPE token.
*/
publicIdentifier = strBufToString();
emitDoctypeToken(0); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
errEofInDoctype(); /* * Set the DOCTYPE token's force-quirks flag to on.
*/
forceQuirks = true; /* * Emit that DOCTYPE token.
*/
emitDoctypeToken(0); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: /* EOF Parse error. */
errEofInSystemId(); /* * Set the DOCTYPE token's force-quirks flag to on.
*/
forceQuirks = true; /* * Emit that DOCTYPE token.
*/
systemIdentifier = strBufToString();
emitDoctypeToken(0); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
errEofInDoctype(); /* * Set the DOCTYPE token's force-quirks flag to on.
*/
forceQuirks = true; /* * Emit that DOCTYPE token.
*/
emitDoctypeToken(0); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case BOGUS_DOCTYPE: /* * Emit that DOCTYPE token.
*/
emitDoctypeToken(0); /* * Reconsume the EOF character in the data state.
*/ break eofloop; case CONSUME_CHARACTER_REFERENCE: /* * Unlike the definition is the spec, this state does not * return a value and never requires the caller to * backtrack. This state takes care of emitting characters * or appending to the current attribute value. It also * takes care of that in the case when consuming the entity * fails.
*/ /* * This section defines how to consume an entity. This * definition is used when parsing entities in text and in * attributes. * * The behavior depends on the identity of the next * character (the one immediately after the U+0026 AMPERSAND * character):
*/
emitOrAppendCharRefBuf(returnState);
state = returnState; continue; case CHARACTER_REFERENCE_HILO_LOOKUP:
emitOrAppendCharRefBuf(returnState);
state = returnState; continue; case CHARACTER_REFERENCE_TAIL:
outer: for (;;) { char c = '\u0000';
entCol++; /* * Consume the maximum number of characters possible, * with the consumed characters matching one of the * identifiers in the first column of the named * character references table (in a case-sensitive * manner).
*/
hiloop: for (;;) { if (hi == -1) { break hiloop;
} if (entCol == NamedCharacters.NAMES[hi].length()) { break hiloop;
} if (entCol > NamedCharacters.NAMES[hi].length()) { break outer;
} elseif (c < NamedCharacters.NAMES[hi].charAt(entCol)) {
hi--;
} else { break hiloop;
}
}
if (candidate == -1) {
emitOrAppendCharRefBuf(returnState);
state = returnState; continue eofloop;
} else {
@Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate]; if (candidateName.length() == 0
|| candidateName.charAt(candidateName.length() - 1) != ';') { /* * If the last character matched is not a U+003B * SEMICOLON (;), there is a parse error.
*/ if ((returnState & DATA_AND_RCDATA_MASK) != 0) { /* * If the entity is being consumed as part of an * attribute, and the last character matched is * not a U+003B SEMICOLON (;),
*/ char ch; if (charRefBufMark == charRefBufLen) {
ch = '\u0000';
} else {
ch = charRefBuf[charRefBufMark];
} if ((ch >= '0' && ch <= '9')
|| (ch >= 'A' && ch <= 'Z')
|| (ch >= 'a' && ch <= 'z')) { /* * and the next character is in the range * U+0030 DIGIT ZERO to U+0039 DIGIT NINE, * U+0041 LATIN CAPITAL LETTER A to U+005A * LATIN CAPITAL LETTER Z, or U+0061 LATIN * SMALL LETTER A to U+007A LATIN SMALL * LETTER Z, then, for historical reasons, * all the characters that were matched * after the U+0026 AMPERSAND (&) must be * unconsumed, and nothing is returned.
*/
appendCharRefBufToStrBuf();
state = returnState; continue eofloop;
}
} if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
errUnescapedAmpersandInterpretedAsCharacterReference();
} else {
errNotSemicolonTerminated();
}
}
/* * Otherwise, return a character token for the character * corresponding to the entity name (as given by the * second column of the named character references * table).
*/
@Const @NoLength char[] val = NamedCharacters.VALUES[candidate]; if ( // [NOCPP[
val.length == 1 // ]NOCPP] // CPPONLY: val[1] == 0
) {
emitOrAppendOne(val, returnState);
} else {
emitOrAppendTwo(val, returnState);
} // this is so complicated! if (charRefBufMark < charRefBufLen) { if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
appendStrBuf(charRefBuf, charRefBufMark,
charRefBufLen - charRefBufMark);
} else {
tokenHandler.characters(charRefBuf, charRefBufMark,
charRefBufLen - charRefBufMark);
}
}
charRefBufLen = 0;
state = returnState; continue eofloop; /* * If the markup contains I'm ¬it; I tell you, the * entity is parsed as "not", as in, I'm ¬it; I tell * you. But if the markup was I'm ∉ I tell you, * the entity would be parsed as "notin;", resulting in * I'm ∉ I tell you.
*/
} case CONSUME_NCR: case DECIMAL_NRC_LOOP: case HEX_NCR_LOOP: /* * If no characters match the range, then don't consume any * characters (and unconsume the U+0023 NUMBER SIGN * character and, if appropriate, the X character). This is * a parse error; nothing is returned. * * Otherwise, if the next character is a U+003B SEMICOLON, * consume that too. If it isn't, there is a parse error.
*/ if (!seenDigits) {
errNoDigitsInNCR();
emitOrAppendCharRefBuf(returnState);
state = returnState; continue;
} else {
errCharRefLacksSemicolon();
} // WARNING previous state sets reconsume
handleNcrValue(returnState);
state = returnState; continue; case CDATA_RSQB:
tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1); break eofloop; case CDATA_RSQB_RSQB:
tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2); break eofloop; case DATA: default: break eofloop;
}
} // case DATA: /* * EOF Emit an end-of-file token.
*/
tokenHandler.eof(); return;
}
/** * Emits a doctype token. * * NOTE: The method may set <code>shouldSuspend</code>, so the caller * must have this pattern after the state's <code>transition</code> call: * <pre> * if (shouldSuspend) { * break stateloop; * } * continue stateloop; * </pre> * * @param pos * @throws SAXException
*/ privatevoid emitDoctypeToken(int pos) throws SAXException { // CPPONLY: RememberGt(pos);
cstart = pos + 1;
tokenHandler.doctype(doctypeName, publicIdentifier, systemIdentifier,
forceQuirks); // It is OK and sufficient to release these here, since // there's no way out of the doctype states than through paths // that call this method.
doctypeName = null;
Portability.releaseString(publicIdentifier);
publicIdentifier = null;
Portability.releaseString(systemIdentifier);
systemIdentifier = null;
suspendIfRequestedAfterCurrentNonTextToken();
}
/** * If a previous call to <code>suspendAfterCurrentTokenIfNotInText()</code> * happened in a non-text context, this method turns that deferred suspension * request into an immediately-pending suspension request.
*/ privatevoid suspendIfRequestedAfterCurrentNonTextToken() { if (suspendAfterCurrentNonTextToken) {
suspendAfterCurrentNonTextToken = false;
shouldSuspend = true;
}
}
// Making this private until the full Java implementation is done. /** * Request suspension after the current token if the tokenizer is currently * in a non-text state (i.e. it's known that the next token will be a * non-text token). * * Must not be called when <code>tokenizeBuffer()</code> is on the call * stack.
*/
@SuppressWarnings("unused") privatevoid suspendAfterCurrentTokenIfNotInText() { switch (stateSave) { case DATA: case RCDATA: case SCRIPT_DATA: case RAWTEXT: case SCRIPT_DATA_ESCAPED: case PLAINTEXT: case NON_DATA_END_TAG_NAME: // We haven't yet committed to the next // token being a non-text token, though // it could be. case SCRIPT_DATA_LESS_THAN_SIGN: case SCRIPT_DATA_ESCAPE_START: case SCRIPT_DATA_ESCAPE_START_DASH: case SCRIPT_DATA_ESCAPED_DASH: case SCRIPT_DATA_ESCAPED_DASH_DASH: case RAWTEXT_RCDATA_LESS_THAN_SIGN: case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: case SCRIPT_DATA_DOUBLE_ESCAPE_START: case SCRIPT_DATA_DOUBLE_ESCAPED: case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: case SCRIPT_DATA_DOUBLE_ESCAPE_END: return; case TAG_NAME: case BEFORE_ATTRIBUTE_NAME: case ATTRIBUTE_NAME: case AFTER_ATTRIBUTE_NAME: case BEFORE_ATTRIBUTE_VALUE: case AFTER_ATTRIBUTE_VALUE_QUOTED: case BOGUS_COMMENT: case MARKUP_DECLARATION_OPEN: case DOCTYPE: case BEFORE_DOCTYPE_NAME: case DOCTYPE_NAME: case AFTER_DOCTYPE_NAME: case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: case AFTER_DOCTYPE_SYSTEM_IDENTIFIER: case BOGUS_DOCTYPE: case COMMENT_START: case COMMENT_START_DASH: case COMMENT: case COMMENT_END_DASH: case COMMENT_END: case COMMENT_END_BANG: case TAG_OPEN: case CLOSE_TAG_OPEN: case MARKUP_DECLARATION_HYPHEN: case MARKUP_DECLARATION_OCTYPE: case DOCTYPE_UBLIC: case DOCTYPE_YSTEM: case AFTER_DOCTYPE_PUBLIC_KEYWORD: case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: case AFTER_DOCTYPE_SYSTEM_KEYWORD: case SELF_CLOSING_START_TAG: case ATTRIBUTE_VALUE_DOUBLE_QUOTED: case ATTRIBUTE_VALUE_SINGLE_QUOTED: case ATTRIBUTE_VALUE_UNQUOTED: case BOGUS_COMMENT_HYPHEN: case COMMENT_LESSTHAN: case COMMENT_LESSTHAN_BANG: case COMMENT_LESSTHAN_BANG_DASH: case COMMENT_LESSTHAN_BANG_DASH_DASH: case CDATA_START: case CDATA_SECTION: case CDATA_RSQB: case CDATA_RSQB_RSQB: case PROCESSING_INSTRUCTION: case PROCESSING_INSTRUCTION_QUESTION_MARK: break; case CONSUME_CHARACTER_REFERENCE: case CONSUME_NCR: case CHARACTER_REFERENCE_TAIL: case HEX_NCR_LOOP: case DECIMAL_NRC_LOOP: case HANDLE_NCR_VALUE: case HANDLE_NCR_VALUE_RECONSUME: case CHARACTER_REFERENCE_HILO_LOOKUP: if (returnStateSave == DATA || returnStateSave == RCDATA) { return;
} break; default: assertfalse : "Incomplete switch"; return;
}
suspendAfterCurrentNonTextToken = true;
}
// Making this private until the full Java implementation is done. /** * Queries if we are about to suspend after the current non-text token due to a request * from <code>suspendAfterCurrentTokenIfNotInText()</code>. * @return <code>true</code> iff <code>suspendAfterCurrentTokenIfNotInText()</code> was * called in a non-text position and the then-current token has not been emitted yet.
*/
@SuppressWarnings("unused") privateboolean suspensionAfterCurrentNonTextTokenPending() { return suspendAfterCurrentNonTextToken;
}
containsHyphen = other.containsHyphen; if (other.tagName == null) {
tagName = null;
} elseif (other.tagName.isInterned()) {
tagName = other.tagName;
} else { // In the C++ case, the atoms in the other tokenizer are from a // different tokenizer-scoped atom table. Therefore, we have to // obtain the correspoding atom from our own atom table.
nonInternedTagName.setNameForNonInterned(other.tagName.getName() // CPPONLY: , other.tagName.isCustom()
);
tagName = nonInternedTagName;
}
// [NOCPP[
attributeName = other.attributeName; // ]NOCPP] // CPPONLY: if (other.attributeName == null) { // CPPONLY: attributeName = null; // CPPONLY: } else if (other.attributeName.isInterned()) { // CPPONLY: attributeName = other.attributeName; // CPPONLY: } else { // CPPONLY: // In the C++ case, the atoms in the other tokenizer are from a // CPPONLY: // different tokenizer-scoped atom table. Therefore, we have to // CPPONLY: // obtain the correspoding atom from our own atom table. // CPPONLY: nonInternedAttributeName.setNameForNonInterned(other.attributeName.getLocal(AttributeName.HTML)); // CPPONLY: attributeName = nonInternedAttributeName; // CPPONLY: }
/** * Sets the encodingDeclarationHandler. * * @param encodingDeclarationHandler * the encodingDeclarationHandler to set
*/ publicvoid setEncodingDeclarationHandler(
EncodingDeclarationHandler encodingDeclarationHandler) { this.encodingDeclarationHandler = encodingDeclarationHandler;
}
void destructor() {
Portability.delete(nonInternedTagName);
nonInternedTagName = null; // CPPONLY: Portability.delete(nonInternedAttributeName); // CPPONLY: nonInternedAttributeName = null; // The translator will write refcount tracing stuff here
Portability.delete(attributes);
attributes = null;
}
// [NOCPP[
/** * Sets an offset to be added to the position reported to * <code>TransitionHandler</code>. * * @param offset the offset
*/ publicvoid setTransitionBaseOffset(int offset) {
}
// ]NOCPP]
}
Messung V0.5 in Prozent
¤ Dauer der Verarbeitung: 0.406 Sekunden
(vorverarbeitet am 2026-04-26)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.