/* * Copyright (c) 2005-2007 Henri Sivonen * Copyright (c) 2007-2017 Mozilla Foundation * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla * Foundation, and Opera Software ASA. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE.
*/
/** * An implementation of * https://html.spec.whatwg.org/multipage/syntax.html#tokenization * * This class implements the <code>Locator</code> interface. This is not an * incidental implementation detail: Users of this class are encouraged to make * use of the <code>Locator</code> nature. * * By default, the tokenizer may report data that XML 1.0 bans. The tokenizer * can be configured to treat these conditions as fatal or to coerce the infoset * to something that XML 1.0 allows. * * @version $Id$ * @author hsivonen
*/ publicclass Tokenizer implements Locator, Locator2 {
/** * Magic value for UTF-16 operations.
*/ privatestaticfinalint LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
/** * UTF-16 code unit array containing less than and greater than for emitting * those characters on certain parse errors.
*/ privatestaticfinal @NoLength char[] LT_GT = { '<', '>' };
/** * UTF-16 code unit array containing less than and solidus for emitting * those characters on certain parse errors.
*/ privatestaticfinal @NoLength char[] LT_SOLIDUS = { '<', '/' };
/** * UTF-16 code unit array containing ]] for emitting those characters on * state transitions.
*/ privatestaticfinal @NoLength char[] RSQB_RSQB = { ']', ']' };
/** * Array version of U+FFFD.
*/ privatestaticfinal @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
// [NOCPP[
/** * Array version of space.
*/ privatestaticfinal @NoLength char[] SPACE = { ' ' };
// ]NOCPP]
/** * Array version of line feed.
*/ privatestaticfinal @NoLength char[] LF = { '\n' };
/** * The element whose end tag closes the current CDATA or RCDATA element.
*/ protected ElementName endTagExpectation = null;
privatechar[] endTagExpectationAsArray; // not @Auto!
/** * <code>true</code> if tokenizing an end tag
*/ protectedboolean endTag;
/** * <code>true</code> iff the current element/attribute name contains * a hyphen.
*/ privateboolean containsHyphen;
/** * The current tag token name. One of * 1) null, * 2) non-owning reference to nonInternedTagName * 3) non-owning reference to a pre-interned ElementName
*/ private ElementName tagName = null;
/** * The recycled ElementName instance for the non-pre-interned cases.
*/ private ElementName nonInternedTagName = null;
/** * The current attribute name.
*/ protected AttributeName attributeName = null;
/* * The line number of the current attribute. First set to the line of the * attribute name and if there is a value, set to the line the value * started on.
*/ // CPPONLY: private int attributeLine;
public ErrorHandler getErrorHandler() { returnthis.errorHandler;
}
/** * Gets the errorProfile. * * @param errorProfile
*/ public HashMap getErrorProfile() { returnnull;
}
/** * Sets the commentPolicy. * * @param commentPolicy * the commentPolicy to set
*/ publicvoid setCommentPolicy(XmlViolationPolicy commentPolicy) { this.commentPolicy = commentPolicy;
}
/** * Sets the contentNonXmlCharPolicy. * * @param contentNonXmlCharPolicy * the contentNonXmlCharPolicy to set
*/ publicvoid setContentNonXmlCharPolicy(
XmlViolationPolicy contentNonXmlCharPolicy) { if (contentNonXmlCharPolicy != XmlViolationPolicy.ALLOW) { thrownew IllegalArgumentException( "Must use ErrorReportingTokenizer to set contentNonXmlCharPolicy to non-ALLOW.");
}
}
/** * Sets the contentSpacePolicy. * * @param contentSpacePolicy * the contentSpacePolicy to set
*/ publicvoid setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { this.contentSpacePolicy = contentSpacePolicy;
}
/** * Sets the xmlnsPolicy. * * @param xmlnsPolicy * the xmlnsPolicy to set
*/ publicvoid setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { if (xmlnsPolicy == XmlViolationPolicy.FATAL) { thrownew IllegalArgumentException("Can't use FATAL here.");
} this.xmlnsPolicy = xmlnsPolicy;
}
/** * Sets the tokenizer state and the associated element name. This should * only ever used to put the tokenizer into one of the states that have * a special end tag expectation. * * @param specialTokenizerState * the tokenizer state to set
*/ publicvoid setState(int specialTokenizerState) { this.stateSave = specialTokenizerState; this.endTagExpectation = null; this.endTagExpectationAsArray = null;
}
// [NOCPP[
/** * Sets the tokenizer state and the associated element name. This should * only ever used to put the tokenizer into one of the states that have * a special end tag expectation. For use from the tokenizer test harness. * * @param specialTokenizerState * the tokenizer state to set * @param endTagExpectation * the expected end tag for transitioning back to normal
*/ publicvoid setStateAndEndTagExpectation(int specialTokenizerState,
@Local String endTagExpectation) { this.stateSave = specialTokenizerState; if (specialTokenizerState == Tokenizer.DATA) { return;
}
@Auto char[] asArray = Portability.newCharArrayFromLocal(endTagExpectation); this.endTagExpectation = ElementName.elementNameByBuffer(asArray,
asArray.length); assertthis.endTagExpectation != null;
endTagExpectationToArray();
}
// ]NOCPP]
/** * Sets the tokenizer state and the associated element name. This should * only ever used to put the tokenizer into one of the states that have * a special end tag expectation. * * @param specialTokenizerState * the tokenizer state to set * @param endTagExpectation * the expected end tag for transitioning back to normal
*/ publicvoid setStateAndEndTagExpectation(int specialTokenizerState,
ElementName endTagExpectation) { this.stateSave = specialTokenizerState; this.endTagExpectation = endTagExpectation;
endTagExpectationToArray();
}
privatevoid endTagExpectationToArray() { switch (endTagExpectation.getGroup()) { case TreeBuilder.TITLE:
endTagExpectationAsArray = TITLE_ARR; return; case TreeBuilder.SCRIPT:
endTagExpectationAsArray = SCRIPT_ARR; return; case TreeBuilder.STYLE:
endTagExpectationAsArray = STYLE_ARR; return; case TreeBuilder.PLAINTEXT:
endTagExpectationAsArray = PLAINTEXT_ARR; return; case TreeBuilder.XMP:
endTagExpectationAsArray = XMP_ARR; return; case TreeBuilder.TEXTAREA:
endTagExpectationAsArray = TEXTAREA_ARR; return; case TreeBuilder.IFRAME:
endTagExpectationAsArray = IFRAME_ARR; return; case TreeBuilder.NOEMBED:
endTagExpectationAsArray = NOEMBED_ARR; return; case TreeBuilder.NOSCRIPT:
endTagExpectationAsArray = NOSCRIPT_ARR; return; case TreeBuilder.NOFRAMES:
endTagExpectationAsArray = NOFRAMES_ARR; return; default: assertfalse: "Bad end tag expectation."; return;
}
}
/** * For C++ use only.
*/ publicvoid setLineNumber(int line) { // CPPONLY: this.attributeLine = line; // XXX is this needed? this.line = line;
}
@Inline privatevoid clearStrBufBeforeUse() { assert strBufLen == 0: "strBufLen not reset after previous use!";
strBufLen = 0; // no-op in the absence of bugs
}
@Inline privatevoid clearStrBufAfterOneHyphen() { assert strBufLen == 1: "strBufLen length not one!"; assert strBuf[0] == '-': "strBuf does not start with a hyphen!";
strBufLen = 0;
}
/** * Appends to the buffer. * * @param c * the UTF-16 code unit to append
*/
@Inline privatevoid appendStrBuf(char c) { // CPPONLY: assert strBufLen < strBuf.length: "Previous buffer length insufficient."; // CPPONLY: if (strBufLen == strBuf.length) { // CPPONLY: if (!EnsureBufferSpace(1)) { // CPPONLY: assert false: "RELEASE: Unable to recover from buffer reallocation failure"; // CPPONLY: } // TODO: Add telemetry when outer if fires but inner does not // CPPONLY: }
strBuf[strBufLen++] = c;
}
/** * The buffer as a String. Currently only used for error reporting. * * <p> * C++ memory note: The return value must be released. * * @return the buffer as a string
*/ protected String strBufToString() {
String str = Portability.newStringFromBuffer(strBuf, 0, strBufLen // CPPONLY: , tokenHandler, !newAttributesEachTime && attributeName == AttributeName.CLASS
);
clearStrBufAfterUse(); return str;
}
/** * Returns the buffer as a local name. The return value is released in * emitDoctypeToken(). * * @return the buffer as local name
*/ privatevoid strBufToDoctypeName() {
doctypeName = Portability.newLocalNameFromBuffer(strBuf, strBufLen, interner);
clearStrBufAfterUse();
}
/** * Emits the buffer as character tokens. * * @throws SAXException * if the token handler threw
*/ privatevoid emitStrBuf() throws SAXException { if (strBufLen > 0) {
tokenHandler.characters(strBuf, 0, strBufLen);
clearStrBufAfterUse();
}
}
@Inline privatevoid appendSecondHyphenToBogusComment() throws SAXException { // [NOCPP[ switch (commentPolicy) { case ALTER_INFOSET:
appendStrBuf(' '); // CPPONLY: MOZ_FALLTHROUGH; case ALLOW:
warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); // ]NOCPP]
appendStrBuf('-'); // [NOCPP[ break; case FATAL:
fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); break;
} // ]NOCPP]
}
// [NOCPP[ privatevoid maybeAppendSpaceToBogusComment() throws SAXException { switch (commentPolicy) { case ALTER_INFOSET:
appendStrBuf(' '); // CPPONLY: MOZ_FALLTHROUGH; case ALLOW:
warn("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment."); break; case FATAL:
fatal("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment."); break;
}
}
// ]NOCPP]
@Inline privatevoid adjustDoubleHyphenAndAppendToStrBufAndErr(char c, boolean reportedConsecutiveHyphens) throws SAXException { // [NOCPP[ switch (commentPolicy) { case ALTER_INFOSET:
strBufLen--; // WARNING!!! This expands the worst case of the buffer length // given the length of input!
appendStrBuf(' ');
appendStrBuf('-'); // CPPONLY: MOZ_FALLTHROUGH; case ALLOW: if (!reportedConsecutiveHyphens) {
warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
} // ]NOCPP]
appendStrBuf(c); // [NOCPP[ break; case FATAL:
fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); break;
} // ]NOCPP]
}
privatevoid appendStrBuf(@NoLength char[] buffer, int offset, int length) throws SAXException { int newLen = Portability.checkedAdd(strBufLen, length); // CPPONLY: assert newLen <= strBuf.length: "Previous buffer length insufficient."; // CPPONLY: if (strBuf.length < newLen) { // CPPONLY: if (!EnsureBufferSpace(length)) { // CPPONLY: assert false: "RELEASE: Unable to recover from buffer reallocation failure"; // CPPONLY: } // TODO: Add telemetry when outer if fires but inner does not // CPPONLY: }
System.arraycopy(buffer, offset, strBuf, strBufLen, length);
strBufLen = newLen;
}
/** * Append the contents of the char reference buffer to the main one.
*/
@Inline privatevoid appendCharRefBufToStrBuf() throws SAXException {
appendStrBuf(charRefBuf, 0, charRefBufLen);
charRefBufLen = 0;
}
/** * Emits the current comment token. * * NOTE: The method may set <code>shouldSuspend</code>, so the caller * must have this pattern after the state's <code>transition</code> call: * * <pre> * if (shouldSuspend) { * break stateloop; * } * continue stateloop; * </pre> * * @param pos * TODO * * @throws SAXException
*/ privatevoid emitComment(int provisionalHyphens, int pos) throws SAXException { // CPPONLY: RememberGt(pos); // [NOCPP[ if (wantsComments) { // ]NOCPP]
tokenHandler.comment(strBuf, 0, strBufLen
- provisionalHyphens); // [NOCPP[
} // ]NOCPP]
clearStrBufAfterUse();
cstart = pos + 1;
suspendIfRequestedAfterCurrentNonTextToken();
}
/** * Flushes coalesced character tokens. * * @param buf * TODO * @param pos * TODO * * @throws SAXException
*/ protectedvoid flushChars(@NoLength char[] buf, int pos) throws SAXException { if (pos > cstart) {
tokenHandler.characters(buf, cstart, pos - cstart);
}
cstart = Integer.MAX_VALUE;
}
/** * Reports an condition that would make the infoset incompatible with XML * 1.0 as fatal. * * @param message * the message * @throws SAXException * @throws SAXParseException
*/ publicvoid fatal(String message) throws SAXException {
SAXParseException spe = new SAXParseException(message, this); if (errorHandler != null) {
errorHandler.fatalError(spe);
} throw spe;
}
/** * Reports a Parse Error. * * @param message * the message * @throws SAXException
*/ publicvoid err(String message) throws SAXException { if (errorHandler == null) { return;
}
SAXParseException spe = new SAXParseException(message, this);
errorHandler.error(spe);
}
if (attributes == null) {
attributes = new HtmlAttributes(mappingLangToXmlLang);
}
/* * When the user agent leaves the attribute name state (and before * emitting the tag token, if appropriate), the complete attribute's * name must be compared to the other attributes on the same token; if * there is already an attribute on the token with the exact same name, * then this is a parse error and the new attribute must be dropped, * along with the value that gets associated with it (if any).
*/ if (attributes.contains(attributeName)) {
errDuplicateAttribute();
attributeName = null;
}
}
publicvoid start() throws SAXException {
initializeWithoutStarting();
tokenHandler.startTokenization(this); // CPPONLY: if (mViewSource) { // CPPONLY: line = 1; // CPPONLY: col = -1; // CPPONLY: nextCharOnNewLine = false; // CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) { // CPPONLY: line = 0; // CPPONLY: col = 1; // CPPONLY: nextCharOnNewLine = true; // CPPONLY: } else { // CPPONLY: line = -1; // CPPONLY: col = -1; // CPPONLY: nextCharOnNewLine = false; // CPPONLY: } // [NOCPP[
startErrorReporting(); // ]NOCPP]
}
publicboolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException { int state = stateSave; int returnState = returnStateSave; char c = '\u0000';
shouldSuspend = false;
lastCR = false;
int start = buffer.getStart(); int end = buffer.getEnd();
// In C++, the caller of tokenizeBuffer needs to do this explicitly. // [NOCPP[
ensureBufferSpace(end - start); // ]NOCPP]
/** * The index of the last <code>char</code> read from <code>buf</code>.
*/ int pos = start - 1;
/** * The index of the first <code>char</code> in <code>buf</code> that is * part of a coalesced run of character tokens or * <code>Integer.MAX_VALUE</code> if there is not a current run being * coalesced.
*/ switch (state) { case DATA: case RCDATA: case SCRIPT_DATA: case PLAINTEXT: case RAWTEXT: case CDATA_SECTION: case SCRIPT_DATA_ESCAPED: case SCRIPT_DATA_ESCAPE_START: case SCRIPT_DATA_ESCAPE_START_DASH: case SCRIPT_DATA_ESCAPED_DASH: case SCRIPT_DATA_ESCAPED_DASH_DASH: case SCRIPT_DATA_DOUBLE_ESCAPE_START: case SCRIPT_DATA_DOUBLE_ESCAPED: case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: case SCRIPT_DATA_DOUBLE_ESCAPE_END:
cstart = start; break; default:
cstart = Integer.MAX_VALUE; break;
}
/** * The number of <code>char</code>s in <code>buf</code> that have * meaning. (The rest of the array is garbage and should not be * examined.)
*/ // CPPONLY: if (mViewSource) { // CPPONLY: mViewSource.SetBuffer(buffer); // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); // CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1); // CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) { // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); // CPPONLY: } else { // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); // CPPONLY: } // [NOCPP[
pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState,
end); // ]NOCPP] if (pos == end) { // exiting due to end of buffer
buffer.setStart(pos);
} else {
buffer.setStart(pos + 1);
} return lastCR;
}
// [NOCPP[ privatevoid ensureBufferSpace(int inputLength) throws SAXException { // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB. // Adding to the general worst case instead of only the // TreeBuilder-exposed worst case to avoid re-introducing a bug when // unifying the tokenizer and tree builder buffers in the future. int worstCase = strBufLen + inputLength + charRefBufLen + 2;
tokenHandler.ensureBufferSpace(worstCase); if (commentPolicy == XmlViolationPolicy.ALTER_INFOSET) { // When altering infoset, if the comment contents are consecutive // hyphens, each hyphen generates a space, too. These buffer // contents never get emitted as characters() to the tokenHandler, // which is why this calculation happens after the call to // ensureBufferSpace on tokenHandler.
worstCase *= 2;
} if (strBuf == null) { // Add an arbitrary small value to avoid immediate reallocation // once there are a few characters in the buffer.
strBuf = newchar[worstCase + 128];
} elseif (worstCase > strBuf.length) { // HotSpot reportedly allocates memory with 8-byte accuracy, so // there's no point in trying to do math here to avoid slop. // Maybe we should add some small constant to worstCase here // but not doing that without profiling. In C++ with jemalloc, // the corresponding method should do math to round up here // to avoid slop. char[] newBuf = newchar[Math.max(worstCase, (strBuf.length*5)/4)];
System.arraycopy(strBuf, 0, newBuf, 0, strBufLen);
strBuf = newBuf;
}
} // ]NOCPP]
@SuppressWarnings("unused") privateint stateLoop(int state, char c, int pos, @NoLength char[] buf, boolean reconsume, int returnState, int endPos) throws SAXException { boolean reportedConsecutiveHyphens = false; /* * Idioms used in this code: * * * Consuming the next input character * * To consume the next input character, the code does this: if (++pos == * endPos) { break stateloop; } c = checkChar(buf, pos); * * * Staying in a state * * When there's a state that the tokenizer may stay in over multiple * input characters, the state has a wrapper |for(;;)| loop and staying * in the state continues the loop. * * * Switching to another state * * To switch to another state, the code sets the state variable to the * magic number of the new state. Then it either continues stateloop or * breaks out of the state's own wrapper loop if the target state is * right after the current state in source order. (This is a partial * workaround for Java's lack of goto.) * * * Reconsume support * * The spec sometimes says that an input character is reconsumed in * another state. If a state can ever be entered so that an input * character can be reconsumed in it, the state's code starts with an * |if (reconsume)| that sets reconsume to false and skips over the * normal code for consuming a new character. * * To reconsume the current character in another state, the code sets * |reconsume| to true and then switches to the other state. * * * Emitting character tokens * * This method emits character tokens lazily. Whenever a new range of * character tokens starts, the field cstart must be set to the start * index of the range. The flushChars() method must be called at the end * of a range to flush it. * * * U+0000 handling * * The various states have to handle the replacement of U+0000 with * U+FFFD. However, if U+0000 would be reconsumed in another state, the * replacement doesn't need to happen, because it's handled by the * reconsuming state. * * * LF handling * * Every state needs to increment the line number upon LF unless the LF * gets reconsumed by another state which increments the line number. * * * CR handling * * Every state needs to handle CR unless the CR gets reconsumed and is * handled by the reconsuming state. The CR needs to be handled as if it * were and LF, the lastCR field must be set to true and then this * method must return. The IO driver will then swallow the next * character if it is an LF to coalesce CRLF.
*/
stateloop: for (;;) { switch (state) { case DATA:
dataloop: for (;;) { if (reconsume) {
reconsume = false;
} else { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos);
} switch (c) { case'&': /* * U+0026 AMPERSAND (&) Switch to the character * reference in data state.
*/
flushChars(buf, pos); assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
appendCharRefBuf(c);
setAdditionalAndRememberAmpersandLocation('\u0000');
returnState = state;
state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); continue stateloop; case'<': /* * U+003C LESS-THAN SIGN (<) Switch to the tag * open state.
*/
flushChars(buf, pos);
state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break dataloop; case'\u0000':
maybeEmitReplacementCharacter(buf, pos); continue; case'\r':
emitCarriageReturn(buf, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; default: /* * Anything else Emit the input character as a * character token. * * Stay in the data state.
*/ continue;
}
} // CPPONLY: MOZ_FALLTHROUGH; case TAG_OPEN:
tagopenloop: for (;;) { /* * The behavior of this state depends on the content * model flag.
*/ if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * If the content model flag is set to the PCDATA state * Consume the next input character:
*/ if (c >= 'A' && c <= 'Z') { /* * U+0041 LATIN CAPITAL LETTER A through to U+005A * LATIN CAPITAL LETTER Z Create a new start tag * token,
*/
endTag = false; /* * set its tag name to the lowercase version of the * input character (add 0x0020 to the character's * code point),
*/
clearStrBufBeforeUse();
appendStrBuf((char) (c + 0x20));
containsHyphen = false; /* then switch to the tag name state. */
state = transition(state, Tokenizer.TAG_NAME, reconsume, pos); /* * (Don't emit the token yet; further details will * be filled in before it is emitted.)
*/ // `break` optimizes; `continue stateloop;` would be valid break tagopenloop;
} elseif (c >= 'a' && c <= 'z') { /* * U+0061 LATIN SMALL LETTER A through to U+007A * LATIN SMALL LETTER Z Create a new start tag * token,
*/
endTag = false; /* * set its tag name to the input character,
*/
clearStrBufBeforeUse();
appendStrBuf(c);
containsHyphen = false; /* then switch to the tag name state. */
state = transition(state, Tokenizer.TAG_NAME, reconsume, pos); /* * (Don't emit the token yet; further details will * be filled in before it is emitted.)
*/ // `break` optimizes; `continue stateloop;` would be valid break tagopenloop;
} switch (c) { case'!': /* * U+0021 EXCLAMATION MARK (!) Switch to the * markup declaration open state.
*/
state = transition(state, Tokenizer.MARKUP_DECLARATION_OPEN, reconsume, pos); continue stateloop; case'/': /* * U+002F SOLIDUS (/) Switch to the close tag * open state.
*/
state = transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos); continue stateloop; case'?': // CPPONLY: if (viewingXmlSource) { // CPPONLY: state = transition(state, // CPPONLY: Tokenizer.PROCESSING_INSTRUCTION, // CPPONLY: reconsume, // CPPONLY: pos); // CPPONLY: continue stateloop; // CPPONLY: } /* * U+003F QUESTION MARK (?) Parse error.
*/
errProcessingInstruction(); /* * Switch to the bogus comment state.
*/
clearStrBufBeforeUse();
appendStrBuf(c);
state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Parse error.
*/
errLtGt(); /* * Emit a U+003C LESS-THAN SIGN character token * and a U+003E GREATER-THAN SIGN character * token.
*/
tokenHandler.characters(Tokenizer.LT_GT, 0, 2); /* Switch to the data state. */
cstart = pos + 1;
state = transition(state, Tokenizer.DATA, reconsume, pos); continue stateloop; default: /* * Anything else Parse error.
*/
errBadCharAfterLt(c); /* * Emit a U+003C LESS-THAN SIGN character token
*/
tokenHandler.characters(Tokenizer.LT_GT, 0, 1); /* * and reconsume the current input character in * the data state.
*/
cstart = pos;
reconsume = true;
state = transition(state, Tokenizer.DATA, reconsume, pos); continue stateloop;
}
} // CPPONLY: MOZ_FALLTHROUGH; case TAG_NAME:
tagnameloop: for (;;) { if (++pos == endPos) { break stateloop;
}
c = checkChar(buf, pos); /* * Consume the next input character:
*/ switch (c) { case'\r':
silentCarriageReturn();
strBufToElementNameString();
state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); break stateloop; case'\n':
silentLineFeed(); // CPPONLY: MOZ_FALLTHROUGH; case' ': case'\t': case'\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the before attribute name state.
*/
strBufToElementNameString();
state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); // `break` optimizes; `continue stateloop;` would be valid break tagnameloop; case'/': /* * U+002F SOLIDUS (/) Switch to the self-closing * start tag state.
*/
strBufToElementNameString();
state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); continue stateloop; case'>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * tag token.
*/
strBufToElementNameString();
state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); if (shouldSuspend) { break stateloop;
} /* * Switch to the data state.
*/ continue stateloop; case'\u0000':
c = '\uFFFD'; // CPPONLY: MOZ_FALLTHROUGH; default: if (c >= 'A' && c <= 'Z') { /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Append the * lowercase version of the current input * character (add 0x0020 to the character's * code point) to the current tag token's * tag name.
*/
c += 0x20;
} elseif (c == '-') {
containsHyphen = true;
} /* * Anything else Append the current input * character to the current tag token's tag * name.
*/
appendStrBuf(c); /* * Stay in the tag name state.
*/ continue;
--> --------------------
--> maximum size reached
--> --------------------
¤ Diese beiden folgenden Angebotsgruppen bietet das Unternehmen0.25Angebot
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.