|
|
const UNDEFINED_CODE_POINTS = new Set([ 65534, 65535, 131070, 131071, 196606, 196607, 262142, 262143, 327678, 327679, 393214, 393215, 458750, 458751, 524286, 524287, 589822, 589823, 655358, 655359, 720894, 720895, 786430, 786431, 851966, 851967, 917502, 917503, 983038, 983039, 1048574, 1048575, 1114110, 1114111, ]); const REPLACEMENT_CHARACTER = '\uFFFD'; var CODE_POINTS; (function (CODE_POINTS) { CODE_POINTS[CODE_POINTS["EOF"] = -1] = "EOF"; CODE_POINTS[CODE_POINTS["NULL"] = 0] = "NULL"; CODE_POINTS[CODE_POINTS["TABULATION"] = 9] = "TABULATION"; CODE_POINTS[CODE_POINTS["CARRIAGE_RETURN"] = 13] = "CARRIAGE_RETURN"; CODE_POINTS[CODE_POINTS["LINE_FEED"] = 10] = "LINE_FEED"; CODE_POINTS[CODE_POINTS["FORM_FEED"] = 12] = "FORM_FEED"; CODE_POINTS[CODE_POINTS["SPACE"] = 32] = "SPACE"; CODE_POINTS[CODE_POINTS["EXCLAMATION_MARK"] = 33] = "EXCLAMATION_MARK"; CODE_POINTS[CODE_POINTS["QUOTATION_MARK"] = 34] = "QUOTATION_MARK"; CODE_POINTS[CODE_POINTS["AMPERSAND"] = 38] = "AMPERSAND"; CODE_POINTS[CODE_POINTS["APOSTROPHE"] = 39] = "APOSTROPHE"; CODE_POINTS[CODE_POINTS["HYPHEN_MINUS"] = 45] = "HYPHEN_MINUS"; CODE_POINTS[CODE_POINTS["SOLIDUS"] = 47] = "SOLIDUS"; CODE_POINTS[CODE_POINTS["DIGIT_0"] = 48] = "DIGIT_0"; CODE_POINTS[CODE_POINTS["DIGIT_9"] = 57] = "DIGIT_9"; CODE_POINTS[CODE_POINTS["SEMICOLON"] = 59] = "SEMICOLON"; CODE_POINTS[CODE_POINTS["LESS_THAN_SIGN"] = 60] = "LESS_THAN_SIGN"; CODE_POINTS[CODE_POINTS["EQUALS_SIGN"] = 61] = "EQUALS_SIGN"; CODE_POINTS[CODE_POINTS["GREATER_THAN_SIGN"] = 62] = "GREATER_THAN_SIGN"; CODE_POINTS[CODE_POINTS["QUESTION_MARK"] = 63] = "QUESTION_MARK"; CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_A"] = 65] = "LATIN_CAPITAL_A"; CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_Z"] = 90] = "LATIN_CAPITAL_Z"; CODE_POINTS[CODE_POINTS["RIGHT_SQUARE_BRACKET"] = 93] = "RIGHT_SQUARE_BRACKET"; CODE_POINTS[CODE_POINTS["GRAVE_ACCENT"] = 96] = "GRAVE_ACCENT"; CODE_POINTS[CODE_POINTS["LATIN_SMALL_A"] = 97] = "LATIN_SMALL_A"; CODE_POINTS[CODE_POINTS["LATIN_SMALL_Z"] = 122] = "LATIN_SMALL_Z"; })(CODE_POINTS || (CODE_POINTS = {})); const SEQUENCES = { DASH_DASH: '--', CDATA_START: '[CDATA[', DOCTYPE: 'doctype', SCRIPT: 'script', PUBLIC: 'public', SYSTEM: 'system', }; //Surrogates
function isSurrogate(cp) { return cp >= 55296 && cp <= 57343; } function isSurrogatePair(cp) { return cp >= 56320 && cp <= 57343; } function getSurrogatePairCodePoint(cp1, cp2) { return (cp1 - 55296) * 1024 + 9216 + cp2; } //NOTE: excluding NULL and ASCII whitespace
function isControlCodePoint(cp) { return ((cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) || (cp >= 0x7f && cp <= 0x9f)); } function isUndefinedCodePoint(cp) { return (cp >= 64976 && cp <= 65007) || UNDEFINED_CODE_POINTS.has(cp); }
var ERR; (function (ERR) { ERR["controlCharacterInInputStream"] = "control-character-in-input-stream"; ERR["noncharacterInInputStream"] = "noncharacter-in-input-stream"; ERR["surrogateInInputStream"] = "surrogate-in-input-stream"; ERR["nonVoidHtmlElementStartTagWithTrailingSolidus"] = "non-void-html-element-start-tag-with-trailing-solidus"; ERR["endTagWithAttributes"] = "end-tag-with-attributes"; ERR["endTagWithTrailingSolidus"] = "end-tag-with-trailing-solidus"; ERR["unexpectedSolidusInTag"] = "unexpected-solidus-in-tag"; ERR["unexpectedNullCharacter"] = "unexpected-null-character"; ERR["unexpectedQuestionMarkInsteadOfTagName"] = "unexpected-question-mark-instead-of-tag-name"; ERR["invalidFirstCharacterOfTagName"] = "invalid-first-character-of-tag-name"; ERR["unexpectedEqualsSignBeforeAttributeName"] = "unexpected-equals-sign-before-attribute-name"; ERR["missingEndTagName"] = "missing-end-tag-name"; ERR["unexpectedCharacterInAttributeName"] = "unexpected-character-in-attribute-name"; ERR["unknownNamedCharacterReference"] = "unknown-named-character-reference"; ERR["missingSemicolonAfterCharacterReference"] = "missing-semicolon-after-character-reference"; ERR["unexpectedCharacterAfterDoctypeSystemIdentifier"] = "unexpected-character-after-doctype-system-identifier"; ERR["unexpectedCharacterInUnquotedAttributeValue"] = "unexpected-character-in-unquoted-attribute-value"; ERR["eofBeforeTagName"] = "eof-before-tag-name"; ERR["eofInTag"] = "eof-in-tag"; ERR["missingAttributeValue"] = "missing-attribute-value"; ERR["missingWhitespaceBetweenAttributes"] = "missing-whitespace-between-attributes"; ERR["missingWhitespaceAfterDoctypePublicKeyword"] = "missing-whitespace-after-doctype-public-keyword"; ERR["missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers"] = "missing-whitespace-between-doctype-public-and-system-identifiers"; ERR["missingWhitespaceAfterDoctypeSystemKeyword"] = "missing-whitespace-after-doctype-system-keyword"; ERR["missingQuoteBeforeDoctypePublicIdentifier"] = "missing-quote-before-doctype-public-identifier"; ERR["missingQuoteBeforeDoctypeSystemIdentifier"] = "missing-quote-before-doctype-system-identifier"; ERR["missingDoctypePublicIdentifier"] = "missing-doctype-public-identifier"; ERR["missingDoctypeSystemIdentifier"] = "missing-doctype-system-identifier"; ERR["abruptDoctypePublicIdentifier"] = "abrupt-doctype-public-identifier"; ERR["abruptDoctypeSystemIdentifier"] = "abrupt-doctype-system-identifier"; ERR["cdataInHtmlContent"] = "cdata-in-html-content"; ERR["incorrectlyOpenedComment"] = "incorrectly-opened-comment"; ERR["eofInScriptHtmlCommentLikeText"] = "eof-in-script-html-comment-like-text"; ERR["eofInDoctype"] = "eof-in-doctype"; ERR["nestedComment"] = "nested-comment"; ERR["abruptClosingOfEmptyComment"] = "abrupt-closing-of-empty-comment"; ERR["eofInComment"] = "eof-in-comment"; ERR["incorrectlyClosedComment"] = "incorrectly-closed-comment"; ERR["eofInCdata"] = "eof-in-cdata"; ERR["absenceOfDigitsInNumericCharacterReference"] = "absence-of-digits-in-numeric-character-reference"; ERR["nullCharacterReference"] = "null-character-reference"; ERR["surrogateCharacterReference"] = "surrogate-character-reference"; ERR["characterReferenceOutsideUnicodeRange"] = "character-reference-outside-unicode-range"; ERR["controlCharacterReference"] = "control-character-reference"; ERR["noncharacterCharacterReference"] = "noncharacter-character-reference"; ERR["missingWhitespaceBeforeDoctypeName"] = "missing-whitespace-before-doctype-name"; ERR["missingDoctypeName"] = "missing-doctype-name"; ERR["invalidCharacterSequenceAfterDoctypeName"] = "invalid-character-sequence-after-doctype-name"; ERR["duplicateAttribute"] = "duplicate-attribute"; ERR["nonConformingDoctype"] = "non-conforming-doctype"; ERR["missingDoctype"] = "missing-doctype"; ERR["misplacedDoctype"] = "misplaced-doctype"; ERR["endTagWithoutMatchingOpenElement"] = "end-tag-without-matching-open-element"; ERR["closingOfElementWithOpenChildElements"] = "closing-of-element-with-open-child-elements"; ERR["disallowedContentInNoscriptInHead"] = "disallowed-content-in-noscript-in-head"; ERR["openElementsLeftAfterEof"] = "open-elements-left-after-eof"; ERR["abandonedHeadElementChild"] = "abandoned-head-element-child"; ERR["misplacedStartTagForHeadElement"] = "misplaced-start-tag-for-head-element"; ERR["nestedNoscriptInHead"] = "nested-noscript-in-head"; ERR["eofInElementThatCanContainOnlyText"] = "eof-in-element-that-can-contain-only-text"; })(ERR || (ERR = {}));
//Const
const DEFAULT_BUFFER_WATERLINE = 1 << 16; //Preprocessor
//NOTE: HTML input preprocessing
//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream)
class Preprocessor { constructor(handler) { this.handler = handler; this.html = ''; this.pos = -1; // NOTE: Initial `lastGapPos` is -2, to ensure `col` on initialisation is 0
this.lastGapPos = -2; this.gapStack = []; this.skipNextNewLine = false; this.lastChunkWritten = false; this.endOfChunkHit = false; this.bufferWaterline = DEFAULT_BUFFER_WATERLINE; this.isEol = false; this.lineStartPos = 0; this.droppedBufferSize = 0; this.line = 1; //NOTE: avoid reporting errors twice on advance/retreat
this.lastErrOffset = -1; } /** The column on the current line. If we just saw a gap (eg. a surrogate pair), return the index before. */ get col() { return this.pos - this.lineStartPos + Number(this.lastGapPos !== this.pos); } get offset() { return this.droppedBufferSize + this.pos; } getError(code, cpOffset) { const { line, col, offset } = this; const startCol = col + cpOffset; const startOffset = offset + cpOffset; return { code, startLine: line, endLine: line, startCol, endCol: startCol, startOffset, endOffset: startOffset, }; } _err(code) { if (this.handler.onParseError && this.lastErrOffset !== this.offset) { this.lastErrOffset = this.offset; this.handler.onParseError(this.getError(code, 0)); } } _addGap() { this.gapStack.push(this.lastGapPos); this.lastGapPos = this.pos; } _processSurrogate(cp) { //NOTE: try to peek a surrogate pair
if (this.pos !== this.html.length - 1) { const nextCp = this.html.charCodeAt(this.pos + 1); if (isSurrogatePair(nextCp)) { //NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
this.pos++; //NOTE: add a gap that should be avoided during retreat
this._addGap(); return getSurrogatePairCodePoint(cp, nextCp); } } //NOTE: we are at the end of a chunk, therefore we can't infer the surrogate pair yet.
else if (!this.lastChunkWritten) { this.endOfChunkHit = true; return CODE_POINTS.EOF; } //NOTE: isolated surrogate
this._err(ERR.surrogateInInputStream); return cp; } willDropParsedChunk() { return this.pos > this.bufferWaterline; } dropParsedChunk() { if (this.willDropParsedChunk()) { this.html = this.html.substring(this.pos); this.lineStartPos -= this.pos; this.droppedBufferSize += this.pos; this.pos = 0; this.lastGapPos = -2; this.gapStack.length = 0; } } write(chunk, isLastChunk) { if (this.html.length > 0) { this.html += chunk; } else { this.html = chunk; } this.endOfChunkHit = false; this.lastChunkWritten = isLastChunk; } insertHtmlAtCurrentPos(chunk) { this.html = this.html.substring(0, this.pos + 1) + chunk + this.html.substring(this.pos + 1); this.endOfChunkHit = false; } startsWith(pattern, caseSensitive) { // Check if our buffer has enough characters
if (this.pos + pattern.length > this.html.length) { this.endOfChunkHit = !this.lastChunkWritten; return false; } if (caseSensitive) { return this.html.startsWith(pattern, this.pos); } for (let i = 0; i < pattern.length; i++) { const cp = this.html.charCodeAt(this.pos + i) | 0x20; if (cp !== pattern.charCodeAt(i)) { return false; } } return true; } peek(offset) { const pos = this.pos + offset; if (pos >= this.html.length) { this.endOfChunkHit = !this.lastChunkWritten; return CODE_POINTS.EOF; } const code = this.html.charCodeAt(pos); return code === CODE_POINTS.CARRIAGE_RETURN ? CODE_POINTS.LINE_FEED : code; } advance() { this.pos++; //NOTE: LF should be in the last column of the line
if (this.isEol) { this.isEol = false; this.line++; this.lineStartPos = this.pos; } if (this.pos >= this.html.length) { this.endOfChunkHit = !this.lastChunkWritten; return CODE_POINTS.EOF; } let cp = this.html.charCodeAt(this.pos); //NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters
if (cp === CODE_POINTS.CARRIAGE_RETURN) { this.isEol = true; this.skipNextNewLine = true; return CODE_POINTS.LINE_FEED; } //NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
//must be ignored.
if (cp === CODE_POINTS.LINE_FEED) { this.isEol = true; if (this.skipNextNewLine) { // `line` will be bumped again in the recursive call.
this.line--; this.skipNextNewLine = false; this._addGap(); return this.advance(); } } this.skipNextNewLine = false; if (isSurrogate(cp)) { cp = this._processSurrogate(cp); } //OPTIMIZATION: first check if code point is in the common allowed
//range (ASCII alphanumeric, whitespaces, big chunk of BMP)
//before going into detailed performance cost validation.
const isCommonValidRange = this.handler.onParseError === null || (cp > 0x1f && cp < 0x7f) || cp === CODE_POINTS.LINE_FEED || cp === CODE_POINTS.CARRIAGE_RETURN || (cp > 0x9f && cp < 64976); if (!isCommonValidRange) { this._checkForProblematicCharacters(cp); } return cp; } _checkForProblematicCharacters(cp) { if (isControlCodePoint(cp)) { this._err(ERR.controlCharacterInInputStream); } else if (isUndefinedCodePoint(cp)) { this._err(ERR.noncharacterInInputStream); } } retreat(count) { this.pos -= count; while (this.pos < this.lastGapPos) { this.lastGapPos = this.gapStack.pop(); this.pos--; } this.isEol = false; } }
var TokenType; (function (TokenType) { TokenType[TokenType["CHARACTER"] = 0] = "CHARACTER"; TokenType[TokenType["NULL_CHARACTER"] = 1] = "NULL_CHARACTER"; TokenType[TokenType["WHITESPACE_CHARACTER"] = 2] = "WHITESPACE_CHARACTER"; TokenType[TokenType["START_TAG"] = 3] = "START_TAG"; TokenType[TokenType["END_TAG"] = 4] = "END_TAG"; TokenType[TokenType["COMMENT"] = 5] = "COMMENT"; TokenType[TokenType["DOCTYPE"] = 6] = "DOCTYPE"; TokenType[TokenType["EOF"] = 7] = "EOF"; TokenType[TokenType["HIBERNATION"] = 8] = "HIBERNATION"; })(TokenType || (TokenType = {})); function getTokenAttr(token, attrName) { for (let i = token.attrs.length - 1; i >= 0; i--) { if (token.attrs[i].name === attrName) { return token.attrs[i].value; } } return null; }
// Generated using scripts/write-decode-map.ts
var htmlDecodeTree = new Uint16Array( // prettier-ignore
"\u1d41<\xd5\u0131\u028a\u049d\u057b\u05d0\u0675\u06de\u07a2\u07d6\u080f\u0a4a\u0a91\u0da1\u0e6d\u0f09\u0f26\u10ca\u1228\u12e1\u1415\u149d\u14c3\u14df\u1525\0\0\0\0\0\0\u156b\u16cd\u198d\u1c12\u1ddd\u1f7e\u2060\u21b0\u228d\u23c0\u23fb\u2442\u2824\u2912\u2d08\u2e48\u2fce\u3016\u32ba\u3639\u37ac\u38fe\u3a28\u3a71\u3ae0\u3b2e\u0800EMabcfglmnoprstu\\bfms\x7f\x84\x8b\x90\x95\x98\xa6\xb3\xb9\xc8\xcflig\u803b\xc6\u40c6P\u803b&\u4026cute\u803b\xc1\u40c1reve;\u4102\u0100iyx}rc\u803b\xc2\u40c2;\u4410r;\uc000\ud835\udd04rave\u803b\xc0\u40c0pha;\u4391acr;\u4100d;\u6a53\u0100gp\x9d\xa1on;\u4104f;\uc000\ud835\udd38plyFunction;\u6061ing\u803b\xc5\u40c5\u0100cs\xbe\xc3r;\uc000\ud835\udc9cign;\u6254ilde\u803b\xc3\u40c3ml\u803b\xc4\u40c4\u0400aceforsu\xe5\xfb\xfe\u0117\u011c\u0122\u0127\u012a\u0100cr\xea\xf2kslash;\u6216\u0176\xf6\xf8;\u6ae7ed;\u6306y;\u4411\u0180crt\u0105\u010b\u0114ause;\u6235noullis;\u612ca;\u4392r;\uc000\ud835\udd05pf;\uc000\ud835\udd39eve;\u42d8c\xf2\u0113mpeq;\u624e\u0700HOacdefhilorsu\u014d\u0151\u0156\u0180\u019e\u01a2\u01b5\u01b7\u01ba\u01dc\u0215\u0273\u0278\u027ecy;\u4427PY\u803b\xa9\u40a9\u0180cpy\u015d\u0162\u017aute;\u4106\u0100;i\u0167\u0168\u62d2talDifferentialD;\u6145leys;\u612d\u0200aeio\u0189\u018e\u0194\u0198ron;\u410cdil\u803b\xc7\u40c7rc;\u4108nint;\u6230ot;\u410a\u0100dn\u01a7\u01adilla;\u40b8terDot;\u40b7\xf2\u017fi;\u43a7rcle\u0200DMPT\u01c7\u01cb\u01d1\u01d6ot;\u6299inus;\u6296lus;\u6295imes;\u6297o\u0100cs\u01e2\u01f8kwiseContourIntegral;\u6232eCurly\u0100DQ\u0203\u020foubleQuote;\u601duote;\u6019\u0200lnpu\u021e\u0228\u0247\u0255on\u0100;e\u0225\u0226\u6237;\u6a74\u0180git\u022f\u0236\u023aruent;\u6261nt;\u622fourIntegral;\u622e\u0100fr\u024c\u024e;\u6102oduct;\u6210nterClockwiseContourIntegral;\u6233oss;\u6a2fcr;\uc000\ud835\udc9ep\u0100;C\u0284\u0285\u62d3ap;\u624d\u0580DJSZacefios\u02a0\u02ac\u02b0\u02b4\u02b8\u02cb\u02d7\u02e1\u02e6\u0333\u048d\u0100;o\u0179\u02a5trahd;\u6911cy;\u4402cy;\u4405cy;\u440f\u0180grs\u02bf\u02c4\u02c7ger;\u6021r;\u61a1hv;\u6ae4\u0100ay\u02d0\u02d5ron;\u410e;\u4414l\u0100;t\u02dd\u02de\u6207a;\u4394r;\uc000\ud835\udd07\u0100af\u02eb\u0327\u0100cm\u02f0\u0322ritical\u0200ADGT\u0300\u0306\u0316\u031ccute;\u40b4o\u0174\u030b\u030d;\u42d9bleAcute;\u42ddrave;\u4060ilde;\u42dcond;\u62c4ferentialD;\u6146\u0470\u033d\0\0\0\u0342\u0354\0\u0405f;\uc000\ud835\udd3b\u0180;DE\u0348\u0349\u034d\u40a8ot;\u60dcqual;\u6250ble\u0300CDLRUV\u0363\u0372\u0382\u03cf\u03e2\u03f8ontourIntegra\xec\u0239o\u0274\u0379\0\0\u037b\xbb\u0349nArrow;\u61d3\u0100eo\u0387\u03a4ft\u0180ART\u0390\u0396\u03a1rrow;\u61d0ightArrow;\u61d4e\xe5\u02cang\u0100LR\u03ab\u03c4eft\u0100AR\u03b3\u03b9rrow;\u67f8ightArrow;\u67faightArrow;\u67f9ight\u0100AT\u03d8\u03derrow;\u61d2ee;\u62a8p\u0241\u03e9\0\0\u03efrrow;\u61d1ownArrow;\u61d5erticalBar;\u6225n\u0300ABLRTa\u0412\u042a\u0430\u045e\u047f\u037crrow\u0180;BU\u041d\u041e\u0422\u6193ar;\u6913pArrow;\u61f5reve;\u4311eft\u02d2\u043a\0\u0446\0\u0450ightVector;\u6950eeVector;\u695eector\u0100;B\u0459\u045a\u61bdar;\u6956ight\u01d4\u0467\0\u0471eeVector;\u695fector\u0100;B\u047a\u047b\u61c1ar;\u6957ee\u0100;A\u0486\u0487\u62a4rrow;\u61a7\u0100ct\u0492\u0497r;\uc000\ud835\udc9frok;\u4110\u0800NTacdfglmopqstux\u04bd\u04c0\u04c4\u04cb\u04de\u04e2\u04e7\u04ee\u04f5\u0521\u052f\u0536\u0552\u055d\u0560\u0565G;\u414aH\u803b\xd0\u40d0cute\u803b\xc9\u40c9\u0180aiy\u04d2\u04d7\u04dcron;\u411arc\u803b\xca\u40ca;\u442dot;\u4116r;\uc000\ud835\udd08rave\u803b\xc8\u40c8ement;\u6208\u0100ap\u04fa\u04fecr;\u4112ty\u0253\u0506\0\0\u0512mallSquare;\u65fberySmallSquare;\u65ab\u0100gp\u0526\u052aon;\u4118f;\uc000\ud835\udd3csilon;\u4395u\u0100ai\u053c\u0549l\u0100;T\u0542\u0543\u6a75ilde;\u6242librium;\u61cc\u0100ci\u0557\u055ar;\u6130m;\u6a73a;\u4397ml\u803b\xcb\u40cb\u0100ip\u056a\u056fsts;\u6203onentialE;\u6147\u0280cfios\u0585\u0588\u058d\u05b2\u05ccy;\u4424r;\uc000\ud835\udd09lled\u0253\u0597\0\0\u05a3mallSquare;\u65fcerySmallSquare;\u65aa\u0370\u05ba\0\u05bf\0\0\u05c4f;\uc000\ud835\udd3dAll;\u6200riertrf;\u6131c\xf2\u05cb\u0600JTabcdfgorst\u05e8\u05ec\u05ef\u05fa\u0600\u0612\u06 .split("") .map((c) => c.charCodeAt(0)));
// Generated using scripts/write-decode-map.ts
new Uint16Array( // prettier-ignore
"\u0200aglq\t\x15\x18\x1b\u026d\x0f\0\0\x12p;\u4026os;\u4027t;\u403et;\u403cuot;\u4022" .split("") .map((c) => c.charCodeAt(0)));
// Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
const decodeMap = new Map([ [0, 65533], // C1 Unicode control character reference replacements
[128, 8364], [130, 8218], [131, 402], [132, 8222], [133, 8230], [134, 8224], [135, 8225], [136, 710], [137, 8240], [138, 352], [139, 8249], [140, 338], [142, 381], [145, 8216], [146, 8217], [147, 8220], [148, 8221], [149, 8226], [150, 8211], [151, 8212], [152, 732], [153, 8482], [154, 353], [155, 8250], [156, 339], [158, 382], [159, 376], ]); /** * Replace the given code point with a replacement character if it is a * surrogate or is outside the valid range. Otherwise return the code * point unchanged. */ function replaceCodePoint(codePoint) { var _a; if ((codePoint >= 0xd800 && codePoint <= 0xdfff) || codePoint > 0x10ffff) { return 0xfffd; } return (_a = decodeMap.get(codePoint)) !== null && _a !== void 0 ? _a : codePoint; }
var CharCodes; (function (CharCodes) { CharCodes[CharCodes["NUM"] = 35] = "NUM"; CharCodes[CharCodes["SEMI"] = 59] = "SEMI"; CharCodes[CharCodes["EQUALS"] = 61] = "EQUALS"; CharCodes[CharCodes["ZERO"] = 48] = "ZERO"; CharCodes[CharCodes["NINE"] = 57] = "NINE"; CharCodes[CharCodes["LOWER_A"] = 97] = "LOWER_A"; CharCodes[CharCodes["LOWER_F"] = 102] = "LOWER_F"; CharCodes[CharCodes["LOWER_X"] = 120] = "LOWER_X"; CharCodes[CharCodes["LOWER_Z"] = 122] = "LOWER_Z"; CharCodes[CharCodes["UPPER_A"] = 65] = "UPPER_A"; CharCodes[CharCodes["UPPER_F"] = 70] = "UPPER_F"; CharCodes[CharCodes["UPPER_Z"] = 90] = "UPPER_Z"; })(CharCodes || (CharCodes = {})); /** Bit that needs to be set to convert an upper case ASCII character to lower case */ const TO_LOWER_BIT = 0b100000; var BinTrieFlags; (function (BinTrieFlags) { BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH"; BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 16256] = "BRANCH_LENGTH"; BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE"; })(BinTrieFlags || (BinTrieFlags = {})); function isNumber(code) { return code >= CharCodes.ZERO && code <= CharCodes.NINE; } function isHexadecimalCharacter(code) { return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_F) || (code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_F)); } function isAsciiAlphaNumeric$1(code) { return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_Z) || (code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_Z) || isNumber(code)); } /** * Checks if the given character is a valid end character for an entity in an attribute. * * Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error. * See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
*/ function isEntityInAttributeInvalidEnd(code) { return code === CharCodes.EQUALS || isAsciiAlphaNumeric$1(code); } var EntityDecoderState; (function (EntityDecoderState) { EntityDecoderState[EntityDecoderState["EntityStart"] = 0] = "EntityStart"; EntityDecoderState[EntityDecoderState["NumericStart"] = 1] = "NumericStart"; EntityDecoderState[EntityDecoderState["NumericDecimal"] = 2] = "NumericDecimal"; EntityDecoderState[EntityDecoderState["NumericHex"] = 3] = "NumericHex"; EntityDecoderState[EntityDecoderState["NamedEntity"] = 4] = "NamedEntity"; })(EntityDecoderState || (EntityDecoderState = {})); var DecodingMode; (function (DecodingMode) { /** Entities in text nodes that can end with any character. */ DecodingMode[DecodingMode["Legacy"] = 0] = "Legacy"; /** Only allow entities terminated with a semicolon. */ DecodingMode[DecodingMode["Strict"] = 1] = "Strict"; /** Entities in attributes have limitations on ending characters. */ DecodingMode[DecodingMode["Attribute"] = 2] = "Attribute"; })(DecodingMode || (DecodingMode = {})); /** * Token decoder with support of writing partial entities. */ class EntityDecoder { constructor( /** The tree used to decode entities. */ decodeTree, /** * The function that is called when a codepoint is decoded. * * For multi-byte named entities, this will be called multiple times, * with the second codepoint, and the same `consumed` value. * * @param codepoint The decoded codepoint. * @param consumed The number of bytes consumed by the decoder. */ emitCodePoint, /** An object that is used to produce errors. */ errors) { this.decodeTree = decodeTree; this.emitCodePoint = emitCodePoint; this.errors = errors; /** The current state of the decoder. */ this.state = EntityDecoderState.EntityStart; /** Characters that were consumed while parsing an entity. */ this.consumed = 1; /** * The result of the entity. * * Either the result index of a numeric entity, or the codepoint of a * numeric entity. */ this.result = 0; /** The current index in the decode tree. */ this.treeIndex = 0; /** The number of characters that were consumed in excess. */ this.excess = 1; /** The mode in which the decoder is operating. */ this.decodeMode = DecodingMode.Strict; } /** Resets the instance to make it reusable. */ startEntity(decodeMode) { this.decodeMode = decodeMode; this.state = EntityDecoderState.EntityStart; this.result = 0; this.treeIndex = 0; this.excess = 1; this.consumed = 1; } /** * Write an entity to the decoder. This can be called multiple times with partial entities. * If the entity is incomplete, the decoder will return -1. * * Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the * entity is incomplete, and resume when the next string is written. * * @param string The string containing the entity (or a continuation of the entity). * @param offset The offset at which the entity begins. Should be 0 if this is not the first call. * @returns The number of characters that were consumed, or -1 if the entity is incomplete. */ write(str, offset) { switch (this.state) { case EntityDecoderState.EntityStart: { if (str.charCodeAt(offset) === CharCodes.NUM) { this.state = EntityDecoderState.NumericStart; this.consumed += 1; return this.stateNumericStart(str, offset + 1); } this.state = EntityDecoderState.NamedEntity; return this.stateNamedEntity(str, offset); } case EntityDecoderState.NumericStart: { return this.stateNumericStart(str, offset); } case EntityDecoderState.NumericDecimal: { return this.stateNumericDecimal(str, offset); } case EntityDecoderState.NumericHex: { return this.stateNumericHex(str, offset); } case EntityDecoderState.NamedEntity: { return this.stateNamedEntity(str, offset); } } } /** * Switches between the numeric decimal and hexadecimal states. * * Equivalent to the `Numeric character reference state` in the HTML spec. * * @param str The string containing the entity (or a continuation of the entity). * @param offset The current offset. * @returns The number of characters that were consumed, or -1 if the entity is incomplete. */ stateNumericStart(str, offset) { if (offset >= str.length) { return -1; } if ((str.charCodeAt(offset) | TO_LOWER_BIT) === CharCodes.LOWER_X) { this.state = EntityDecoderState.NumericHex; this.consumed += 1; return this.stateNumericHex(str, offset + 1); } this.state = EntityDecoderState.NumericDecimal; return this.stateNumericDecimal(str, offset); } addToNumericResult(str, start, end, base) { if (start !== end) { const digitCount = end - start; this.result = this.result * Math.pow(base, digitCount) + parseInt(str.substr(start, digitCount), base); this.consumed += digitCount; } } /** * Parses a hexadecimal numeric entity. * * Equivalent to the `Hexademical character reference state` in the HTML spec. * * @param str The string containing the entity (or a continuation of the entity). * @param offset The current offset. * @returns The number of characters that were consumed, or -1 if the entity is incomplete. */ stateNumericHex(str, offset) { const startIdx = offset; while (offset < str.length) { const char = str.charCodeAt(offset); if (isNumber(char) || isHexadecimalCharacter(char)) { offset += 1; } else { this.addToNumericResult(str, startIdx, offset, 16); return this.emitNumericEntity(char, 3); } } this.addToNumericResult(str, startIdx, offset, 16); return -1; } /** * Parses a decimal numeric entity. * * Equivalent to the `Decimal character reference state` in the HTML spec. * * @param str The string containing the entity (or a continuation of the entity). * @param offset The current offset. * @returns The number of characters that were consumed, or -1 if the entity is incomplete. */ stateNumericDecimal(str, offset) { const startIdx = offset; while (offset < str.length) { const char = str.charCodeAt(offset); if (isNumber(char)) { offset += 1; } else { this.addToNumericResult(str, startIdx, offset, 10); return this.emitNumericEntity(char, 2); } } this.addToNumericResult(str, startIdx, offset, 10); return -1; } /** * Validate and emit a numeric entity. * * Implements the logic from the `Hexademical character reference start
* state` and `Numeric character reference end state` in the HTML spec.
* * @param lastCp The last code point of the entity. Used to see if the * entity was terminated with a semicolon. * @param expectedLength The minimum number of characters that should be * consumed. Used to validate that at least one digit * was consumed. * @returns The number of characters that were consumed. */ emitNumericEntity(lastCp, expectedLength) { var _a; // Ensure we consumed at least one digit.
if (this.consumed <= expectedLength) { (_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed); return 0; } // Figure out if this is a legit end of the entity
if (lastCp === CharCodes.SEMI) { this.consumed += 1; } else if (this.decodeMode === DecodingMode.Strict) { return 0; } this.emitCodePoint(replaceCodePoint(this.result), this.consumed); if (this.errors) { if (lastCp !== CharCodes.SEMI) { this.errors.missingSemicolonAfterCharacterReference(); } this.errors.validateNumericCharacterReference(this.result); } return this.consumed; } /** * Parses a named entity. * * Equivalent to the `Named character reference state` in the HTML spec. * * @param str The string containing the entity (or a continuation of the entity). * @param offset The current offset. * @returns The number of characters that were consumed, or -1 if the entity is incomplete. */ stateNamedEntity(str, offset) { const { decodeTree } = this; let current = decodeTree[this.treeIndex]; // The mask is the number of bytes of the value, including the current byte.
let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14; for (; offset < str.length; offset++, this.excess++) { const char = str.charCodeAt(offset); this.treeIndex = determineBranch(decodeTree, current, this.treeIndex + Math.max(1, valueLength), char); if (this.treeIndex < 0) { return this.result === 0 || // If we are parsing an attribute
(this.decodeMode === DecodingMode.Attribute && // We shouldn't have consumed any characters after the entity,
(valueLength === 0 || // And there should be no invalid characters.
isEntityInAttributeInvalidEnd(char))) ? 0 : this.emitNotTerminatedNamedEntity(); } current = decodeTree[this.treeIndex]; valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14; // If the branch is a value, store it and continue
if (valueLength !== 0) { // If the entity is terminated by a semicolon, we are done.
if (char === CharCodes.SEMI) { return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess); } // If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
if (this.decodeMode !== DecodingMode.Strict) { this.result = this.treeIndex; this.consumed += this.excess; this.excess = 0; } } } return -1; } /** * Emit a named entity that was not terminated with a semicolon. * * @returns The number of characters consumed. */ emitNotTerminatedNamedEntity() { var _a; const { result, decodeTree } = this; const valueLength = (decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14; this.emitNamedEntityData(result, valueLength, this.consumed); (_a = this.errors) === null || _a === void 0 ? void 0 : _a.missingSemicolonAfterCharacterReference(); return this.consumed; } /** * Emit a named entity. * * @param result The index of the entity in the decode tree. * @param valueLength The number of bytes in the entity. * @param consumed The number of characters consumed. * * @returns The number of characters consumed. */ emitNamedEntityData(result, valueLength, consumed) { const { decodeTree } = this; this.emitCodePoint(valueLength === 1 ? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH : decodeTree[result + 1], consumed); if (valueLength === 3) { // For multi-byte values, we need to emit the second byte.
this.emitCodePoint(decodeTree[result + 2], consumed); } return consumed; } /** * Signal to the parser that the end of the input was reached. * * Remaining data will be emitted and relevant errors will be produced. * * @returns The number of characters consumed. */ end() { var _a; switch (this.state) { case EntityDecoderState.NamedEntity: { // Emit a named entity if we have one.
return this.result !== 0 && (this.decodeMode !== DecodingMode.Attribute || this.result === this.treeIndex) ? this.emitNotTerminatedNamedEntity() : 0; } // Otherwise, emit a numeric entity if we have one.
case EntityDecoderState.NumericDecimal: { return this.emitNumericEntity(0, 2); } case EntityDecoderState.NumericHex: { return this.emitNumericEntity(0, 3); } case EntityDecoderState.NumericStart: { (_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed); return 0; } case EntityDecoderState.EntityStart: { // Return 0 if we have no entity.
return 0; } } } } /** * Determines the branch of the current node that is taken given the current * character. This function is used to traverse the trie. * * @param decodeTree The trie. * @param current The current node. * @param nodeIdx The index right after the current node and its value. * @param char The current character. * @returns The index of the next node, or -1 if no branch is taken. */ function determineBranch(decodeTree, current, nodeIdx, char) { const branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7; const jumpOffset = current & BinTrieFlags.JUMP_TABLE; // Case 1: Single branch encoded in jump offset
if (branchCount === 0) { return jumpOffset !== 0 && char === jumpOffset ? nodeIdx : -1; } // Case 2: Multiple branches encoded in jump table
if (jumpOffset) { const value = char - jumpOffset; return value < 0 || value >= branchCount ? -1 : decodeTree[nodeIdx + value] - 1; } // Case 3: Multiple branches encoded in dictionary
// Binary search for the character.
let lo = nodeIdx; let hi = lo + branchCount - 1; while (lo <= hi) { const mid = (lo + hi) >>> 1; const midVal = decodeTree[mid]; if (midVal < char) { lo = mid + 1; } else if (midVal > char) { hi = mid - 1; } else { return decodeTree[mid + branchCount]; } } return -1; }
/** All valid namespaces in HTML. */ var NS; (function (NS) { NS["HTML"] = "http://www.w3.org/1999/xhtml"; NS["MATHML"] = "http://www.w3.org/1998/Math/MathML"; NS["SVG"] = "http://www.w3.org/2000/svg"; NS["XLINK"] = "http://www.w3.org/1999/xlink"; NS["XML"] = "http://www.w3.org/XML/1998/namespace"; NS["XMLNS"] = "http://www.w3.org/2000/xmlns/"; })(NS || (NS = {})); var ATTRS; (function (ATTRS) { ATTRS["TYPE"] = "type"; ATTRS["ACTION"] = "action"; ATTRS["ENCODING"] = "encoding"; ATTRS["PROMPT"] = "prompt"; ATTRS["NAME"] = "name"; ATTRS["COLOR"] = "color"; ATTRS["FACE"] = "face"; ATTRS["SIZE"] = "size"; })(ATTRS || (ATTRS = {})); /** * The mode of the document. * * @see {@link https://dom.spec.whatwg.org/#concept-document-limited-quirks}
*/ var DOCUMENT_MODE; (function (DOCUMENT_MODE) { DOCUMENT_MODE["NO_QUIRKS"] = "no-quirks"; DOCUMENT_MODE["QUIRKS"] = "quirks"; DOCUMENT_MODE["LIMITED_QUIRKS"] = "limited-quirks"; })(DOCUMENT_MODE || (DOCUMENT_MODE = {})); var TAG_NAMES; (function (TAG_NAMES) { TAG_NAMES["A"] = "a"; TAG_NAMES["ADDRESS"] = "address"; TAG_NAMES["ANNOTATION_XML"] = "annotation-xml"; TAG_NAMES["APPLET"] = "applet"; TAG_NAMES["AREA"] = "area"; TAG_NAMES["ARTICLE"] = "article"; TAG_NAMES["ASIDE"] = "aside"; TAG_NAMES["B"] = "b"; TAG_NAMES["BASE"] = "base"; TAG_NAMES["BASEFONT"] = "basefont"; TAG_NAMES["BGSOUND"] = "bgsound"; TAG_NAMES["BIG"] = "big"; TAG_NAMES["BLOCKQUOTE"] = "blockquote"; TAG_NAMES["BODY"] = "body"; TAG_NAMES["BR"] = "br"; TAG_NAMES["BUTTON"] = "button"; TAG_NAMES["CAPTION"] = "caption"; TAG_NAMES["CENTER"] = "center"; TAG_NAMES["CODE"] = "code"; TAG_NAMES["COL"] = "col"; TAG_NAMES["COLGROUP"] = "colgroup"; TAG_NAMES["DD"] = "dd"; TAG_NAMES["DESC"] = "desc"; TAG_NAMES["DETAILS"] = "details"; TAG_NAMES["DIALOG"] = "dialog"; TAG_NAMES["DIR"] = "dir"; TAG_NAMES["DIV"] = "div"; TAG_NAMES["DL"] = "dl"; TAG_NAMES["DT"] = "dt"; TAG_NAMES["EM"] = "em"; TAG_NAMES["EMBED"] = "embed"; TAG_NAMES["FIELDSET"] = "fieldset"; TAG_NAMES["FIGCAPTION"] = "figcaption"; TAG_NAMES["FIGURE"] = "figure"; TAG_NAMES["FONT"] = "font"; TAG_NAMES["FOOTER"] = "footer"; TAG_NAMES["FOREIGN_OBJECT"] = "foreignObject"; TAG_NAMES["FORM"] = "form"; TAG_NAMES["FRAME"] = "frame"; TAG_NAMES["FRAMESET"] = "frameset"; TAG_NAMES["H1"] = "h1"; TAG_NAMES["H2"] = "h2"; TAG_NAMES["H3"] = "h3"; TAG_NAMES["H4"] = "h4"; TAG_NAMES["H5"] = "h5"; TAG_NAMES["H6"] = "h6"; TAG_NAMES["HEAD"] = "head"; TAG_NAMES["HEADER"] = "header"; TAG_NAMES["HGROUP"] = "hgroup"; TAG_NAMES["HR"] = "hr"; TAG_NAMES["HTML"] = "html"; TAG_NAMES["I"] = "i"; TAG_NAMES["IMG"] = "img"; TAG_NAMES["IMAGE"] = "image"; TAG_NAMES["INPUT"] = "input"; TAG_NAMES["IFRAME"] = "iframe"; TAG_NAMES["KEYGEN"] = "keygen"; TAG_NAMES["LABEL"] = "label"; TAG_NAMES["LI"] = "li"; TAG_NAMES["LINK"] = "link"; TAG_NAMES["LISTING"] = "listing"; TAG_NAMES["MAIN"] = "main"; TAG_NAMES["MALIGNMARK"] = "malignmark"; TAG_NAMES["MARQUEE"] = "marquee"; TAG_NAMES["MATH"] = "math"; TAG_NAMES["MENU"] = "menu"; TAG_NAMES["META"] = "meta"; TAG_NAMES["MGLYPH"] = "mglyph"; TAG_NAMES["MI"] = "mi"; TAG_NAMES["MO"] = "mo"; TAG_NAMES["MN"] = "mn"; TAG_NAMES["MS"] = "ms"; TAG_NAMES["MTEXT"] = "mtext"; TAG_NAMES["NAV"] = "nav"; TAG_NAMES["NOBR"] = "nobr"; TAG_NAMES["NOFRAMES"] = "noframes"; TAG_NAMES["NOEMBED"] = "noembed"; TAG_NAMES["NOSCRIPT"] = "noscript"; TAG_NAMES["OBJECT"] = "object"; TAG_NAMES["OL"] = "ol"; TAG_NAMES["OPTGROUP"] = "optgroup"; TAG_NAMES["OPTION"] = "option"; TAG_NAMES["P"] = "p"; TAG_NAMES["PARAM"] = "param"; TAG_NAMES["PLAINTEXT"] = "plaintext"; TAG_NAMES["PRE"] = "pre"; TAG_NAMES["RB"] = "rb"; TAG_NAMES["RP"] = "rp"; TAG_NAMES["RT"] = "rt"; TAG_NAMES["RTC"] = "rtc"; TAG_NAMES["RUBY"] = "ruby"; TAG_NAMES["S"] = "s"; TAG_NAMES["SCRIPT"] = "script"; TAG_NAMES["SEARCH"] = "search"; TAG_NAMES["SECTION"] = "section"; TAG_NAMES["SELECT"] = "select"; TAG_NAMES["SOURCE"] = "source"; TAG_NAMES["SMALL"] = "small"; TAG_NAMES["SPAN"] = "span"; TAG_NAMES["STRIKE"] = "strike"; TAG_NAMES["STRONG"] = "strong"; TAG_NAMES["STYLE"] = "style"; TAG_NAMES["SUB"] = "sub"; TAG_NAMES["SUMMARY"] = "summary"; TAG_NAMES["SUP"] = "sup"; TAG_NAMES["TABLE"] = "table"; TAG_NAMES["TBODY"] = "tbody"; TAG_NAMES["TEMPLATE"] = "template"; TAG_NAMES["TEXTAREA"] = "textarea"; TAG_NAMES["TFOOT"] = "tfoot"; TAG_NAMES["TD"] = "td"; TAG_NAMES["TH"] = "th"; TAG_NAMES["THEAD"] = "thead"; TAG_NAMES["TITLE"] = "title"; TAG_NAMES["TR"] = "tr"; TAG_NAMES["TRACK"] = "track"; TAG_NAMES["TT"] = "tt"; TAG_NAMES["U"] = "u"; TAG_NAMES["UL"] = "ul"; TAG_NAMES["SVG"] = "svg"; TAG_NAMES["VAR"] = "var"; TAG_NAMES["WBR"] = "wbr"; TAG_NAMES["XMP"] = "xmp"; })(TAG_NAMES || (TAG_NAMES = {})); /** * Tag IDs are numeric IDs for known tag names. * * We use tag IDs to improve the performance of tag name comparisons. */ var TAG_ID; (function (TAG_ID) { TAG_ID[TAG_ID["UNKNOWN"] = 0] = "UNKNOWN"; TAG_ID[TAG_ID["A"] = 1] = "A"; TAG_ID[TAG_ID["ADDRESS"] = 2] = "ADDRESS"; TAG_ID[TAG_ID["ANNOTATION_XML"] = 3] = "ANNOTATION_XML"; TAG_ID[TAG_ID["APPLET"] = 4] = "APPLET"; TAG_ID[TAG_ID["AREA"] = 5] = "AREA"; TAG_ID[TAG_ID["ARTICLE"] = 6] = "ARTICLE"; TAG_ID[TAG_ID["ASIDE"] = 7] = "ASIDE"; TAG_ID[TAG_ID["B"] = 8] = "B"; TAG_ID[TAG_ID["BASE"] = 9] = "BASE"; TAG_ID[TAG_ID["BASEFONT"] = 10] = "BASEFONT"; TAG_ID[TAG_ID["BGSOUND"] = 11] = "BGSOUND"; TAG_ID[TAG_ID["BIG"] = 12] = "BIG"; TAG_ID[TAG_ID["BLOCKQUOTE"] = 13] = "BLOCKQUOTE"; TAG_ID[TAG_ID["BODY"] = 14] = "BODY"; TAG_ID[TAG_ID["BR"] = 15] = "BR"; TAG_ID[TAG_ID["BUTTON"] = 16] = "BUTTON"; TAG_ID[TAG_ID["CAPTION"] = 17] = "CAPTION"; TAG_ID[TAG_ID["CENTER"] = 18] = "CENTER"; TAG_ID[TAG_ID["CODE"] = 19] = "CODE"; TAG_ID[TAG_ID["COL"] = 20] = "COL"; TAG_ID[TAG_ID["COLGROUP"] = 21] = "COLGROUP"; TAG_ID[TAG_ID["DD"] = 22] = "DD"; TAG_ID[TAG_ID["DESC"] = 23] = "DESC"; TAG_ID[TAG_ID["DETAILS"] = 24] = "DETAILS"; TAG_ID[TAG_ID["DIALOG"] = 25] = "DIALOG"; TAG_ID[TAG_ID["DIR"] = 26] = "DIR"; TAG_ID[TAG_ID["DIV"] = 27] = "DIV"; TAG_ID[TAG_ID["DL"] = 28] = "DL"; TAG_ID[TAG_ID["DT"] = 29] = "DT"; TAG_ID[TAG_ID["EM"] = 30] = "EM"; TAG_ID[TAG_ID["EMBED"] = 31] = "EMBED"; TAG_ID[TAG_ID["FIELDSET"] = 32] = "FIELDSET"; TAG_ID[TAG_ID["FIGCAPTION"] = 33] = "FIGCAPTION"; TAG_ID[TAG_ID["FIGURE"] = 34] = "FIGURE"; TAG_ID[TAG_ID["FONT"] = 35] = "FONT"; TAG_ID[TAG_ID["FOOTER"] = 36] = "FOOTER"; TAG_ID[TAG_ID["FOREIGN_OBJECT"] = 37] = "FOREIGN_OBJECT"; TAG_ID[TAG_ID["FORM"] = 38] = "FORM"; TAG_ID[TAG_ID["FRAME"] = 39] = "FRAME"; TAG_ID[TAG_ID["FRAMESET"] = 40] = "FRAMESET"; TAG_ID[TAG_ID["H1"] = 41] = "H1"; TAG_ID[TAG_ID["H2"] = 42] = "H2"; TAG_ID[TAG_ID["H3"] = 43] = "H3"; TAG_ID[TAG_ID["H4"] = 44] = "H4"; TAG_ID[TAG_ID["H5"] = 45] = "H5"; TAG_ID[TAG_ID["H6"] = 46] = "H6"; TAG_ID[TAG_ID["HEAD"] = 47] = "HEAD"; TAG_ID[TAG_ID["HEADER"] = 48] = "HEADER"; TAG_ID[TAG_ID["HGROUP"] = 49] = "HGROUP"; TAG_ID[TAG_ID["HR"] = 50] = "HR"; TAG_ID[TAG_ID["HTML"] = 51] = "HTML"; TAG_ID[TAG_ID["I"] = 52] = "I"; TAG_ID[TAG_ID["IMG"] = 53] = "IMG"; TAG_ID[TAG_ID["IMAGE"] = 54] = "IMAGE"; TAG_ID[TAG_ID["INPUT"] = 55] = "INPUT"; TAG_ID[TAG_ID["IFRAME"] = 56] = "IFRAME"; TAG_ID[TAG_ID["KEYGEN"] = 57] = "KEYGEN"; TAG_ID[TAG_ID["LABEL"] = 58] = "LABEL"; TAG_ID[TAG_ID["LI"] = 59] = "LI"; TAG_ID[TAG_ID["LINK"] = 60] = "LINK"; TAG_ID[TAG_ID["LISTING"] = 61] = "LISTING"; TAG_ID[TAG_ID["MAIN"] = 62] = "MAIN"; TAG_ID[TAG_ID["MALIGNMARK"] = 63] = "MALIGNMARK"; TAG_ID[TAG_ID["MARQUEE"] = 64] = "MARQUEE"; TAG_ID[TAG_ID["MATH"] = 65] = "MATH"; TAG_ID[TAG_ID["MENU"] = 66] = "MENU"; TAG_ID[TAG_ID["META"] = 67] = "META"; TAG_ID[TAG_ID["MGLYPH"] = 68] = "MGLYPH"; TAG_ID[TAG_ID["MI"] = 69] = "MI"; TAG_ID[TAG_ID["MO"] = 70] = "MO"; TAG_ID[TAG_ID["MN"] = 71] = "MN"; TAG_ID[TAG_ID["MS"] = 72] = "MS"; TAG_ID[TAG_ID["MTEXT"] = 73] = "MTEXT"; TAG_ID[TAG_ID["NAV"] = 74] = "NAV"; TAG_ID[TAG_ID["NOBR"] = 75] = "NOBR"; TAG_ID[TAG_ID["NOFRAMES"] = 76] = "NOFRAMES"; TAG_ID[TAG_ID["NOEMBED"] = 77] = "NOEMBED"; TAG_ID[TAG_ID["NOSCRIPT"] = 78] = "NOSCRIPT"; TAG_ID[TAG_ID["OBJECT"] = 79] = "OBJECT"; TAG_ID[TAG_ID["OL"] = 80] = "OL"; TAG_ID[TAG_ID["OPTGROUP"] = 81] = "OPTGROUP"; TAG_ID[TAG_ID["OPTION"] = 82] = "OPTION"; TAG_ID[TAG_ID["P"] = 83] = "P"; TAG_ID[TAG_ID["PARAM"] = 84] = "PARAM"; TAG_ID[TAG_ID["PLAINTEXT"] = 85] = "PLAINTEXT"; TAG_ID[TAG_ID["PRE"] = 86] = "PRE"; TAG_ID[TAG_ID["RB"] = 87] = "RB"; TAG_ID[TAG_ID["RP"] = 88] = "RP"; TAG_ID[TAG_ID["RT"] = 89] = "RT"; TAG_ID[TAG_ID["RTC"] = 90] = "RTC"; TAG_ID[TAG_ID["RUBY"] = 91] = "RUBY"; TAG_ID[TAG_ID["S"] = 92] = "S"; TAG_ID[TAG_ID["SCRIPT"] = 93] = "SCRIPT"; TAG_ID[TAG_ID["SEARCH"] = 94] = "SEARCH"; TAG_ID[TAG_ID["SECTION"] = 95] = "SECTION"; TAG_ID[TAG_ID["SELECT"] = 96] = "SELECT"; TAG_ID[TAG_ID["SOURCE"] = 97] = "SOURCE"; TAG_ID[TAG_ID["SMALL"] = 98] = "SMALL"; TAG_ID[TAG_ID["SPAN"] = 99] = "SPAN"; TAG_ID[TAG_ID["STRIKE"] = 100] = "STRIKE"; TAG_ID[TAG_ID["STRONG"] = 101] = "STRONG"; TAG_ID[TAG_ID["STYLE"] = 102] = "STYLE"; TAG_ID[TAG_ID["SUB"] = 103] = "SUB"; TAG_ID[TAG_ID["SUMMARY"] = 104] = "SUMMARY"; TAG_ID[TAG_ID["SUP"] = 105] = "SUP"; TAG_ID[TAG_ID["TABLE"] = 106] = "TABLE"; TAG_ID[TAG_ID["TBODY"] = 107] = "TBODY"; TAG_ID[TAG_ID["TEMPLATE"] = 108] = "TEMPLATE"; TAG_ID[TAG_ID["TEXTAREA"] = 109] = "TEXTAREA"; TAG_ID[TAG_ID["TFOOT"] = 110] = "TFOOT"; TAG_ID[TAG_ID["TD"] = 111] = "TD"; TAG_ID[TAG_ID["TH"] = 112] = "TH"; TAG_ID[TAG_ID["THEAD"] = 113] = "THEAD"; TAG_ID[TAG_ID["TITLE"] = 114] = "TITLE"; TAG_ID[TAG_ID["TR"] = 115] = "TR"; TAG_ID[TAG_ID["TRACK"] = 116] = "TRACK"; TAG_ID[TAG_ID["TT"] = 117] = "TT"; TAG_ID[TAG_ID["U"] = 118] = "U"; TAG_ID[TAG_ID["UL"] = 119] = "UL"; TAG_ID[TAG_ID["SVG"] = 120] = "SVG"; TAG_ID[TAG_ID["VAR"] = 121] = "VAR"; TAG_ID[TAG_ID["WBR"] = 122] = "WBR"; TAG_ID[TAG_ID["XMP"] = 123] = "XMP"; })(TAG_ID || (TAG_ID = {})); const TAG_NAME_TO_ID = new Map([ [TAG_NAMES.A, TAG_ID.A], [TAG_NAMES.ADDRESS, TAG_ID.ADDRESS], [TAG_NAMES.ANNOTATION_XML, TAG_ID.ANNOTATION_XML], [TAG_NAMES.APPLET, TAG_ID.APPLET], [TAG_NAMES.AREA, TAG_ID.AREA], [TAG_NAMES.ARTICLE, TAG_ID.ARTICLE], [TAG_NAMES.ASIDE, TAG_ID.ASIDE], [TAG_NAMES.B, TAG_ID.B], [TAG_NAMES.BASE, TAG_ID.BASE], [TAG_NAMES.BASEFONT, TAG_ID.BASEFONT], [TAG_NAMES.BGSOUND, TAG_ID.BGSOUND], [TAG_NAMES.BIG, TAG_ID.BIG], [TAG_NAMES.BLOCKQUOTE, TAG_ID.BLOCKQUOTE], [TAG_NAMES.BODY, TAG_ID.BODY], [TAG_NAMES.BR, TAG_ID.BR], [TAG_NAMES.BUTTON, TAG_ID.BUTTON], [TAG_NAMES.CAPTION, TAG_ID.CAPTION], [TAG_NAMES.CENTER, TAG_ID.CENTER], [TAG_NAMES.CODE, TAG_ID.CODE], [TAG_NAMES.COL, TAG_ID.COL], [TAG_NAMES.COLGROUP, TAG_ID.COLGROUP], [TAG_NAMES.DD, TAG_ID.DD], [TAG_NAMES.DESC, TAG_ID.DESC], [TAG_NAMES.DETAILS, TAG_ID.DETAILS], [TAG_NAMES.DIALOG, TAG_ID.DIALOG], [TAG_NAMES.DIR, TAG_ID.DIR], [TAG_NAMES.DIV, TAG_ID.DIV], [TAG_NAMES.DL, TAG_ID.DL], [TAG_NAMES.DT, TAG_ID.DT], [TAG_NAMES.EM, TAG_ID.EM], [TAG_NAMES.EMBED, TAG_ID.EMBED], [TAG_NAMES.FIELDSET, TAG_ID.FIELDSET], [TAG_NAMES.FIGCAPTION, TAG_ID.FIGCAPTION], [TAG_NAMES.FIGURE, TAG_ID.FIGURE], [TAG_NAMES.FONT, TAG_ID.FONT], [TAG_NAMES.FOOTER, TAG_ID.FOOTER], [TAG_NAMES.FOREIGN_OBJECT, TAG_ID.FOREIGN_OBJECT], [TAG_NAMES.FORM, TAG_ID.FORM], [TAG_NAMES.FRAME, TAG_ID.FRAME], [TAG_NAMES.FRAMESET, TAG_ID.FRAMESET], [TAG_NAMES.H1, TAG_ID.H1], [TAG_NAMES.H2, TAG_ID.H2], [TAG_NAMES.H3, TAG_ID.H3], [TAG_NAMES.H4, TAG_ID.H4], [TAG_NAMES.H5, TAG_ID.H5], [TAG_NAMES.H6, TAG_ID.H6], [TAG_NAMES.HEAD, TAG_ID.HEAD], [TAG_NAMES.HEADER, TAG_ID.HEADER], [TAG_NAMES.HGROUP, TAG_ID.HGROUP], [TAG_NAMES.HR, TAG_ID.HR], [TAG_NAMES.HTML, TAG_ID.HTML], [TAG_NAMES.I, TAG_ID.I], [TAG_NAMES.IMG, TAG_ID.IMG], [TAG_NAMES.IMAGE, TAG_ID.IMAGE], [TAG_NAMES.INPUT, TAG_ID.INPUT], [TAG_NAMES.IFRAME, TAG_ID.IFRAME], [TAG_NAMES.KEYGEN, TAG_ID.KEYGEN], [TAG_NAMES.LABEL, TAG_ID.LABEL], [TAG_NAMES.LI, TAG_ID.LI], [TAG_NAMES.LINK, TAG_ID.LINK], [TAG_NAMES.LISTING, TAG_ID.LISTING], [TAG_NAMES.MAIN, TAG_ID.MAIN], [TAG_NAMES.MALIGNMARK, TAG_ID.MALIGNMARK], [TAG_NAMES.MARQUEE, TAG_ID.MARQUEE], [TAG_NAMES.MATH, TAG_ID.MATH], [TAG_NAMES.MENU, TAG_ID.MENU], [TAG_NAMES.META, TAG_ID.META], [TAG_NAMES.MGLYPH, TAG_ID.MGLYPH], [TAG_NAMES.MI, TAG_ID.MI], [TAG_NAMES.MO, TAG_ID.MO], [TAG_NAMES.MN, TAG_ID.MN], [TAG_NAMES.MS, TAG_ID.MS], [TAG_NAMES.MTEXT, TAG_ID.MTEXT], [TAG_NAMES.NAV, TAG_ID.NAV], [TAG_NAMES.NOBR, TAG_ID.NOBR], [TAG_NAMES.NOFRAMES, TAG_ID.NOFRAMES], [TAG_NAMES.NOEMBED, TAG_ID.NOEMBED], [TAG_NAMES.NOSCRIPT, TAG_ID.NOSCRIPT], [TAG_NAMES.OBJECT, TAG_ID.OBJECT], [TAG_NAMES.OL, TAG_ID.OL], [TAG_NAMES.OPTGROUP, TAG_ID.OPTGROUP], [TAG_NAMES.OPTION, TAG_ID.OPTION], [TAG_NAMES.P, TAG_ID.P], [TAG_NAMES.PARAM, TAG_ID.PARAM], [TAG_NAMES.PLAINTEXT, TAG_ID.PLAINTEXT], [TAG_NAMES.PRE, TAG_ID.PRE], [TAG_NAMES.RB, TAG_ID.RB], [TAG_NAMES.RP, TAG_ID.RP], [TAG_NAMES.RT, TAG_ID.RT], [TAG_NAMES.RTC, TAG_ID.RTC], [TAG_NAMES.RUBY, TAG_ID.RUBY], [TAG_NAMES.S, TAG_ID.S], [TAG_NAMES.SCRIPT, TAG_ID.SCRIPT], [TAG_NAMES.SEARCH, TAG_ID.SEARCH], [TAG_NAMES.SECTION, TAG_ID.SECTION], [TAG_NAMES.SELECT, TAG_ID.SELECT], [TAG_NAMES.SOURCE, TAG_ID.SOURCE], [TAG_NAMES.SMALL, TAG_ID.SMALL], [TAG_NAMES.SPAN, TAG_ID.SPAN], [TAG_NAMES.STRIKE, TAG_ID.STRIKE], [TAG_NAMES.STRONG, TAG_ID.STRONG], [TAG_NAMES.STYLE, TAG_ID.STYLE], [TAG_NAMES.SUB, TAG_ID.SUB], [TAG_NAMES.SUMMARY, TAG_ID.SUMMARY], [TAG_NAMES.SUP, TAG_ID.SUP], [TAG_NAMES.TABLE, TAG_ID.TABLE], [TAG_NAMES.TBODY, TAG_ID.TBODY], [TAG_NAMES.TEMPLATE, TAG_ID.TEMPLATE], [TAG_NAMES.TEXTAREA, TAG_ID.TEXTAREA], [TAG_NAMES.TFOOT, TAG_ID.TFOOT], [TAG_NAMES.TD, TAG_ID.TD], [TAG_NAMES.TH, TAG_ID.TH], [TAG_NAMES.THEAD, TAG_ID.THEAD], [TAG_NAMES.TITLE, TAG_ID.TITLE], [TAG_NAMES.TR, TAG_ID.TR], [TAG_NAMES.TRACK, TAG_ID.TRACK], [TAG_NAMES.TT, TAG_ID.TT], [TAG_NAMES.U, TAG_ID.U], [TAG_NAMES.UL, TAG_ID.UL], [TAG_NAMES.SVG, TAG_ID.SVG], [TAG_NAMES.VAR, TAG_ID.VAR], [TAG_NAMES.WBR, TAG_ID.WBR], [TAG_NAMES.XMP, TAG_ID.XMP], ]); function getTagID(tagName) { var _a; return (_a = TAG_NAME_TO_ID.get(tagName)) !== null && _a !== void 0 ? _a : TAG_ID.UNKNOWN; } const $ = TAG_ID; const SPECIAL_ELEMENTS = { [NS.HTML]: new Set([ $.ADDRESS, $.APPLET, $.AREA, $.ARTICLE, $.ASIDE, $.BASE, $.BASEFONT, $.BGSOUND, $.BLOCKQUOTE, $.BODY, $.BR, $.BUTTON, $.CAPTION, $.CENTER, $.COL, $.COLGROUP, $.DD, $.DETAILS, $.DIR, $.DIV, $.DL, $.DT, $.EMBED, $.FIELDSET, $.FIGCAPTION, $.FIGURE, $.FOOTER, $.FORM, $.FRAME, $.FRAMESET, $.H1, $.H2, $.H3, $.H4, $.H5, $.H6, $.HEAD, $.HEADER, $.HGROUP, $.HR, $.HTML, $.IFRAME, $.IMG, $.INPUT, $.LI, $.LINK, $.LISTING, $.MAIN, $.MARQUEE, $.MENU, $.META, $.NAV, $.NOEMBED, $.NOFRAMES, $.NOSCRIPT, $.OBJECT, $.OL, $.P, $.PARAM, $.PLAINTEXT, $.PRE, $.SCRIPT, $.SECTION, $.SELECT, $.SOURCE, $.STYLE, $.SUMMARY, $.TABLE, $.TBODY, $.TD, $.TEMPLATE, $.TEXTAREA, $.TFOOT, $.TH, $.THEAD, $.TITLE, $.TR, $.TRACK, $.UL, $.WBR, $.XMP, ]), [NS.MATHML]: new Set([$.MI, $.MO, $.MN, $.MS, $.MTEXT, $.ANNOTATION_XML]), [NS.SVG]: new Set([$.TITLE, $.FOREIGN_OBJECT, $.DESC]), [NS.XLINK]: new Set(), [NS.XML]: new Set(), [NS.XMLNS]: new Set(), }; const NUMBERED_HEADERS = new Set([$.H1, $.H2, $.H3, $.H4, $.H5, $.H6]);
//States
var State; (function (State) { State[State["DATA"] = 0] = "DATA"; State[State["RCDATA"] = 1] = "RCDATA"; State[State["RAWTEXT"] = 2] = "RAWTEXT"; State[State["SCRIPT_DATA"] = 3] = "SCRIPT_DATA"; State[State["PLAINTEXT"] = 4] = "PLAINTEXT"; State[State["TAG_OPEN"] = 5] = "TAG_OPEN"; State[State["END_TAG_OPEN"] = 6] = "END_TAG_OPEN"; State[State["TAG_NAME"] = 7] = "TAG_NAME"; State[State["RCDATA_LESS_THAN_SIGN"] = 8] = "RCDATA_LESS_THAN_SIGN"; State[State["RCDATA_END_TAG_OPEN"] = 9] = "RCDATA_END_TAG_OPEN"; State[State["RCDATA_END_TAG_NAME"] = 10] = "RCDATA_END_TAG_NAME"; State[State["RAWTEXT_LESS_THAN_SIGN"] = 11] = "RAWTEXT_LESS_THAN_SIGN"; State[State["RAWTEXT_END_TAG_OPEN"] = 12] = "RAWTEXT_END_TAG_OPEN"; State[State["RAWTEXT_END_TAG_NAME"] = 13] = "RAWTEXT_END_TAG_NAME"; State[State["SCRIPT_DATA_LESS_THAN_SIGN"] = 14] = "SCRIPT_DATA_LESS_THAN_SIGN"; State[State["SCRIPT_DATA_END_TAG_OPEN"] = 15] = "SCRIPT_DATA_END_TAG_OPEN"; State[State["SCRIPT_DATA_END_TAG_NAME"] = 16] = "SCRIPT_DATA_END_TAG_NAME"; State[State["SCRIPT_DATA_ESCAPE_START"] = 17] = "SCRIPT_DATA_ESCAPE_START"; State[State["SCRIPT_DATA_ESCAPE_START_DASH"] = 18] = "SCRIPT_DATA_ESCAPE_START_DASH"; State[State["SCRIPT_DATA_ESCAPED"] = 19] = "SCRIPT_DATA_ESCAPED"; State[State["SCRIPT_DATA_ESCAPED_DASH"] = 20] = "SCRIPT_DATA_ESCAPED_DASH"; State[State["SCRIPT_DATA_ESCAPED_DASH_DASH"] = 21] = "SCRIPT_DATA_ESCAPED_DASH_DASH"; State[State["SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"] = 22] = "SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"; State[State["SCRIPT_DATA_ESCAPED_END_TAG_OPEN"] = 23] = "SCRIPT_DATA_ESCAPED_END_TAG_OPEN"; State[State["SCRIPT_DATA_ESCAPED_END_TAG_NAME"] = 24] = "SCRIPT_DATA_ESCAPED_END_TAG_NAME"; State[State["SCRIPT_DATA_DOUBLE_ESCAPE_START"] = 25] = "SCRIPT_DATA_DOUBLE_ESCAPE_START"; State[State["SCRIPT_DATA_DOUBLE_ESCAPED"] = 26] = "SCRIPT_DATA_DOUBLE_ESCAPED"; State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH"] = 27] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH"; State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"] = 28] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"; State[State["SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"] = 29] = "SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"; State[State["SCRIPT_DATA_DOUBLE_ESCAPE_END"] = 30] = "SCRIPT_DATA_DOUBLE_ESCAPE_END"; State[State["BEFORE_ATTRIBUTE_NAME"] = 31] = "BEFORE_ATTRIBUTE_NAME"; State[State["ATTRIBUTE_NAME"] = 32] = "ATTRIBUTE_NAME"; State[State["AFTER_ATTRIBUTE_NAME"] = 33] = "AFTER_ATTRIBUTE_NAME"; State[State["BEFORE_ATTRIBUTE_VALUE"] = 34] = "BEFORE_ATTRIBUTE_VALUE"; State[State["ATTRIBUTE_VALUE_DOUBLE_QUOTED"] = 35] = "ATTRIBUTE_VALUE_DOUBLE_QUOTED"; State[State["ATTRIBUTE_VALUE_SINGLE_QUOTED"] = 36] = "ATTRIBUTE_VALUE_SINGLE_QUOTED"; State[State["ATTRIBUTE_VALUE_UNQUOTED"] = 37] = "ATTRIBUTE_VALUE_UNQUOTED"; State[State["AFTER_ATTRIBUTE_VALUE_QUOTED"] = 38] = "AFTER_ATTRIBUTE_VALUE_QUOTED"; State[State["SELF_CLOSING_START_TAG"] = 39] = "SELF_CLOSING_START_TAG"; State[State["BOGUS_COMMENT"] = 40] = "BOGUS_COMMENT"; State[State["MARKUP_DECLARATION_OPEN"] = 41] = "MARKUP_DECLARATION_OPEN"; State[State["COMMENT_START"] = 42] = "COMMENT_START"; State[State["COMMENT_START_DASH"] = 43] = "COMMENT_START_DASH"; State[State["COMMENT"] = 44] = "COMMENT"; State[State["COMMENT_LESS_THAN_SIGN"] = 45] = "COMMENT_LESS_THAN_SIGN"; State[State["COMMENT_LESS_THAN_SIGN_BANG"] = 46] = "COMMENT_LESS_THAN_SIGN_BANG"; State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH"] = 47] = "COMMENT_LESS_THAN_SIGN_BANG_DASH"; State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"] = 48] = "COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"; State[State["COMMENT_END_DASH"] = 49] = "COMMENT_END_DASH"; State[State["COMMENT_END"] = 50] = "COMMENT_END"; State[State["COMMENT_END_BANG"] = 51] = "COMMENT_END_BANG"; State[State["DOCTYPE"] = 52] = "DOCTYPE"; State[State["BEFORE_DOCTYPE_NAME"] = 53] = "BEFORE_DOCTYPE_NAME"; State[State["DOCTYPE_NAME"] = 54] = "DOCTYPE_NAME"; State[State["AFTER_DOCTYPE_NAME"] = 55] = "AFTER_DOCTYPE_NAME"; State[State["AFTER_DOCTYPE_PUBLIC_KEYWORD"] = 56] = "AFTER_DOCTYPE_PUBLIC_KEYWORD"; State[State["BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"] = 57] = "BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"; State[State["DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"] = 58] = "DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"; State[State["DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"] = 59] = "DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"; State[State["AFTER_DOCTYPE_PUBLIC_IDENTIFIER"] = 60] = "AFTER_DOCTYPE_PUBLIC_IDENTIFIER"; State[State["BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"] = 61] = "BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"; State[State["AFTER_DOCTYPE_SYSTEM_KEYWORD"] = 62] = "AFTER_DOCTYPE_SYSTEM_KEYWORD"; State[State["BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"] = 63] = "BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"; State[State["DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"] = 64] = "DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"; State[State["DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"] = 65] = "DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"; State[State["AFTER_DOCTYPE_SYSTEM_IDENTIFIER"] = 66] = "AFTER_DOCTYPE_SYSTEM_IDENTIFIER"; State[State["BOGUS_DOCTYPE"] = 67] = "BOGUS_DOCTYPE"; State[State["CDATA_SECTION"] = 68] = "CDATA_SECTION"; State[State["CDATA_SECTION_BRACKET"] = 69] = "CDATA_SECTION_BRACKET"; State[State["CDATA_SECTION_END"] = 70] = "CDATA_SECTION_END"; State[State["CHARACTER_REFERENCE"] = 71] = "CHARACTER_REFERENCE"; State[State["AMBIGUOUS_AMPERSAND"] = 72] = "AMBIGUOUS_AMPERSAND"; })(State || (State = {})); //Tokenizer initial states for different modes
const TokenizerMode = { RCDATA: State.RCDATA, RAWTEXT: State.RAWTEXT, SCRIPT_DATA: State.SCRIPT_DATA, PLAINTEXT: State.PLAINTEXT}; //Utils
//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline
//this functions if they will be situated in another module due to context switch.
//Always perform inlining check before modifying this functions ('node --trace-inlining').
function isAsciiDigit(cp) { return cp >= CODE_POINTS.DIGIT_0 && cp <= CODE_POINTS.DIGIT_9; } function isAsciiUpper(cp) { return cp >= CODE_POINTS.LATIN_CAPITAL_A && cp <= CODE_POINTS.LATIN_CAPITAL_Z; } function isAsciiLower(cp) { return cp >= CODE_POINTS.LATIN_SMALL_A && cp <= CODE_POINTS.LATIN_SMALL_Z; } function isAsciiLetter(cp) { return isAsciiLower(cp) || isAsciiUpper(cp); } function isAsciiAlphaNumeric(cp) { return isAsciiLetter(cp) || isAsciiDigit(cp); } function toAsciiLower(cp) { return cp + 32; } function isWhitespace(cp) { return cp === CODE_POINTS.SPACE || cp === CODE_POINTS.LINE_FEED || cp === CODE_POINTS.TABULATION || cp === CODE_POINTS.FORM_FEED; } function isScriptDataDoubleEscapeSequenceEnd(cp) { return isWhitespace(cp) || cp === CODE_POINTS.SOLIDUS || cp === CODE_POINTS.GREATER_THAN_SIGN; } function getErrorForNumericCharacterReference(code) { if (code === CODE_POINTS.NULL) { return ERR.nullCharacterReference; } else if (code > 1114111) { return ERR.characterReferenceOutsideUnicodeRange; } else if (isSurrogate(code)) { return ERR.surrogateCharacterReference; } else if (isUndefinedCodePoint(code)) { return ERR.noncharacterCharacterReference; } else if (isControlCodePoint(code) || code === CODE_POINTS.CARRIAGE_RETURN) { return ERR.controlCharacterReference; } return null; } //Tokenizer
class Tokenizer { constructor(options, handler) { this.options = options; this.handler = handler; this.paused = false; /** Ensures that the parsing loop isn't run multiple times at once. */ this.inLoop = false; /** * Indicates that the current adjusted node exists, is not an element in the HTML namespace, * and that it is not an integration point for either MathML or HTML. * * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction}
*/ this.inForeignNode = false; this.lastStartTagName = ''; this.active = false; this.state = State.DATA; this.returnState = State.DATA; this.entityStartPos = 0; this.consumedAfterSnapshot = -1; this.currentCharacterToken = null; this.currentToken = null; this.currentAttr = { name: '', value: '' }; this.preprocessor = new Preprocessor(handler); this.currentLocation = this.getCurrentLocation(-1); this.entityDecoder = new EntityDecoder(htmlDecodeTree, (cp, consumed) => { // Note: Set `pos` _before_ flushing, as flushing might drop
// the current chunk and invalidate `entityStartPos`.
this.preprocessor.pos = this.entityStartPos + consumed - 1; this._flushCodePointConsumedAsCharacterReference(cp); }, handler.onParseError ? { missingSemicolonAfterCharacterReference: () => { this._err(ERR.missingSemicolonAfterCharacterReference, 1); }, absenceOfDigitsInNumericCharacterReference: (consumed) => { this._err(ERR.absenceOfDigitsInNumericCharacterReference, this.entityStartPos - this.preprocessor.pos + consumed); }, validateNumericCharacterReference: (code) => { const error = getErrorForNumericCharacterReference(code); if (error) this._err(error, 1); }, } : undefined); } //Errors
_err(code, cpOffset = 0) { var _a, _b; (_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code, cpOffset)); } // NOTE: `offset` may never run across line boundaries.
getCurrentLocation(offset) { if (!this.options.sourceCodeLocationInfo) { return null; } return { startLine: this.preprocessor.line, startCol: this.preprocessor.col - offset, startOffset: this.preprocessor.offset - offset, endLine: -1, endCol: -1, endOffset: -1, }; } _runParsingLoop() { if (this.inLoop) return; this.inLoop = true; while (this.active && !this.paused) { this.consumedAfterSnapshot = 0; const cp = this._consume(); if (!this._ensureHibernation()) { this._callState(cp); } } this.inLoop = false; } //API
pause() { this.paused = true; } resume(writeCallback) { if (!this.paused) { throw new Error('Parser was already resumed'); } this.paused = false; // Necessary for synchronous resume.
if (this.inLoop) return; this._runParsingLoop(); if (!this.paused) { writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback(); } } write(chunk, isLastChunk, writeCallback) { this.active = true; this.preprocessor.write(chunk, isLastChunk); this._runParsingLoop(); if (!this.paused) { writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback(); } } insertHtmlAtCurrentPos(chunk) { this.active = true; this.preprocessor.insertHtmlAtCurrentPos(chunk); this._runParsingLoop(); } //Hibernation
_ensureHibernation() { if (this.preprocessor.endOfChunkHit) { this.preprocessor.retreat(this.consumedAfterSnapshot); this.consumedAfterSnapshot = 0; this.active = false; return true; } return false; } //Consumption
_consume() { this.consumedAfterSnapshot++; return this.preprocessor.advance(); } _advanceBy(count) { this.consumedAfterSnapshot += count; for (let i = 0; i < count; i++) { this.preprocessor.advance(); } } _consumeSequenceIfMatch(pattern, caseSensitive) { if (this.preprocessor.startsWith(pattern, caseSensitive)) { // We will already have consumed one character before calling this method.
this._advanceBy(pattern.length - 1); return true; } return false; } //Token creation
_createStartTagToken() { this.currentToken = { type: TokenType.START_TAG, tagName: '', tagID: TAG_ID.UNKNOWN, selfClosing: false, ackSelfClosing: false, attrs: [], location: this.getCurrentLocation(1), }; } _createEndTagToken() { this.currentToken = { type: TokenType.END_TAG, tagName: '', tagID: TAG_ID.UNKNOWN, selfClosing: false, ackSelfClosing: false, attrs: [], location: this.getCurrentLocation(2), }; } _createCommentToken(offset) { this.currentToken = { type: TokenType.COMMENT, data: '', location: this.getCurrentLocation(offset), }; } _createDoctypeToken(initialName) { this.currentToken = { type: TokenType.DOCTYPE, name: initialName, forceQuirks: false, publicId: null, systemId: null, location: this.currentLocation, }; } _createCharacterToken(type, chars) { this.currentCharacterToken = { type, chars, location: this.currentLocation, }; } //Tag attributes
_createAttr(attrNameFirstCh) { this.currentAttr = { name: attrNameFirstCh, value: '', }; this.currentLocation = this.getCurrentLocation(0); } _leaveAttrName() { var _a; var _b; const token = this.currentToken; if (getTokenAttr(token, this.currentAttr.name) === null) { token.attrs.push(this.currentAttr); if (token.location && this.currentLocation) { const attrLocations = ((_a = (_b = token.location).attrs) !== null && _a !== void 0 ? _a : (_b.attrs = Object.create(null))); attrLocations[this.currentAttr.name] = this.currentLocation; // Set end location
this._leaveAttrValue(); } } else { this._err(ERR.duplicateAttribute); } } _leaveAttrValue() { if (this.currentLocation) { this.currentLocation.endLine = this.preprocessor.line; this.currentLocation.endCol = this.preprocessor.col; this.currentLocation.endOffset = this.preprocessor.offset; } } //Token emission
prepareToken(ct) { this._emitCurrentCharacterToken(ct.location); this.currentToken = null; if (ct.location) { ct.location.endLine = this.preprocessor.line; ct.location.endCol = this.preprocessor.col + 1; ct.location.endOffset = this.preprocessor.offset + 1; } this.currentLocation = this.getCurrentLocation(-1); } emitCurrentTagToken() { const ct = this.currentToken; this.prepareToken(ct); ct.tagID = getTagID(ct.tagName); if (ct.type === TokenType.START_TAG) { this.lastStartTagName = ct.tagName; this.handler.onStartTag(ct); } else { if (ct.attrs.length > 0) { this._err(ERR.endTagWithAttributes); } if (ct.selfClosing) { this._err(ERR.endTagWithTrailingSolidus); } this.handler.onEndTag(ct); } this.preprocessor.dropParsedChunk(); } emitCurrentComment(ct) { this.prepareToken(ct); this.handler.onComment(ct); this.preprocessor.dropParsedChunk(); } emitCurrentDoctype(ct) { this.prepareToken(ct); this.handler.onDoctype(ct); this.preprocessor.dropParsedChunk(); } _emitCurrentCharacterToken(nextLocation) { if (this.currentCharacterToken) { //NOTE: if we have a pending character token, make it's end location equal to the
//current token's start location.
if (nextLocation && this.currentCharacterToken.location) { this.currentCharacterToken.location.endLine = nextLocation.startLine; this.currentCharacterToken.location.endCol = nextLocation.startCol; this.currentCharacterToken.location.endOffset = nextLocation.startOffset; } switch (this.currentCharacterToken.type) { case TokenType.CHARACTER: { this.handler.onCharacter(this.currentCharacterToken); break; } case TokenType.NULL_CHARACTER: { this.handler.onNullCharacter(this.currentCharacterToken); break; } case TokenType.WHITESPACE_CHARACTER: { this.handler.onWhitespaceCharacter(this.currentCharacterToken); break; } } this.currentCharacterToken = null; } } _emitEOFToken() { const location = this.getCurrentLocation(0); if (location) { location.endLine = location.startLine; location.endCol = location.startCol; location.endOffset = location.startOffset; } this._emitCurrentCharacterToken(location); this.handler.onEof({ type: TokenType.EOF, location }); this.active = false; } //Characters emission
//OPTIMIZATION: The specification uses only one type of character token (one token per character).
//This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters.
//If we have a sequence of characters that belong to the same group, the parser can process it
//as a single solid character token.
//So, there are 3 types of character tokens in parse5:
//1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000')
//2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n \r\t \f')
//3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
_appendCharToCurrentCharacterToken(type, ch) { if (this.currentCharacterToken) { if (this.currentCharacterToken.type === type) { this.currentCharacterToken.chars += ch; return; } else { this.currentLocation = this.getCurrentLocation(0); this._emitCurrentCharacterToken(this.currentLocation); this.preprocessor.dropParsedChunk(); } } this._createCharacterToken(type, ch); } _emitCodePoint(cp) { const type = isWhitespace(cp) ? TokenType.WHITESPACE_CHARACTER : cp === CODE_POINTS.NULL ? TokenType.NULL_CHARACTER : TokenType.CHARACTER; this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp)); } //NOTE: used when we emit characters explicitly.
//This is always for non-whitespace and non-null characters, which allows us to avoid additional checks.
_emitChars(ch) { this._appendCharToCurrentCharacterToken(TokenType.CHARACTER, ch); } // Character reference helpers
_startCharacterReference() { this.returnState = this.state; this.state = State.CHARACTER_REFERENCE; this.entityStartPos = this.preprocessor.pos; this.entityDecoder.startEntity(this._isCharacterReferenceInAttribute() ? DecodingMode.Attribute : DecodingMode.Legacy); } _isCharacterReferenceInAttribute() { return (this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED || this.returnState === State.ATTRIBUTE_VALUE_SINGLE_QUOTED || this.returnState === State.ATTRIBUTE_VALUE_UNQUOTED); } _flushCodePointConsumedAsCharacterReference(cp) { if (this._isCharacterReferenceInAttribute()) { this.currentAttr.value += String.fromCodePoint(cp); } else { this._emitCodePoint(cp); } } // Calling states this way turns out to be much faster than any other approach.
_callState(cp) { switch (this.state) { case State.DATA: { this._stateData(cp); break; } case State.RCDATA: { this._stateRcdata(cp); break; } case State.RAWTEXT: { this._stateRawtext(cp); break; } case State.SCRIPT_DATA: { this._stateScriptData(cp); break; } case State.PLAINTEXT: { this._statePlaintext(cp); break; } case State.TAG_OPEN: { this._stateTagOpen(cp); break; } case State.END_TAG_OPEN: { this._stateEndTagOpen(cp); break; } case State.TAG_NAME: { this._stateTagName(cp); break; } case State.RCDATA_LESS_THAN_SIGN: { this._stateRcdataLessThanSign(cp); break; } case State.RCDATA_END_TAG_OPEN: { this._stateRcdataEndTagOpen(cp); break; } case State.RCDATA_END_TAG_NAME: { this._stateRcdataEndTagName(cp); break; } case State.RAWTEXT_LESS_THAN_SIGN: { this._stateRawtextLessThanSign(cp); break; } case State.RAWTEXT_END_TAG_OPEN: { this._stateRawtextEndTagOpen(cp); break; } case State.RAWTEXT_END_TAG_NAME: { this._stateRawtextEndTagName(cp); break; } case State.SCRIPT_DATA_LESS_THAN_SIGN: { this._stateScriptDataLessThanSign(cp); break; } case State.SCRIPT_DATA_END_TAG_OPEN: { this._stateScriptDataEndTagOpen(cp); break; } case State.SCRIPT_DATA_END_TAG_NAME: { this._stateScriptDataEndTagName(cp); break; } case State.SCRIPT_DATA_ESCAPE_START: { this._stateScriptDataEscapeStart(cp); break; } case State.SCRIPT_DATA_ESCAPE_START_DASH: { this._stateScriptDataEscapeStartDash(cp); break; } case State.SCRIPT_DATA_ESCAPED: { this._stateScriptDataEscaped(cp); break; } case State.SCRIPT_DATA_ESCAPED_DASH: { this._stateScriptDataEscapedDash(cp); break; } case State.SCRIPT_DATA_ESCAPED_DASH_DASH: { this._stateScriptDataEscapedDashDash(cp); break; } case State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: { this._stateScriptDataEscapedLessThanSign(cp); break; } case State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN: { this._stateScriptDataEscapedEndTagOpen(cp); break; } case State.SCRIPT_DATA_ESCAPED_END_TAG_NAME: { this._stateScriptDataEscapedEndTagName(cp); break; } case State.SCRIPT_DATA_DOUBLE_ESCAPE_START: { this._stateScriptDataDoubleEscapeStart(cp); break; } case State.SCRIPT_DATA_DOUBLE_ESCAPED: { this._stateScriptDataDoubleEscaped(cp); break; } case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH: { this._stateScriptDataDoubleEscapedDash(cp); break; } case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: { this._stateScriptDataDoubleEscapedDashDash(cp); break; } case State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: { this._stateScriptDataDoubleEscapedLessThanSign(cp); break; } case State.SCRIPT_DATA_DOUBLE_ESCAPE_END: { this._stateScriptDataDoubleEscapeEnd(cp); break; } case State.BEFORE_ATTRIBUTE_NAME: { this._stateBeforeAttributeName(cp); break; } case State.ATTRIBUTE_NAME: { this._stateAttributeName(cp); break; } case State.AFTER_ATTRIBUTE_NAME: { this._stateAfterAttributeName(cp); break; } case State.BEFORE_ATTRIBUTE_VALUE: { this._stateBeforeAttributeValue(cp); break; } case State.ATTRIBUTE_VALUE_DOUBLE_QUOTED: { this._stateAttributeValueDoubleQuoted(cp); break; } case State.ATTRIBUTE_VALUE_SINGLE_QUOTED: { this._stateAttributeValueSingleQuoted(cp); break; } case State.ATTRIBUTE_VALUE_UNQUOTED: { this._stateAttributeValueUnquoted(cp); break; } case State.AFTER_ATTRIBUTE_VALUE_QUOTED: { this._stateAfterAttributeValueQuoted(cp); break; } case State.SELF_CLOSING_START_TAG: { this._stateSelfClosingStartTag(cp); break; } case State.BOGUS_COMMENT: { this._stateBogusComment(cp); break; } case State.MARKUP_DECLARATION_OPEN: { this._stateMarkupDeclarationOpen(cp); break; } case State.COMMENT_START: { this._stateCommentStart(cp); break; } case State.COMMENT_START_DASH: { this._stateCommentStartDash(cp); break; } case State.COMMENT: { this._stateComment(cp); break; } case State.COMMENT_LESS_THAN_SIGN: { this._stateCommentLessThanSign(cp); break; } case State.COMMENT_LESS_THAN_SIGN_BANG: { this._stateCommentLessThanSignBang(cp); break; } case State.COMMENT_LESS_THAN_SIGN_BANG_DASH: { this._stateCommentLessThanSignBangDash(cp); break; } case State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: { this._stateCommentLessThanSignBangDashDash(cp); break; } case State.COMMENT_END_DASH: { this._stateCommentEndDash(cp); break; } case State.COMMENT_END: { this._stateCommentEnd(cp); break; } case State.COMMENT_END_BANG: { this._stateCommentEndBang(cp); break; } case State.DOCTYPE: { this._stateDoctype(cp); break; } case State.BEFORE_DOCTYPE_NAME: { this._stateBeforeDoctypeName(cp); break; } case State.DOCTYPE_NAME: { this._stateDoctypeName(cp); break; } case State.AFTER_DOCTYPE_NAME: { this._stateAfterDoctypeName(cp); break; } case State.AFTER_DOCTYPE_PUBLIC_KEYWORD: { this._stateAfterDoctypePublicKeyword(cp); break; } case State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: { this._stateBeforeDoctypePublicIdentifier(cp); break; } case State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: { this._stateDoctypePublicIdentifierDoubleQuoted(cp); break; } case State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: { this._stateDoctypePublicIdentifierSingleQuoted(cp); break; } case State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER: { this._stateAfterDoctypePublicIdentifier(cp); break; } case State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: { this._stateBetweenDoctypePublicAndSystemIdentifiers(cp); break; } case State.AFTER_DOCTYPE_SYSTEM_KEYWORD: { this._stateAfterDoctypeSystemKeyword(cp); break; } case State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: { this._stateBeforeDoctypeSystemIdentifier(cp); break; } case State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: { this._stateDoctypeSystemIdentifierDoubleQuoted(cp); break; } case State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: { this._stateDoctypeSystemIdentifierSingleQuoted(cp); break; } case State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER: { this._stateAfterDoctypeSystemIdentifier(cp); break; } case State.BOGUS_DOCTYPE: { this._stateBogusDoctype(cp); break; } case State.CDATA_SECTION: { this._stateCdataSection(cp); break; } case State.CDATA_SECTION_BRACKET: { this._stateCdataSectionBracket(cp); break; } case State.CDATA_SECTION_END: { this._stateCdataSectionEnd(cp); break; } case State.CHARACTER_REFERENCE: { this._stateCharacterReference(); break; } case State.AMBIGUOUS_AMPERSAND: { this._stateAmbiguousAmpersand(cp); break; } default: { throw new Error('Unknown state'); } } } // State machine
// Data state
//------------------------------------------------------------------
_stateData(cp) { switch (cp) { case CODE_POINTS.LESS_THAN_SIGN: { this.state = State.TAG_OPEN; break; } case CODE_POINTS.AMPERSAND: { this._startCharacterReference(); break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this._emitCodePoint(cp); break; } case CODE_POINTS.EOF: { this._emitEOFToken(); break; } default: { this._emitCodePoint(cp); } } } // RCDATA state
//------------------------------------------------------------------
_stateRcdata(cp) { switch (cp) { case CODE_POINTS.AMPERSAND: { this._startCharacterReference(); break; } case CODE_POINTS.LESS_THAN_SIGN: { this.state = State.RCDATA_LESS_THAN_SIGN; break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this._emitChars(REPLACEMENT_CHARACTER); break; } case CODE_POINTS.EOF: { this._emitEOFToken(); break; } default: { this._emitCodePoint(cp); } } } // RAWTEXT state
//------------------------------------------------------------------
_stateRawtext(cp) { switch (cp) { case CODE_POINTS.LESS_THAN_SIGN: { this.state = State.RAWTEXT_LESS_THAN_SIGN; break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this._emitChars(REPLACEMENT_CHARACTER); break; } case CODE_POINTS.EOF: { this._emitEOFToken(); break; } default: { this._emitCodePoint(cp); } } } // Script data state
//------------------------------------------------------------------
_stateScriptData(cp) { switch (cp) { case CODE_POINTS.LESS_THAN_SIGN: { this.state = State.SCRIPT_DATA_LESS_THAN_SIGN; break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this._emitChars(REPLACEMENT_CHARACTER); break; } case CODE_POINTS.EOF: { this._emitEOFToken(); break; } default: { this._emitCodePoint(cp); } } } // PLAINTEXT state
//------------------------------------------------------------------
_statePlaintext(cp) { switch (cp) { case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this._emitChars(REPLACEMENT_CHARACTER); break; } case CODE_POINTS.EOF: { this._emitEOFToken(); break; } default: { this._emitCodePoint(cp); } } } // Tag open state
//------------------------------------------------------------------
_stateTagOpen(cp) { if (isAsciiLetter(cp)) { this._createStartTagToken(); this.state = State.TAG_NAME; this._stateTagName(cp); } else switch (cp) { case CODE_POINTS.EXCLAMATION_MARK: { this.state = State.MARKUP_DECLARATION_OPEN; break; } case CODE_POINTS.SOLIDUS: { this.state = State.END_TAG_OPEN; break; } case CODE_POINTS.QUESTION_MARK: { this._err(ERR.unexpectedQuestionMarkInsteadOfTagName); this._createCommentToken(1); this.state = State.BOGUS_COMMENT; this._stateBogusComment(cp); break; } case CODE_POINTS.EOF: { this._err(ERR.eofBeforeTagName); this._emitChars('<'); this._emitEOFToken(); break; } default: { this._err(ERR.invalidFirstCharacterOfTagName); this._emitChars('<'); this.state = State.DATA; this._stateData(cp); } } } // End tag open state
//------------------------------------------------------------------
_stateEndTagOpen(cp) { if (isAsciiLetter(cp)) { this._createEndTagToken(); this.state = State.TAG_NAME; this._stateTagName(cp); } else switch (cp) { case CODE_POINTS.GREATER_THAN_SIGN: { this._err(ERR.missingEndTagName); this.state = State.DATA; break; } case CODE_POINTS.EOF: { this._err(ERR.eofBeforeTagName); this._emitChars('</'); this._emitEOFToken(); break; } default: { this._err(ERR.invalidFirstCharacterOfTagName); this._createCommentToken(2); this.state = State.BOGUS_COMMENT; this._stateBogusComment(cp); } } } // Tag name state
//------------------------------------------------------------------
_stateTagName(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { this.state = State.BEFORE_ATTRIBUTE_NAME; break; } case CODE_POINTS.SOLIDUS: { this.state = State.SELF_CLOSING_START_TAG; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; this.emitCurrentTagToken(); break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); token.tagName += REPLACEMENT_CHARACTER; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInTag); this._emitEOFToken(); break; } default: { token.tagName += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); } } } // RCDATA less-than sign state
//------------------------------------------------------------------
_stateRcdataLessThanSign(cp) { if (cp === CODE_POINTS.SOLIDUS) { this.state = State.RCDATA_END_TAG_OPEN; } else { this._emitChars('<'); this.state = State.RCDATA; this._stateRcdata(cp); } } // RCDATA end tag open state
//------------------------------------------------------------------
_stateRcdataEndTagOpen(cp) { if (isAsciiLetter(cp)) { this.state = State.RCDATA_END_TAG_NAME; this._stateRcdataEndTagName(cp); } else { this._emitChars('</'); this.state = State.RCDATA; this._stateRcdata(cp); } } handleSpecialEndTag(_cp) { if (!this.preprocessor.startsWith(this.lastStartTagName, false)) { return !this._ensureHibernation(); } this._createEndTagToken(); const token = this.currentToken; token.tagName = this.lastStartTagName; const cp = this.preprocessor.peek(this.lastStartTagName.length); switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { this._advanceBy(this.lastStartTagName.length); this.state = State.BEFORE_ATTRIBUTE_NAME; return false; } case CODE_POINTS.SOLIDUS: { this._advanceBy(this.lastStartTagName.length); this.state = State.SELF_CLOSING_START_TAG; return false; } case CODE_POINTS.GREATER_THAN_SIGN: { this._advanceBy(this.lastStartTagName.length); this.emitCurrentTagToken(); this.state = State.DATA; return false; } default: { return !this._ensureHibernation(); } } } // RCDATA end tag name state
//------------------------------------------------------------------
_stateRcdataEndTagName(cp) { if (this.handleSpecialEndTag(cp)) { this._emitChars('</'); this.state = State.RCDATA; this._stateRcdata(cp); } } // RAWTEXT less-than sign state
//------------------------------------------------------------------
_stateRawtextLessThanSign(cp) { if (cp === CODE_POINTS.SOLIDUS) { this.state = State.RAWTEXT_END_TAG_OPEN; } else { this._emitChars('<'); this.state = State.RAWTEXT; this._stateRawtext(cp); } } // RAWTEXT end tag open state
//------------------------------------------------------------------
_stateRawtextEndTagOpen(cp) { if (isAsciiLetter(cp)) { this.state = State.RAWTEXT_END_TAG_NAME; this._stateRawtextEndTagName(cp); } else { this._emitChars('</'); this.state = State.RAWTEXT; this._stateRawtext(cp); } } // RAWTEXT end tag name state
//------------------------------------------------------------------
_stateRawtextEndTagName(cp) { if (this.handleSpecialEndTag(cp)) { this._emitChars('</'); this.state = State.RAWTEXT; this._stateRawtext(cp); } } // Script data less-than sign state
//------------------------------------------------------------------
_stateScriptDataLessThanSign(cp) { switch (cp) { case CODE_POINTS.SOLIDUS: { this.state = State.SCRIPT_DATA_END_TAG_OPEN; break; } case CODE_POINTS.EXCLAMATION_MARK: { this.state = State.SCRIPT_DATA_ESCAPE_START; this._emitChars('<!'); break; } default: { this._emitChars('<'); this.state = State.SCRIPT_DATA; this._stateScriptData(cp); } } } // Script data end tag open state
//------------------------------------------------------------------
_stateScriptDataEndTagOpen(cp) { if (isAsciiLetter(cp)) { this.state = State.SCRIPT_DATA_END_TAG_NAME; this._stateScriptDataEndTagName(cp); } else { this._emitChars('</'); this.state = State.SCRIPT_DATA; this._stateScriptData(cp); } } // Script data end tag name state
//------------------------------------------------------------------
_stateScriptDataEndTagName(cp) { if (this.handleSpecialEndTag(cp)) { this._emitChars('</'); this.state = State.SCRIPT_DATA; this._stateScriptData(cp); } } // Script data escape start state
//------------------------------------------------------------------
_stateScriptDataEscapeStart(cp) { if (cp === CODE_POINTS.HYPHEN_MINUS) { this.state = State.SCRIPT_DATA_ESCAPE_START_DASH; this._emitChars('-'); } else { this.state = State.SCRIPT_DATA; this._stateScriptData(cp); } } // Script data escape start dash state
//------------------------------------------------------------------
_stateScriptDataEscapeStartDash(cp) { if (cp === CODE_POINTS.HYPHEN_MINUS) { this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH; this._emitChars('-'); } else { this.state = State.SCRIPT_DATA; this._stateScriptData(cp); } } // Script data escaped state
//------------------------------------------------------------------
_stateScriptDataEscaped(cp) { switch (cp) { case CODE_POINTS.HYPHEN_MINUS: { this.state = State.SCRIPT_DATA_ESCAPED_DASH; this._emitChars('-'); break; } case CODE_POINTS.LESS_THAN_SIGN: { this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this._emitChars(REPLACEMENT_CHARACTER); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInScriptHtmlCommentLikeText); this._emitEOFToken(); break; } default: { this._emitCodePoint(cp); } } } // Script data escaped dash state
//------------------------------------------------------------------
_stateScriptDataEscapedDash(cp) { switch (cp) { case CODE_POINTS.HYPHEN_MINUS: { this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH; this._emitChars('-'); break; } case CODE_POINTS.LESS_THAN_SIGN: { this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this.state = State.SCRIPT_DATA_ESCAPED; this._emitChars(REPLACEMENT_CHARACTER); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInScriptHtmlCommentLikeText); this._emitEOFToken(); break; } default: { this.state = State.SCRIPT_DATA_ESCAPED; this._emitCodePoint(cp); } } } // Script data escaped dash dash state
//------------------------------------------------------------------
_stateScriptDataEscapedDashDash(cp) { switch (cp) { case CODE_POINTS.HYPHEN_MINUS: { this._emitChars('-'); break; } case CODE_POINTS.LESS_THAN_SIGN: { this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.SCRIPT_DATA; this._emitChars('>'); break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this.state = State.SCRIPT_DATA_ESCAPED; this._emitChars(REPLACEMENT_CHARACTER); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInScriptHtmlCommentLikeText); this._emitEOFToken(); break; } default: { this.state = State.SCRIPT_DATA_ESCAPED; this._emitCodePoint(cp); } } } // Script data escaped less-than sign state
//------------------------------------------------------------------
_stateScriptDataEscapedLessThanSign(cp) { if (cp === CODE_POINTS.SOLIDUS) { this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN; } else if (isAsciiLetter(cp)) { this._emitChars('<'); this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_START; this._stateScriptDataDoubleEscapeStart(cp); } else { this._emitChars('<'); this.state = State.SCRIPT_DATA_ESCAPED; this._stateScriptDataEscaped(cp); } } // Script data escaped end tag open state
//------------------------------------------------------------------
_stateScriptDataEscapedEndTagOpen(cp) { if (isAsciiLetter(cp)) { this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_NAME; this._stateScriptDataEscapedEndTagName(cp); } else { this._emitChars('</'); this.state = State.SCRIPT_DATA_ESCAPED; this._stateScriptDataEscaped(cp); } } // Script data escaped end tag name state
//------------------------------------------------------------------
_stateScriptDataEscapedEndTagName(cp) { if (this.handleSpecialEndTag(cp)) { this._emitChars('</'); this.state = State.SCRIPT_DATA_ESCAPED; this._stateScriptDataEscaped(cp); } } // Script data double escape start state
//------------------------------------------------------------------
_stateScriptDataDoubleEscapeStart(cp) { if (this.preprocessor.startsWith(SEQUENCES.SCRIPT, false) && isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek(SEQUENCES.SCRIPT.length))) { this._emitCodePoint(cp); for (let i = 0; i < SEQUENCES.SCRIPT.length; i++) { this._emitCodePoint(this._consume()); } this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; } else if (!this._ensureHibernation()) { this.state = State.SCRIPT_DATA_ESCAPED; this._stateScriptDataEscaped(cp); } } // Script data double escaped state
//------------------------------------------------------------------
_stateScriptDataDoubleEscaped(cp) { switch (cp) { case CODE_POINTS.HYPHEN_MINUS: { this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH; this._emitChars('-'); break; } case CODE_POINTS.LESS_THAN_SIGN: { this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; this._emitChars('<'); break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this._emitChars(REPLACEMENT_CHARACTER); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInScriptHtmlCommentLikeText); this._emitEOFToken(); break; } default: { this._emitCodePoint(cp); } } } // Script data double escaped dash state
//------------------------------------------------------------------
_stateScriptDataDoubleEscapedDash(cp) { switch (cp) { case CODE_POINTS.HYPHEN_MINUS: { this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH; this._emitChars('-'); break; } case CODE_POINTS.LESS_THAN_SIGN: { this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; this._emitChars('<'); break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; this._emitChars(REPLACEMENT_CHARACTER); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInScriptHtmlCommentLikeText); this._emitEOFToken(); break; } default: { this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; this._emitCodePoint(cp); } } } // Script data double escaped dash dash state
//------------------------------------------------------------------
_stateScriptDataDoubleEscapedDashDash(cp) { switch (cp) { case CODE_POINTS.HYPHEN_MINUS: { this._emitChars('-'); break; } case CODE_POINTS.LESS_THAN_SIGN: { this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; this._emitChars('<'); break; } case CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.SCRIPT_DATA; this._emitChars('>'); break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; this._emitChars(REPLACEMENT_CHARACTER); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInScriptHtmlCommentLikeText); this._emitEOFToken(); break; } default: { this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; this._emitCodePoint(cp); } } } // Script data double escaped less-than sign state
//------------------------------------------------------------------
_stateScriptDataDoubleEscapedLessThanSign(cp) { if (cp === CODE_POINTS.SOLIDUS) { this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_END; this._emitChars('/'); } else { this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; this._stateScriptDataDoubleEscaped(cp); } } // Script data double escape end state
//------------------------------------------------------------------
_stateScriptDataDoubleEscapeEnd(cp) { if (this.preprocessor.startsWith(SEQUENCES.SCRIPT, false) && isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek(SEQUENCES.SCRIPT.length))) { this._emitCodePoint(cp); for (let i = 0; i < SEQUENCES.SCRIPT.length; i++) { this._emitCodePoint(this._consume()); } this.state = State.SCRIPT_DATA_ESCAPED; } else if (!this._ensureHibernation()) { this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; this._stateScriptDataDoubleEscaped(cp); } } // Before attribute name state
//------------------------------------------------------------------
_stateBeforeAttributeName(cp) { switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { // Ignore whitespace
break; } case CODE_POINTS.SOLIDUS: case CODE_POINTS.GREATER_THAN_SIGN: case CODE_POINTS.EOF: { this.state = State.AFTER_ATTRIBUTE_NAME; this._stateAfterAttributeName(cp); break; } case CODE_POINTS.EQUALS_SIGN: { this._err(ERR.unexpectedEqualsSignBeforeAttributeName); this._createAttr('='); this.state = State.ATTRIBUTE_NAME; break; } default: { this._createAttr(''); this.state = State.ATTRIBUTE_NAME; this._stateAttributeName(cp); } } } // Attribute name state
//------------------------------------------------------------------
_stateAttributeName(cp) { switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: case CODE_POINTS.SOLIDUS: case CODE_POINTS.GREATER_THAN_SIGN: case CODE_POINTS.EOF: { this._leaveAttrName(); this.state = State.AFTER_ATTRIBUTE_NAME; this._stateAfterAttributeName(cp); break; } case CODE_POINTS.EQUALS_SIGN: { this._leaveAttrName(); this.state = State.BEFORE_ATTRIBUTE_VALUE; break; } case CODE_POINTS.QUOTATION_MARK: case CODE_POINTS.APOSTROPHE: case CODE_POINTS.LESS_THAN_SIGN: { this._err(ERR.unexpectedCharacterInAttributeName); this.currentAttr.name += String.fromCodePoint(cp); break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this.currentAttr.name += REPLACEMENT_CHARACTER; break; } default: { this.currentAttr.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); } } } // After attribute name state
//------------------------------------------------------------------
_stateAfterAttributeName(cp) { switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { // Ignore whitespace
break; } case CODE_POINTS.SOLIDUS: { this.state = State.SELF_CLOSING_START_TAG; break; } case CODE_POINTS.EQUALS_SIGN: { this.state = State.BEFORE_ATTRIBUTE_VALUE; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; this.emitCurrentTagToken(); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInTag); this._emitEOFToken(); break; } default: { this._createAttr(''); this.state = State.ATTRIBUTE_NAME; this._stateAttributeName(cp); } } } // Before attribute value state
//------------------------------------------------------------------
_stateBeforeAttributeValue(cp) { switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { // Ignore whitespace
break; } case CODE_POINTS.QUOTATION_MARK: { this.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED; break; } case CODE_POINTS.APOSTROPHE: { this.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._err(ERR.missingAttributeValue); this.state = State.DATA; this.emitCurrentTagToken(); break; } default: { this.state = State.ATTRIBUTE_VALUE_UNQUOTED; this._stateAttributeValueUnquoted(cp); } } } // Attribute value (double-quoted) state
//------------------------------------------------------------------
_stateAttributeValueDoubleQuoted(cp) { switch (cp) { case CODE_POINTS.QUOTATION_MARK: { this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED; break; } case CODE_POINTS.AMPERSAND: { this._startCharacterReference(); break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this.currentAttr.value += REPLACEMENT_CHARACTER; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInTag); this._emitEOFToken(); break; } default: { this.currentAttr.value += String.fromCodePoint(cp); } } } // Attribute value (single-quoted) state
//------------------------------------------------------------------
_stateAttributeValueSingleQuoted(cp) { switch (cp) { case CODE_POINTS.APOSTROPHE: { this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED; break; } case CODE_POINTS.AMPERSAND: { this._startCharacterReference(); break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this.currentAttr.value += REPLACEMENT_CHARACTER; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInTag); this._emitEOFToken(); break; } default: { this.currentAttr.value += String.fromCodePoint(cp); } } } // Attribute value (unquoted) state
//------------------------------------------------------------------
_stateAttributeValueUnquoted(cp) { switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { this._leaveAttrValue(); this.state = State.BEFORE_ATTRIBUTE_NAME; break; } case CODE_POINTS.AMPERSAND: { this._startCharacterReference(); break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._leaveAttrValue(); this.state = State.DATA; this.emitCurrentTagToken(); break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this.currentAttr.value += REPLACEMENT_CHARACTER; break; } case CODE_POINTS.QUOTATION_MARK: case CODE_POINTS.APOSTROPHE: case CODE_POINTS.LESS_THAN_SIGN: case CODE_POINTS.EQUALS_SIGN: case CODE_POINTS.GRAVE_ACCENT: { this._err(ERR.unexpectedCharacterInUnquotedAttributeValue); this.currentAttr.value += String.fromCodePoint(cp); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInTag); this._emitEOFToken(); break; } default: { this.currentAttr.value += String.fromCodePoint(cp); } } } // After attribute value (quoted) state
//------------------------------------------------------------------
_stateAfterAttributeValueQuoted(cp) { switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { this._leaveAttrValue(); this.state = State.BEFORE_ATTRIBUTE_NAME; break; } case CODE_POINTS.SOLIDUS: { this._leaveAttrValue(); this.state = State.SELF_CLOSING_START_TAG; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._leaveAttrValue(); this.state = State.DATA; this.emitCurrentTagToken(); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInTag); this._emitEOFToken(); break; } default: { this._err(ERR.missingWhitespaceBetweenAttributes); this.state = State.BEFORE_ATTRIBUTE_NAME; this._stateBeforeAttributeName(cp); } } } // Self-closing start tag state
//------------------------------------------------------------------
_stateSelfClosingStartTag(cp) { switch (cp) { case CODE_POINTS.GREATER_THAN_SIGN: { const token = this.currentToken; token.selfClosing = true; this.state = State.DATA; this.emitCurrentTagToken(); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInTag); this._emitEOFToken(); break; } default: { this._err(ERR.unexpectedSolidusInTag); this.state = State.BEFORE_ATTRIBUTE_NAME; this._stateBeforeAttributeName(cp); } } } // Bogus comment state
//------------------------------------------------------------------
_stateBogusComment(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; this.emitCurrentComment(token); break; } case CODE_POINTS.EOF: { this.emitCurrentComment(token); this._emitEOFToken(); break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); token.data += REPLACEMENT_CHARACTER; break; } default: { token.data += String.fromCodePoint(cp); } } } // Markup declaration open state
//------------------------------------------------------------------
_stateMarkupDeclarationOpen(cp) { if (this._consumeSequenceIfMatch(SEQUENCES.DASH_DASH, true)) { this._createCommentToken(SEQUENCES.DASH_DASH.length + 1); this.state = State.COMMENT_START; } else if (this._consumeSequenceIfMatch(SEQUENCES.DOCTYPE, false)) { // NOTE: Doctypes tokens are created without fixed offsets. We keep track of the moment a doctype *might* start here.
this.currentLocation = this.getCurrentLocation(SEQUENCES.DOCTYPE.length + 1); this.state = State.DOCTYPE; } else if (this._consumeSequenceIfMatch(SEQUENCES.CDATA_START, true)) { if (this.inForeignNode) { this.state = State.CDATA_SECTION; } else { this._err(ERR.cdataInHtmlContent); this._createCommentToken(SEQUENCES.CDATA_START.length + 1); this.currentToken.data = '[CDATA['; this.state = State.BOGUS_COMMENT; } } //NOTE: Sequence lookups can be abrupted by hibernation. In that case, lookup
//results are no longer valid and we will need to start over.
else if (!this._ensureHibernation()) { this._err(ERR.incorrectlyOpenedComment); this._createCommentToken(2); this.state = State.BOGUS_COMMENT; this._stateBogusComment(cp); } } // Comment start state
//------------------------------------------------------------------
_stateCommentStart(cp) { switch (cp) { case CODE_POINTS.HYPHEN_MINUS: { this.state = State.COMMENT_START_DASH; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._err(ERR.abruptClosingOfEmptyComment); this.state = State.DATA; const token = this.currentToken; this.emitCurrentComment(token); break; } default: { this.state = State.COMMENT; this._stateComment(cp); } } } // Comment start dash state
//------------------------------------------------------------------
_stateCommentStartDash(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.HYPHEN_MINUS: { this.state = State.COMMENT_END; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._err(ERR.abruptClosingOfEmptyComment); this.state = State.DATA; this.emitCurrentComment(token); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInComment); this.emitCurrentComment(token); this._emitEOFToken(); break; } default: { token.data += '-'; this.state = State.COMMENT; this._stateComment(cp); } } } // Comment state
//------------------------------------------------------------------
_stateComment(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.HYPHEN_MINUS: { this.state = State.COMMENT_END_DASH; break; } case CODE_POINTS.LESS_THAN_SIGN: { token.data += '<'; this.state = State.COMMENT_LESS_THAN_SIGN; break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); token.data += REPLACEMENT_CHARACTER; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInComment); this.emitCurrentComment(token); this._emitEOFToken(); break; } default: { token.data += String.fromCodePoint(cp); } } } // Comment less-than sign state
//------------------------------------------------------------------
_stateCommentLessThanSign(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.EXCLAMATION_MARK: { token.data += '!'; this.state = State.COMMENT_LESS_THAN_SIGN_BANG; break; } case CODE_POINTS.LESS_THAN_SIGN: { token.data += '<'; break; } default: { this.state = State.COMMENT; this._stateComment(cp); } } } // Comment less-than sign bang state
//------------------------------------------------------------------
_stateCommentLessThanSignBang(cp) { if (cp === CODE_POINTS.HYPHEN_MINUS) { this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH; } else { this.state = State.COMMENT; this._stateComment(cp); } } // Comment less-than sign bang dash state
//------------------------------------------------------------------
_stateCommentLessThanSignBangDash(cp) { if (cp === CODE_POINTS.HYPHEN_MINUS) { this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH; } else { this.state = State.COMMENT_END_DASH; this._stateCommentEndDash(cp); } } // Comment less-than sign bang dash dash state
//------------------------------------------------------------------
_stateCommentLessThanSignBangDashDash(cp) { if (cp !== CODE_POINTS.GREATER_THAN_SIGN && cp !== CODE_POINTS.EOF) { this._err(ERR.nestedComment); } this.state = State.COMMENT_END; this._stateCommentEnd(cp); } // Comment end dash state
//------------------------------------------------------------------
_stateCommentEndDash(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.HYPHEN_MINUS: { this.state = State.COMMENT_END; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInComment); this.emitCurrentComment(token); this._emitEOFToken(); break; } default: { token.data += '-'; this.state = State.COMMENT; this._stateComment(cp); } } } // Comment end state
//------------------------------------------------------------------
_stateCommentEnd(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; this.emitCurrentComment(token); break; } case CODE_POINTS.EXCLAMATION_MARK: { this.state = State.COMMENT_END_BANG; break; } case CODE_POINTS.HYPHEN_MINUS: { token.data += '-'; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInComment); this.emitCurrentComment(token); this._emitEOFToken(); break; } default: { token.data += '--'; this.state = State.COMMENT; this._stateComment(cp); } } } // Comment end bang state
//------------------------------------------------------------------
_stateCommentEndBang(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.HYPHEN_MINUS: { token.data += '--!'; this.state = State.COMMENT_END_DASH; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._err(ERR.incorrectlyClosedComment); this.state = State.DATA; this.emitCurrentComment(token); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInComment); this.emitCurrentComment(token); this._emitEOFToken(); break; } default: { token.data += '--!'; this.state = State.COMMENT; this._stateComment(cp); } } } // DOCTYPE state
//------------------------------------------------------------------
_stateDoctype(cp) { switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { this.state = State.BEFORE_DOCTYPE_NAME; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.BEFORE_DOCTYPE_NAME; this._stateBeforeDoctypeName(cp); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); this._createDoctypeToken(null); const token = this.currentToken; token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(ERR.missingWhitespaceBeforeDoctypeName); this.state = State.BEFORE_DOCTYPE_NAME; this._stateBeforeDoctypeName(cp); } } } // Before DOCTYPE name state
//------------------------------------------------------------------
_stateBeforeDoctypeName(cp) { if (isAsciiUpper(cp)) { this._createDoctypeToken(String.fromCharCode(toAsciiLower(cp))); this.state = State.DOCTYPE_NAME; } else switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { // Ignore whitespace
break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); this._createDoctypeToken(REPLACEMENT_CHARACTER); this.state = State.DOCTYPE_NAME; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._err(ERR.missingDoctypeName); this._createDoctypeToken(null); const token = this.currentToken; token.forceQuirks = true; this.emitCurrentDoctype(token); this.state = State.DATA; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); this._createDoctypeToken(null); const token = this.currentToken; token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._createDoctypeToken(String.fromCodePoint(cp)); this.state = State.DOCTYPE_NAME; } } } // DOCTYPE name state
//------------------------------------------------------------------
_stateDoctypeName(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { this.state = State.AFTER_DOCTYPE_NAME; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; this.emitCurrentDoctype(token); break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); token.name += REPLACEMENT_CHARACTER; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { token.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); } } } // After DOCTYPE name state
//------------------------------------------------------------------
_stateAfterDoctypeName(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { // Ignore whitespace
break; } case CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; this.emitCurrentDoctype(token); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { if (this._consumeSequenceIfMatch(SEQUENCES.PUBLIC, false)) { this.state = State.AFTER_DOCTYPE_PUBLIC_KEYWORD; } else if (this._consumeSequenceIfMatch(SEQUENCES.SYSTEM, false)) { this.state = State.AFTER_DOCTYPE_SYSTEM_KEYWORD; } //NOTE: sequence lookup can be abrupted by hibernation. In that case lookup
//results are no longer valid and we will need to start over.
else if (!this._ensureHibernation()) { this._err(ERR.invalidCharacterSequenceAfterDoctypeName); token.forceQuirks = true; this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } } // After DOCTYPE public keyword state
//------------------------------------------------------------------
_stateAfterDoctypePublicKeyword(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { this.state = State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER; break; } case CODE_POINTS.QUOTATION_MARK: { this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword); token.publicId = ''; this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; break; } case CODE_POINTS.APOSTROPHE: { this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword); token.publicId = ''; this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._err(ERR.missingDoctypePublicIdentifier); token.forceQuirks = true; this.state = State.DATA; this.emitCurrentDoctype(token); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier); token.forceQuirks = true; this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // Before DOCTYPE public identifier state
//------------------------------------------------------------------
_stateBeforeDoctypePublicIdentifier(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { // Ignore whitespace
break; } case CODE_POINTS.QUOTATION_MARK: { token.publicId = ''; this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; break; } case CODE_POINTS.APOSTROPHE: { token.publicId = ''; this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._err(ERR.missingDoctypePublicIdentifier); token.forceQuirks = true; this.state = State.DATA; this.emitCurrentDoctype(token); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier); token.forceQuirks = true; this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // DOCTYPE public identifier (double-quoted) state
//------------------------------------------------------------------
_stateDoctypePublicIdentifierDoubleQuoted(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.QUOTATION_MARK: { this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER; break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); token.publicId += REPLACEMENT_CHARACTER; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._err(ERR.abruptDoctypePublicIdentifier); token.forceQuirks = true; this.emitCurrentDoctype(token); this.state = State.DATA; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { token.publicId += String.fromCodePoint(cp); } } } // DOCTYPE public identifier (single-quoted) state
//------------------------------------------------------------------
_stateDoctypePublicIdentifierSingleQuoted(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.APOSTROPHE: { this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER; break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); token.publicId += REPLACEMENT_CHARACTER; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._err(ERR.abruptDoctypePublicIdentifier); token.forceQuirks = true; this.emitCurrentDoctype(token); this.state = State.DATA; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { token.publicId += String.fromCodePoint(cp); } } } // After DOCTYPE public identifier state
//------------------------------------------------------------------
_stateAfterDoctypePublicIdentifier(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { this.state = State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; this.emitCurrentDoctype(token); break; } case CODE_POINTS.QUOTATION_MARK: { this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; break; } case CODE_POINTS.APOSTROPHE: { this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); token.forceQuirks = true; this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // Between DOCTYPE public and system identifiers state
//------------------------------------------------------------------
_stateBetweenDoctypePublicAndSystemIdentifiers(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { // Ignore whitespace
break; } case CODE_POINTS.GREATER_THAN_SIGN: { this.emitCurrentDoctype(token); this.state = State.DATA; break; } case CODE_POINTS.QUOTATION_MARK: { token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; break; } case CODE_POINTS.APOSTROPHE: { token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); token.forceQuirks = true; this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // After DOCTYPE system keyword state
//------------------------------------------------------------------
_stateAfterDoctypeSystemKeyword(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { this.state = State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER; break; } case CODE_POINTS.QUOTATION_MARK: { this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword); token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; break; } case CODE_POINTS.APOSTROPHE: { this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword); token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._err(ERR.missingDoctypeSystemIdentifier); token.forceQuirks = true; this.state = State.DATA; this.emitCurrentDoctype(token); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); token.forceQuirks = true; this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // Before DOCTYPE system identifier state
//------------------------------------------------------------------
_stateBeforeDoctypeSystemIdentifier(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { // Ignore whitespace
break; } case CODE_POINTS.QUOTATION_MARK: { token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; break; } case CODE_POINTS.APOSTROPHE: { token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._err(ERR.missingDoctypeSystemIdentifier); token.forceQuirks = true; this.state = State.DATA; this.emitCurrentDoctype(token); break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); token.forceQuirks = true; this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // DOCTYPE system identifier (double-quoted) state
//------------------------------------------------------------------
_stateDoctypeSystemIdentifierDoubleQuoted(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.QUOTATION_MARK: { this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER; break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); token.systemId += REPLACEMENT_CHARACTER; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._err(ERR.abruptDoctypeSystemIdentifier); token.forceQuirks = true; this.emitCurrentDoctype(token); this.state = State.DATA; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { token.systemId += String.fromCodePoint(cp); } } } // DOCTYPE system identifier (single-quoted) state
//------------------------------------------------------------------
_stateDoctypeSystemIdentifierSingleQuoted(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.APOSTROPHE: { this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER; break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); token.systemId += REPLACEMENT_CHARACTER; break; } case CODE_POINTS.GREATER_THAN_SIGN: { this._err(ERR.abruptDoctypeSystemIdentifier); token.forceQuirks = true; this.emitCurrentDoctype(token); this.state = State.DATA; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { token.systemId += String.fromCodePoint(cp); } } } // After DOCTYPE system identifier state
//------------------------------------------------------------------
_stateAfterDoctypeSystemIdentifier(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.SPACE: case CODE_POINTS.LINE_FEED: case CODE_POINTS.TABULATION: case CODE_POINTS.FORM_FEED: { // Ignore whitespace
break; } case CODE_POINTS.GREATER_THAN_SIGN: { this.emitCurrentDoctype(token); this.state = State.DATA; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(ERR.unexpectedCharacterAfterDoctypeSystemIdentifier); this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // Bogus DOCTYPE state
//------------------------------------------------------------------
_stateBogusDoctype(cp) { const token = this.currentToken; switch (cp) { case CODE_POINTS.GREATER_THAN_SIGN: { this.emitCurrentDoctype(token); this.state = State.DATA; break; } case CODE_POINTS.NULL: { this._err(ERR.unexpectedNullCharacter); break; } case CODE_POINTS.EOF: { this.emitCurrentDoctype(token); this._emitEOFToken(); break; } // Do nothing
} } // CDATA section state
//------------------------------------------------------------------
_stateCdataSection(cp) { switch (cp) { case CODE_POINTS.RIGHT_SQUARE_BRACKET: { this.state = State.CDATA_SECTION_BRACKET; break; } case CODE_POINTS.EOF: { this._err(ERR.eofInCdata); this._emitEOFToken(); break; } default: { this._emitCodePoint(cp); } } } // CDATA section bracket state
//------------------------------------------------------------------
_stateCdataSectionBracket(cp) { if (cp === CODE_POINTS.RIGHT_SQUARE_BRACKET) { this.state = State.CDATA_SECTION_END; } else { this._emitChars(']'); this.state = State.CDATA_SECTION; this._stateCdataSection(cp); } } // CDATA section end state
//------------------------------------------------------------------
_stateCdataSectionEnd(cp) { switch (cp) { case CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; break; } case CODE_POINTS.RIGHT_SQUARE_BRACKET: { this._emitChars(']'); break; } default: { this._emitChars(']]'); this.state = State.CDATA_SECTION; this._stateCdataSection(cp); } } } // Character reference state
//------------------------------------------------------------------
_stateCharacterReference() { let length = this.entityDecoder.write(this.preprocessor.html, this.preprocessor.pos); if (length < 0) { if (this.preprocessor.lastChunkWritten) { length = this.entityDecoder.end(); } else { // Wait for the rest of the entity.
this.active = false; // Mark the entire buffer as read.
this.preprocessor.pos = this.preprocessor.html.length - 1; this.consumedAfterSnapshot = 0; this.preprocessor.endOfChunkHit = true; return; } } if (length === 0) { // This was not a valid entity. Go back to the beginning, and
// figure out what to do.
this.preprocessor.pos = this.entityStartPos; this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.AMPERSAND); this.state = !this._isCharacterReferenceInAttribute() && isAsciiAlphaNumeric(this.preprocessor.peek(1)) ? State.AMBIGUOUS_AMPERSAND : this.returnState; } else { // We successfully parsed an entity. Switch to the return state.
this.state = this.returnState; } } // Ambiguos ampersand state
//------------------------------------------------------------------
_stateAmbiguousAmpersand(cp) { if (isAsciiAlphaNumeric(cp)) { this._flushCodePointConsumedAsCharacterReference(cp); } else { if (cp === CODE_POINTS.SEMICOLON) { this._err(ERR.unknownNamedCharacterReference); } this.state = this.returnState; this._callState(cp); } } }
//Element utils
const IMPLICIT_END_TAG_REQUIRED = new Set([TAG_ID.DD, TAG_ID.DT, TAG_ID.LI, TAG_ID.OPTGROUP, TAG_ID.OPTION, TAG_ID.P, TAG_ID.RB, TAG_ID.RP, TAG_ID.RT, TAG_ID.RTC]); const IMPLICIT_END_TAG_REQUIRED_THOROUGHLY = new Set([ ...IMPLICIT_END_TAG_REQUIRED, TAG_ID.CAPTION, TAG_ID.COLGROUP, TAG_ID.TBODY, TAG_ID.TD, TAG_ID.TFOOT, TAG_ID.TH, TAG_ID.THEAD, TAG_ID.TR, ]); const SCOPING_ELEMENTS_HTML = new Set([ TAG_ID.APPLET, TAG_ID.CAPTION, TAG_ID.HTML, TAG_ID.MARQUEE, TAG_ID.OBJECT, TAG_ID.TABLE, TAG_ID.TD, TAG_ID.TEMPLATE, TAG_ID.TH, ]); const SCOPING_ELEMENTS_HTML_LIST = new Set([...SCOPING_ELEMENTS_HTML, TAG_ID.OL, TAG_ID.UL]); const SCOPING_ELEMENTS_HTML_BUTTON = new Set([...SCOPING_ELEMENTS_HTML, TAG_ID.BUTTON]); const SCOPING_ELEMENTS_MATHML = new Set([TAG_ID.ANNOTATION_XML, TAG_ID.MI, TAG_ID.MN, TAG_ID.MO, TAG_ID.MS, TAG_ID.MTEXT]); const SCOPING_ELEMENTS_SVG = new Set([TAG_ID.DESC, TAG_ID.FOREIGN_OBJECT, TAG_ID.TITLE]); const TABLE_ROW_CONTEXT = new Set([TAG_ID.TR, TAG_ID.TEMPLATE, TAG_ID.HTML]); const TABLE_BODY_CONTEXT = new Set([TAG_ID.TBODY, TAG_ID.TFOOT, TAG_ID.THEAD, TAG_ID.TEMPLATE, TAG_ID.HTML]); const TABLE_CONTEXT = new Set([TAG_ID.TABLE, TAG_ID.TEMPLATE, TAG_ID.HTML]); const TABLE_CELLS = new Set([TAG_ID.TD, TAG_ID.TH]); //Stack of open elements
class OpenElementStack { get currentTmplContentOrNode() { return this._isInTemplate() ? this.treeAdapter.getTemplateContent(this.current) : this.current; } constructor(document, treeAdapter, handler) { this.treeAdapter = treeAdapter; this.handler = handler; this.items = []; this.tagIDs = []; this.stackTop = -1; this.tmplCount = 0; this.currentTagId = TAG_ID.UNKNOWN; this.current = document; } //Index of element
_indexOf(element) { return this.items.lastIndexOf(element, this.stackTop); } //Update current element
_isInTemplate() { return this.currentTagId === TAG_ID.TEMPLATE && this.treeAdapter.getNamespaceURI(this.current) === NS.HTML; } _updateCurrentElement() { this.current = this.items[this.stackTop]; this.currentTagId = this.tagIDs[this.stackTop]; } //Mutations
push(element, tagID) { this.stackTop++; this.items[this.stackTop] = element; this.current = element; this.tagIDs[this.stackTop] = tagID; this.currentTagId = tagID; if (this._isInTemplate()) { this.tmplCount++; } this.handler.onItemPush(element, tagID, true); } pop() { const popped = this.current; if (this.tmplCount > 0 && this._isInTemplate()) { this.tmplCount--; } this.stackTop--; this._updateCurrentElement(); this.handler.onItemPop(popped, true); } replace(oldElement, newElement) { const idx = this._indexOf(oldElement); this.items[idx] = newElement; if (idx === this.stackTop) { this.current = newElement; } } insertAfter(referenceElement, newElement, newElementID) { const insertionIdx = this._indexOf(referenceElement) + 1; this.items.splice(insertionIdx, 0, newElement); this.tagIDs.splice(insertionIdx, 0, newElementID); this.stackTop++; if (insertionIdx === this.stackTop) { this._updateCurrentElement(); } this.handler.onItemPush(this.current, this.currentTagId, insertionIdx === this.stackTop); } popUntilTagNamePopped(tagName) { let targetIdx = this.stackTop + 1; do { targetIdx = this.tagIDs.lastIndexOf(tagName, targetIdx - 1); } while (targetIdx > 0 && this.treeAdapter.getNamespaceURI(this.items[targetIdx]) !== NS.HTML); this.shortenToLength(targetIdx < 0 ? 0 : targetIdx); } shortenToLength(idx) { while (this.stackTop >= idx) { const popped = this.current; if (this.tmplCount > 0 && this._isInTemplate()) { this.tmplCount -= 1; } this.stackTop--; this._updateCurrentElement(); this.handler.onItemPop(popped, this.stackTop < idx); } } popUntilElementPopped(element) { const idx = this._indexOf(element); this.shortenToLength(idx < 0 ? 0 : idx); } popUntilPopped(tagNames, targetNS) { const idx = this._indexOfTagNames(tagNames, targetNS); this.shortenToLength(idx < 0 ? 0 : idx); } popUntilNumberedHeaderPopped() { this.popUntilPopped(NUMBERED_HEADERS, NS.HTML); } popUntilTableCellPopped() { this.popUntilPopped(TABLE_CELLS, NS.HTML); } popAllUpToHtmlElement() { //NOTE: here we assume that the root <html> element is always first in the open element stack, so
//we perform this fast stack clean up.
this.tmplCount = 0; this.shortenToLength(1); } _indexOfTagNames(tagNames, namespace) { for (let i = this.stackTop; i >= 0; i--) { if (tagNames.has(this.tagIDs[i]) && this.treeAdapter.getNamespaceURI(this.items[i]) === namespace) { return i; } } return -1; } clearBackTo(tagNames, targetNS) { const idx = this._indexOfTagNames(tagNames, targetNS); this.shortenToLength(idx + 1); } clearBackToTableContext() { this.clearBackTo(TABLE_CONTEXT, NS.HTML); } clearBackToTableBodyContext() { this.clearBackTo(TABLE_BODY_CONTEXT, NS.HTML); } clearBackToTableRowContext() { this.clearBackTo(TABLE_ROW_CONTEXT, NS.HTML); } remove(element) { const idx = this._indexOf(element); if (idx >= 0) { if (idx === this.stackTop) { this.pop(); } else { this.items.splice(idx, 1); this.tagIDs.splice(idx, 1); this.stackTop--; this._updateCurrentElement(); this.handler.onItemPop(element, false); } } } //Search
tryPeekProperlyNestedBodyElement() { //Properly nested <body> element (should be second element in stack).
return this.stackTop >= 1 && this.tagIDs[1] === TAG_ID.BODY ? this.items[1] : null; } contains(element) { return this._indexOf(element) > -1; } getCommonAncestor(element) { const elementIdx = this._indexOf(element) - 1; return elementIdx >= 0 ? this.items[elementIdx] : null; } isRootHtmlElementCurrent() { return this.stackTop === 0 && this.tagIDs[0] === TAG_ID.HTML; } //Element in scope
hasInDynamicScope(tagName, htmlScope) { for (let i = this.stackTop; i >= 0; i--) { const tn = this.tagIDs[i]; switch (this.treeAdapter.getNamespaceURI(this.items[i])) { case NS.HTML: { if (tn === tagName) return true; if (htmlScope.has(tn)) return false; break; } case NS.SVG: { if (SCOPING_ELEMENTS_SVG.has(tn)) return false; break; } case NS.MATHML: { if (SCOPING_ELEMENTS_MATHML.has(tn)) return false; break; } } } return true; } hasInScope(tagName) { return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML); } hasInListItemScope(tagName) { return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML_LIST); } hasInButtonScope(tagName) { return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML_BUTTON); } hasNumberedHeaderInScope() { for (let i = this.stackTop; i >= 0; i--) { const tn = this.tagIDs[i]; switch (this.treeAdapter.getNamespaceURI(this.items[i])) { case NS.HTML: { if (NUMBERED_HEADERS.has(tn)) return true; if (SCOPING_ELEMENTS_HTML.has(tn)) return false; break; } case NS.SVG: { if (SCOPING_ELEMENTS_SVG.has(tn)) return false; break; } case NS.MATHML: { if (SCOPING_ELEMENTS_MATHML.has(tn)) return false; break; } } } return true; } hasInTableScope(tagName) { for (let i = this.stackTop; i >= 0; i--) { if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) { continue; } switch (this.tagIDs[i]) { case tagName: { return true; } case TAG_ID.TABLE: case TAG_ID.HTML: { return false; } } } return true; } hasTableBodyContextInTableScope() { for (let i = this.stackTop; i >= 0; i--) { if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) { continue; } switch (this.tagIDs[i]) { case TAG_ID.TBODY: case TAG_ID.THEAD: case TAG_ID.TFOOT: { return true; } case TAG_ID.TABLE: case TAG_ID.HTML: { return false; } } } return true; } hasInSelectScope(tagName) { for (let i = this.stackTop; i >= 0; i--) { if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) { continue; } switch (this.tagIDs[i]) { case tagName: { return true; } case TAG_ID.OPTION: case TAG_ID.OPTGROUP: { break; } default: { return false; } } } return true; } //Implied end tags
generateImpliedEndTags() { while (IMPLICIT_END_TAG_REQUIRED.has(this.currentTagId)) { this.pop(); } } generateImpliedEndTagsThoroughly() { while (IMPLICIT_END_TAG_REQUIRED_THOROUGHLY.has(this.currentTagId)) { this.pop(); } } generateImpliedEndTagsWithExclusion(exclusionId) { while (this.currentTagId !== exclusionId && IMPLICIT_END_TAG_REQUIRED_THOROUGHLY.has(this.currentTagId)) { this.pop(); } } }
//Const
const NOAH_ARK_CAPACITY = 3; var EntryType; (function (EntryType) { EntryType[EntryType["Marker"] = 0] = "Marker"; EntryType[EntryType["Element"] = 1] = "Element"; })(EntryType || (EntryType = {})); const MARKER = { type: EntryType.Marker }; //List of formatting elements
class FormattingElementList { constructor(treeAdapter) { this.treeAdapter = treeAdapter; this.entries = []; this.bookmark = null; } //Noah Ark's condition
//OPTIMIZATION: at first we try to find possible candidates for exclusion using
//lightweight heuristics without thorough attributes check.
_getNoahArkConditionCandidates(newElement, neAttrs) { const candidates = []; const neAttrsLength = neAttrs.length; const neTagName = this.treeAdapter.getTagName(newElement); const neNamespaceURI = this.treeAdapter.getNamespaceURI(newElement); for (let i = 0; i < this.entries.length; i++) { const entry = this.entries[i]; if (entry.type === EntryType.Marker) { break; } const { element } = entry; if (this.treeAdapter.getTagName(element) === neTagName && this.treeAdapter.getNamespaceURI(element) === neNamespaceURI) { const elementAttrs = this.treeAdapter.getAttrList(element); if (elementAttrs.length === neAttrsLength) { candidates.push({ idx: i, attrs: elementAttrs }); } } } return candidates; } _ensureNoahArkCondition(newElement) { if (this.entries.length < NOAH_ARK_CAPACITY) return; const neAttrs = this.treeAdapter.getAttrList(newElement); const candidates = this._getNoahArkConditionCandidates(newElement, neAttrs); if (candidates.length < NOAH_ARK_CAPACITY) return; //NOTE: build attrs map for the new element, so we can perform fast lookups
const neAttrsMap = new Map(neAttrs.map((neAttr) => [neAttr.name, neAttr.value])); let validCandidates = 0; //NOTE: remove bottommost candidates, until Noah's Ark condition will not be met
for (let i = 0; i < candidates.length; i++) { const candidate = candidates[i]; // We know that `candidate.attrs.length === neAttrs.length`
if (candidate.attrs.every((cAttr) => neAttrsMap.get(cAttr.name) === cAttr.value)) { validCandidates += 1; if (validCandidates >= NOAH_ARK_CAPACITY) { this.entries.splice(candidate.idx, 1); } } } } //Mutations
insertMarker() { this.entries.unshift(MARKER); } pushElement(element, token) { this._ensureNoahArkCondition(element); this.entries.unshift({ type: EntryType.Element, element, token, }); } insertElementAfterBookmark(element, token) { const bookmarkIdx = this.entries.indexOf(this.bookmark); this.entries.splice(bookmarkIdx, 0, { type: EntryType.Element, element, token, }); } removeEntry(entry) { const entryIndex = this.entries.indexOf(entry); if (entryIndex >= 0) { this.entries.splice(entryIndex, 1); } } /** * Clears the list of formatting elements up to the last marker. * * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker
*/ clearToLastMarker() { const markerIdx = this.entries.indexOf(MARKER); if (markerIdx >= 0) { this.entries.splice(0, markerIdx + 1); } else { this.entries.length = 0; } } //Search
getElementEntryInScopeWithTagName(tagName) { const entry = this.entries.find((entry) => entry.type === EntryType.Marker || this.treeAdapter.getTagName(entry.element) === tagName); return entry && entry.type === EntryType.Element ? entry : null; } getElementEntry(element) { return this.entries.find((entry) => entry.type === EntryType.Element && entry.element === element); } }
const defaultTreeAdapter = { //Node construction
createDocument() { return { nodeName: '#document', mode: DOCUMENT_MODE.NO_QUIRKS, childNodes: [], }; }, createDocumentFragment() { return { nodeName: '#document-fragment', childNodes: [], }; }, createElement(tagName, namespaceURI, attrs) { return { nodeName: tagName, tagName, attrs, namespaceURI, childNodes: [], parentNode: null, }; }, createCommentNode(data) { return { nodeName: '#comment', data, parentNode: null, }; }, createTextNode(value) { return { nodeName: '#text', value, parentNode: null, }; }, //Tree mutation
appendChild(parentNode, newNode) { parentNode.childNodes.push(newNode); newNode.parentNode = parentNode; }, insertBefore(parentNode, newNode, referenceNode) { const insertionIdx = parentNode.childNodes.indexOf(referenceNode); parentNode.childNodes.splice(insertionIdx, 0, newNode); newNode.parentNode = parentNode; }, setTemplateContent(templateElement, contentElement) { templateElement.content = contentElement; }, getTemplateContent(templateElement) { return templateElement.content; }, setDocumentType(document, name, publicId, systemId) { const doctypeNode = document.childNodes.find((node) => node.nodeName === '#documentType'); if (doctypeNode) { doctypeNode.name = name; doctypeNode.publicId = publicId; doctypeNode.systemId = systemId; } else { const node = { nodeName: '#documentType', name, publicId, systemId, parentNode: null, }; defaultTreeAdapter.appendChild(document, node); } }, setDocumentMode(document, mode) { document.mode = mode; }, getDocumentMode(document) { return document.mode; }, detachNode(node) { if (node.parentNode) { const idx = node.parentNode.childNodes.indexOf(node); node.parentNode.childNodes.splice(idx, 1); node.parentNode = null; } }, insertText(parentNode, text) { if (parentNode.childNodes.length > 0) { const prevNode = parentNode.childNodes[parentNode.childNodes.length - 1]; if (defaultTreeAdapter.isTextNode(prevNode)) { prevNode.value += text; return; } } defaultTreeAdapter.appendChild(parentNode, defaultTreeAdapter.createTextNode(text)); }, insertTextBefore(parentNode, text, referenceNode) { const prevNode = parentNode.childNodes[parentNode.childNodes.indexOf(referenceNode) - 1]; if (prevNode && defaultTreeAdapter.isTextNode(prevNode)) { prevNode.value += text; } else { defaultTreeAdapter.insertBefore(parentNode, defaultTreeAdapter.createTextNode(text), referenceNode); } }, adoptAttributes(recipient, attrs) { const recipientAttrsMap = new Set(recipient.attrs.map((attr) => attr.name)); for (let j = 0; j < attrs.length; j++) { if (!recipientAttrsMap.has(attrs[j].name)) { recipient.attrs.push(attrs[j]); } } }, //Tree traversing
getFirstChild(node) { return node.childNodes[0]; }, getChildNodes(node) { return node.childNodes; }, getParentNode(node) { return node.parentNode; }, getAttrList(element) { return element.attrs; }, //Node data
getTagName(element) { return element.tagName; }, getNamespaceURI(element) { return element.namespaceURI; }, getTextNodeContent(textNode) { return textNode.value; }, getCommentNodeContent(commentNode) { return commentNode.data; }, getDocumentTypeNodeName(doctypeNode) { return doctypeNode.name; }, getDocumentTypeNodePublicId(doctypeNode) { return doctypeNode.publicId; }, getDocumentTypeNodeSystemId(doctypeNode) { return doctypeNode.systemId; }, //Node types
isTextNode(node) { return node.nodeName === '#text'; }, isCommentNode(node) { return node.nodeName === '#comment'; }, isDocumentTypeNode(node) { return node.nodeName === '#documentType'; }, isElementNode(node) { return Object.prototype.hasOwnProperty.call(node, 'tagName'); }, // Source code location
setNodeSourceCodeLocation(node, location) { node.sourceCodeLocation = location; }, getNodeSourceCodeLocation(node) { return node.sourceCodeLocation; }, updateNodeSourceCodeLocation(node, endLocation) { node.sourceCodeLocation = { ...node.sourceCodeLocation, ...endLocation }; }, };
//Const
const VALID_DOCTYPE_NAME = 'html'; const VALID_SYSTEM_ID = 'about:legacy-compat'; const QUIRKS_MODE_SYSTEM_ID = 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'; const QUIRKS_MODE_PUBLIC_ID_PREFIXES = [ '+//silmaril//dtd html pro v0r11 19970101//', '-//as//dtd html 3.0 aswedit + extensions//', '-//advasoft ltd//dtd html 3.0 aswedit + extensions//', '-//ietf//dtd html 2.0 level 1//', '-//ietf//dtd html 2.0 level 2//', '-//ietf//dtd html 2.0 strict level 1//', '-//ietf//dtd html 2.0 strict level 2//', '-//ietf//dtd html 2.0 strict//', '-//ietf//dtd html 2.0//', '-//ietf//dtd html 2.1e//', '-//ietf//dtd html 3.0//', '-//ietf//dtd html 3.2 final//', '-//ietf//dtd html 3.2//', '-//ietf//dtd html 3//', '-//ietf//dtd html level 0//', '-//ietf//dtd html level 1//', '-//ietf//dtd html level 2//', '-//ietf//dtd html level 3//', '-//ietf//dtd html strict level 0//', '-//ietf//dtd html strict level 1//', '-//ietf//dtd html strict level 2//', '-//ietf//dtd html strict level 3//', '-//ietf//dtd html strict//', '-//ietf//dtd html//', '-//metrius//dtd metrius presentational//', '-//microsoft//dtd internet explorer 2.0 html strict//', '-//microsoft//dtd internet explorer 2.0 html//', '-//microsoft//dtd internet explorer 2.0 tables//', '-//microsoft//dtd internet explorer 3.0 html strict//', '-//microsoft//dtd internet explorer 3.0 html//', '-//microsoft//dtd internet explorer 3.0 tables//', '-//netscape comm. corp.//dtd html//', '-//netscape comm. corp.//dtd strict html//', "-//o'reilly and associates//dtd html 2.0//", "-//o'reilly and associates//dtd html extended 1.0//", "-//o'reilly and associates//dtd html extended relaxed 1.0//", '-//sq//dtd html 2.0 hotmetal + extensions//', '-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//', '-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//', '-//spyglass//dtd html 2.0 extended//', '-//sun microsystems corp.//dtd hotjava html//', '-//sun microsystems corp.//dtd hotjava strict html//', '-//w3c//dtd html 3 1995-03-24//', '-//w3c//dtd html 3.2 draft//', '-//w3c//dtd html 3.2 final//', '-//w3c//dtd html 3.2//', '-//w3c//dtd html 3.2s draft//', '-//w3c//dtd html 4.0 frameset//', '-//w3c//dtd html 4.0 transitional//', '-//w3c//dtd html experimental 19960712//', '-//w3c//dtd html experimental 970421//', '-//w3c//dtd w3 html//', '-//w3o//dtd w3 html 3.0//', '-//webtechs//dtd mozilla html 2.0//', '-//webtechs//dtd mozilla html//', ]; const QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = [ ...QUIRKS_MODE_PUBLIC_ID_PREFIXES, '-//w3c//dtd html 4.01 frameset//', '-//w3c//dtd html 4.01 transitional//', ]; const QUIRKS_MODE_PUBLIC_IDS = new Set([ '-//w3o//dtd w3 html strict 3.0//en//', '-/w3c/dtd html 4.0 transitional/en', 'html', ]); const LIMITED_QUIRKS_PUBLIC_ID_PREFIXES = ['-//w3c//dtd xhtml 1.0 frameset//', '-//w3c//dtd xhtml 1.0 transitional//']; const LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES = [ ...LIMITED_QUIRKS_PUBLIC_ID_PREFIXES, '-//w3c//dtd html 4.01 frameset//', '-//w3c//dtd html 4.01 transitional//', ]; //Utils
function hasPrefix(publicId, prefixes) { return prefixes.some((prefix) => publicId.startsWith(prefix)); } //API
function isConforming(token) { return (token.name === VALID_DOCTYPE_NAME && token.publicId === null && (token.systemId === null || token.systemId === VALID_SYSTEM_ID)); } function getDocumentMode(token) { if (token.name !== VALID_DOCTYPE_NAME) { return DOCUMENT_MODE.QUIRKS; } const { systemId } = token; if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID) { return DOCUMENT_MODE.QUIRKS; } let { publicId } = token; if (publicId !== null) { publicId = publicId.toLowerCase(); if (QUIRKS_MODE_PUBLIC_IDS.has(publicId)) { return DOCUMENT_MODE.QUIRKS; } let prefixes = systemId === null ? QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES : QUIRKS_MODE_PUBLIC_ID_PREFIXES; if (hasPrefix(publicId, prefixes)) { return DOCUMENT_MODE.QUIRKS; } prefixes = systemId === null ? LIMITED_QUIRKS_PUBLIC_ID_PREFIXES : LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES; if (hasPrefix(publicId, prefixes)) { return DOCUMENT_MODE.LIMITED_QUIRKS; } } return DOCUMENT_MODE.NO_QUIRKS; }
//MIME types
const MIME_TYPES = { TEXT_HTML: 'text/html', APPLICATION_XML: 'application/xhtml+xml', }; //Attributes
const DEFINITION_URL_ATTR = 'definitionurl'; const ADJUSTED_DEFINITION_URL_ATTR = 'definitionURL'; const SVG_ATTRS_ADJUSTMENT_MAP = new Map([ 'attributeName', 'attributeType', 'baseFrequency', 'baseProfile', 'calcMode', 'clipPathUnits', 'diffuseConstant', 'edgeMode', 'filterUnits', 'glyphRef', 'gradientTransform', 'gradientUnits', 'kernelMatrix', 'kernelUnitLength', 'keyPoints', 'keySplines', 'keyTimes', 'lengthAdjust', 'limitingConeAngle', 'markerHeight', 'markerUnits', 'markerWidth', 'maskContentUnits', 'maskUnits', 'numOctaves', 'pathLength', 'patternContentUnits', 'patternTransform', 'patternUnits', 'pointsAtX', 'pointsAtY', 'pointsAtZ', 'preserveAlpha', 'preserveAspectRatio', 'primitiveUnits', 'refX', 'refY', 'repeatCount', 'repeatDur', 'requiredExtensions', 'requiredFeatures', 'specularConstant', 'specularExponent', 'spreadMethod', 'startOffset', 'stdDeviation', 'stitchTiles', 'surfaceScale', 'systemLanguage', 'tableValues', 'targetX', 'targetY', 'textLength', 'viewBox', 'viewTarget', 'xChannelSelector', 'yChannelSelector', 'zoomAndPan', ].map((attr) => [attr.toLowerCase(), attr])); const XML_ATTRS_ADJUSTMENT_MAP = new Map([ ['xlink:actuate', { prefix: 'xlink', name: 'actuate', namespace: NS.XLINK }], ['xlink:arcrole', { prefix: 'xlink', name: 'arcrole', namespace: NS.XLINK }], ['xlink:href', { prefix: 'xlink', name: 'href', namespace: NS.XLINK }], ['xlink:role', { prefix: 'xlink', name: 'role', namespace: NS.XLINK }], ['xlink:show', { prefix: 'xlink', name: 'show', namespace: NS.XLINK }], ['xlink:title', { prefix: 'xlink', name: 'title', namespace: NS.XLINK }], ['xlink:type', { prefix: 'xlink', name: 'type', namespace: NS.XLINK }], ['xml:lang', { prefix: 'xml', name: 'lang', namespace: NS.XML }], ['xml:space', { prefix: 'xml', name: 'space', namespace: NS.XML }], ['xmlns', { prefix: '', name: 'xmlns', namespace: NS.XMLNS }], ['xmlns:xlink', { prefix: 'xmlns', name: 'xlink', namespace: NS.XMLNS }], ]); //SVG tag names adjustment map
const SVG_TAG_NAMES_ADJUSTMENT_MAP = new Map([ 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animateColor', 'animateMotion', 'animateTransform', 'clipPath', 'feBlend', 'feColorMatrix', 'feComponentTransfer', 'feComposite', 'feConvolveMatrix', 'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood', 'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage', 'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight', 'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'foreignObject', 'glyphRef', 'linearGradient', 'radialGradient', 'textPath', ].map((tn) => [tn.toLowerCase(), tn])); //Tags that causes exit from foreign content
const EXITS_FOREIGN_CONTENT = new Set([ TAG_ID.B, TAG_ID.BIG, TAG_ID.BLOCKQUOTE, TAG_ID.BODY, TAG_ID.BR, TAG_ID.CENTER, TAG_ID.CODE, TAG_ID.DD, TAG_ID.DIV, TAG_ID.DL, TAG_ID.DT, TAG_ID.EM, TAG_ID.EMBED, TAG_ID.H1, TAG_ID.H2, TAG_ID.H3, TAG_ID.H4, TAG_ID.H5, TAG_ID.H6, TAG_ID.HEAD, TAG_ID.HR, TAG_ID.I, TAG_ID.IMG, TAG_ID.LI, TAG_ID.LISTING, TAG_ID.MENU, TAG_ID.META, TAG_ID.NOBR, TAG_ID.OL, TAG_ID.P, TAG_ID.PRE, TAG_ID.RUBY, TAG_ID.S, TAG_ID.SMALL, TAG_ID.SPAN, TAG_ID.STRONG, TAG_ID.STRIKE, TAG_ID.SUB, TAG_ID.SUP, TAG_ID.TABLE, TAG_ID.TT, TAG_ID.U, TAG_ID.UL, TAG_ID.VAR, ]); //Check exit from foreign content
function causesExit(startTagToken) { const tn = startTagToken.tagID; const isFontWithAttrs = tn === TAG_ID.FONT && startTagToken.attrs.some(({ name }) => name === ATTRS.COLOR || name === ATTRS.SIZE || name === ATTRS.FACE); return isFontWithAttrs || EXITS_FOREIGN_CONTENT.has(tn); } //Token adjustments
function adjustTokenMathMLAttrs(token) { for (let i = 0; i < token.attrs.length; i++) { if (token.attrs[i].name === DEFINITION_URL_ATTR) { token.attrs[i].name = ADJUSTED_DEFINITION_URL_ATTR; break; } } } function adjustTokenSVGAttrs(token) { for (let i = 0; i < token.attrs.length; i++) { const adjustedAttrName = SVG_ATTRS_ADJUSTMENT_MAP.get(token.attrs[i].name); if (adjustedAttrName != null) { token.attrs[i].name = adjustedAttrName; } } } function adjustTokenXMLAttrs(token) { for (let i = 0; i < token.attrs.length; i++) { const adjustedAttrEntry = XML_ATTRS_ADJUSTMENT_MAP.get(token.attrs[i].name); if (adjustedAttrEntry) { token.attrs[i].prefix = adjustedAttrEntry.prefix; token.attrs[i].name = adjustedAttrEntry.name; token.attrs[i].namespace = adjustedAttrEntry.namespace; } } } function adjustTokenSVGTagName(token) { const adjustedTagName = SVG_TAG_NAMES_ADJUSTMENT_MAP.get(token.tagName); if (adjustedTagName != null) { token.tagName = adjustedTagName; token.tagID = getTagID(token.tagName); } } //Integration points
function isMathMLTextIntegrationPoint(tn, ns) { return ns === NS.MATHML && (tn === TAG_ID.MI || tn === TAG_ID.MO || tn === TAG_ID.MN || tn === TAG_ID.MS || tn === TAG_ID.MTEXT); } function isHtmlIntegrationPoint(tn, ns, attrs) { if (ns === NS.MATHML && tn === TAG_ID.ANNOTATION_XML) { for (let i = 0; i < attrs.length; i++) { if (attrs[i].name === ATTRS.ENCODING) { const value = attrs[i].value.toLowerCase(); return value === MIME_TYPES.TEXT_HTML || value === MIME_TYPES.APPLICATION_XML; } } } return ns === NS.SVG && (tn === TAG_ID.FOREIGN_OBJECT || tn === TAG_ID.DESC || tn === TAG_ID.TITLE); } function isIntegrationPoint(tn, ns, attrs, foreignNS) { return (((!foreignNS || foreignNS === NS.HTML) && isHtmlIntegrationPoint(tn, ns, attrs)) || ((!foreignNS || foreignNS === NS.MATHML) && isMathMLTextIntegrationPoint(tn, ns))); }
//Misc constants
const HIDDEN_INPUT_TYPE = 'hidden'; //Adoption agency loops iteration count
const AA_OUTER_LOOP_ITER = 8; const AA_INNER_LOOP_ITER = 3; //Insertion modes
var InsertionMode; (function (InsertionMode) { InsertionMode[InsertionMode["INITIAL"] = 0] = "INITIAL"; InsertionMode[InsertionMode["BEFORE_HTML"] = 1] = "BEFORE_HTML"; InsertionMode[InsertionMode["BEFORE_HEAD"] = 2] = "BEFORE_HEAD"; InsertionMode[InsertionMode["IN_HEAD"] = 3] = "IN_HEAD"; InsertionMode[InsertionMode["IN_HEAD_NO_SCRIPT"] = 4] = "IN_HEAD_NO_SCRIPT"; InsertionMode[InsertionMode["AFTER_HEAD"] = 5] = "AFTER_HEAD"; InsertionMode[InsertionMode["IN_BODY"] = 6] = "IN_BODY"; InsertionMode[InsertionMode["TEXT"] = 7] = "TEXT"; InsertionMode[InsertionMode["IN_TABLE"] = 8] = "IN_TABLE"; InsertionMode[InsertionMode["IN_TABLE_TEXT"] = 9] = "IN_TABLE_TEXT"; InsertionMode[InsertionMode["IN_CAPTION"] = 10] = "IN_CAPTION"; InsertionMode[InsertionMode["IN_COLUMN_GROUP"] = 11] = "IN_COLUMN_GROUP"; InsertionMode[InsertionMode["IN_TABLE_BODY"] = 12] = "IN_TABLE_BODY"; InsertionMode[InsertionMode["IN_ROW"] = 13] = "IN_ROW"; InsertionMode[InsertionMode["IN_CELL"] = 14] = "IN_CELL"; InsertionMode[InsertionMode["IN_SELECT"] = 15] = "IN_SELECT"; InsertionMode[InsertionMode["IN_SELECT_IN_TABLE"] = 16] = "IN_SELECT_IN_TABLE"; InsertionMode[InsertionMode["IN_TEMPLATE"] = 17] = "IN_TEMPLATE"; InsertionMode[InsertionMode["AFTER_BODY"] = 18] = "AFTER_BODY"; InsertionMode[InsertionMode["IN_FRAMESET"] = 19] = "IN_FRAMESET"; InsertionMode[InsertionMode["AFTER_FRAMESET"] = 20] = "AFTER_FRAMESET"; InsertionMode[InsertionMode["AFTER_AFTER_BODY"] = 21] = "AFTER_AFTER_BODY"; InsertionMode[InsertionMode["AFTER_AFTER_FRAMESET"] = 22] = "AFTER_AFTER_FRAMESET"; })(InsertionMode || (InsertionMode = {})); const BASE_LOC = { startLine: -1, startCol: -1, startOffset: -1, endLine: -1, endCol: -1, endOffset: -1, }; const TABLE_STRUCTURE_TAGS = new Set([TAG_ID.TABLE, TAG_ID.TBODY, TAG_ID.TFOOT, TAG_ID.THEAD, TAG_ID.TR]); const defaultParserOptions = { scriptingEnabled: true, sourceCodeLocationInfo: false, treeAdapter: defaultTreeAdapter, onParseError: null, }; //Parser
class Parser { constructor(options, document, /** @internal */ fragmentContext = null, /** @internal */ scriptHandler = null) { this.fragmentContext = fragmentContext; this.scriptHandler = scriptHandler; this.currentToken = null; this.stopped = false; /** @internal */ this.insertionMode = InsertionMode.INITIAL; /** @internal */ this.originalInsertionMode = InsertionMode.INITIAL; /** @internal */ this.headElement = null; /** @internal */ this.formElement = null; /** Indicates that the current node is not an element in the HTML namespace */ this.currentNotInHTML = false; /** * The template insertion mode stack is maintained from the left. * Ie. the topmost element will always have index 0. * * @internal */ this.tmplInsertionModeStack = []; /** @internal */ this.pendingCharacterTokens = []; /** @internal */ this.hasNonWhitespacePendingCharacterToken = false; /** @internal */ this.framesetOk = true; /** @internal */ this.skipNextNewLine = false; /** @internal */ this.fosterParentingEnabled = false; this.options = { ...defaultParserOptions, ...options, }; this.treeAdapter = this.options.treeAdapter; this.onParseError = this.options.onParseError; // Always enable location info if we report parse errors.
if (this.onParseError) { this.options.sourceCodeLocationInfo = true; } this.document = document !== null && document !== void 0 ? document : this.treeAdapter.createDocument(); this.tokenizer = new Tokenizer(this.options, this); this.activeFormattingElements = new FormattingElementList(this.treeAdapter); this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : TAG_ID.UNKNOWN; this._setContextModes(fragmentContext !== null && fragmentContext !== void 0 ? fragmentContext : this.document, this.fragmentContextID); this.openElements = new OpenElementStack(this.document, this.treeAdapter, this); } // API
static parse(html, options) { const parser = new this(options); parser.tokenizer.write(html, true); return parser.document; } static getFragmentParser(fragmentContext, options) { const opts = { ...defaultParserOptions, ...options, }; //NOTE: use a <template> element as the fragment context if no context element was provided,
//so we will parse in a "forgiving" manner
fragmentContext !== null && fragmentContext !== void 0 ? fragmentContext : (fragmentContext = opts.treeAdapter.createElement(TAG_NAMES.TEMPLATE, NS.HTML, [])); //NOTE: create a fake element which will be used as the `document` for fragment parsing.
//This is important for jsdom, where a new `document` cannot be created. This led to
//fragment parsing messing with the main `document`.
const documentMock = opts.treeAdapter.createElement('documentmock', NS.HTML, []); const parser = new this(opts, documentMock, fragmentContext); if (parser.fragmentContextID === TAG_ID.TEMPLATE) { parser.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE); } parser._initTokenizerForFragmentParsing(); parser._insertFakeRootElement(); parser._resetInsertionMode(); parser._findFormInFragmentContext(); return parser; } getFragment() { const rootElement = this.treeAdapter.getFirstChild(this.document); const fragment = this.treeAdapter.createDocumentFragment(); this._adoptNodes(rootElement, fragment); return fragment; } //Errors
/** @internal */ _err(token, code, beforeToken) { var _a; if (!this.onParseError) return; const loc = (_a = token.location) !== null && _a !== void 0 ? _a : BASE_LOC; const err = { code, startLine: loc.startLine, startCol: loc.startCol, startOffset: loc.startOffset, endLine: beforeToken ? loc.startLine : loc.endLine, endCol: beforeToken ? loc.startCol : loc.endCol, endOffset: beforeToken ? loc.startOffset : loc.endOffset, }; this.onParseError(err); } //Stack events
/** @internal */ onItemPush(node, tid, isTop) { var _a, _b; (_b = (_a = this.treeAdapter).onItemPush) === null || _b === void 0 ? void 0 : _b.call(_a, node); if (isTop && this.openElements.stackTop > 0) this._setContextModes(node, tid); } /** @internal */ onItemPop(node, isTop) { var _a, _b; if (this.options.sourceCodeLocationInfo) { this._setEndLocation(node, this.currentToken); } (_b = (_a = this.treeAdapter).onItemPop) === null || _b === void 0 ? void 0 : _b.call(_a, node, this.openElements.current); if (isTop) { let current; let currentTagId; if (this.openElements.stackTop === 0 && this.fragmentContext) { current = this.fragmentContext; currentTagId = this.fragmentContextID; } else { ({ current, currentTagId } = this.openElements); } this._setContextModes(current, currentTagId); } } _setContextModes(current, tid) { const isHTML = current === this.document || this.treeAdapter.getNamespaceURI(current) === NS.HTML; this.currentNotInHTML = !isHTML; this.tokenizer.inForeignNode = !isHTML && !this._isIntegrationPoint(tid, current); } /** @protected */ _switchToTextParsing(currentToken, nextTokenizerState) { this._insertElement(currentToken, NS.HTML); this.tokenizer.state = nextTokenizerState; this.originalInsertionMode = this.insertionMode; this.insertionMode = InsertionMode.TEXT; } switchToPlaintextParsing() { this.insertionMode = InsertionMode.TEXT; this.originalInsertionMode = InsertionMode.IN_BODY; this.tokenizer.state = TokenizerMode.PLAINTEXT; } //Fragment parsing
/** @protected */ _getAdjustedCurrentElement() { return this.openElements.stackTop === 0 && this.fragmentContext ? this.fragmentContext : this.openElements.current; } /** @protected */ _findFormInFragmentContext() { let node = this.fragmentContext; while (node) { if (this.treeAdapter.getTagName(node) === TAG_NAMES.FORM) { this.formElement = node; break; } node = this.treeAdapter.getParentNode(node); } } _initTokenizerForFragmentParsing() { if (!this.fragmentContext || this.treeAdapter.getNamespaceURI(this.fragmentContext) !== NS.HTML) { return; } switch (this.fragmentContextID) { case TAG_ID.TITLE: case TAG_ID.TEXTAREA: { this.tokenizer.state = TokenizerMode.RCDATA; break; } case TAG_ID.STYLE: case TAG_ID.XMP: case TAG_ID.IFRAME: case TAG_ID.NOEMBED: case TAG_ID.NOFRAMES: case TAG_ID.NOSCRIPT: { this.tokenizer.state = TokenizerMode.RAWTEXT; break; } case TAG_ID.SCRIPT: { this.tokenizer.state = TokenizerMode.SCRIPT_DATA; break; } case TAG_ID.PLAINTEXT: { this.tokenizer.state = TokenizerMode.PLAINTEXT; break; } // Do nothing
} } //Tree mutation
/** @protected */ _setDocumentType(token) { const name = token.name || ''; const publicId = token.publicId || ''; const systemId = token.systemId || ''; this.treeAdapter.setDocumentType(this.document, name, publicId, systemId); if (token.location) { const documentChildren = this.treeAdapter.getChildNodes(this.document); const docTypeNode = documentChildren.find((node) => this.treeAdapter.isDocumentTypeNode(node)); if (docTypeNode) { this.treeAdapter.setNodeSourceCodeLocation(docTypeNode, token.location); } } } /** @protected */ _attachElementToTree(element, location) { if (this.options.sourceCodeLocationInfo) { const loc = location && { ...location, startTag: location, }; this.treeAdapter.setNodeSourceCodeLocation(element, loc); } if (this._shouldFosterParentOnInsertion()) { this._fosterParentElement(element); } else { const parent = this.openElements.currentTmplContentOrNode; this.treeAdapter.appendChild(parent, element); } } /** * For self-closing tags. Add an element to the tree, but skip adding it * to the stack. */ /** @protected */ _appendElement(token, namespaceURI) { const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs); this._attachElementToTree(element, token.location); } /** @protected */ _insertElement(token, namespaceURI) { const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs); this._attachElementToTree(element, token.location); this.openElements.push(element, token.tagID); } /** @protected */ _insertFakeElement(tagName, tagID) { const element = this.treeAdapter.createElement(tagName, NS.HTML, []); this._attachElementToTree(element, null); this.openElements.push(element, tagID); } /** @protected */ _insertTemplate(token) { const tmpl = this.treeAdapter.createElement(token.tagName, NS.HTML, token.attrs); const content = this.treeAdapter.createDocumentFragment(); this.treeAdapter.setTemplateContent(tmpl, content); this._attachElementToTree(tmpl, token.location); this.openElements.push(tmpl, token.tagID); if (this.options.sourceCodeLocationInfo) this.treeAdapter.setNodeSourceCodeLocation(content, null); } /** @protected */ _insertFakeRootElement() { const element = this.treeAdapter.createElement(TAG_NAMES.HTML, NS.HTML, []); if (this.options.sourceCodeLocationInfo) this.treeAdapter.setNodeSourceCodeLocation(element, null); this.treeAdapter.appendChild(this.openElements.current, element); this.openElements.push(element, TAG_ID.HTML); } /** @protected */ _appendCommentNode(token, parent) { const commentNode = this.treeAdapter.createCommentNode(token.data); this.treeAdapter.appendChild(parent, commentNode); if (this.options.sourceCodeLocationInfo) { this.treeAdapter.setNodeSourceCodeLocation(commentNode, token.location); } } /** @protected */ _insertCharacters(token) { let parent; let beforeElement; if (this._shouldFosterParentOnInsertion()) { ({ parent, beforeElement } = this._findFosterParentingLocation()); if (beforeElement) { this.treeAdapter.insertTextBefore(parent, token.chars, beforeElement); } else { this.treeAdapter.insertText(parent, token.chars); } } else { parent = this.openElements.currentTmplContentOrNode; this.treeAdapter.insertText(parent, token.chars); } if (!token.location) return; const siblings = this.treeAdapter.getChildNodes(parent); const textNodeIdx = beforeElement ? siblings.lastIndexOf(beforeElement) : siblings.length; const textNode = siblings[textNodeIdx - 1]; //NOTE: if we have a location assigned by another token, then just update the end position
const tnLoc = this.treeAdapter.getNodeSourceCodeLocation(textNode); if (tnLoc) { const { endLine, endCol, endOffset } = token.location; this.treeAdapter.updateNodeSourceCodeLocation(textNode, { endLine, endCol, endOffset }); } else if (this.options.sourceCodeLocationInfo) { this.treeAdapter.setNodeSourceCodeLocation(textNode, token.location); } } /** @protected */ _adoptNodes(donor, recipient) { for (let child = this.treeAdapter.getFirstChild(donor); child; child = this.treeAdapter.getFirstChild(donor)) { this.treeAdapter.detachNode(child); this.treeAdapter.appendChild(recipient, child); } } /** @protected */ _setEndLocation(element, closingToken) { if (this.treeAdapter.getNodeSourceCodeLocation(element) && closingToken.location) { const ctLoc = closingToken.location; const tn = this.treeAdapter.getTagName(element); const endLoc = // NOTE: For cases like <p> <p> </p> - First 'p' closes without a closing
// tag and for cases like <td> <p> </td> - 'p' closes without a closing tag.
closingToken.type === TokenType.END_TAG && tn === closingToken.tagName ? { endTag: { ...ctLoc }, endLine: ctLoc.endLine, endCol: ctLoc.endCol, endOffset: ctLoc.endOffset, } : { endLine: ctLoc.startLine, endCol: ctLoc.startCol, endOffset: ctLoc.startOffset, }; this.treeAdapter.updateNodeSourceCodeLocation(element, endLoc); } } //Token processing
shouldProcessStartTagTokenInForeignContent(token) { // Check that neither current === document, or ns === NS.HTML
if (!this.currentNotInHTML) return false; let current; let currentTagId; if (this.openElements.stackTop === 0 && this.fragmentContext) { current = this.fragmentContext; currentTagId = this.fragmentContextID; } else { ({ current, currentTagId } = this.openElements); } if (token.tagID === TAG_ID.SVG && this.treeAdapter.getTagName(current) === TAG_NAMES.ANNOTATION_XML && this.treeAdapter.getNamespaceURI(current) === NS.MATHML) { return false; } return ( // Check that `current` is not an integration point for HTML or MathML elements.
this.tokenizer.inForeignNode || // If it _is_ an integration point, then we might have to check that it is not an HTML
// integration point.
((token.tagID === TAG_ID.MGLYPH || token.tagID === TAG_ID.MALIGNMARK) && !this._isIntegrationPoint(currentTagId, current, NS.HTML))); } /** @protected */ _processToken(token) { switch (token.type) { case TokenType.CHARACTER: { this.onCharacter(token); break; } case TokenType.NULL_CHARACTER: { this.onNullCharacter(token); break; } case TokenType.COMMENT: { this.onComment(token); break; } case TokenType.DOCTYPE: { this.onDoctype(token); break; } case TokenType.START_TAG: { this._processStartTag(token); break; } case TokenType.END_TAG: { this.onEndTag(token); break; } case TokenType.EOF: { this.onEof(token); break; } case TokenType.WHITESPACE_CHARACTER: { this.onWhitespaceCharacter(token); break; } } } //Integration points
/** @protected */ _isIntegrationPoint(tid, element, foreignNS) { const ns = this.treeAdapter.getNamespaceURI(element); const attrs = this.treeAdapter.getAttrList(element); return isIntegrationPoint(tid, ns, attrs, foreignNS); } //Active formatting elements reconstruction
/** @protected */ _reconstructActiveFormattingElements() { const listLength = this.activeFormattingElements.entries.length; if (listLength) { const endIndex = this.activeFormattingElements.entries.findIndex((entry) => entry.type === EntryType.Marker || this.openElements.contains(entry.element)); const unopenIdx = endIndex < 0 ? listLength - 1 : endIndex - 1; for (let i = unopenIdx; i >= 0; i--) { const entry = this.activeFormattingElements.entries[i]; this._insertElement(entry.token, this.treeAdapter.getNamespaceURI(entry.element)); entry.element = this.openElements.current; } } } //Close elements
/** @protected */ _closeTableCell() { this.openElements.generateImpliedEndTags(); this.openElements.popUntilTableCellPopped(); this.activeFormattingElements.clearToLastMarker(); this.insertionMode = InsertionMode.IN_ROW; } /** @protected */ _closePElement() { this.openElements.generateImpliedEndTagsWithExclusion(TAG_ID.P); this.openElements.popUntilTagNamePopped(TAG_ID.P); } //Insertion modes
/** @protected */ _resetInsertionMode() { for (let i = this.openElements.stackTop; i >= 0; i--) { //Insertion mode reset map
switch (i === 0 && this.fragmentContext ? this.fragmentContextID : this.openElements.tagIDs[i]) { case TAG_ID.TR: { this.insertionMode = InsertionMode.IN_ROW; return; } case TAG_ID.TBODY: case TAG_ID.THEAD: case TAG_ID.TFOOT: { this.insertionMode = InsertionMode.IN_TABLE_BODY; return; } case TAG_ID.CAPTION: { this.insertionMode = InsertionMode.IN_CAPTION; return; } case TAG_ID.COLGROUP: { this.insertionMode = InsertionMode.IN_COLUMN_GROUP; return; } case TAG_ID.TABLE: { this.insertionMode = InsertionMode.IN_TABLE; return; } case TAG_ID.BODY: { this.insertionMode = InsertionMode.IN_BODY; return; } case TAG_ID.FRAMESET: { this.insertionMode = InsertionMode.IN_FRAMESET; return; } case TAG_ID.SELECT: { this._resetInsertionModeForSelect(i); return; } case TAG_ID.TEMPLATE: { this.insertionMode = this.tmplInsertionModeStack[0]; return; } case TAG_ID.HTML: { this.insertionMode = this.headElement ? InsertionMode.AFTER_HEAD : InsertionMode.BEFORE_HEAD; return; } case TAG_ID.TD: case TAG_ID.TH: { if (i > 0) { this.insertionMode = InsertionMode.IN_CELL; return; } break; } case TAG_ID.HEAD: { if (i > 0) { this.insertionMode = InsertionMode.IN_HEAD; return; } break; } } } this.insertionMode = InsertionMode.IN_BODY; } /** @protected */ _resetInsertionModeForSelect(selectIdx) { if (selectIdx > 0) { for (let i = selectIdx - 1; i > 0; i--) { const tn = this.openElements.tagIDs[i]; if (tn === TAG_ID.TEMPLATE) { break; } else if (tn === TAG_ID.TABLE) { this.insertionMode = InsertionMode.IN_SELECT_IN_TABLE; return; } } } this.insertionMode = InsertionMode.IN_SELECT; } //Foster parenting
/** @protected */ _isElementCausesFosterParenting(tn) { return TABLE_STRUCTURE_TAGS.has(tn); } /** @protected */ _shouldFosterParentOnInsertion() { return this.fosterParentingEnabled && this._isElementCausesFosterParenting(this.openElements.currentTagId); } /** @protected */ _findFosterParentingLocation() { for (let i = this.openElements.stackTop; i >= 0; i--) { const openElement = this.openElements.items[i]; switch (this.openElements.tagIDs[i]) { case TAG_ID.TEMPLATE: { if (this.treeAdapter.getNamespaceURI(openElement) === NS.HTML) { return { parent: this.treeAdapter.getTemplateContent(openElement), beforeElement: null }; } break; } case TAG_ID.TABLE: { const parent = this.treeAdapter.getParentNode(openElement); if (parent) { return { parent, beforeElement: openElement }; } return { parent: this.openElements.items[i - 1], beforeElement: null }; } // Do nothing
} } return { parent: this.openElements.items[0], beforeElement: null }; } /** @protected */ _fosterParentElement(element) { const location = this._findFosterParentingLocation(); if (location.beforeElement) { this.treeAdapter.insertBefore(location.parent, element, location.beforeElement); } else { this.treeAdapter.appendChild(location.parent, element); } } //Special elements
/** @protected */ _isSpecialElement(element, id) { const ns = this.treeAdapter.getNamespaceURI(element); return SPECIAL_ELEMENTS[ns].has(id); } /** @internal */ onCharacter(token) { this.skipNextNewLine = false; if (this.tokenizer.inForeignNode) { characterInForeignContent(this, token); return; } switch (this.insertionMode) { case InsertionMode.INITIAL: { tokenInInitialMode(this, token); break; } case InsertionMode.BEFORE_HTML: { tokenBeforeHtml(this, token); break; } case InsertionMode.BEFORE_HEAD: { tokenBeforeHead(this, token); break; } case InsertionMode.IN_HEAD: { tokenInHead(this, token); break; } case InsertionMode.IN_HEAD_NO_SCRIPT: { tokenInHeadNoScript(this, token); break; } case InsertionMode.AFTER_HEAD: { tokenAfterHead(this, token); break; } case InsertionMode.IN_BODY: case InsertionMode.IN_CAPTION: case InsertionMode.IN_CELL: case InsertionMode.IN_TEMPLATE: { characterInBody(this, token); break; } case InsertionMode.TEXT: case InsertionMode.IN_SELECT: case InsertionMode.IN_SELECT_IN_TABLE: { this._insertCharacters(token); break; } case InsertionMode.IN_TABLE: case InsertionMode.IN_TABLE_BODY: case InsertionMode.IN_ROW: { characterInTable(this, token); break; } case InsertionMode.IN_TABLE_TEXT: { characterInTableText(this, token); break; } case InsertionMode.IN_COLUMN_GROUP: { tokenInColumnGroup(this, token); break; } case InsertionMode.AFTER_BODY: { tokenAfterBody(this, token); break; } case InsertionMode.AFTER_AFTER_BODY: { tokenAfterAfterBody(this, token); break; } // Do nothing
} } /** @internal */ onNullCharacter(token) { this.skipNextNewLine = false; if (this.tokenizer.inForeignNode) { nullCharacterInForeignContent(this, token); return; } switch (this.insertionMode) { case InsertionMode.INITIAL: { tokenInInitialMode(this, token); break; } case InsertionMode.BEFORE_HTML: { tokenBeforeHtml(this, token); break; } case InsertionMode.BEFORE_HEAD: { tokenBeforeHead(this, token); break; } case InsertionMode.IN_HEAD: { tokenInHead(this, token); break; } case InsertionMode.IN_HEAD_NO_SCRIPT: { tokenInHeadNoScript(this, token); break; } case InsertionMode.AFTER_HEAD: { tokenAfterHead(this, token); break; } case InsertionMode.TEXT: { this._insertCharacters(token); break; } case InsertionMode.IN_TABLE: case InsertionMode.IN_TABLE_BODY: case InsertionMode.IN_ROW: { characterInTable(this, token); break; } case InsertionMode.IN_COLUMN_GROUP: { tokenInColumnGroup(this, token); break; } case InsertionMode.AFTER_BODY: { tokenAfterBody(this, token); break; } case InsertionMode.AFTER_AFTER_BODY: { tokenAfterAfterBody(this, token); break; } // Do nothing
} } /** @internal */ onComment(token) { this.skipNextNewLine = false; if (this.currentNotInHTML) { appendComment(this, token); return; } switch (this.insertionMode) { case InsertionMode.INITIAL: case InsertionMode.BEFORE_HTML: case InsertionMode.BEFORE_HEAD: case InsertionMode.IN_HEAD: case InsertionMode.IN_HEAD_NO_SCRIPT: case InsertionMode.AFTER_HEAD: case InsertionMode.IN_BODY: case InsertionMode.IN_TABLE: case InsertionMode.IN_CAPTION: case InsertionMode.IN_COLUMN_GROUP: case InsertionMode.IN_TABLE_BODY: case InsertionMode.IN_ROW: case InsertionMode.IN_CELL: case InsertionMode.IN_SELECT: case InsertionMode.IN_SELECT_IN_TABLE: case InsertionMode.IN_TEMPLATE: case InsertionMode.IN_FRAMESET: case InsertionMode.AFTER_FRAMESET: { appendComment(this, token); break; } case InsertionMode.IN_TABLE_TEXT: { tokenInTableText(this, token); break; } case InsertionMode.AFTER_BODY: { appendCommentToRootHtmlElement(this, token); break; } case InsertionMode.AFTER_AFTER_BODY: case InsertionMode.AFTER_AFTER_FRAMESET: { appendCommentToDocument(this, token); break; } // Do nothing
} } /** @internal */ onDoctype(token) { this.skipNextNewLine = false; switch (this.insertionMode) { case InsertionMode.INITIAL: { doctypeInInitialMode(this, token); break; } case InsertionMode.BEFORE_HEAD: case InsertionMode.IN_HEAD: case InsertionMode.IN_HEAD_NO_SCRIPT: case InsertionMode.AFTER_HEAD: { this._err(token, ERR.misplacedDoctype); break; } case InsertionMode.IN_TABLE_TEXT: { tokenInTableText(this, token); break; } // Do nothing
} } /** @internal */ onStartTag(token) { this.skipNextNewLine = false; this.currentToken = token; this._processStartTag(token); if (token.selfClosing && !token.ackSelfClosing) { this._err(token, ERR.nonVoidHtmlElementStartTagWithTrailingSolidus); } } /** * Processes a given start tag. * * `onStartTag` checks if a self-closing tag was recognized. When a token * is moved inbetween multiple insertion modes, this check for self-closing * could lead to false positives. To avoid this, `_processStartTag` is used * for nested calls. * * @param token The token to process. * @protected */ _processStartTag(token) { if (this.shouldProcessStartTagTokenInForeignContent(token)) { startTagInForeignContent(this, token); } else { this._startTagOutsideForeignContent(token); } } /** @protected */ _startTagOutsideForeignContent(token) { switch (this.insertionMode) { case InsertionMode.INITIAL: { tokenInInitialMode(this, token); break; } case InsertionMode.BEFORE_HTML: { startTagBeforeHtml(this, token); break; } case InsertionMode.BEFORE_HEAD: { startTagBeforeHead(this, token); break; } case InsertionMode.IN_HEAD: { startTagInHead(this, token); break; } case InsertionMode.IN_HEAD_NO_SCRIPT: { startTagInHeadNoScript(this, token); break; } case InsertionMode.AFTER_HEAD: { startTagAfterHead(this, token); break; } case InsertionMode.IN_BODY: { startTagInBody(this, token); break; } case InsertionMode.IN_TABLE: { startTagInTable(this, token); break; } case InsertionMode.IN_TABLE_TEXT: { tokenInTableText(this, token); break; } case InsertionMode.IN_CAPTION: { startTagInCaption(this, token); break; } case InsertionMode.IN_COLUMN_GROUP: { startTagInColumnGroup(this, token); break; } case InsertionMode.IN_TABLE_BODY: { startTagInTableBody(this, token); break; } case InsertionMode.IN_ROW: { startTagInRow(this, token); break; } case InsertionMode.IN_CELL: { startTagInCell(this, token); break; } case InsertionMode.IN_SELECT: { startTagInSelect(this, token); break; } case InsertionMode.IN_SELECT_IN_TABLE: { startTagInSelectInTable(this, token); break; } case InsertionMode.IN_TEMPLATE: { startTagInTemplate(this, token); break; } case InsertionMode.AFTER_BODY: { startTagAfterBody(this, token); break; } case InsertionMode.IN_FRAMESET: { startTagInFrameset(this, token); break; } case InsertionMode.AFTER_FRAMESET: { startTagAfterFrameset(this, token); break; } case InsertionMode.AFTER_AFTER_BODY: { startTagAfterAfterBody(this, token); break; } case InsertionMode.AFTER_AFTER_FRAMESET: { startTagAfterAfterFrameset(this, token); break; } // Do nothing
} } /** @internal */ onEndTag(token) { this.skipNextNewLine = false; this.currentToken = token; if (this.currentNotInHTML) { endTagInForeignContent(this, token); } else { this._endTagOutsideForeignContent(token); } } /** @protected */ _endTagOutsideForeignContent(token) { switch (this.insertionMode) { case InsertionMode.INITIAL: { tokenInInitialMode(this, token); break; } case InsertionMode.BEFORE_HTML: { endTagBeforeHtml(this, token); break; } case InsertionMode.BEFORE_HEAD: { endTagBeforeHead(this, token); break; } case InsertionMode.IN_HEAD: { endTagInHead(this, token); break; } case InsertionMode.IN_HEAD_NO_SCRIPT: { endTagInHeadNoScript(this, token); break; } case InsertionMode.AFTER_HEAD: { endTagAfterHead(this, token); break; } case InsertionMode.IN_BODY: { endTagInBody(this, token); break; } case InsertionMode.TEXT: { endTagInText(this, token); break; } case InsertionMode.IN_TABLE: { endTagInTable(this, token); break; } case InsertionMode.IN_TABLE_TEXT: { tokenInTableText(this, token); break; } case InsertionMode.IN_CAPTION: { endTagInCaption(this, token); break; } case InsertionMode.IN_COLUMN_GROUP: { endTagInColumnGroup(this, token); break; } case InsertionMode.IN_TABLE_BODY: { endTagInTableBody(this, token); break; } case InsertionMode.IN_ROW: { endTagInRow(this, token); break; } case InsertionMode.IN_CELL: { endTagInCell(this, token); break; } case InsertionMode.IN_SELECT: { endTagInSelect(this, token); break; } case InsertionMode.IN_SELECT_IN_TABLE: { endTagInSelectInTable(this, token); break; } case InsertionMode.IN_TEMPLATE: { endTagInTemplate(this, token); break; } case InsertionMode.AFTER_BODY: { endTagAfterBody(this, token); break; } case InsertionMode.IN_FRAMESET: { endTagInFrameset(this, token); break; } case InsertionMode.AFTER_FRAMESET: { endTagAfterFrameset(this, token); break; } case InsertionMode.AFTER_AFTER_BODY: { tokenAfterAfterBody(this, token); break; } // Do nothing
} } /** @internal */ onEof(token) { switch (this.insertionMode) { case InsertionMode.INITIAL: { tokenInInitialMode(this, token); break; } case InsertionMode.BEFORE_HTML: { tokenBeforeHtml(this, token); break; } case InsertionMode.BEFORE_HEAD: { tokenBeforeHead(this, token); break; } case InsertionMode.IN_HEAD: { tokenInHead(this, token); break; } case InsertionMode.IN_HEAD_NO_SCRIPT: { tokenInHeadNoScript(this, token); break; } case InsertionMode.AFTER_HEAD: { tokenAfterHead(this, token); break; } case InsertionMode.IN_BODY: case InsertionMode.IN_TABLE: case InsertionMode.IN_CAPTION: case InsertionMode.IN_COLUMN_GROUP: case InsertionMode.IN_TABLE_BODY: case InsertionMode.IN_ROW: case InsertionMode.IN_CELL: case InsertionMode.IN_SELECT: case InsertionMode.IN_SELECT_IN_TABLE: { eofInBody(this, token); break; } case InsertionMode.TEXT: { eofInText(this, token); break; } case InsertionMode.IN_TABLE_TEXT: { tokenInTableText(this, token); break; } case InsertionMode.IN_TEMPLATE: { eofInTemplate(this, token); break; } case InsertionMode.AFTER_BODY: case InsertionMode.IN_FRAMESET: case InsertionMode.AFTER_FRAMESET: case InsertionMode.AFTER_AFTER_BODY: case InsertionMode.AFTER_AFTER_FRAMESET: { stopParsing(this, token); break; } // Do nothing
} } /** @internal */ onWhitespaceCharacter(token) { if (this.skipNextNewLine) { this.skipNextNewLine = false; if (token.chars.charCodeAt(0) === CODE_POINTS.LINE_FEED) { if (token.chars.length === 1) { return; } token.chars = token.chars.substr(1); } } if (this.tokenizer.inForeignNode) { this._insertCharacters(token); return; } switch (this.insertionMode) { case InsertionMode.IN_HEAD: case InsertionMode.IN_HEAD_NO_SCRIPT: case InsertionMode.AFTER_HEAD: case InsertionMode.TEXT: case InsertionMode.IN_COLUMN_GROUP: case InsertionMode.IN_SELECT: case InsertionMode.IN_SELECT_IN_TABLE: case InsertionMode.IN_FRAMESET: case InsertionMode.AFTER_FRAMESET: { this._insertCharacters(token); break; } case InsertionMode.IN_BODY: case InsertionMode.IN_CAPTION: case InsertionMode.IN_CELL: case InsertionMode.IN_TEMPLATE: case InsertionMode.AFTER_BODY: case InsertionMode.AFTER_AFTER_BODY: case InsertionMode.AFTER_AFTER_FRAMESET: { whitespaceCharacterInBody(this, token); break; } case InsertionMode.IN_TABLE: case InsertionMode.IN_TABLE_BODY: case InsertionMode.IN_ROW: { characterInTable(this, token); break; } case InsertionMode.IN_TABLE_TEXT: { whitespaceCharacterInTableText(this, token); break; } // Do nothing
} } } //Adoption agency algorithm
//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adoptionAgency)
//------------------------------------------------------------------
//Steps 5-8 of the algorithm
function aaObtainFormattingElementEntry(p, token) { let formattingElementEntry = p.activeFormattingElements.getElementEntryInScopeWithTagName(token.tagName); if (formattingElementEntry) { if (!p.openElements.contains(formattingElementEntry.element)) { p.activeFormattingElements.removeEntry(formattingElementEntry); formattingElementEntry = null; } else if (!p.openElements.hasInScope(token.tagID)) { formattingElementEntry = null; } } else { genericEndTagInBody(p, token); } return formattingElementEntry; } //Steps 9 and 10 of the algorithm
function aaObtainFurthestBlock(p, formattingElementEntry) { let furthestBlock = null; let idx = p.openElements.stackTop; for (; idx >= 0; idx--) { const element = p.openElements.items[idx]; if (element === formattingElementEntry.element) { break; } if (p._isSpecialElement(element, p.openElements.tagIDs[idx])) { furthestBlock = element; } } if (!furthestBlock) { p.openElements.shortenToLength(idx < 0 ? 0 : idx); p.activeFormattingElements.removeEntry(formattingElementEntry); } return furthestBlock; } //Step 13 of the algorithm
function aaInnerLoop(p, furthestBlock, formattingElement) { let lastElement = furthestBlock; let nextElement = p.openElements.getCommonAncestor(furthestBlock); for (let i = 0, element = nextElement; element !== formattingElement; i++, element = nextElement) { //NOTE: store the next element for the next loop iteration (it may be deleted from the stack by step 9.5)
nextElement = p.openElements.getCommonAncestor(element); const elementEntry = p.activeFormattingElements.getElementEntry(element); const counterOverflow = elementEntry && i >= AA_INNER_LOOP_ITER; const shouldRemoveFromOpenElements = !elementEntry || counterOverflow; if (shouldRemoveFromOpenElements) { if (counterOverflow) { p.activeFormattingElements.removeEntry(elementEntry); } p.openElements.remove(element); } else { element = aaRecreateElementFromEntry(p, elementEntry); if (lastElement === furthestBlock) { p.activeFormattingElements.bookmark = elementEntry; } p.treeAdapter.detachNode(lastElement); p.treeAdapter.appendChild(element, lastElement); lastElement = element; } } return lastElement; } //Step 13.7 of the algorithm
function aaRecreateElementFromEntry(p, elementEntry) { const ns = p.treeAdapter.getNamespaceURI(elementEntry.element); const newElement = p.treeAdapter.createElement(elementEntry.token.tagName, ns, elementEntry.token.attrs); p.openElements.replace(elementEntry.element, newElement); elementEntry.element = newElement; return newElement; } //Step 14 of the algorithm
function aaInsertLastNodeInCommonAncestor(p, commonAncestor, lastElement) { const tn = p.treeAdapter.getTagName(commonAncestor); const tid = getTagID(tn); if (p._isElementCausesFosterParenting(tid)) { p._fosterParentElement(lastElement); } else { const ns = p.treeAdapter.getNamespaceURI(commonAncestor); if (tid === TAG_ID.TEMPLATE && ns === NS.HTML) { commonAncestor = p.treeAdapter.getTemplateContent(commonAncestor); } p.treeAdapter.appendChild(commonAncestor, lastElement); } } //Steps 15-19 of the algorithm
function aaReplaceFormattingElement(p, furthestBlock, formattingElementEntry) { const ns = p.treeAdapter.getNamespaceURI(formattingElementEntry.element); const { token } = formattingElementEntry; const newElement = p.treeAdapter.createElement(token.tagName, ns, token.attrs); p._adoptNodes(furthestBlock, newElement); p.treeAdapter.appendChild(furthestBlock, newElement); p.activeFormattingElements.insertElementAfterBookmark(newElement, token); p.activeFormattingElements.removeEntry(formattingElementEntry); p.openElements.remove(formattingElementEntry.element); p.openElements.insertAfter(furthestBlock, newElement, token.tagID); } //Algorithm entry point
function callAdoptionAgency(p, token) { for (let i = 0; i < AA_OUTER_LOOP_ITER; i++) { const formattingElementEntry = aaObtainFormattingElementEntry(p, token); if (!formattingElementEntry) { break; } const furthestBlock = aaObtainFurthestBlock(p, formattingElementEntry); if (!furthestBlock) { break; } p.activeFormattingElements.bookmark = formattingElementEntry; const lastElement = aaInnerLoop(p, furthestBlock, formattingElementEntry.element); const commonAncestor = p.openElements.getCommonAncestor(formattingElementEntry.element); p.treeAdapter.detachNode(lastElement); if (commonAncestor) aaInsertLastNodeInCommonAncestor(p, commonAncestor, lastElement); aaReplaceFormattingElement(p, furthestBlock, formattingElementEntry); } } //Generic token handlers
//------------------------------------------------------------------
function appendComment(p, token) { p._appendCommentNode(token, p.openElements.currentTmplContentOrNode); } function appendCommentToRootHtmlElement(p, token) { p._appendCommentNode(token, p.openElements.items[0]); } function appendCommentToDocument(p, token) { p._appendCommentNode(token, p.document); } function stopParsing(p, token) { p.stopped = true; // NOTE: Set end locations for elements that remain on the open element stack.
if (token.location) { // NOTE: If we are not in a fragment, `html` and `body` will stay on the stack.
// This is a problem, as we might overwrite their end position here.
const target = p.fragmentContext ? 0 : 2; for (let i = p.openElements.stackTop; i >= target; i--) { p._setEndLocation(p.openElements.items[i], token); } // Handle `html` and `body`
if (!p.fragmentContext && p.openElements.stackTop >= 0) { const htmlElement = p.openElements.items[0]; const htmlLocation = p.treeAdapter.getNodeSourceCodeLocation(htmlElement); if (htmlLocation && !htmlLocation.endTag) { p._setEndLocation(htmlElement, token); if (p.openElements.stackTop >= 1) { const bodyElement = p.openElements.items[1]; const bodyLocation = p.treeAdapter.getNodeSourceCodeLocation(bodyElement); if (bodyLocation && !bodyLocation.endTag) { p._setEndLocation(bodyElement, token); } } } } } } // The "initial" insertion mode
//------------------------------------------------------------------
function doctypeInInitialMode(p, token) { p._setDocumentType(token); const mode = token.forceQuirks ? DOCUMENT_MODE.QUIRKS : getDocumentMode(token); if (!isConforming(token)) { p._err(token, ERR.nonConformingDoctype); } p.treeAdapter.setDocumentMode(p.document, mode); p.insertionMode = InsertionMode.BEFORE_HTML; } function tokenInInitialMode(p, token) { p._err(token, ERR.missingDoctype, true); p.treeAdapter.setDocumentMode(p.document, DOCUMENT_MODE.QUIRKS); p.insertionMode = InsertionMode.BEFORE_HTML; p._processToken(token); } // The "before html" insertion mode
//------------------------------------------------------------------
function startTagBeforeHtml(p, token) { if (token.tagID === TAG_ID.HTML) { p._insertElement(token, NS.HTML); p.insertionMode = InsertionMode.BEFORE_HEAD; } else { tokenBeforeHtml(p, token); } } function endTagBeforeHtml(p, token) { const tn = token.tagID; if (tn === TAG_ID.HTML || tn === TAG_ID.HEAD || tn === TAG_ID.BODY || tn === TAG_ID.BR) { tokenBeforeHtml(p, token); } } function tokenBeforeHtml(p, token) { p._insertFakeRootElement(); p.insertionMode = InsertionMode.BEFORE_HEAD; p._processToken(token); } // The "before head" insertion mode
//------------------------------------------------------------------
function startTagBeforeHead(p, token) { switch (token.tagID) { case TAG_ID.HTML: { startTagInBody(p, token); break; } case TAG_ID.HEAD: { p._insertElement(token, NS.HTML); p.headElement = p.openElements.current; p.insertionMode = InsertionMode.IN_HEAD; break; } default: { tokenBeforeHead(p, token); } } } function endTagBeforeHead(p, token) { const tn = token.tagID; if (tn === TAG_ID.HEAD || tn === TAG_ID.BODY || tn === TAG_ID.HTML || tn === TAG_ID.BR) { tokenBeforeHead(p, token); } else { p._err(token, ERR.endTagWithoutMatchingOpenElement); } } function tokenBeforeHead(p, token) { p._insertFakeElement(TAG_NAMES.HEAD, TAG_ID.HEAD); p.headElement = p.openElements.current; p.insertionMode = InsertionMode.IN_HEAD; p._processToken(token); } // The "in head" insertion mode
//------------------------------------------------------------------
function startTagInHead(p, token) { switch (token.tagID) { case TAG_ID.HTML: { startTagInBody(p, token); break; } case TAG_ID.BASE: case TAG_ID.BASEFONT: case TAG_ID.BGSOUND: case TAG_ID.LINK: case TAG_ID.META: { p._appendElement(token, NS.HTML); token.ackSelfClosing = true; break; } case TAG_ID.TITLE: { p._switchToTextParsing(token, TokenizerMode.RCDATA); break; } case TAG_ID.NOSCRIPT: { if (p.options.scriptingEnabled) { p._switchToTextParsing(token, TokenizerMode.RAWTEXT); } else { p._insertElement(token, NS.HTML); p.insertionMode = InsertionMode.IN_HEAD_NO_SCRIPT; } break; } case TAG_ID.NOFRAMES: case TAG_ID.STYLE: { p._switchToTextParsing(token, TokenizerMode.RAWTEXT); break; } case TAG_ID.SCRIPT: { p._switchToTextParsing(token, TokenizerMode.SCRIPT_DATA); break; } case TAG_ID.TEMPLATE: { p._insertTemplate(token); p.activeFormattingElements.insertMarker(); p.framesetOk = false; p.insertionMode = InsertionMode.IN_TEMPLATE; p.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE); break; } case TAG_ID.HEAD: { p._err(token, ERR.misplacedStartTagForHeadElement); break; } default: { tokenInHead(p, token); } } } function endTagInHead(p, token) { switch (token.tagID) { case TAG_ID.HEAD: { p.openElements.pop(); p.insertionMode = InsertionMode.AFTER_HEAD; break; } case TAG_ID.BODY: case TAG_ID.BR: case TAG_ID.HTML: { tokenInHead(p, token); break; } case TAG_ID.TEMPLATE: { templateEndTagInHead(p, token); break; } default: { p._err(token, ERR.endTagWithoutMatchingOpenElement); } } } function templateEndTagInHead(p, token) { if (p.openElements.tmplCount > 0) { p.openElements.generateImpliedEndTagsThoroughly(); if (p.openElements.currentTagId !== TAG_ID.TEMPLATE) { p._err(token, ERR.closingOfElementWithOpenChildElements); } p.openElements.popUntilTagNamePopped(TAG_ID.TEMPLATE); p.activeFormattingElements.clearToLastMarker(); p.tmplInsertionModeStack.shift(); p._resetInsertionMode(); } else { p._err(token, ERR.endTagWithoutMatchingOpenElement); } } function tokenInHead(p, token) { p.openElements.pop(); p.insertionMode = InsertionMode.AFTER_HEAD; p._processToken(token); } // The "in head no script" insertion mode
//------------------------------------------------------------------
function startTagInHeadNoScript(p, token) { switch (token.tagID) { case TAG_ID.HTML: { startTagInBody(p, token); break; } case TAG_ID.BASEFONT: case TAG_ID.BGSOUND: case TAG_ID.HEAD: case TAG_ID.LINK: case TAG_ID.META: case TAG_ID.NOFRAMES: case TAG_ID.STYLE: { startTagInHead(p, token); break; } case TAG_ID.NOSCRIPT: { p._err(token, ERR.nestedNoscriptInHead); break; } default: { tokenInHeadNoScript(p, token); } } } function endTagInHeadNoScript(p, token) { switch (token.tagID) { case TAG_ID.NOSCRIPT: { p.openElements.pop(); p.insertionMode = InsertionMode.IN_HEAD; break; } case TAG_ID.BR: { tokenInHeadNoScript(p, token); break; } default: { p._err(token, ERR.endTagWithoutMatchingOpenElement); } } } function tokenInHeadNoScript(p, token) { const errCode = token.type === TokenType.EOF ? ERR.openElementsLeftAfterEof : ERR.disallowedContentInNoscriptInHead; p._err(token, errCode); p.openElements.pop(); p.insertionMode = InsertionMode.IN_HEAD; p._processToken(token); } // The "after head" insertion mode
//------------------------------------------------------------------
function startTagAfterHead(p, token) { switch (token.tagID) { case TAG_ID.HTML: { startTagInBody(p, token); break; } case TAG_ID.BODY: { p._insertElement(token, NS.HTML); p.framesetOk = false; p.insertionMode = InsertionMode.IN_BODY; break; } case TAG_ID.FRAMESET: { p._insertElement(token, NS.HTML); p.insertionMode = InsertionMode.IN_FRAMESET; break; } case TAG_ID.BASE: case TAG_ID.BASEFONT: case TAG_ID.BGSOUND: case TAG_ID.LINK: case TAG_ID.META: case TAG_ID.NOFRAMES: case TAG_ID.SCRIPT: case TAG_ID.STYLE: case TAG_ID.TEMPLATE: case TAG_ID.TITLE: { p._err(token, ERR.abandonedHeadElementChild); p.openElements.push(p.headElement, TAG_ID.HEAD); startTagInHead(p, token); p.openElements.remove(p.headElement); break; } case TAG_ID.HEAD: { p._err(token, ERR.misplacedStartTagForHeadElement); break; } default: { tokenAfterHead(p, token); } } } function endTagAfterHead(p, token) { switch (token.tagID) { case TAG_ID.BODY: case TAG_ID.HTML: case TAG_ID.BR: { tokenAfterHead(p, token); break; } case TAG_ID.TEMPLATE: { templateEndTagInHead(p, token); break; } default: { p._err(token, ERR.endTagWithoutMatchingOpenElement); } } } function tokenAfterHead(p, token) { p._insertFakeElement(TAG_NAMES.BODY, TAG_ID.BODY); p.insertionMode = InsertionMode.IN_BODY; modeInBody(p, token); } // The "in body" insertion mode
//------------------------------------------------------------------
function modeInBody(p, token) { switch (token.type) { case TokenType.CHARACTER: { characterInBody(p, token); break; } case TokenType.WHITESPACE_CHARACTER: { whitespaceCharacterInBody(p, token); break; } case TokenType.COMMENT: { appendComment(p, token); break; } case TokenType.START_TAG: { startTagInBody(p, token); break; } case TokenType.END_TAG: { endTagInBody(p, token); break; } case TokenType.EOF: { eofInBody(p, token); break; } // Do nothing
} } function whitespaceCharacterInBody(p, token) { p._reconstructActiveFormattingElements(); p._insertCharacters(token); } function characterInBody(p, token) { p._reconstructActiveFormattingElements(); p._insertCharacters(token); p.framesetOk = false; } function htmlStartTagInBody(p, token) { if (p.openElements.tmplCount === 0) { p.treeAdapter.adoptAttributes(p.openElements.items[0], token.attrs); } } function bodyStartTagInBody(p, token) { const bodyElement = p.openElements.tryPeekProperlyNestedBodyElement(); if (bodyElement && p.openElements.tmplCount === 0) { p.framesetOk = false; p.treeAdapter.adoptAttributes(bodyElement, token.attrs); } } function framesetStartTagInBody(p, token) { const bodyElement = p.openElements.tryPeekProperlyNestedBodyElement(); if (p.framesetOk && bodyElement) { p.treeAdapter.detachNode(bodyElement); p.openElements.popAllUpToHtmlElement(); p._insertElement(token, NS.HTML); p.insertionMode = InsertionMode.IN_FRAMESET; } } function addressStartTagInBody(p, token) { if (p.openElements.hasInButtonScope(TAG_ID.P)) { p._closePElement(); } p._insertElement(token, NS.HTML); } function numberedHeaderStartTagInBody(p, token) { if (p.openElements.hasInButtonScope(TAG_ID.P)) { p._closePElement(); } if (NUMBERED_HEADERS.has(p.openElements.currentTagId)) { p.openElements.pop(); } p._insertElement(token, NS.HTML); } function preStartTagInBody(p, token) { if (p.openElements.hasInButtonScope(TAG_ID.P)) { p._closePElement(); } p._insertElement(token, NS.HTML); //NOTE: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move
//on to the next one. (Newlines at the start of pre blocks are ignored as an authoring convenience.)
p.skipNextNewLine = true; p.framesetOk = false; } function formStartTagInBody(p, token) { const inTemplate = p.openElements.tmplCount > 0; if (!p.formElement || inTemplate) { if (p.openElements.hasInButtonScope(TAG_ID.P)) { p._closePElement(); } p._insertElement(token, NS.HTML); if (!inTemplate) { p.formElement = p.openElements.current; } } } function listItemStartTagInBody(p, token) { p.framesetOk = false; const tn = token.tagID; for (let i = p.openElements.stackTop; i >= 0; i--) { const elementId = p.openElements.tagIDs[i]; if ((tn === TAG_ID.LI && elementId === TAG_ID.LI) || ((tn === TAG_ID.DD || tn === TAG_ID.DT) && (elementId === TAG_ID.DD || elementId === TAG_ID.DT))) { p.openElements.generateImpliedEndTagsWithExclusion(elementId); p.openElements.popUntilTagNamePopped(elementId); break; } if (elementId !== TAG_ID.ADDRESS && elementId !== TAG_ID.DIV && elementId !== TAG_ID.P && p._isSpecialElement(p.openElements.items[i], elementId)) { break; } } if (p.openElements.hasInButtonScope(TAG_ID.P)) { p._closePElement(); } p._insertElement(token, NS.HTML); } function plaintextStartTagInBody(p, token) { if (p.openElements.hasInButtonScope(TAG_ID.P)) { p._closePElement(); } p._insertElement(token, NS.HTML); p.tokenizer.state = TokenizerMode.PLAINTEXT; } function buttonStartTagInBody(p, token) { if (p.openElements.hasInScope(TAG_ID.BUTTON)) { p.openElements.generateImpliedEndTags(); p.openElements.popUntilTagNamePopped(TAG_ID.BUTTON); } p._reconstructActiveFormattingElements(); p._insertElement(token, NS.HTML); p.framesetOk = false; } function aStartTagInBody(p, token) { const activeElementEntry = p.activeFormattingElements.getElementEntryInScopeWithTagName(TAG_NAMES.A); if (activeElementEntry) { callAdoptionAgency(p, token); p.openElements.remove(activeElementEntry.element); p.activeFormattingElements.removeEntry(activeElementEntry); } p._reconstructActiveFormattingElements(); p._insertElement(token, NS.HTML); p.activeFormattingElements.pushElement(p.openElements.current, token); } function bStartTagInBody(p, token) { p._reconstructActiveFormattingElements(); p._insertElement(token, NS.HTML); p.activeFormattingElements.pushElement(p.openElements.current, token); } function nobrStartTagInBody(p, token) { p._reconstructActiveFormattingElements(); if (p.openElements.hasInScope(TAG_ID.NOBR)) { callAdoptionAgency(p, token); p._reconstructActiveFormattingElements(); } p._insertElement(token, NS.HTML); p.activeFormattingElements.pushElement(p.openElements.current, token); } function appletStartTagInBody(p, token) { p._reconstructActiveFormattingElements(); p._insertElement(token, NS.HTML); p.activeFormattingElements.insertMarker(); p.framesetOk = false; } function tableStartTagInBody(p, token) { if (p.treeAdapter.getDocumentMode(p.document) !== DOCUMENT_MODE.QUIRKS && p.openElements.hasInButtonScope(TAG_ID.P)) { p._closePElement(); } p._insertElement(token, NS.HTML); p.framesetOk = false; p.insertionMode = InsertionMode.IN_TABLE; } function areaStartTagInBody(p, token) { p._reconstructActiveFormattingElements(); p._appendElement(token, NS.HTML); p.framesetOk = false; token.ackSelfClosing = true; } function isHiddenInput(token) { const inputType = getTokenAttr(token, ATTRS.TYPE); return inputType != null && inputType.toLowerCase() === HIDDEN_INPUT_TYPE; } function inputStartTagInBody(p, token) { p._reconstructActiveFormattingElements(); p._appendElement(token, NS.HTML); if (!isHiddenInput(token)) { p.framesetOk = false; } token.ackSelfClosing = true; } function paramStartTagInBody(p, token) { p._appendElement(token, NS.HTML); token.ackSelfClosing = true; } function hrStartTagInBody(p, token) { if (p.openElements.hasInButtonScope(TAG_ID.P)) { p._closePElement(); } p._appendElement(token, NS.HTML); p.framesetOk = false; token.ackSelfClosing = true; } function imageStartTagInBody(p, token) { token.tagName = TAG_NAMES.IMG; token.tagID = TAG_ID.IMG; areaStartTagInBody(p, token); } function textareaStartTagInBody(p, token) { p._insertElement(token, NS.HTML); //NOTE: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move
//on to the next one. (Newlines at the start of textarea elements are ignored as an authoring convenience.)
p.skipNextNewLine = true; p.tokenizer.state = TokenizerMode.RCDATA; p.originalInsertionMode = p.insertionMode; p.framesetOk = false; p.insertionMode = InsertionMode.TEXT; } function xmpStartTagInBody(p, token) { if (p.openElements.hasInButtonScope(TAG_ID.P)) { p._closePElement(); } p._reconstructActiveFormattingElements(); p.framesetOk = false; p._switchToTextParsing(token, TokenizerMode.RAWTEXT); } function iframeStartTagInBody(p, token) { p.framesetOk = false; p._switchToTextParsing(token, TokenizerMode.RAWTEXT); } //NOTE: here we assume that we always act as a user agent with enabled plugins/frames, so we parse
//<noembed>/<noframes> as rawtext.
function rawTextStartTagInBody(p, token) { p._switchToTextParsing(token, TokenizerMode.RAWTEXT); } function selectStartTagInBody(p, token) { p._reconstructActiveFormattingElements(); p._insertElement(token, NS.HTML); p.framesetOk = false; p.insertionMode = p.insertionMode === InsertionMode.IN_TABLE || p.insertionMode === InsertionMode.IN_CAPTION || p.insertionMode === InsertionMode.IN_TABLE_BODY || p.insertionMode === InsertionMode.IN_ROW || p.insertionMode === InsertionMode.IN_CELL ? InsertionMode.IN_SELECT_IN_TABLE : InsertionMode.IN_SELECT; } function optgroupStartTagInBody(p, token) { if (p.openElements.currentTagId === TAG_ID.OPTION) { p.openElements.pop(); } p._reconstructActiveFormattingElements(); p._insertElement(token, NS.HTML); } function rbStartTagInBody(p, token) { if (p.openElements.hasInScope(TAG_ID.RUBY)) { p.openElements.generateImpliedEndTags(); } p._insertElement(token, NS.HTML); } function rtStartTagInBody(p, token) { if (p.openElements.hasInScope(TAG_ID.RUBY)) { p.openElements.generateImpliedEndTagsWithExclusion(TAG_ID.RTC); } p._insertElement(token, NS.HTML); } function mathStartTagInBody(p, token) { p._reconstructActiveFormattingElements(); adjustTokenMathMLAttrs(token); adjustTokenXMLAttrs(token); if (token.selfClosing) { p._appendElement(token, NS.MATHML); } else { p._insertElement(token, NS.MATHML); } token.ackSelfClosing = true; } function svgStartTagInBody(p, token) { p._reconstructActiveFormattingElements(); adjustTokenSVGAttrs(token); adjustTokenXMLAttrs(token); if (token.selfClosing) { p._appendElement(token, NS.SVG); } else { p._insertElement(token, NS.SVG); } token.ackSelfClosing = true; } function genericStartTagInBody(p, token) { p._reconstructActiveFormattingElements(); p._insertElement(token, NS.HTML); } function startTagInBody(p, token) { switch (token.tagID) { case TAG_ID.I: case TAG_ID.S: case TAG_ID.B: case TAG_ID.U: case TAG_ID.EM: case TAG_ID.TT: case TAG_ID.BIG: case TAG_ID.CODE: case TAG_ID.FONT: case TAG_ID.SMALL: case TAG_ID.STRIKE: case TAG_ID.STRONG: { bStartTagInBody(p, token); break; } case TAG_ID.A: { aStartTagInBody(p, token); break; } case TAG_ID.H1: case TAG_ID.H2: case TAG_ID.H3: case TAG_ID.H4: case TAG_ID.H5: case TAG_ID.H6: { numberedHeaderStartTagInBody(p, token); break; } case TAG_ID.P: case TAG_ID.DL: case TAG_ID.OL: case TAG_ID.UL: case TAG_ID.DIV: case TAG_ID.DIR: case TAG_ID.NAV: case TAG_ID.MAIN: case TAG_ID.MENU: case TAG_ID.ASIDE: case TAG_ID.CENTER: case TAG_ID.FIGURE: case TAG_ID.FOOTER: case TAG_ID.HEADER: case TAG_ID.HGROUP: case TAG_ID.DIALOG: case TAG_ID.DETAILS: case TAG_ID.ADDRESS: case TAG_ID.ARTICLE: case TAG_ID.SEARCH: case TAG_ID.SECTION: case TAG_ID.SUMMARY: case TAG_ID.FIELDSET: case TAG_ID.BLOCKQUOTE: case TAG_ID.FIGCAPTION: { addressStartTagInBody(p, token); break; } case TAG_ID.LI: case TAG_ID.DD: case TAG_ID.DT: { listItemStartTagInBody(p, token); break; } case TAG_ID.BR: case TAG_ID.IMG: case TAG_ID.WBR: case TAG_ID.AREA: case TAG_ID.EMBED: case TAG_ID.KEYGEN: { areaStartTagInBody(p, token); break; } case TAG_ID.HR: { hrStartTagInBody(p, token); break; } case TAG_ID.RB: case TAG_ID.RTC: { rbStartTagInBody(p, token); break; } case TAG_ID.RT: case TAG_ID.RP: { rtStartTagInBody(p, token); break; } case TAG_ID.PRE: case TAG_ID.LISTING: { preStartTagInBody(p, token); break; } case TAG_ID.XMP: { xmpStartTagInBody(p, token); break; } case TAG_ID.SVG: { svgStartTagInBody(p, token); break; } case TAG_ID.HTML: { htmlStartTagInBody(p, token); break; } case TAG_ID.BASE: case TAG_ID.LINK: case TAG_ID.META: case TAG_ID.STYLE: case TAG_ID.TITLE: case TAG_ID.SCRIPT: case TAG_ID.BGSOUND: case TAG_ID.BASEFONT: case TAG_ID.TEMPLATE: { startTagInHead(p, token); break; } case TAG_ID.BODY: { bodyStartTagInBody(p, token); break; } case TAG_ID.FORM: { formStartTagInBody(p, token); break; } case TAG_ID.NOBR: { nobrStartTagInBody(p, token); break; } case TAG_ID.MATH: { mathStartTagInBody(p, token); break; } case TAG_ID.TABLE: { tableStartTagInBody(p, token); break; } case TAG_ID.INPUT: { inputStartTagInBody(p, token); break; } case TAG_ID.PARAM: case TAG_ID.TRACK: case TAG_ID.SOURCE: { paramStartTagInBody(p, token); break; } case TAG_ID.IMAGE: { imageStartTagInBody(p, token); break; } case TAG_ID.BUTTON: { buttonStartTagInBody(p, token); break; } case TAG_ID.APPLET: case TAG_ID.OBJECT: case TAG_ID.MARQUEE: { appletStartTagInBody(p, token); break; } case TAG_ID.IFRAME: { iframeStartTagInBody(p, token); break; } case TAG_ID.SELECT: { selectStartTagInBody(p, token); break; } case TAG_ID.OPTION: case TAG_ID.OPTGROUP: { optgroupStartTagInBody(p, token); break; } case TAG_ID.NOEMBED: case TAG_ID.NOFRAMES: { rawTextStartTagInBody(p, token); break; } case TAG_ID.FRAMESET: { framesetStartTagInBody(p, token); break; } case TAG_ID.TEXTAREA: { textareaStartTagInBody(p, token); break; } case TAG_ID.NOSCRIPT: { if (p.options.scriptingEnabled) { rawTextStartTagInBody(p, token); } else { genericStartTagInBody(p, token); } break; } case TAG_ID.PLAINTEXT: { plaintextStartTagInBody(p, token); break; } case TAG_ID.COL: case TAG_ID.TH: case TAG_ID.TD: case TAG_ID.TR: case TAG_ID.HEAD: case TAG_ID.FRAME: case TAG_ID.TBODY: case TAG_ID.TFOOT: case TAG_ID.THEAD: case TAG_ID.CAPTION: case TAG_ID.COLGROUP: { // Ignore token
break; } default: { genericStartTagInBody(p, token); } } } function bodyEndTagInBody(p, token) { if (p.openElements.hasInScope(TAG_ID.BODY)) { p.insertionMode = InsertionMode.AFTER_BODY; //NOTE: <body> is never popped from the stack, so we need to updated
//the end location explicitly.
if (p.options.sourceCodeLocationInfo) { const bodyElement = p.openElements.tryPeekProperlyNestedBodyElement(); if (bodyElement) { p._setEndLocation(bodyElement, token); } } } } function htmlEndTagInBody(p, token) { if (p.openElements.hasInScope(TAG_ID.BODY)) { p.insertionMode = InsertionMode.AFTER_BODY; endTagAfterBody(p, token); } } function addressEndTagInBody(p, token) { const tn = token.tagID; if (p.openElements.hasInScope(tn)) { p.openElements.generateImpliedEndTags(); p.openElements.popUntilTagNamePopped(tn); } } function formEndTagInBody(p) { const inTemplate = p.openElements.tmplCount > 0; const { formElement } = p; if (!inTemplate) { p.formElement = null; } if ((formElement || inTemplate) && p.openElements.hasInScope(TAG_ID.FORM)) { p.openElements.generateImpliedEndTags(); if (inTemplate) { p.openElements.popUntilTagNamePopped(TAG_ID.FORM); } else if (formElement) { p.openElements.remove(formElement); } } } function pEndTagInBody(p) { if (!p.openElements.hasInButtonScope(TAG_ID.P)) { p._insertFakeElement(TAG_NAMES.P, TAG_ID.P); } p._closePElement(); } function liEndTagInBody(p) { if (p.openElements.hasInListItemScope(TAG_ID.LI)) { p.openElements.generateImpliedEndTagsWithExclusion(TAG_ID.LI); p.openElements.popUntilTagNamePopped(TAG_ID.LI); } } function ddEndTagInBody(p, token) { const tn = token.tagID; if (p.openElements.hasInScope(tn)) { p.openElements.generateImpliedEndTagsWithExclusion(tn); p.openElements.popUntilTagNamePopped(tn); } } function numberedHeaderEndTagInBody(p) { if (p.openElements.hasNumberedHeaderInScope()) { p.openElements.generateImpliedEndTags(); p.openElements.popUntilNumberedHeaderPopped(); } } function appletEndTagInBody(p, token) { const tn = token.tagID; if (p.openElements.hasInScope(tn)) { p.openElements.generateImpliedEndTags(); p.openElements.popUntilTagNamePopped(tn); p.activeFormattingElements.clearToLastMarker(); } } function brEndTagInBody(p) { p._reconstructActiveFormattingElements(); p._insertFakeElement(TAG_NAMES.BR, TAG_ID.BR); p.openElements.pop(); p.framesetOk = false; } function genericEndTagInBody(p, token) { const tn = token.tagName; const tid = token.tagID; for (let i = p.openElements.stackTop; i > 0; i--) { const element = p.openElements.items[i]; const elementId = p.openElements.tagIDs[i]; // Compare the tag name here, as the tag might not be a known tag with an ID.
if (tid === elementId && (tid !== TAG_ID.UNKNOWN || p.treeAdapter.getTagName(element) === tn)) { p.openElements.generateImpliedEndTagsWithExclusion(tid); if (p.openElements.stackTop >= i) p.openElements.shortenToLength(i); break; } if (p._isSpecialElement(element, elementId)) { break; } } } function endTagInBody(p, token) { switch (token.tagID) { case TAG_ID.A: case TAG_ID.B: case TAG_ID.I: case TAG_ID.S: case TAG_ID.U: case TAG_ID.EM: case TAG_ID.TT: case TAG_ID.BIG: case TAG_ID.CODE: case TAG_ID.FONT: case TAG_ID.NOBR: case TAG_ID.SMALL: case TAG_ID.STRIKE: case TAG_ID.STRONG: { callAdoptionAgency(p, token); break; } case TAG_ID.P: { pEndTagInBody(p); break; } case TAG_ID.DL: case TAG_ID.UL: case TAG_ID.OL: case TAG_ID.DIR: case TAG_ID.DIV: case TAG_ID.NAV: case TAG_ID.PRE: case TAG_ID.MAIN: case TAG_ID.MENU: case TAG_ID.ASIDE: case TAG_ID.BUTTON: case TAG_ID.CENTER: case TAG_ID.FIGURE: case TAG_ID.FOOTER: case TAG_ID.HEADER: case TAG_ID.HGROUP: case TAG_ID.DIALOG: case TAG_ID.ADDRESS: case TAG_ID.ARTICLE: case TAG_ID.DETAILS: case TAG_ID.SEARCH: case TAG_ID.SECTION: case TAG_ID.SUMMARY: case TAG_ID.LISTING: case TAG_ID.FIELDSET: case TAG_ID.BLOCKQUOTE: case TAG_ID.FIGCAPTION: { addressEndTagInBody(p, token); break; } case TAG_ID.LI: { liEndTagInBody(p); break; } case TAG_ID.DD: case TAG_ID.DT: { ddEndTagInBody(p, token); break; } case TAG_ID.H1: case TAG_ID.H2: case TAG_ID.H3: case TAG_ID.H4: case TAG_ID.H5: case TAG_ID.H6: { numberedHeaderEndTagInBody(p); break; } case TAG_ID.BR: { brEndTagInBody(p); break; } case TAG_ID.BODY: { bodyEndTagInBody(p, token); break; } case TAG_ID.HTML: { htmlEndTagInBody(p, token); break; } case TAG_ID.FORM: { formEndTagInBody(p); break; } case TAG_ID.APPLET: case TAG_ID.OBJECT: case TAG_ID.MARQUEE: { appletEndTagInBody(p, token); break; } case TAG_ID.TEMPLATE: { templateEndTagInHead(p, token); break; } default: { genericEndTagInBody(p, token); } } } function eofInBody(p, token) { if (p.tmplInsertionModeStack.length > 0) { eofInTemplate(p, token); } else { stopParsing(p, token); } } // The "text" insertion mode
//------------------------------------------------------------------
function endTagInText(p, token) { var _a; if (token.tagID === TAG_ID.SCRIPT) { (_a = p.scriptHandler) === null || _a === void 0 ? void 0 : _a.call(p, p.openElements.current); } p.openElements.pop(); p.insertionMode = p.originalInsertionMode; } function eofInText(p, token) { p._err(token, ERR.eofInElementThatCanContainOnlyText); p.openElements.pop(); p.insertionMode = p.originalInsertionMode; p.onEof(token); } // The "in table" insertion mode
//------------------------------------------------------------------
function characterInTable(p, token) { if (TABLE_STRUCTURE_TAGS.has(p.openElements.currentTagId)) { p.pendingCharacterTokens.length = 0; p.hasNonWhitespacePendingCharacterToken = false; p.originalInsertionMode = p.insertionMode; p.insertionMode = InsertionMode.IN_TABLE_TEXT; switch (token.type) { case TokenType.CHARACTER: { characterInTableText(p, token); break; } case TokenType.WHITESPACE_CHARACTER: { whitespaceCharacterInTableText(p, token); break; } // Ignore null
} } else { tokenInTable(p, token); } } function captionStartTagInTable(p, token) { p.openElements.clearBackToTableContext(); p.activeFormattingElements.insertMarker(); p._insertElement(token, NS.HTML); p.insertionMode = InsertionMode.IN_CAPTION; } function colgroupStartTagInTable(p, token) { p.openElements.clearBackToTableContext(); p._insertElement(token, NS.HTML); p.insertionMode = InsertionMode.IN_COLUMN_GROUP; } function colStartTagInTable(p, token) { p.openElements.clearBackToTableContext(); p._insertFakeElement(TAG_NAMES.COLGROUP, TAG_ID.COLGROUP); p.insertionMode = InsertionMode.IN_COLUMN_GROUP; startTagInColumnGroup(p, token); } function tbodyStartTagInTable(p, token) { p.openElements.clearBackToTableContext(); p._insertElement(token, NS.HTML); p.insertionMode = InsertionMode.IN_TABLE_BODY; } function tdStartTagInTable(p, token) { p.openElements.clearBackToTableContext(); p._insertFakeElement(TAG_NAMES.TBODY, TAG_ID.TBODY); p.insertionMode = InsertionMode.IN_TABLE_BODY; startTagInTableBody(p, token); } function tableStartTagInTable(p, token) { if (p.openElements.hasInTableScope(TAG_ID.TABLE)) { p.openElements.popUntilTagNamePopped(TAG_ID.TABLE); p._resetInsertionMode(); p._processStartTag(token); } } function inputStartTagInTable(p, token) { if (isHiddenInput(token)) { p._appendElement(token, NS.HTML); } else { tokenInTable(p, token); } token.ackSelfClosing = true; } function formStartTagInTable(p, token) { if (!p.formElement && p.openElements.tmplCount === 0) { p._insertElement(token, NS.HTML); p.formElement = p.openElements.current; p.openElements.pop(); } } function startTagInTable(p, token) { switch (token.tagID) { case TAG_ID.TD: case TAG_ID.TH: case TAG_ID.TR: { tdStartTagInTable(p, token); break; } case TAG_ID.STYLE: case TAG_ID.SCRIPT: case TAG_ID.TEMPLATE: { startTagInHead(p, token); break; } case TAG_ID.COL: { colStartTagInTable(p, token); break; } case TAG_ID.FORM: { formStartTagInTable(p, token); break; } case TAG_ID.TABLE: { tableStartTagInTable(p, token); break; } case TAG_ID.TBODY: case TAG_ID.TFOOT: case TAG_ID.THEAD: { tbodyStartTagInTable(p, token); break; } case TAG_ID.INPUT: { inputStartTagInTable(p, token); break; } case TAG_ID.CAPTION: { captionStartTagInTable(p, token); break; } case TAG_ID.COLGROUP: { colgroupStartTagInTable(p, token); break; } default: { tokenInTable(p, token); } } } function endTagInTable(p, token) { switch (token.tagID) { case TAG_ID.TABLE: { if (p.openElements.hasInTableScope(TAG_ID.TABLE)) { p.openElements.popUntilTagNamePopped(TAG_ID.TABLE); p._resetInsertionMode(); } break; } case TAG_ID.TEMPLATE: { templateEndTagInHead(p, token); break; } case TAG_ID.BODY: case TAG_ID.CAPTION: case TAG_ID.COL: case TAG_ID.COLGROUP: case TAG_ID.HTML: case TAG_ID.TBODY: case TAG_ID.TD: case TAG_ID.TFOOT: case TAG_ID.TH: case TAG_ID.THEAD: case TAG_ID.TR: { // Ignore token
break; } default: { tokenInTable(p, token); } } } function tokenInTable(p, token) { const savedFosterParentingState = p.fosterParentingEnabled; p.fosterParentingEnabled = true; // Process token in `In Body` mode
modeInBody(p, token); p.fosterParentingEnabled = savedFosterParentingState; } // The "in table text" insertion mode
//------------------------------------------------------------------
function whitespaceCharacterInTableText(p, token) { p.pendingCharacterTokens.push(token); } function characterInTableText(p, token) { p.pendingCharacterTokens.push(token); p.hasNonWhitespacePendingCharacterToken = true; } function tokenInTableText(p, token) { let i = 0; if (p.hasNonWhitespacePendingCharacterToken) { for (; i < p.pendingCharacterTokens.length; i++) { tokenInTable(p, p.pendingCharacterTokens[i]); } } else { for (; i < p.pendingCharacterTokens.length; i++) { p._insertCharacters(p.pendingCharacterTokens[i]); } } p.insertionMode = p.originalInsertionMode; p._processToken(token); } // The "in caption" insertion mode
//------------------------------------------------------------------
const TABLE_VOID_ELEMENTS = new Set([TAG_ID.CAPTION, TAG_ID.COL, TAG_ID.COLGROUP, TAG_ID.TBODY, TAG_ID.TD, TAG_ID.TFOOT, TAG_ID.TH, TAG_ID.THEAD, TAG_ID.TR]); function startTagInCaption(p, token) { const tn = token.tagID; if (TABLE_VOID_ELEMENTS.has(tn)) { if (p.openElements.hasInTableScope(TAG_ID.CAPTION)) { p.openElements.generateImpliedEndTags(); p.openElements.popUntilTagNamePopped(TAG_ID.CAPTION); p.activeFormattingElements.clearToLastMarker(); p.insertionMode = InsertionMode.IN_TABLE; startTagInTable(p, token); } } else { startTagInBody(p, token); } } function endTagInCaption(p, token) { const tn = token.tagID; switch (tn) { case TAG_ID.CAPTION: case TAG_ID.TABLE: { if (p.openElements.hasInTableScope(TAG_ID.CAPTION)) { p.openElements.generateImpliedEndTags(); p.openElements.popUntilTagNamePopped(TAG_ID.CAPTION); p.activeFormattingElements.clearToLastMarker(); p.insertionMode = InsertionMode.IN_TABLE; if (tn === TAG_ID.TABLE) { endTagInTable(p, token); } } break; } case TAG_ID.BODY: case TAG_ID.COL: case TAG_ID.COLGROUP: case TAG_ID.HTML: case TAG_ID.TBODY: case TAG_ID.TD: case TAG_ID.TFOOT: case TAG_ID.TH: case TAG_ID.THEAD: case TAG_ID.TR: { // Ignore token
break; } default: { endTagInBody(p, token); } } } // The "in column group" insertion mode
//------------------------------------------------------------------
function startTagInColumnGroup(p, token) { switch (token.tagID) { case TAG_ID.HTML: { startTagInBody(p, token); break; } case TAG_ID.COL: { p._appendElement(token, NS.HTML); token.ackSelfClosing = true; break; } case TAG_ID.TEMPLATE: { startTagInHead(p, token); break; } default: { tokenInColumnGroup(p, token); } } } function endTagInColumnGroup(p, token) { switch (token.tagID) { case TAG_ID.COLGROUP: { if (p.openElements.currentTagId === TAG_ID.COLGROUP) { p.openElements.pop(); p.insertionMode = InsertionMode.IN_TABLE; } break; } case TAG_ID.TEMPLATE: { templateEndTagInHead(p, token); break; } case TAG_ID.COL: { // Ignore token
break; } default: { tokenInColumnGroup(p, token); } } } function tokenInColumnGroup(p, token) { if (p.openElements.currentTagId === TAG_ID.COLGROUP) { p.openElements.pop(); p.insertionMode = InsertionMode.IN_TABLE; p._processToken(token); } } // The "in table body" insertion mode
//------------------------------------------------------------------
function startTagInTableBody(p, token) { switch (token.tagID) { case TAG_ID.TR: { p.openElements.clearBackToTableBodyContext(); p._insertElement(token, NS.HTML); p.insertionMode = InsertionMode.IN_ROW; break; } case TAG_ID.TH: case TAG_ID.TD: { p.openElements.clearBackToTableBodyContext(); p._insertFakeElement(TAG_NAMES.TR, TAG_ID.TR); p.insertionMode = InsertionMode.IN_ROW; startTagInRow(p, token); break; } case TAG_ID.CAPTION: case TAG_ID.COL: case TAG_ID.COLGROUP: case TAG_ID.TBODY: case TAG_ID.TFOOT: case TAG_ID.THEAD: { if (p.openElements.hasTableBodyContextInTableScope()) { p.openElements.clearBackToTableBodyContext(); p.openElements.pop(); p.insertionMode = InsertionMode.IN_TABLE; startTagInTable(p, token); } break; } default: { startTagInTable(p, token); } } } function endTagInTableBody(p, token) { const tn = token.tagID; switch (token.tagID) { case TAG_ID.TBODY: case TAG_ID.TFOOT: case TAG_ID.THEAD: { if (p.openElements.hasInTableScope(tn)) { p.openElements.clearBackToTableBodyContext(); p.openElements.pop(); p.insertionMode = InsertionMode.IN_TABLE; } break; } case TAG_ID.TABLE: { if (p.openElements.hasTableBodyContextInTableScope()) { p.openElements.clearBackToTableBodyContext(); p.openElements.pop(); p.insertionMode = InsertionMode.IN_TABLE; endTagInTable(p, token); } break; } case TAG_ID.BODY: case TAG_ID.CAPTION: case TAG_ID.COL: case TAG_ID.COLGROUP: case TAG_ID.HTML: case TAG_ID.TD: case TAG_ID.TH: case TAG_ID.TR: { // Ignore token
break; } default: { endTagInTable(p, token); } } } // The "in row" insertion mode
//------------------------------------------------------------------
function startTagInRow(p, token) { switch (token.tagID) { case TAG_ID.TH: case TAG_ID.TD: { p.openElements.clearBackToTableRowContext(); p._insertElement(token, NS.HTML); p.insertionMode = InsertionMode.IN_CELL; p.activeFormattingElements.insertMarker(); break; } case TAG_ID.CAPTION: case TAG_ID.COL: case TAG_ID.COLGROUP: case TAG_ID.TBODY: case TAG_ID.TFOOT: case TAG_ID.THEAD: case TAG_ID.TR: { if (p.openElements.hasInTableScope(TAG_ID.TR)) { p.openElements.clearBackToTableRowContext(); p.openElements.pop(); p.insertionMode = InsertionMode.IN_TABLE_BODY; startTagInTableBody(p, token); } break; } default: { startTagInTable(p, token); } } } function endTagInRow(p, token) { switch (token.tagID) { case TAG_ID.TR: { if (p.openElements.hasInTableScope(TAG_ID.TR)) { p.openElements.clearBackToTableRowContext(); p.openElements.pop(); p.insertionMode = InsertionMode.IN_TABLE_BODY; } break; } case TAG_ID.TABLE: { if (p.openElements.hasInTableScope(TAG_ID.TR)) { p.openElements.clearBackToTableRowContext(); p.openElements.pop(); p.insertionMode = InsertionMode.IN_TABLE_BODY; endTagInTableBody(p, token); } break; } case TAG_ID.TBODY: case TAG_ID.TFOOT: case TAG_ID.THEAD: { if (p.openElements.hasInTableScope(token.tagID) || p.openElements.hasInTableScope(TAG_ID.TR)) { p.openElements.clearBackToTableRowContext(); p.openElements.pop(); p.insertionMode = InsertionMode.IN_TABLE_BODY; endTagInTableBody(p, token); } break; } case TAG_ID.BODY: case TAG_ID.CAPTION: case TAG_ID.COL: case TAG_ID.COLGROUP: case TAG_ID.HTML: case TAG_ID.TD: case TAG_ID.TH: { // Ignore end tag
break; } default: { endTagInTable(p, token); } } } // The "in cell" insertion mode
//------------------------------------------------------------------
function startTagInCell(p, token) { const tn = token.tagID; if (TABLE_VOID_ELEMENTS.has(tn)) { if (p.openElements.hasInTableScope(TAG_ID.TD) || p.openElements.hasInTableScope(TAG_ID.TH)) { p._closeTableCell(); startTagInRow(p, token); } } else { startTagInBody(p, token); } } function endTagInCell(p, token) { const tn = token.tagID; switch (tn) { case TAG_ID.TD: case TAG_ID.TH: { if (p.openElements.hasInTableScope(tn)) { p.openElements.generateImpliedEndTags(); p.openElements.popUntilTagNamePopped(tn); p.activeFormattingElements.clearToLastMarker(); p.insertionMode = InsertionMode.IN_ROW; } break; } case TAG_ID.TABLE: case TAG_ID.TBODY: case TAG_ID.TFOOT: case TAG_ID.THEAD: case TAG_ID.TR: { if (p.openElements.hasInTableScope(tn)) { p._closeTableCell(); endTagInRow(p, token); } break; } case TAG_ID.BODY: case TAG_ID.CAPTION: case TAG_ID.COL: case TAG_ID.COLGROUP: case TAG_ID.HTML: { // Ignore token
break; } default: { endTagInBody(p, token); } } } // The "in select" insertion mode
//------------------------------------------------------------------
function startTagInSelect(p, token) { switch (token.tagID) { case TAG_ID.HTML: { startTagInBody(p, token); break; } case TAG_ID.OPTION: { if (p.openElements.currentTagId === TAG_ID.OPTION) { p.openElements.pop(); } p._insertElement(token, NS.HTML); break; } case TAG_ID.OPTGROUP: { if (p.openElements.currentTagId === TAG_ID.OPTION) { p.openElements.pop(); } if (p.openElements.currentTagId === TAG_ID.OPTGROUP) { p.openElements.pop(); } p._insertElement(token, NS.HTML); break; } case TAG_ID.HR: { if (p.openElements.currentTagId === TAG_ID.OPTION) { p.openElements.pop(); } if (p.openElements.currentTagId === TAG_ID.OPTGROUP) { p.openElements.pop(); } p._appendElement(token, NS.HTML); token.ackSelfClosing = true; break; } case TAG_ID.INPUT: case TAG_ID.KEYGEN: case TAG_ID.TEXTAREA: case TAG_ID.SELECT: { if (p.openElements.hasInSelectScope(TAG_ID.SELECT)) { p.openElements.popUntilTagNamePopped(TAG_ID.SELECT); p._resetInsertionMode(); if (token.tagID !== TAG_ID.SELECT) { p._processStartTag(token); } } break; } case TAG_ID.SCRIPT: case TAG_ID.TEMPLATE: { startTagInHead(p, token); break; } // Do nothing
} } function endTagInSelect(p, token) { switch (token.tagID) { case TAG_ID.OPTGROUP: { if (p.openElements.stackTop > 0 && p.openElements.currentTagId === TAG_ID.OPTION && p.openElements.tagIDs[p.openElements.stackTop - 1] === TAG_ID.OPTGROUP) { p.openElements.pop(); } if (p.openElements.currentTagId === TAG_ID.OPTGROUP) { p.openElements.pop(); } break; } case TAG_ID.OPTION: { if (p.openElements.currentTagId === TAG_ID.OPTION) { p.openElements.pop(); } break; } case TAG_ID.SELECT: { if (p.openElements.hasInSelectScope(TAG_ID.SELECT)) { p.openElements.popUntilTagNamePopped(TAG_ID.SELECT); p._resetInsertionMode(); } break; } case TAG_ID.TEMPLATE: { templateEndTagInHead(p, token); break; } // Do nothing
} } // The "in select in table" insertion mode
//------------------------------------------------------------------
function startTagInSelectInTable(p, token) { const tn = token.tagID; if (tn === TAG_ID.CAPTION || tn === TAG_ID.TABLE || tn === TAG_ID.TBODY || tn === TAG_ID.TFOOT || tn === TAG_ID.THEAD || tn === TAG_ID.TR || tn === TAG_ID.TD || tn === TAG_ID.TH) { p.openElements.popUntilTagNamePopped(TAG_ID.SELECT); p._resetInsertionMode(); p._processStartTag(token); } else { startTagInSelect(p, token); } } function endTagInSelectInTable(p, token) { const tn = token.tagID; if (tn === TAG_ID.CAPTION || tn === TAG_ID.TABLE || tn === TAG_ID.TBODY || tn === TAG_ID.TFOOT || tn === TAG_ID.THEAD || tn === TAG_ID.TR || tn === TAG_ID.TD || tn === TAG_ID.TH) { if (p.openElements.hasInTableScope(tn)) { p.openElements.popUntilTagNamePopped(TAG_ID.SELECT); p._resetInsertionMode(); p.onEndTag(token); } } else { endTagInSelect(p, token); } } // The "in template" insertion mode
//------------------------------------------------------------------
function startTagInTemplate(p, token) { switch (token.tagID) { // First, handle tags that can start without a mode change
case TAG_ID.BASE: case TAG_ID.BASEFONT: case TAG_ID.BGSOUND: case TAG_ID.LINK: case TAG_ID.META: case TAG_ID.NOFRAMES: case TAG_ID.SCRIPT: case TAG_ID.STYLE: case TAG_ID.TEMPLATE: case TAG_ID.TITLE: { startTagInHead(p, token); break; } // Re-process the token in the appropriate mode
case TAG_ID.CAPTION: case TAG_ID.COLGROUP: case TAG_ID.TBODY: case TAG_ID.TFOOT: case TAG_ID.THEAD: { p.tmplInsertionModeStack[0] = InsertionMode.IN_TABLE; p.insertionMode = InsertionMode.IN_TABLE; startTagInTable(p, token); break; } case TAG_ID.COL: { p.tmplInsertionModeStack[0] = InsertionMode.IN_COLUMN_GROUP; p.insertionMode = InsertionMode.IN_COLUMN_GROUP; startTagInColumnGroup(p, token); break; } case TAG_ID.TR: { p.tmplInsertionModeStack[0] = InsertionMode.IN_TABLE_BODY; p.insertionMode = InsertionMode.IN_TABLE_BODY; startTagInTableBody(p, token); break; } case TAG_ID.TD: case TAG_ID.TH: { p.tmplInsertionModeStack[0] = InsertionMode.IN_ROW; p.insertionMode = InsertionMode.IN_ROW; startTagInRow(p, token); break; } default: { p.tmplInsertionModeStack[0] = InsertionMode.IN_BODY; p.insertionMode = InsertionMode.IN_BODY; startTagInBody(p, token); } } } function endTagInTemplate(p, token) { if (token.tagID === TAG_ID.TEMPLATE) { templateEndTagInHead(p, token); } } function eofInTemplate(p, token) { if (p.openElements.tmplCount > 0) { p.openElements.popUntilTagNamePopped(TAG_ID.TEMPLATE); p.activeFormattingElements.clearToLastMarker(); p.tmplInsertionModeStack.shift(); p._resetInsertionMode(); p.onEof(token); } else { stopParsing(p, token); } } // The "after body" insertion mode
//------------------------------------------------------------------
function startTagAfterBody(p, token) { if (token.tagID === TAG_ID.HTML) { startTagInBody(p, token); } else { tokenAfterBody(p, token); } } function endTagAfterBody(p, token) { var _a; if (token.tagID === TAG_ID.HTML) { if (!p.fragmentContext) { p.insertionMode = InsertionMode.AFTER_AFTER_BODY; } //NOTE: <html> is never popped from the stack, so we need to updated
//the end location explicitly.
if (p.options.sourceCodeLocationInfo && p.openElements.tagIDs[0] === TAG_ID.HTML) { p._setEndLocation(p.openElements.items[0], token); // Update the body element, if it doesn't have an end tag
const bodyElement = p.openElements.items[1]; if (bodyElement && !((_a = p.treeAdapter.getNodeSourceCodeLocation(bodyElement)) === null || _a === void 0 ? void 0 : _a.endTag)) { p._setEndLocation(bodyElement, token); } } } else { tokenAfterBody(p, token); } } function tokenAfterBody(p, token) { p.insertionMode = InsertionMode.IN_BODY; modeInBody(p, token); } // The "in frameset" insertion mode
//------------------------------------------------------------------
function startTagInFrameset(p, token) { switch (token.tagID) { case TAG_ID.HTML: { startTagInBody(p, token); break; } case TAG_ID.FRAMESET: { p._insertElement(token, NS.HTML); break; } case TAG_ID.FRAME: { p._appendElement(token, NS.HTML); token.ackSelfClosing = true; break; } case TAG_ID.NOFRAMES: { startTagInHead(p, token); break; } // Do nothing
} } function endTagInFrameset(p, token) { if (token.tagID === TAG_ID.FRAMESET && !p.openElements.isRootHtmlElementCurrent()) { p.openElements.pop(); if (!p.fragmentContext && p.openElements.currentTagId !== TAG_ID.FRAMESET) { p.insertionMode = InsertionMode.AFTER_FRAMESET; } } } // The "after frameset" insertion mode
//------------------------------------------------------------------
function startTagAfterFrameset(p, token) { switch (token.tagID) { case TAG_ID.HTML: { startTagInBody(p, token); break; } case TAG_ID.NOFRAMES: { startTagInHead(p, token); break; } // Do nothing
} } function endTagAfterFrameset(p, token) { if (token.tagID === TAG_ID.HTML) { p.insertionMode = InsertionMode.AFTER_AFTER_FRAMESET; } } // The "after after body" insertion mode
//------------------------------------------------------------------
function startTagAfterAfterBody(p, token) { if (token.tagID === TAG_ID.HTML) { startTagInBody(p, token); } else { tokenAfterAfterBody(p, token); } } function tokenAfterAfterBody(p, token) { p.insertionMode = InsertionMode.IN_BODY; modeInBody(p, token); } // The "after after frameset" insertion mode
//------------------------------------------------------------------
function startTagAfterAfterFrameset(p, token) { switch (token.tagID) { case TAG_ID.HTML: { startTagInBody(p, token); break; } case TAG_ID.NOFRAMES: { startTagInHead(p, token); break; } // Do nothing
} } // The rules for parsing tokens in foreign content
//------------------------------------------------------------------
function nullCharacterInForeignContent(p, token) { token.chars = REPLACEMENT_CHARACTER; p._insertCharacters(token); } function characterInForeignContent(p, token) { p._insertCharacters(token); p.framesetOk = false; } function popUntilHtmlOrIntegrationPoint(p) { while (p.treeAdapter.getNamespaceURI(p.openElements.current) !== NS.HTML && !p._isIntegrationPoint(p.openElements.currentTagId, p.openElements.current)) { p.openElements.pop(); } } function startTagInForeignContent(p, token) { if (causesExit(token)) { popUntilHtmlOrIntegrationPoint(p); p._startTagOutsideForeignContent(token); } else { const current = p._getAdjustedCurrentElement(); const currentNs = p.treeAdapter.getNamespaceURI(current); if (currentNs === NS.MATHML) { adjustTokenMathMLAttrs(token); } else if (currentNs === NS.SVG) { adjustTokenSVGTagName(token); adjustTokenSVGAttrs(token); } adjustTokenXMLAttrs(token); if (token.selfClosing) { p._appendElement(token, currentNs); } else { p._insertElement(token, currentNs); } token.ackSelfClosing = true; } } function endTagInForeignContent(p, token) { if (token.tagID === TAG_ID.P || token.tagID === TAG_ID.BR) { popUntilHtmlOrIntegrationPoint(p); p._endTagOutsideForeignContent(token); return; } for (let i = p.openElements.stackTop; i > 0; i--) { const element = p.openElements.items[i]; if (p.treeAdapter.getNamespaceURI(element) === NS.HTML) { p._endTagOutsideForeignContent(token); break; } const tagName = p.treeAdapter.getTagName(element); if (tagName.toLowerCase() === token.tagName) { //NOTE: update the token tag name for `_setEndLocation`.
token.tagName = tagName; p.openElements.shortenToLength(i); break; } } }
// Shorthands
/** * Parses an HTML string. * * @param html Input HTML string. * @param options Parsing options. * @returns Document * * @example * * ```js
* const parse5 = require('parse5'); * * const document = parse5.parse('<!DOCTYPE html><html><head></head><body>Hi there!</body></html>'); * * console.log(document.childNodes[1].tagName); //> 'html'
*```
*/ function parse(html, options) { return Parser.parse(html, options); }
export { ERR as ErrorCodes, Parser, Tokenizer, TokenizerMode, defaultTreeAdapter, parse };
|