You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

8218 lines
332 KiB

  1. const UNDEFINED_CODE_POINTS = new Set([
  2. 65534, 65535, 131070, 131071, 196606, 196607, 262142, 262143, 327678, 327679, 393214,
  3. 393215, 458750, 458751, 524286, 524287, 589822, 589823, 655358, 655359, 720894,
  4. 720895, 786430, 786431, 851966, 851967, 917502, 917503, 983038, 983039, 1048574,
  5. 1048575, 1114110, 1114111,
  6. ]);
  7. const REPLACEMENT_CHARACTER = '\uFFFD';
  8. var CODE_POINTS;
  9. (function (CODE_POINTS) {
  10. CODE_POINTS[CODE_POINTS["EOF"] = -1] = "EOF";
  11. CODE_POINTS[CODE_POINTS["NULL"] = 0] = "NULL";
  12. CODE_POINTS[CODE_POINTS["TABULATION"] = 9] = "TABULATION";
  13. CODE_POINTS[CODE_POINTS["CARRIAGE_RETURN"] = 13] = "CARRIAGE_RETURN";
  14. CODE_POINTS[CODE_POINTS["LINE_FEED"] = 10] = "LINE_FEED";
  15. CODE_POINTS[CODE_POINTS["FORM_FEED"] = 12] = "FORM_FEED";
  16. CODE_POINTS[CODE_POINTS["SPACE"] = 32] = "SPACE";
  17. CODE_POINTS[CODE_POINTS["EXCLAMATION_MARK"] = 33] = "EXCLAMATION_MARK";
  18. CODE_POINTS[CODE_POINTS["QUOTATION_MARK"] = 34] = "QUOTATION_MARK";
  19. CODE_POINTS[CODE_POINTS["AMPERSAND"] = 38] = "AMPERSAND";
  20. CODE_POINTS[CODE_POINTS["APOSTROPHE"] = 39] = "APOSTROPHE";
  21. CODE_POINTS[CODE_POINTS["HYPHEN_MINUS"] = 45] = "HYPHEN_MINUS";
  22. CODE_POINTS[CODE_POINTS["SOLIDUS"] = 47] = "SOLIDUS";
  23. CODE_POINTS[CODE_POINTS["DIGIT_0"] = 48] = "DIGIT_0";
  24. CODE_POINTS[CODE_POINTS["DIGIT_9"] = 57] = "DIGIT_9";
  25. CODE_POINTS[CODE_POINTS["SEMICOLON"] = 59] = "SEMICOLON";
  26. CODE_POINTS[CODE_POINTS["LESS_THAN_SIGN"] = 60] = "LESS_THAN_SIGN";
  27. CODE_POINTS[CODE_POINTS["EQUALS_SIGN"] = 61] = "EQUALS_SIGN";
  28. CODE_POINTS[CODE_POINTS["GREATER_THAN_SIGN"] = 62] = "GREATER_THAN_SIGN";
  29. CODE_POINTS[CODE_POINTS["QUESTION_MARK"] = 63] = "QUESTION_MARK";
  30. CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_A"] = 65] = "LATIN_CAPITAL_A";
  31. CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_Z"] = 90] = "LATIN_CAPITAL_Z";
  32. CODE_POINTS[CODE_POINTS["RIGHT_SQUARE_BRACKET"] = 93] = "RIGHT_SQUARE_BRACKET";
  33. CODE_POINTS[CODE_POINTS["GRAVE_ACCENT"] = 96] = "GRAVE_ACCENT";
  34. CODE_POINTS[CODE_POINTS["LATIN_SMALL_A"] = 97] = "LATIN_SMALL_A";
  35. CODE_POINTS[CODE_POINTS["LATIN_SMALL_Z"] = 122] = "LATIN_SMALL_Z";
  36. })(CODE_POINTS || (CODE_POINTS = {}));
  37. const SEQUENCES = {
  38. DASH_DASH: '--',
  39. CDATA_START: '[CDATA[',
  40. DOCTYPE: 'doctype',
  41. SCRIPT: 'script',
  42. PUBLIC: 'public',
  43. SYSTEM: 'system',
  44. };
  45. //Surrogates
  46. function isSurrogate(cp) {
  47. return cp >= 55296 && cp <= 57343;
  48. }
  49. function isSurrogatePair(cp) {
  50. return cp >= 56320 && cp <= 57343;
  51. }
  52. function getSurrogatePairCodePoint(cp1, cp2) {
  53. return (cp1 - 55296) * 1024 + 9216 + cp2;
  54. }
  55. //NOTE: excluding NULL and ASCII whitespace
  56. function isControlCodePoint(cp) {
  57. return ((cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) ||
  58. (cp >= 0x7f && cp <= 0x9f));
  59. }
  60. function isUndefinedCodePoint(cp) {
  61. return (cp >= 64976 && cp <= 65007) || UNDEFINED_CODE_POINTS.has(cp);
  62. }
  63. var ERR;
  64. (function (ERR) {
  65. ERR["controlCharacterInInputStream"] = "control-character-in-input-stream";
  66. ERR["noncharacterInInputStream"] = "noncharacter-in-input-stream";
  67. ERR["surrogateInInputStream"] = "surrogate-in-input-stream";
  68. ERR["nonVoidHtmlElementStartTagWithTrailingSolidus"] = "non-void-html-element-start-tag-with-trailing-solidus";
  69. ERR["endTagWithAttributes"] = "end-tag-with-attributes";
  70. ERR["endTagWithTrailingSolidus"] = "end-tag-with-trailing-solidus";
  71. ERR["unexpectedSolidusInTag"] = "unexpected-solidus-in-tag";
  72. ERR["unexpectedNullCharacter"] = "unexpected-null-character";
  73. ERR["unexpectedQuestionMarkInsteadOfTagName"] = "unexpected-question-mark-instead-of-tag-name";
  74. ERR["invalidFirstCharacterOfTagName"] = "invalid-first-character-of-tag-name";
  75. ERR["unexpectedEqualsSignBeforeAttributeName"] = "unexpected-equals-sign-before-attribute-name";
  76. ERR["missingEndTagName"] = "missing-end-tag-name";
  77. ERR["unexpectedCharacterInAttributeName"] = "unexpected-character-in-attribute-name";
  78. ERR["unknownNamedCharacterReference"] = "unknown-named-character-reference";
  79. ERR["missingSemicolonAfterCharacterReference"] = "missing-semicolon-after-character-reference";
  80. ERR["unexpectedCharacterAfterDoctypeSystemIdentifier"] = "unexpected-character-after-doctype-system-identifier";
  81. ERR["unexpectedCharacterInUnquotedAttributeValue"] = "unexpected-character-in-unquoted-attribute-value";
  82. ERR["eofBeforeTagName"] = "eof-before-tag-name";
  83. ERR["eofInTag"] = "eof-in-tag";
  84. ERR["missingAttributeValue"] = "missing-attribute-value";
  85. ERR["missingWhitespaceBetweenAttributes"] = "missing-whitespace-between-attributes";
  86. ERR["missingWhitespaceAfterDoctypePublicKeyword"] = "missing-whitespace-after-doctype-public-keyword";
  87. ERR["missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers"] = "missing-whitespace-between-doctype-public-and-system-identifiers";
  88. ERR["missingWhitespaceAfterDoctypeSystemKeyword"] = "missing-whitespace-after-doctype-system-keyword";
  89. ERR["missingQuoteBeforeDoctypePublicIdentifier"] = "missing-quote-before-doctype-public-identifier";
  90. ERR["missingQuoteBeforeDoctypeSystemIdentifier"] = "missing-quote-before-doctype-system-identifier";
  91. ERR["missingDoctypePublicIdentifier"] = "missing-doctype-public-identifier";
  92. ERR["missingDoctypeSystemIdentifier"] = "missing-doctype-system-identifier";
  93. ERR["abruptDoctypePublicIdentifier"] = "abrupt-doctype-public-identifier";
  94. ERR["abruptDoctypeSystemIdentifier"] = "abrupt-doctype-system-identifier";
  95. ERR["cdataInHtmlContent"] = "cdata-in-html-content";
  96. ERR["incorrectlyOpenedComment"] = "incorrectly-opened-comment";
  97. ERR["eofInScriptHtmlCommentLikeText"] = "eof-in-script-html-comment-like-text";
  98. ERR["eofInDoctype"] = "eof-in-doctype";
  99. ERR["nestedComment"] = "nested-comment";
  100. ERR["abruptClosingOfEmptyComment"] = "abrupt-closing-of-empty-comment";
  101. ERR["eofInComment"] = "eof-in-comment";
  102. ERR["incorrectlyClosedComment"] = "incorrectly-closed-comment";
  103. ERR["eofInCdata"] = "eof-in-cdata";
  104. ERR["absenceOfDigitsInNumericCharacterReference"] = "absence-of-digits-in-numeric-character-reference";
  105. ERR["nullCharacterReference"] = "null-character-reference";
  106. ERR["surrogateCharacterReference"] = "surrogate-character-reference";
  107. ERR["characterReferenceOutsideUnicodeRange"] = "character-reference-outside-unicode-range";
  108. ERR["controlCharacterReference"] = "control-character-reference";
  109. ERR["noncharacterCharacterReference"] = "noncharacter-character-reference";
  110. ERR["missingWhitespaceBeforeDoctypeName"] = "missing-whitespace-before-doctype-name";
  111. ERR["missingDoctypeName"] = "missing-doctype-name";
  112. ERR["invalidCharacterSequenceAfterDoctypeName"] = "invalid-character-sequence-after-doctype-name";
  113. ERR["duplicateAttribute"] = "duplicate-attribute";
  114. ERR["nonConformingDoctype"] = "non-conforming-doctype";
  115. ERR["missingDoctype"] = "missing-doctype";
  116. ERR["misplacedDoctype"] = "misplaced-doctype";
  117. ERR["endTagWithoutMatchingOpenElement"] = "end-tag-without-matching-open-element";
  118. ERR["closingOfElementWithOpenChildElements"] = "closing-of-element-with-open-child-elements";
  119. ERR["disallowedContentInNoscriptInHead"] = "disallowed-content-in-noscript-in-head";
  120. ERR["openElementsLeftAfterEof"] = "open-elements-left-after-eof";
  121. ERR["abandonedHeadElementChild"] = "abandoned-head-element-child";
  122. ERR["misplacedStartTagForHeadElement"] = "misplaced-start-tag-for-head-element";
  123. ERR["nestedNoscriptInHead"] = "nested-noscript-in-head";
  124. ERR["eofInElementThatCanContainOnlyText"] = "eof-in-element-that-can-contain-only-text";
  125. })(ERR || (ERR = {}));
  126. //Const
  127. const DEFAULT_BUFFER_WATERLINE = 1 << 16;
  128. //Preprocessor
  129. //NOTE: HTML input preprocessing
  130. //(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream)
  131. class Preprocessor {
  132. constructor(handler) {
  133. this.handler = handler;
  134. this.html = '';
  135. this.pos = -1;
  136. // NOTE: Initial `lastGapPos` is -2, to ensure `col` on initialisation is 0
  137. this.lastGapPos = -2;
  138. this.gapStack = [];
  139. this.skipNextNewLine = false;
  140. this.lastChunkWritten = false;
  141. this.endOfChunkHit = false;
  142. this.bufferWaterline = DEFAULT_BUFFER_WATERLINE;
  143. this.isEol = false;
  144. this.lineStartPos = 0;
  145. this.droppedBufferSize = 0;
  146. this.line = 1;
  147. //NOTE: avoid reporting errors twice on advance/retreat
  148. this.lastErrOffset = -1;
  149. }
  150. /** The column on the current line. If we just saw a gap (eg. a surrogate pair), return the index before. */
  151. get col() {
  152. return this.pos - this.lineStartPos + Number(this.lastGapPos !== this.pos);
  153. }
  154. get offset() {
  155. return this.droppedBufferSize + this.pos;
  156. }
  157. getError(code, cpOffset) {
  158. const { line, col, offset } = this;
  159. const startCol = col + cpOffset;
  160. const startOffset = offset + cpOffset;
  161. return {
  162. code,
  163. startLine: line,
  164. endLine: line,
  165. startCol,
  166. endCol: startCol,
  167. startOffset,
  168. endOffset: startOffset,
  169. };
  170. }
  171. _err(code) {
  172. if (this.handler.onParseError && this.lastErrOffset !== this.offset) {
  173. this.lastErrOffset = this.offset;
  174. this.handler.onParseError(this.getError(code, 0));
  175. }
  176. }
  177. _addGap() {
  178. this.gapStack.push(this.lastGapPos);
  179. this.lastGapPos = this.pos;
  180. }
  181. _processSurrogate(cp) {
  182. //NOTE: try to peek a surrogate pair
  183. if (this.pos !== this.html.length - 1) {
  184. const nextCp = this.html.charCodeAt(this.pos + 1);
  185. if (isSurrogatePair(nextCp)) {
  186. //NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
  187. this.pos++;
  188. //NOTE: add a gap that should be avoided during retreat
  189. this._addGap();
  190. return getSurrogatePairCodePoint(cp, nextCp);
  191. }
  192. }
  193. //NOTE: we are at the end of a chunk, therefore we can't infer the surrogate pair yet.
  194. else if (!this.lastChunkWritten) {
  195. this.endOfChunkHit = true;
  196. return CODE_POINTS.EOF;
  197. }
  198. //NOTE: isolated surrogate
  199. this._err(ERR.surrogateInInputStream);
  200. return cp;
  201. }
  202. willDropParsedChunk() {
  203. return this.pos > this.bufferWaterline;
  204. }
  205. dropParsedChunk() {
  206. if (this.willDropParsedChunk()) {
  207. this.html = this.html.substring(this.pos);
  208. this.lineStartPos -= this.pos;
  209. this.droppedBufferSize += this.pos;
  210. this.pos = 0;
  211. this.lastGapPos = -2;
  212. this.gapStack.length = 0;
  213. }
  214. }
  215. write(chunk, isLastChunk) {
  216. if (this.html.length > 0) {
  217. this.html += chunk;
  218. }
  219. else {
  220. this.html = chunk;
  221. }
  222. this.endOfChunkHit = false;
  223. this.lastChunkWritten = isLastChunk;
  224. }
  225. insertHtmlAtCurrentPos(chunk) {
  226. this.html = this.html.substring(0, this.pos + 1) + chunk + this.html.substring(this.pos + 1);
  227. this.endOfChunkHit = false;
  228. }
  229. startsWith(pattern, caseSensitive) {
  230. // Check if our buffer has enough characters
  231. if (this.pos + pattern.length > this.html.length) {
  232. this.endOfChunkHit = !this.lastChunkWritten;
  233. return false;
  234. }
  235. if (caseSensitive) {
  236. return this.html.startsWith(pattern, this.pos);
  237. }
  238. for (let i = 0; i < pattern.length; i++) {
  239. const cp = this.html.charCodeAt(this.pos + i) | 0x20;
  240. if (cp !== pattern.charCodeAt(i)) {
  241. return false;
  242. }
  243. }
  244. return true;
  245. }
  246. peek(offset) {
  247. const pos = this.pos + offset;
  248. if (pos >= this.html.length) {
  249. this.endOfChunkHit = !this.lastChunkWritten;
  250. return CODE_POINTS.EOF;
  251. }
  252. const code = this.html.charCodeAt(pos);
  253. return code === CODE_POINTS.CARRIAGE_RETURN ? CODE_POINTS.LINE_FEED : code;
  254. }
  255. advance() {
  256. this.pos++;
  257. //NOTE: LF should be in the last column of the line
  258. if (this.isEol) {
  259. this.isEol = false;
  260. this.line++;
  261. this.lineStartPos = this.pos;
  262. }
  263. if (this.pos >= this.html.length) {
  264. this.endOfChunkHit = !this.lastChunkWritten;
  265. return CODE_POINTS.EOF;
  266. }
  267. let cp = this.html.charCodeAt(this.pos);
  268. //NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters
  269. if (cp === CODE_POINTS.CARRIAGE_RETURN) {
  270. this.isEol = true;
  271. this.skipNextNewLine = true;
  272. return CODE_POINTS.LINE_FEED;
  273. }
  274. //NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
  275. //must be ignored.
  276. if (cp === CODE_POINTS.LINE_FEED) {
  277. this.isEol = true;
  278. if (this.skipNextNewLine) {
  279. // `line` will be bumped again in the recursive call.
  280. this.line--;
  281. this.skipNextNewLine = false;
  282. this._addGap();
  283. return this.advance();
  284. }
  285. }
  286. this.skipNextNewLine = false;
  287. if (isSurrogate(cp)) {
  288. cp = this._processSurrogate(cp);
  289. }
  290. //OPTIMIZATION: first check if code point is in the common allowed
  291. //range (ASCII alphanumeric, whitespaces, big chunk of BMP)
  292. //before going into detailed performance cost validation.
  293. const isCommonValidRange = this.handler.onParseError === null ||
  294. (cp > 0x1f && cp < 0x7f) ||
  295. cp === CODE_POINTS.LINE_FEED ||
  296. cp === CODE_POINTS.CARRIAGE_RETURN ||
  297. (cp > 0x9f && cp < 64976);
  298. if (!isCommonValidRange) {
  299. this._checkForProblematicCharacters(cp);
  300. }
  301. return cp;
  302. }
  303. _checkForProblematicCharacters(cp) {
  304. if (isControlCodePoint(cp)) {
  305. this._err(ERR.controlCharacterInInputStream);
  306. }
  307. else if (isUndefinedCodePoint(cp)) {
  308. this._err(ERR.noncharacterInInputStream);
  309. }
  310. }
  311. retreat(count) {
  312. this.pos -= count;
  313. while (this.pos < this.lastGapPos) {
  314. this.lastGapPos = this.gapStack.pop();
  315. this.pos--;
  316. }
  317. this.isEol = false;
  318. }
  319. }
  320. var TokenType;
  321. (function (TokenType) {
  322. TokenType[TokenType["CHARACTER"] = 0] = "CHARACTER";
  323. TokenType[TokenType["NULL_CHARACTER"] = 1] = "NULL_CHARACTER";
  324. TokenType[TokenType["WHITESPACE_CHARACTER"] = 2] = "WHITESPACE_CHARACTER";
  325. TokenType[TokenType["START_TAG"] = 3] = "START_TAG";
  326. TokenType[TokenType["END_TAG"] = 4] = "END_TAG";
  327. TokenType[TokenType["COMMENT"] = 5] = "COMMENT";
  328. TokenType[TokenType["DOCTYPE"] = 6] = "DOCTYPE";
  329. TokenType[TokenType["EOF"] = 7] = "EOF";
  330. TokenType[TokenType["HIBERNATION"] = 8] = "HIBERNATION";
  331. })(TokenType || (TokenType = {}));
  332. function getTokenAttr(token, attrName) {
  333. for (let i = token.attrs.length - 1; i >= 0; i--) {
  334. if (token.attrs[i].name === attrName) {
  335. return token.attrs[i].value;
  336. }
  337. }
  338. return null;
  339. }
  340. // Generated using scripts/write-decode-map.ts
  341. var htmlDecodeTree = new Uint16Array(
  342. // prettier-ignore
  343. "\u1d41<\xd5\u0131\u028a\u049d\u057b\u05d0\u0675\u06de\u07a2\u07d6\u080f\u0a4a\u0a91\u0da1\u0e6d\u0f09\u0f26\u10ca\u1228\u12e1\u1415\u149d\u14c3\u14df\u1525\0\0\0\0\0\0\u156b\u16cd\u198d\u1c12\u1ddd\u1f7e\u2060\u21b0\u228d\u23c0\u23fb\u2442\u2824\u2912\u2d08\u2e48\u2fce\u3016\u32ba\u3639\u37ac\u38fe\u3a28\u3a71\u3ae0\u3b2e\u0800EMabcfglmnoprstu\\bfms\x7f\x84\x8b\x90\x95\x98\xa6\xb3\xb9\xc8\xcflig\u803b\xc6\u40c6P\u803b&\u4026cute\u803b\xc1\u40c1reve;\u4102\u0100iyx}rc\u803b\xc2\u40c2;\u4410r;\uc000\ud835\udd04rave\u803b\xc0\u40c0pha;\u4391acr;\u4100d;\u6a53\u0100gp\x9d\xa1on;\u4104f;\uc000\ud835\udd38plyFunction;\u6061ing\u803b\xc5\u40c5\u0100cs\xbe\xc3r;\uc000\ud835\udc9cign;\u6254ilde\u803b\xc3\u40c3ml\u803b\xc4\u40c4\u0400aceforsu\xe5\xfb\xfe\u0117\u011c\u0122\u0127\u012a\u0100cr\xea\xf2kslash;\u6216\u0176\xf6\xf8;\u6ae7ed;\u6306y;\u4411\u0180crt\u0105\u010b\u0114ause;\u6235noullis;\u612ca;\u4392r;\uc000\ud835\udd05pf;\uc000\ud835\udd39eve;\u42d8c\xf2\u0113mpeq;\u624e\u0700HOacdefhilorsu\u014d\u0151\u0156\u0180\u019e\u01a2\u01b5\u01b7\u01ba\u01dc\u0215\u0273\u0278\u027ecy;\u4427PY\u803b\xa9\u40a9\u0180cpy\u015d\u0162\u017aute;\u4106\u0100;i\u0167\u0168\u62d2talDifferentialD;\u6145leys;\u612d\u0200aeio\u0189\u018e\u0194\u0198ron;\u410cdil\u803b\xc7\u40c7rc;\u4108nint;\u6230ot;\u410a\u0100dn\u01a7\u01adilla;\u40b8terDot;\u40b7\xf2\u017fi;\u43a7rcle\u0200DMPT\u01c7\u01cb\u01d1\u01d6ot;\u6299inus;\u6296lus;\u6295imes;\u6297o\u0100cs\u01e2\u01f8kwiseContourIntegral;\u6232eCurly\u0100DQ\u0203\u020foubleQuote;\u601duote;\u6019\u0200lnpu\u021e\u0228\u0247\u0255on\u0100;e\u0225\u0226\u6237;\u6a74\u0180git\u022f\u0236\u023aruent;\u6261nt;\u622fourIntegral;\u622e\u0100fr\u024c\u024e;\u6102oduct;\u6210nterClockwiseContourIntegral;\u6233oss;\u6a2fcr;\uc000\ud835\udc9ep\u0100;C\u0284\u0285\u62d3ap;\u624d\u0580DJSZacefios\u02a0\u02ac\u02b0\u02b4\u02b8\u02cb\u02d7\u02e1\u02e6\u0333\u048d\u0100;o\u0179\u02a5trahd;\u6911cy;\u4402cy;\u4405cy;\u440f\u0180grs\u02bf\u02c4\u02c7ger;\u6021r;\u61a1hv;\u6ae4\u0100ay\u02d0\u02d5ron;\u410e;\u4414l\u0100;t\u02dd\u02de\u6207a;\u4394r;\uc000\ud835\udd07\u0100af\u02eb\u0327\u0100cm\u02f0\u0322ritical\u0200ADGT\u0300\u0306\u0316\u031ccute;\u40b4o\u0174\u030b\u030d;\u42d9bleAcute;\u42ddrave;\u4060ilde;\u42dcond;\u62c4ferentialD;\u6146\u0470\u033d\0\0\0\u0342\u0354\0\u0405f;\uc000\ud835\udd3b\u0180;DE\u0348\u0349\u034d\u40a8ot;\u60dcqual;\u6250ble\u0300CDLRUV\u0363\u0372\u0382\u03cf\u03e2\u03f8ontourIntegra\xec\u0239o\u0274\u0379\0\0\u037b\xbb\u0349nArrow;\u61d3\u0100eo\u0387\u03a4ft\u0180ART\u0390\u0396\u03a1rrow;\u61d0ightArrow;\u61d4e\xe5\u02cang\u0100LR\u03ab\u03c4eft\u0100AR\u03b3\u03b9rrow;\u67f8ightArrow;\u67faightArrow;\u67f9ight\u0100AT\u03d8\u03derrow;\u61d2ee;\u62a8p\u0241\u03e9\0\0\u03efrrow;\u61d1ownArrow;\u61d5erticalBar;\u6225n\u0300ABLRTa\u0412\u042a\u0430\u045e\u047f\u037crrow\u0180;BU\u041d\u041e\u0422\u6193ar;\u6913pArrow;\u61f5reve;\u4311eft\u02d2\u043a\0\u0446\0\u0450ightVector;\u6950eeVector;\u695eector\u0100;B\u0459\u045a\u61bdar;\u6956ight\u01d4\u0467\0\u0471eeVector;\u695fector\u0100;B\u047a\u047b\u61c1ar;\u6957ee\u0100;A\u0486\u0487\u62a4rrow;\u61a7\u0100ct\u0492\u0497r;\uc000\ud835\udc9frok;\u4110\u0800NTacdfglmopqstux\u04bd\u04c0\u04c4\u04cb\u04de\u04e2\u04e7\u04ee\u04f5\u0521\u052f\u0536\u0552\u055d\u0560\u0565G;\u414aH\u803b\xd0\u40d0cute\u803b\xc9\u40c9\u0180aiy\u04d2\u04d7\u04dcron;\u411arc\u803b\xca\u40ca;\u442dot;\u4116r;\uc000\ud835\udd08rave\u803b\xc8\u40c8ement;\u6208\u0100ap\u04fa\u04fecr;\u4112ty\u0253\u0506\0\0\u0512mallSquare;\u65fberySmallSquare;\u65ab\u0100gp\u0526\u052aon;\u4118f;\uc000\ud835\udd3csilon;\u4395u\u0100ai\u053c\u0549l\u0100;T\u0542\u0543\u6a75ilde;\u6242librium;\u61cc\u0100ci\u0557\u055ar;\u6130m;\u6a73a;\u4397ml\u803b\xcb\u40cb\u0100ip\u056a\u056fsts;\u6203onentialE;\u6147\u0280cfios\u0585\u0588\u058d\u05b2\u05ccy;\u4424r;\uc000\ud835\udd09lled\u0253\u0597\0\0\u05a3mallSquare;\u65fcerySmallSquare;\u65aa\u0370\u05ba\0\u05bf\0\0\u05c4f;\uc000\ud835\udd3dAll;\u6200riertrf;\u6131c\xf2\u05cb\u0600JTabcdfgorst\u05e8\u05ec\u05ef\u05fa\u0600\u0612\u06
  344. .split("")
  345. .map((c) => c.charCodeAt(0)));
  346. // Generated using scripts/write-decode-map.ts
  347. new Uint16Array(
  348. // prettier-ignore
  349. "\u0200aglq\t\x15\x18\x1b\u026d\x0f\0\0\x12p;\u4026os;\u4027t;\u403et;\u403cuot;\u4022"
  350. .split("")
  351. .map((c) => c.charCodeAt(0)));
  352. // Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
  353. const decodeMap = new Map([
  354. [0, 65533],
  355. // C1 Unicode control character reference replacements
  356. [128, 8364],
  357. [130, 8218],
  358. [131, 402],
  359. [132, 8222],
  360. [133, 8230],
  361. [134, 8224],
  362. [135, 8225],
  363. [136, 710],
  364. [137, 8240],
  365. [138, 352],
  366. [139, 8249],
  367. [140, 338],
  368. [142, 381],
  369. [145, 8216],
  370. [146, 8217],
  371. [147, 8220],
  372. [148, 8221],
  373. [149, 8226],
  374. [150, 8211],
  375. [151, 8212],
  376. [152, 732],
  377. [153, 8482],
  378. [154, 353],
  379. [155, 8250],
  380. [156, 339],
  381. [158, 382],
  382. [159, 376],
  383. ]);
  384. /**
  385. * Replace the given code point with a replacement character if it is a
  386. * surrogate or is outside the valid range. Otherwise return the code
  387. * point unchanged.
  388. */
  389. function replaceCodePoint(codePoint) {
  390. var _a;
  391. if ((codePoint >= 0xd800 && codePoint <= 0xdfff) || codePoint > 0x10ffff) {
  392. return 0xfffd;
  393. }
  394. return (_a = decodeMap.get(codePoint)) !== null && _a !== void 0 ? _a : codePoint;
  395. }
  396. var CharCodes;
  397. (function (CharCodes) {
  398. CharCodes[CharCodes["NUM"] = 35] = "NUM";
  399. CharCodes[CharCodes["SEMI"] = 59] = "SEMI";
  400. CharCodes[CharCodes["EQUALS"] = 61] = "EQUALS";
  401. CharCodes[CharCodes["ZERO"] = 48] = "ZERO";
  402. CharCodes[CharCodes["NINE"] = 57] = "NINE";
  403. CharCodes[CharCodes["LOWER_A"] = 97] = "LOWER_A";
  404. CharCodes[CharCodes["LOWER_F"] = 102] = "LOWER_F";
  405. CharCodes[CharCodes["LOWER_X"] = 120] = "LOWER_X";
  406. CharCodes[CharCodes["LOWER_Z"] = 122] = "LOWER_Z";
  407. CharCodes[CharCodes["UPPER_A"] = 65] = "UPPER_A";
  408. CharCodes[CharCodes["UPPER_F"] = 70] = "UPPER_F";
  409. CharCodes[CharCodes["UPPER_Z"] = 90] = "UPPER_Z";
  410. })(CharCodes || (CharCodes = {}));
  411. /** Bit that needs to be set to convert an upper case ASCII character to lower case */
  412. const TO_LOWER_BIT = 0b100000;
  413. var BinTrieFlags;
  414. (function (BinTrieFlags) {
  415. BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH";
  416. BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 16256] = "BRANCH_LENGTH";
  417. BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
  418. })(BinTrieFlags || (BinTrieFlags = {}));
  419. function isNumber(code) {
  420. return code >= CharCodes.ZERO && code <= CharCodes.NINE;
  421. }
  422. function isHexadecimalCharacter(code) {
  423. return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_F) ||
  424. (code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_F));
  425. }
  426. function isAsciiAlphaNumeric$1(code) {
  427. return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_Z) ||
  428. (code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_Z) ||
  429. isNumber(code));
  430. }
  431. /**
  432. * Checks if the given character is a valid end character for an entity in an attribute.
  433. *
  434. * Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
  435. * See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
  436. */
  437. function isEntityInAttributeInvalidEnd(code) {
  438. return code === CharCodes.EQUALS || isAsciiAlphaNumeric$1(code);
  439. }
  440. var EntityDecoderState;
  441. (function (EntityDecoderState) {
  442. EntityDecoderState[EntityDecoderState["EntityStart"] = 0] = "EntityStart";
  443. EntityDecoderState[EntityDecoderState["NumericStart"] = 1] = "NumericStart";
  444. EntityDecoderState[EntityDecoderState["NumericDecimal"] = 2] = "NumericDecimal";
  445. EntityDecoderState[EntityDecoderState["NumericHex"] = 3] = "NumericHex";
  446. EntityDecoderState[EntityDecoderState["NamedEntity"] = 4] = "NamedEntity";
  447. })(EntityDecoderState || (EntityDecoderState = {}));
  448. var DecodingMode;
  449. (function (DecodingMode) {
  450. /** Entities in text nodes that can end with any character. */
  451. DecodingMode[DecodingMode["Legacy"] = 0] = "Legacy";
  452. /** Only allow entities terminated with a semicolon. */
  453. DecodingMode[DecodingMode["Strict"] = 1] = "Strict";
  454. /** Entities in attributes have limitations on ending characters. */
  455. DecodingMode[DecodingMode["Attribute"] = 2] = "Attribute";
  456. })(DecodingMode || (DecodingMode = {}));
  457. /**
  458. * Token decoder with support of writing partial entities.
  459. */
  460. class EntityDecoder {
  461. constructor(
  462. /** The tree used to decode entities. */
  463. decodeTree,
  464. /**
  465. * The function that is called when a codepoint is decoded.
  466. *
  467. * For multi-byte named entities, this will be called multiple times,
  468. * with the second codepoint, and the same `consumed` value.
  469. *
  470. * @param codepoint The decoded codepoint.
  471. * @param consumed The number of bytes consumed by the decoder.
  472. */
  473. emitCodePoint,
  474. /** An object that is used to produce errors. */
  475. errors) {
  476. this.decodeTree = decodeTree;
  477. this.emitCodePoint = emitCodePoint;
  478. this.errors = errors;
  479. /** The current state of the decoder. */
  480. this.state = EntityDecoderState.EntityStart;
  481. /** Characters that were consumed while parsing an entity. */
  482. this.consumed = 1;
  483. /**
  484. * The result of the entity.
  485. *
  486. * Either the result index of a numeric entity, or the codepoint of a
  487. * numeric entity.
  488. */
  489. this.result = 0;
  490. /** The current index in the decode tree. */
  491. this.treeIndex = 0;
  492. /** The number of characters that were consumed in excess. */
  493. this.excess = 1;
  494. /** The mode in which the decoder is operating. */
  495. this.decodeMode = DecodingMode.Strict;
  496. }
  497. /** Resets the instance to make it reusable. */
  498. startEntity(decodeMode) {
  499. this.decodeMode = decodeMode;
  500. this.state = EntityDecoderState.EntityStart;
  501. this.result = 0;
  502. this.treeIndex = 0;
  503. this.excess = 1;
  504. this.consumed = 1;
  505. }
  506. /**
  507. * Write an entity to the decoder. This can be called multiple times with partial entities.
  508. * If the entity is incomplete, the decoder will return -1.
  509. *
  510. * Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
  511. * entity is incomplete, and resume when the next string is written.
  512. *
  513. * @param string The string containing the entity (or a continuation of the entity).
  514. * @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
  515. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  516. */
  517. write(str, offset) {
  518. switch (this.state) {
  519. case EntityDecoderState.EntityStart: {
  520. if (str.charCodeAt(offset) === CharCodes.NUM) {
  521. this.state = EntityDecoderState.NumericStart;
  522. this.consumed += 1;
  523. return this.stateNumericStart(str, offset + 1);
  524. }
  525. this.state = EntityDecoderState.NamedEntity;
  526. return this.stateNamedEntity(str, offset);
  527. }
  528. case EntityDecoderState.NumericStart: {
  529. return this.stateNumericStart(str, offset);
  530. }
  531. case EntityDecoderState.NumericDecimal: {
  532. return this.stateNumericDecimal(str, offset);
  533. }
  534. case EntityDecoderState.NumericHex: {
  535. return this.stateNumericHex(str, offset);
  536. }
  537. case EntityDecoderState.NamedEntity: {
  538. return this.stateNamedEntity(str, offset);
  539. }
  540. }
  541. }
  542. /**
  543. * Switches between the numeric decimal and hexadecimal states.
  544. *
  545. * Equivalent to the `Numeric character reference state` in the HTML spec.
  546. *
  547. * @param str The string containing the entity (or a continuation of the entity).
  548. * @param offset The current offset.
  549. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  550. */
  551. stateNumericStart(str, offset) {
  552. if (offset >= str.length) {
  553. return -1;
  554. }
  555. if ((str.charCodeAt(offset) | TO_LOWER_BIT) === CharCodes.LOWER_X) {
  556. this.state = EntityDecoderState.NumericHex;
  557. this.consumed += 1;
  558. return this.stateNumericHex(str, offset + 1);
  559. }
  560. this.state = EntityDecoderState.NumericDecimal;
  561. return this.stateNumericDecimal(str, offset);
  562. }
  563. addToNumericResult(str, start, end, base) {
  564. if (start !== end) {
  565. const digitCount = end - start;
  566. this.result =
  567. this.result * Math.pow(base, digitCount) +
  568. parseInt(str.substr(start, digitCount), base);
  569. this.consumed += digitCount;
  570. }
  571. }
  572. /**
  573. * Parses a hexadecimal numeric entity.
  574. *
  575. * Equivalent to the `Hexademical character reference state` in the HTML spec.
  576. *
  577. * @param str The string containing the entity (or a continuation of the entity).
  578. * @param offset The current offset.
  579. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  580. */
  581. stateNumericHex(str, offset) {
  582. const startIdx = offset;
  583. while (offset < str.length) {
  584. const char = str.charCodeAt(offset);
  585. if (isNumber(char) || isHexadecimalCharacter(char)) {
  586. offset += 1;
  587. }
  588. else {
  589. this.addToNumericResult(str, startIdx, offset, 16);
  590. return this.emitNumericEntity(char, 3);
  591. }
  592. }
  593. this.addToNumericResult(str, startIdx, offset, 16);
  594. return -1;
  595. }
  596. /**
  597. * Parses a decimal numeric entity.
  598. *
  599. * Equivalent to the `Decimal character reference state` in the HTML spec.
  600. *
  601. * @param str The string containing the entity (or a continuation of the entity).
  602. * @param offset The current offset.
  603. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  604. */
  605. stateNumericDecimal(str, offset) {
  606. const startIdx = offset;
  607. while (offset < str.length) {
  608. const char = str.charCodeAt(offset);
  609. if (isNumber(char)) {
  610. offset += 1;
  611. }
  612. else {
  613. this.addToNumericResult(str, startIdx, offset, 10);
  614. return this.emitNumericEntity(char, 2);
  615. }
  616. }
  617. this.addToNumericResult(str, startIdx, offset, 10);
  618. return -1;
  619. }
  620. /**
  621. * Validate and emit a numeric entity.
  622. *
  623. * Implements the logic from the `Hexademical character reference start
  624. * state` and `Numeric character reference end state` in the HTML spec.
  625. *
  626. * @param lastCp The last code point of the entity. Used to see if the
  627. * entity was terminated with a semicolon.
  628. * @param expectedLength The minimum number of characters that should be
  629. * consumed. Used to validate that at least one digit
  630. * was consumed.
  631. * @returns The number of characters that were consumed.
  632. */
  633. emitNumericEntity(lastCp, expectedLength) {
  634. var _a;
  635. // Ensure we consumed at least one digit.
  636. if (this.consumed <= expectedLength) {
  637. (_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
  638. return 0;
  639. }
  640. // Figure out if this is a legit end of the entity
  641. if (lastCp === CharCodes.SEMI) {
  642. this.consumed += 1;
  643. }
  644. else if (this.decodeMode === DecodingMode.Strict) {
  645. return 0;
  646. }
  647. this.emitCodePoint(replaceCodePoint(this.result), this.consumed);
  648. if (this.errors) {
  649. if (lastCp !== CharCodes.SEMI) {
  650. this.errors.missingSemicolonAfterCharacterReference();
  651. }
  652. this.errors.validateNumericCharacterReference(this.result);
  653. }
  654. return this.consumed;
  655. }
  656. /**
  657. * Parses a named entity.
  658. *
  659. * Equivalent to the `Named character reference state` in the HTML spec.
  660. *
  661. * @param str The string containing the entity (or a continuation of the entity).
  662. * @param offset The current offset.
  663. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  664. */
  665. stateNamedEntity(str, offset) {
  666. const { decodeTree } = this;
  667. let current = decodeTree[this.treeIndex];
  668. // The mask is the number of bytes of the value, including the current byte.
  669. let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
  670. for (; offset < str.length; offset++, this.excess++) {
  671. const char = str.charCodeAt(offset);
  672. this.treeIndex = determineBranch(decodeTree, current, this.treeIndex + Math.max(1, valueLength), char);
  673. if (this.treeIndex < 0) {
  674. return this.result === 0 ||
  675. // If we are parsing an attribute
  676. (this.decodeMode === DecodingMode.Attribute &&
  677. // We shouldn't have consumed any characters after the entity,
  678. (valueLength === 0 ||
  679. // And there should be no invalid characters.
  680. isEntityInAttributeInvalidEnd(char)))
  681. ? 0
  682. : this.emitNotTerminatedNamedEntity();
  683. }
  684. current = decodeTree[this.treeIndex];
  685. valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
  686. // If the branch is a value, store it and continue
  687. if (valueLength !== 0) {
  688. // If the entity is terminated by a semicolon, we are done.
  689. if (char === CharCodes.SEMI) {
  690. return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess);
  691. }
  692. // If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
  693. if (this.decodeMode !== DecodingMode.Strict) {
  694. this.result = this.treeIndex;
  695. this.consumed += this.excess;
  696. this.excess = 0;
  697. }
  698. }
  699. }
  700. return -1;
  701. }
  702. /**
  703. * Emit a named entity that was not terminated with a semicolon.
  704. *
  705. * @returns The number of characters consumed.
  706. */
  707. emitNotTerminatedNamedEntity() {
  708. var _a;
  709. const { result, decodeTree } = this;
  710. const valueLength = (decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14;
  711. this.emitNamedEntityData(result, valueLength, this.consumed);
  712. (_a = this.errors) === null || _a === void 0 ? void 0 : _a.missingSemicolonAfterCharacterReference();
  713. return this.consumed;
  714. }
  715. /**
  716. * Emit a named entity.
  717. *
  718. * @param result The index of the entity in the decode tree.
  719. * @param valueLength The number of bytes in the entity.
  720. * @param consumed The number of characters consumed.
  721. *
  722. * @returns The number of characters consumed.
  723. */
  724. emitNamedEntityData(result, valueLength, consumed) {
  725. const { decodeTree } = this;
  726. this.emitCodePoint(valueLength === 1
  727. ? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
  728. : decodeTree[result + 1], consumed);
  729. if (valueLength === 3) {
  730. // For multi-byte values, we need to emit the second byte.
  731. this.emitCodePoint(decodeTree[result + 2], consumed);
  732. }
  733. return consumed;
  734. }
  735. /**
  736. * Signal to the parser that the end of the input was reached.
  737. *
  738. * Remaining data will be emitted and relevant errors will be produced.
  739. *
  740. * @returns The number of characters consumed.
  741. */
  742. end() {
  743. var _a;
  744. switch (this.state) {
  745. case EntityDecoderState.NamedEntity: {
  746. // Emit a named entity if we have one.
  747. return this.result !== 0 &&
  748. (this.decodeMode !== DecodingMode.Attribute ||
  749. this.result === this.treeIndex)
  750. ? this.emitNotTerminatedNamedEntity()
  751. : 0;
  752. }
  753. // Otherwise, emit a numeric entity if we have one.
  754. case EntityDecoderState.NumericDecimal: {
  755. return this.emitNumericEntity(0, 2);
  756. }
  757. case EntityDecoderState.NumericHex: {
  758. return this.emitNumericEntity(0, 3);
  759. }
  760. case EntityDecoderState.NumericStart: {
  761. (_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
  762. return 0;
  763. }
  764. case EntityDecoderState.EntityStart: {
  765. // Return 0 if we have no entity.
  766. return 0;
  767. }
  768. }
  769. }
  770. }
  771. /**
  772. * Determines the branch of the current node that is taken given the current
  773. * character. This function is used to traverse the trie.
  774. *
  775. * @param decodeTree The trie.
  776. * @param current The current node.
  777. * @param nodeIdx The index right after the current node and its value.
  778. * @param char The current character.
  779. * @returns The index of the next node, or -1 if no branch is taken.
  780. */
  781. function determineBranch(decodeTree, current, nodeIdx, char) {
  782. const branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7;
  783. const jumpOffset = current & BinTrieFlags.JUMP_TABLE;
  784. // Case 1: Single branch encoded in jump offset
  785. if (branchCount === 0) {
  786. return jumpOffset !== 0 && char === jumpOffset ? nodeIdx : -1;
  787. }
  788. // Case 2: Multiple branches encoded in jump table
  789. if (jumpOffset) {
  790. const value = char - jumpOffset;
  791. return value < 0 || value >= branchCount
  792. ? -1
  793. : decodeTree[nodeIdx + value] - 1;
  794. }
  795. // Case 3: Multiple branches encoded in dictionary
  796. // Binary search for the character.
  797. let lo = nodeIdx;
  798. let hi = lo + branchCount - 1;
  799. while (lo <= hi) {
  800. const mid = (lo + hi) >>> 1;
  801. const midVal = decodeTree[mid];
  802. if (midVal < char) {
  803. lo = mid + 1;
  804. }
  805. else if (midVal > char) {
  806. hi = mid - 1;
  807. }
  808. else {
  809. return decodeTree[mid + branchCount];
  810. }
  811. }
  812. return -1;
  813. }
  814. /** All valid namespaces in HTML. */
  815. var NS;
  816. (function (NS) {
  817. NS["HTML"] = "http://www.w3.org/1999/xhtml";
  818. NS["MATHML"] = "http://www.w3.org/1998/Math/MathML";
  819. NS["SVG"] = "http://www.w3.org/2000/svg";
  820. NS["XLINK"] = "http://www.w3.org/1999/xlink";
  821. NS["XML"] = "http://www.w3.org/XML/1998/namespace";
  822. NS["XMLNS"] = "http://www.w3.org/2000/xmlns/";
  823. })(NS || (NS = {}));
  824. var ATTRS;
  825. (function (ATTRS) {
  826. ATTRS["TYPE"] = "type";
  827. ATTRS["ACTION"] = "action";
  828. ATTRS["ENCODING"] = "encoding";
  829. ATTRS["PROMPT"] = "prompt";
  830. ATTRS["NAME"] = "name";
  831. ATTRS["COLOR"] = "color";
  832. ATTRS["FACE"] = "face";
  833. ATTRS["SIZE"] = "size";
  834. })(ATTRS || (ATTRS = {}));
  835. /**
  836. * The mode of the document.
  837. *
  838. * @see {@link https://dom.spec.whatwg.org/#concept-document-limited-quirks}
  839. */
  840. var DOCUMENT_MODE;
  841. (function (DOCUMENT_MODE) {
  842. DOCUMENT_MODE["NO_QUIRKS"] = "no-quirks";
  843. DOCUMENT_MODE["QUIRKS"] = "quirks";
  844. DOCUMENT_MODE["LIMITED_QUIRKS"] = "limited-quirks";
  845. })(DOCUMENT_MODE || (DOCUMENT_MODE = {}));
  846. var TAG_NAMES;
  847. (function (TAG_NAMES) {
  848. TAG_NAMES["A"] = "a";
  849. TAG_NAMES["ADDRESS"] = "address";
  850. TAG_NAMES["ANNOTATION_XML"] = "annotation-xml";
  851. TAG_NAMES["APPLET"] = "applet";
  852. TAG_NAMES["AREA"] = "area";
  853. TAG_NAMES["ARTICLE"] = "article";
  854. TAG_NAMES["ASIDE"] = "aside";
  855. TAG_NAMES["B"] = "b";
  856. TAG_NAMES["BASE"] = "base";
  857. TAG_NAMES["BASEFONT"] = "basefont";
  858. TAG_NAMES["BGSOUND"] = "bgsound";
  859. TAG_NAMES["BIG"] = "big";
  860. TAG_NAMES["BLOCKQUOTE"] = "blockquote";
  861. TAG_NAMES["BODY"] = "body";
  862. TAG_NAMES["BR"] = "br";
  863. TAG_NAMES["BUTTON"] = "button";
  864. TAG_NAMES["CAPTION"] = "caption";
  865. TAG_NAMES["CENTER"] = "center";
  866. TAG_NAMES["CODE"] = "code";
  867. TAG_NAMES["COL"] = "col";
  868. TAG_NAMES["COLGROUP"] = "colgroup";
  869. TAG_NAMES["DD"] = "dd";
  870. TAG_NAMES["DESC"] = "desc";
  871. TAG_NAMES["DETAILS"] = "details";
  872. TAG_NAMES["DIALOG"] = "dialog";
  873. TAG_NAMES["DIR"] = "dir";
  874. TAG_NAMES["DIV"] = "div";
  875. TAG_NAMES["DL"] = "dl";
  876. TAG_NAMES["DT"] = "dt";
  877. TAG_NAMES["EM"] = "em";
  878. TAG_NAMES["EMBED"] = "embed";
  879. TAG_NAMES["FIELDSET"] = "fieldset";
  880. TAG_NAMES["FIGCAPTION"] = "figcaption";
  881. TAG_NAMES["FIGURE"] = "figure";
  882. TAG_NAMES["FONT"] = "font";
  883. TAG_NAMES["FOOTER"] = "footer";
  884. TAG_NAMES["FOREIGN_OBJECT"] = "foreignObject";
  885. TAG_NAMES["FORM"] = "form";
  886. TAG_NAMES["FRAME"] = "frame";
  887. TAG_NAMES["FRAMESET"] = "frameset";
  888. TAG_NAMES["H1"] = "h1";
  889. TAG_NAMES["H2"] = "h2";
  890. TAG_NAMES["H3"] = "h3";
  891. TAG_NAMES["H4"] = "h4";
  892. TAG_NAMES["H5"] = "h5";
  893. TAG_NAMES["H6"] = "h6";
  894. TAG_NAMES["HEAD"] = "head";
  895. TAG_NAMES["HEADER"] = "header";
  896. TAG_NAMES["HGROUP"] = "hgroup";
  897. TAG_NAMES["HR"] = "hr";
  898. TAG_NAMES["HTML"] = "html";
  899. TAG_NAMES["I"] = "i";
  900. TAG_NAMES["IMG"] = "img";
  901. TAG_NAMES["IMAGE"] = "image";
  902. TAG_NAMES["INPUT"] = "input";
  903. TAG_NAMES["IFRAME"] = "iframe";
  904. TAG_NAMES["KEYGEN"] = "keygen";
  905. TAG_NAMES["LABEL"] = "label";
  906. TAG_NAMES["LI"] = "li";
  907. TAG_NAMES["LINK"] = "link";
  908. TAG_NAMES["LISTING"] = "listing";
  909. TAG_NAMES["MAIN"] = "main";
  910. TAG_NAMES["MALIGNMARK"] = "malignmark";
  911. TAG_NAMES["MARQUEE"] = "marquee";
  912. TAG_NAMES["MATH"] = "math";
  913. TAG_NAMES["MENU"] = "menu";
  914. TAG_NAMES["META"] = "meta";
  915. TAG_NAMES["MGLYPH"] = "mglyph";
  916. TAG_NAMES["MI"] = "mi";
  917. TAG_NAMES["MO"] = "mo";
  918. TAG_NAMES["MN"] = "mn";
  919. TAG_NAMES["MS"] = "ms";
  920. TAG_NAMES["MTEXT"] = "mtext";
  921. TAG_NAMES["NAV"] = "nav";
  922. TAG_NAMES["NOBR"] = "nobr";
  923. TAG_NAMES["NOFRAMES"] = "noframes";
  924. TAG_NAMES["NOEMBED"] = "noembed";
  925. TAG_NAMES["NOSCRIPT"] = "noscript";
  926. TAG_NAMES["OBJECT"] = "object";
  927. TAG_NAMES["OL"] = "ol";
  928. TAG_NAMES["OPTGROUP"] = "optgroup";
  929. TAG_NAMES["OPTION"] = "option";
  930. TAG_NAMES["P"] = "p";
  931. TAG_NAMES["PARAM"] = "param";
  932. TAG_NAMES["PLAINTEXT"] = "plaintext";
  933. TAG_NAMES["PRE"] = "pre";
  934. TAG_NAMES["RB"] = "rb";
  935. TAG_NAMES["RP"] = "rp";
  936. TAG_NAMES["RT"] = "rt";
  937. TAG_NAMES["RTC"] = "rtc";
  938. TAG_NAMES["RUBY"] = "ruby";
  939. TAG_NAMES["S"] = "s";
  940. TAG_NAMES["SCRIPT"] = "script";
  941. TAG_NAMES["SEARCH"] = "search";
  942. TAG_NAMES["SECTION"] = "section";
  943. TAG_NAMES["SELECT"] = "select";
  944. TAG_NAMES["SOURCE"] = "source";
  945. TAG_NAMES["SMALL"] = "small";
  946. TAG_NAMES["SPAN"] = "span";
  947. TAG_NAMES["STRIKE"] = "strike";
  948. TAG_NAMES["STRONG"] = "strong";
  949. TAG_NAMES["STYLE"] = "style";
  950. TAG_NAMES["SUB"] = "sub";
  951. TAG_NAMES["SUMMARY"] = "summary";
  952. TAG_NAMES["SUP"] = "sup";
  953. TAG_NAMES["TABLE"] = "table";
  954. TAG_NAMES["TBODY"] = "tbody";
  955. TAG_NAMES["TEMPLATE"] = "template";
  956. TAG_NAMES["TEXTAREA"] = "textarea";
  957. TAG_NAMES["TFOOT"] = "tfoot";
  958. TAG_NAMES["TD"] = "td";
  959. TAG_NAMES["TH"] = "th";
  960. TAG_NAMES["THEAD"] = "thead";
  961. TAG_NAMES["TITLE"] = "title";
  962. TAG_NAMES["TR"] = "tr";
  963. TAG_NAMES["TRACK"] = "track";
  964. TAG_NAMES["TT"] = "tt";
  965. TAG_NAMES["U"] = "u";
  966. TAG_NAMES["UL"] = "ul";
  967. TAG_NAMES["SVG"] = "svg";
  968. TAG_NAMES["VAR"] = "var";
  969. TAG_NAMES["WBR"] = "wbr";
  970. TAG_NAMES["XMP"] = "xmp";
  971. })(TAG_NAMES || (TAG_NAMES = {}));
  972. /**
  973. * Tag IDs are numeric IDs for known tag names.
  974. *
  975. * We use tag IDs to improve the performance of tag name comparisons.
  976. */
  977. var TAG_ID;
  978. (function (TAG_ID) {
  979. TAG_ID[TAG_ID["UNKNOWN"] = 0] = "UNKNOWN";
  980. TAG_ID[TAG_ID["A"] = 1] = "A";
  981. TAG_ID[TAG_ID["ADDRESS"] = 2] = "ADDRESS";
  982. TAG_ID[TAG_ID["ANNOTATION_XML"] = 3] = "ANNOTATION_XML";
  983. TAG_ID[TAG_ID["APPLET"] = 4] = "APPLET";
  984. TAG_ID[TAG_ID["AREA"] = 5] = "AREA";
  985. TAG_ID[TAG_ID["ARTICLE"] = 6] = "ARTICLE";
  986. TAG_ID[TAG_ID["ASIDE"] = 7] = "ASIDE";
  987. TAG_ID[TAG_ID["B"] = 8] = "B";
  988. TAG_ID[TAG_ID["BASE"] = 9] = "BASE";
  989. TAG_ID[TAG_ID["BASEFONT"] = 10] = "BASEFONT";
  990. TAG_ID[TAG_ID["BGSOUND"] = 11] = "BGSOUND";
  991. TAG_ID[TAG_ID["BIG"] = 12] = "BIG";
  992. TAG_ID[TAG_ID["BLOCKQUOTE"] = 13] = "BLOCKQUOTE";
  993. TAG_ID[TAG_ID["BODY"] = 14] = "BODY";
  994. TAG_ID[TAG_ID["BR"] = 15] = "BR";
  995. TAG_ID[TAG_ID["BUTTON"] = 16] = "BUTTON";
  996. TAG_ID[TAG_ID["CAPTION"] = 17] = "CAPTION";
  997. TAG_ID[TAG_ID["CENTER"] = 18] = "CENTER";
  998. TAG_ID[TAG_ID["CODE"] = 19] = "CODE";
  999. TAG_ID[TAG_ID["COL"] = 20] = "COL";
  1000. TAG_ID[TAG_ID["COLGROUP"] = 21] = "COLGROUP";
  1001. TAG_ID[TAG_ID["DD"] = 22] = "DD";
  1002. TAG_ID[TAG_ID["DESC"] = 23] = "DESC";
  1003. TAG_ID[TAG_ID["DETAILS"] = 24] = "DETAILS";
  1004. TAG_ID[TAG_ID["DIALOG"] = 25] = "DIALOG";
  1005. TAG_ID[TAG_ID["DIR"] = 26] = "DIR";
  1006. TAG_ID[TAG_ID["DIV"] = 27] = "DIV";
  1007. TAG_ID[TAG_ID["DL"] = 28] = "DL";
  1008. TAG_ID[TAG_ID["DT"] = 29] = "DT";
  1009. TAG_ID[TAG_ID["EM"] = 30] = "EM";
  1010. TAG_ID[TAG_ID["EMBED"] = 31] = "EMBED";
  1011. TAG_ID[TAG_ID["FIELDSET"] = 32] = "FIELDSET";
  1012. TAG_ID[TAG_ID["FIGCAPTION"] = 33] = "FIGCAPTION";
  1013. TAG_ID[TAG_ID["FIGURE"] = 34] = "FIGURE";
  1014. TAG_ID[TAG_ID["FONT"] = 35] = "FONT";
  1015. TAG_ID[TAG_ID["FOOTER"] = 36] = "FOOTER";
  1016. TAG_ID[TAG_ID["FOREIGN_OBJECT"] = 37] = "FOREIGN_OBJECT";
  1017. TAG_ID[TAG_ID["FORM"] = 38] = "FORM";
  1018. TAG_ID[TAG_ID["FRAME"] = 39] = "FRAME";
  1019. TAG_ID[TAG_ID["FRAMESET"] = 40] = "FRAMESET";
  1020. TAG_ID[TAG_ID["H1"] = 41] = "H1";
  1021. TAG_ID[TAG_ID["H2"] = 42] = "H2";
  1022. TAG_ID[TAG_ID["H3"] = 43] = "H3";
  1023. TAG_ID[TAG_ID["H4"] = 44] = "H4";
  1024. TAG_ID[TAG_ID["H5"] = 45] = "H5";
  1025. TAG_ID[TAG_ID["H6"] = 46] = "H6";
  1026. TAG_ID[TAG_ID["HEAD"] = 47] = "HEAD";
  1027. TAG_ID[TAG_ID["HEADER"] = 48] = "HEADER";
  1028. TAG_ID[TAG_ID["HGROUP"] = 49] = "HGROUP";
  1029. TAG_ID[TAG_ID["HR"] = 50] = "HR";
  1030. TAG_ID[TAG_ID["HTML"] = 51] = "HTML";
  1031. TAG_ID[TAG_ID["I"] = 52] = "I";
  1032. TAG_ID[TAG_ID["IMG"] = 53] = "IMG";
  1033. TAG_ID[TAG_ID["IMAGE"] = 54] = "IMAGE";
  1034. TAG_ID[TAG_ID["INPUT"] = 55] = "INPUT";
  1035. TAG_ID[TAG_ID["IFRAME"] = 56] = "IFRAME";
  1036. TAG_ID[TAG_ID["KEYGEN"] = 57] = "KEYGEN";
  1037. TAG_ID[TAG_ID["LABEL"] = 58] = "LABEL";
  1038. TAG_ID[TAG_ID["LI"] = 59] = "LI";
  1039. TAG_ID[TAG_ID["LINK"] = 60] = "LINK";
  1040. TAG_ID[TAG_ID["LISTING"] = 61] = "LISTING";
  1041. TAG_ID[TAG_ID["MAIN"] = 62] = "MAIN";
  1042. TAG_ID[TAG_ID["MALIGNMARK"] = 63] = "MALIGNMARK";
  1043. TAG_ID[TAG_ID["MARQUEE"] = 64] = "MARQUEE";
  1044. TAG_ID[TAG_ID["MATH"] = 65] = "MATH";
  1045. TAG_ID[TAG_ID["MENU"] = 66] = "MENU";
  1046. TAG_ID[TAG_ID["META"] = 67] = "META";
  1047. TAG_ID[TAG_ID["MGLYPH"] = 68] = "MGLYPH";
  1048. TAG_ID[TAG_ID["MI"] = 69] = "MI";
  1049. TAG_ID[TAG_ID["MO"] = 70] = "MO";
  1050. TAG_ID[TAG_ID["MN"] = 71] = "MN";
  1051. TAG_ID[TAG_ID["MS"] = 72] = "MS";
  1052. TAG_ID[TAG_ID["MTEXT"] = 73] = "MTEXT";
  1053. TAG_ID[TAG_ID["NAV"] = 74] = "NAV";
  1054. TAG_ID[TAG_ID["NOBR"] = 75] = "NOBR";
  1055. TAG_ID[TAG_ID["NOFRAMES"] = 76] = "NOFRAMES";
  1056. TAG_ID[TAG_ID["NOEMBED"] = 77] = "NOEMBED";
  1057. TAG_ID[TAG_ID["NOSCRIPT"] = 78] = "NOSCRIPT";
  1058. TAG_ID[TAG_ID["OBJECT"] = 79] = "OBJECT";
  1059. TAG_ID[TAG_ID["OL"] = 80] = "OL";
  1060. TAG_ID[TAG_ID["OPTGROUP"] = 81] = "OPTGROUP";
  1061. TAG_ID[TAG_ID["OPTION"] = 82] = "OPTION";
  1062. TAG_ID[TAG_ID["P"] = 83] = "P";
  1063. TAG_ID[TAG_ID["PARAM"] = 84] = "PARAM";
  1064. TAG_ID[TAG_ID["PLAINTEXT"] = 85] = "PLAINTEXT";
  1065. TAG_ID[TAG_ID["PRE"] = 86] = "PRE";
  1066. TAG_ID[TAG_ID["RB"] = 87] = "RB";
  1067. TAG_ID[TAG_ID["RP"] = 88] = "RP";
  1068. TAG_ID[TAG_ID["RT"] = 89] = "RT";
  1069. TAG_ID[TAG_ID["RTC"] = 90] = "RTC";
  1070. TAG_ID[TAG_ID["RUBY"] = 91] = "RUBY";
  1071. TAG_ID[TAG_ID["S"] = 92] = "S";
  1072. TAG_ID[TAG_ID["SCRIPT"] = 93] = "SCRIPT";
  1073. TAG_ID[TAG_ID["SEARCH"] = 94] = "SEARCH";
  1074. TAG_ID[TAG_ID["SECTION"] = 95] = "SECTION";
  1075. TAG_ID[TAG_ID["SELECT"] = 96] = "SELECT";
  1076. TAG_ID[TAG_ID["SOURCE"] = 97] = "SOURCE";
  1077. TAG_ID[TAG_ID["SMALL"] = 98] = "SMALL";
  1078. TAG_ID[TAG_ID["SPAN"] = 99] = "SPAN";
  1079. TAG_ID[TAG_ID["STRIKE"] = 100] = "STRIKE";
  1080. TAG_ID[TAG_ID["STRONG"] = 101] = "STRONG";
  1081. TAG_ID[TAG_ID["STYLE"] = 102] = "STYLE";
  1082. TAG_ID[TAG_ID["SUB"] = 103] = "SUB";
  1083. TAG_ID[TAG_ID["SUMMARY"] = 104] = "SUMMARY";
  1084. TAG_ID[TAG_ID["SUP"] = 105] = "SUP";
  1085. TAG_ID[TAG_ID["TABLE"] = 106] = "TABLE";
  1086. TAG_ID[TAG_ID["TBODY"] = 107] = "TBODY";
  1087. TAG_ID[TAG_ID["TEMPLATE"] = 108] = "TEMPLATE";
  1088. TAG_ID[TAG_ID["TEXTAREA"] = 109] = "TEXTAREA";
  1089. TAG_ID[TAG_ID["TFOOT"] = 110] = "TFOOT";
  1090. TAG_ID[TAG_ID["TD"] = 111] = "TD";
  1091. TAG_ID[TAG_ID["TH"] = 112] = "TH";
  1092. TAG_ID[TAG_ID["THEAD"] = 113] = "THEAD";
  1093. TAG_ID[TAG_ID["TITLE"] = 114] = "TITLE";
  1094. TAG_ID[TAG_ID["TR"] = 115] = "TR";
  1095. TAG_ID[TAG_ID["TRACK"] = 116] = "TRACK";
  1096. TAG_ID[TAG_ID["TT"] = 117] = "TT";
  1097. TAG_ID[TAG_ID["U"] = 118] = "U";
  1098. TAG_ID[TAG_ID["UL"] = 119] = "UL";
  1099. TAG_ID[TAG_ID["SVG"] = 120] = "SVG";
  1100. TAG_ID[TAG_ID["VAR"] = 121] = "VAR";
  1101. TAG_ID[TAG_ID["WBR"] = 122] = "WBR";
  1102. TAG_ID[TAG_ID["XMP"] = 123] = "XMP";
  1103. })(TAG_ID || (TAG_ID = {}));
  1104. const TAG_NAME_TO_ID = new Map([
  1105. [TAG_NAMES.A, TAG_ID.A],
  1106. [TAG_NAMES.ADDRESS, TAG_ID.ADDRESS],
  1107. [TAG_NAMES.ANNOTATION_XML, TAG_ID.ANNOTATION_XML],
  1108. [TAG_NAMES.APPLET, TAG_ID.APPLET],
  1109. [TAG_NAMES.AREA, TAG_ID.AREA],
  1110. [TAG_NAMES.ARTICLE, TAG_ID.ARTICLE],
  1111. [TAG_NAMES.ASIDE, TAG_ID.ASIDE],
  1112. [TAG_NAMES.B, TAG_ID.B],
  1113. [TAG_NAMES.BASE, TAG_ID.BASE],
  1114. [TAG_NAMES.BASEFONT, TAG_ID.BASEFONT],
  1115. [TAG_NAMES.BGSOUND, TAG_ID.BGSOUND],
  1116. [TAG_NAMES.BIG, TAG_ID.BIG],
  1117. [TAG_NAMES.BLOCKQUOTE, TAG_ID.BLOCKQUOTE],
  1118. [TAG_NAMES.BODY, TAG_ID.BODY],
  1119. [TAG_NAMES.BR, TAG_ID.BR],
  1120. [TAG_NAMES.BUTTON, TAG_ID.BUTTON],
  1121. [TAG_NAMES.CAPTION, TAG_ID.CAPTION],
  1122. [TAG_NAMES.CENTER, TAG_ID.CENTER],
  1123. [TAG_NAMES.CODE, TAG_ID.CODE],
  1124. [TAG_NAMES.COL, TAG_ID.COL],
  1125. [TAG_NAMES.COLGROUP, TAG_ID.COLGROUP],
  1126. [TAG_NAMES.DD, TAG_ID.DD],
  1127. [TAG_NAMES.DESC, TAG_ID.DESC],
  1128. [TAG_NAMES.DETAILS, TAG_ID.DETAILS],
  1129. [TAG_NAMES.DIALOG, TAG_ID.DIALOG],
  1130. [TAG_NAMES.DIR, TAG_ID.DIR],
  1131. [TAG_NAMES.DIV, TAG_ID.DIV],
  1132. [TAG_NAMES.DL, TAG_ID.DL],
  1133. [TAG_NAMES.DT, TAG_ID.DT],
  1134. [TAG_NAMES.EM, TAG_ID.EM],
  1135. [TAG_NAMES.EMBED, TAG_ID.EMBED],
  1136. [TAG_NAMES.FIELDSET, TAG_ID.FIELDSET],
  1137. [TAG_NAMES.FIGCAPTION, TAG_ID.FIGCAPTION],
  1138. [TAG_NAMES.FIGURE, TAG_ID.FIGURE],
  1139. [TAG_NAMES.FONT, TAG_ID.FONT],
  1140. [TAG_NAMES.FOOTER, TAG_ID.FOOTER],
  1141. [TAG_NAMES.FOREIGN_OBJECT, TAG_ID.FOREIGN_OBJECT],
  1142. [TAG_NAMES.FORM, TAG_ID.FORM],
  1143. [TAG_NAMES.FRAME, TAG_ID.FRAME],
  1144. [TAG_NAMES.FRAMESET, TAG_ID.FRAMESET],
  1145. [TAG_NAMES.H1, TAG_ID.H1],
  1146. [TAG_NAMES.H2, TAG_ID.H2],
  1147. [TAG_NAMES.H3, TAG_ID.H3],
  1148. [TAG_NAMES.H4, TAG_ID.H4],
  1149. [TAG_NAMES.H5, TAG_ID.H5],
  1150. [TAG_NAMES.H6, TAG_ID.H6],
  1151. [TAG_NAMES.HEAD, TAG_ID.HEAD],
  1152. [TAG_NAMES.HEADER, TAG_ID.HEADER],
  1153. [TAG_NAMES.HGROUP, TAG_ID.HGROUP],
  1154. [TAG_NAMES.HR, TAG_ID.HR],
  1155. [TAG_NAMES.HTML, TAG_ID.HTML],
  1156. [TAG_NAMES.I, TAG_ID.I],
  1157. [TAG_NAMES.IMG, TAG_ID.IMG],
  1158. [TAG_NAMES.IMAGE, TAG_ID.IMAGE],
  1159. [TAG_NAMES.INPUT, TAG_ID.INPUT],
  1160. [TAG_NAMES.IFRAME, TAG_ID.IFRAME],
  1161. [TAG_NAMES.KEYGEN, TAG_ID.KEYGEN],
  1162. [TAG_NAMES.LABEL, TAG_ID.LABEL],
  1163. [TAG_NAMES.LI, TAG_ID.LI],
  1164. [TAG_NAMES.LINK, TAG_ID.LINK],
  1165. [TAG_NAMES.LISTING, TAG_ID.LISTING],
  1166. [TAG_NAMES.MAIN, TAG_ID.MAIN],
  1167. [TAG_NAMES.MALIGNMARK, TAG_ID.MALIGNMARK],
  1168. [TAG_NAMES.MARQUEE, TAG_ID.MARQUEE],
  1169. [TAG_NAMES.MATH, TAG_ID.MATH],
  1170. [TAG_NAMES.MENU, TAG_ID.MENU],
  1171. [TAG_NAMES.META, TAG_ID.META],
  1172. [TAG_NAMES.MGLYPH, TAG_ID.MGLYPH],
  1173. [TAG_NAMES.MI, TAG_ID.MI],
  1174. [TAG_NAMES.MO, TAG_ID.MO],
  1175. [TAG_NAMES.MN, TAG_ID.MN],
  1176. [TAG_NAMES.MS, TAG_ID.MS],
  1177. [TAG_NAMES.MTEXT, TAG_ID.MTEXT],
  1178. [TAG_NAMES.NAV, TAG_ID.NAV],
  1179. [TAG_NAMES.NOBR, TAG_ID.NOBR],
  1180. [TAG_NAMES.NOFRAMES, TAG_ID.NOFRAMES],
  1181. [TAG_NAMES.NOEMBED, TAG_ID.NOEMBED],
  1182. [TAG_NAMES.NOSCRIPT, TAG_ID.NOSCRIPT],
  1183. [TAG_NAMES.OBJECT, TAG_ID.OBJECT],
  1184. [TAG_NAMES.OL, TAG_ID.OL],
  1185. [TAG_NAMES.OPTGROUP, TAG_ID.OPTGROUP],
  1186. [TAG_NAMES.OPTION, TAG_ID.OPTION],
  1187. [TAG_NAMES.P, TAG_ID.P],
  1188. [TAG_NAMES.PARAM, TAG_ID.PARAM],
  1189. [TAG_NAMES.PLAINTEXT, TAG_ID.PLAINTEXT],
  1190. [TAG_NAMES.PRE, TAG_ID.PRE],
  1191. [TAG_NAMES.RB, TAG_ID.RB],
  1192. [TAG_NAMES.RP, TAG_ID.RP],
  1193. [TAG_NAMES.RT, TAG_ID.RT],
  1194. [TAG_NAMES.RTC, TAG_ID.RTC],
  1195. [TAG_NAMES.RUBY, TAG_ID.RUBY],
  1196. [TAG_NAMES.S, TAG_ID.S],
  1197. [TAG_NAMES.SCRIPT, TAG_ID.SCRIPT],
  1198. [TAG_NAMES.SEARCH, TAG_ID.SEARCH],
  1199. [TAG_NAMES.SECTION, TAG_ID.SECTION],
  1200. [TAG_NAMES.SELECT, TAG_ID.SELECT],
  1201. [TAG_NAMES.SOURCE, TAG_ID.SOURCE],
  1202. [TAG_NAMES.SMALL, TAG_ID.SMALL],
  1203. [TAG_NAMES.SPAN, TAG_ID.SPAN],
  1204. [TAG_NAMES.STRIKE, TAG_ID.STRIKE],
  1205. [TAG_NAMES.STRONG, TAG_ID.STRONG],
  1206. [TAG_NAMES.STYLE, TAG_ID.STYLE],
  1207. [TAG_NAMES.SUB, TAG_ID.SUB],
  1208. [TAG_NAMES.SUMMARY, TAG_ID.SUMMARY],
  1209. [TAG_NAMES.SUP, TAG_ID.SUP],
  1210. [TAG_NAMES.TABLE, TAG_ID.TABLE],
  1211. [TAG_NAMES.TBODY, TAG_ID.TBODY],
  1212. [TAG_NAMES.TEMPLATE, TAG_ID.TEMPLATE],
  1213. [TAG_NAMES.TEXTAREA, TAG_ID.TEXTAREA],
  1214. [TAG_NAMES.TFOOT, TAG_ID.TFOOT],
  1215. [TAG_NAMES.TD, TAG_ID.TD],
  1216. [TAG_NAMES.TH, TAG_ID.TH],
  1217. [TAG_NAMES.THEAD, TAG_ID.THEAD],
  1218. [TAG_NAMES.TITLE, TAG_ID.TITLE],
  1219. [TAG_NAMES.TR, TAG_ID.TR],
  1220. [TAG_NAMES.TRACK, TAG_ID.TRACK],
  1221. [TAG_NAMES.TT, TAG_ID.TT],
  1222. [TAG_NAMES.U, TAG_ID.U],
  1223. [TAG_NAMES.UL, TAG_ID.UL],
  1224. [TAG_NAMES.SVG, TAG_ID.SVG],
  1225. [TAG_NAMES.VAR, TAG_ID.VAR],
  1226. [TAG_NAMES.WBR, TAG_ID.WBR],
  1227. [TAG_NAMES.XMP, TAG_ID.XMP],
  1228. ]);
  1229. function getTagID(tagName) {
  1230. var _a;
  1231. return (_a = TAG_NAME_TO_ID.get(tagName)) !== null && _a !== void 0 ? _a : TAG_ID.UNKNOWN;
  1232. }
  1233. const $ = TAG_ID;
  1234. const SPECIAL_ELEMENTS = {
  1235. [NS.HTML]: new Set([
  1236. $.ADDRESS,
  1237. $.APPLET,
  1238. $.AREA,
  1239. $.ARTICLE,
  1240. $.ASIDE,
  1241. $.BASE,
  1242. $.BASEFONT,
  1243. $.BGSOUND,
  1244. $.BLOCKQUOTE,
  1245. $.BODY,
  1246. $.BR,
  1247. $.BUTTON,
  1248. $.CAPTION,
  1249. $.CENTER,
  1250. $.COL,
  1251. $.COLGROUP,
  1252. $.DD,
  1253. $.DETAILS,
  1254. $.DIR,
  1255. $.DIV,
  1256. $.DL,
  1257. $.DT,
  1258. $.EMBED,
  1259. $.FIELDSET,
  1260. $.FIGCAPTION,
  1261. $.FIGURE,
  1262. $.FOOTER,
  1263. $.FORM,
  1264. $.FRAME,
  1265. $.FRAMESET,
  1266. $.H1,
  1267. $.H2,
  1268. $.H3,
  1269. $.H4,
  1270. $.H5,
  1271. $.H6,
  1272. $.HEAD,
  1273. $.HEADER,
  1274. $.HGROUP,
  1275. $.HR,
  1276. $.HTML,
  1277. $.IFRAME,
  1278. $.IMG,
  1279. $.INPUT,
  1280. $.LI,
  1281. $.LINK,
  1282. $.LISTING,
  1283. $.MAIN,
  1284. $.MARQUEE,
  1285. $.MENU,
  1286. $.META,
  1287. $.NAV,
  1288. $.NOEMBED,
  1289. $.NOFRAMES,
  1290. $.NOSCRIPT,
  1291. $.OBJECT,
  1292. $.OL,
  1293. $.P,
  1294. $.PARAM,
  1295. $.PLAINTEXT,
  1296. $.PRE,
  1297. $.SCRIPT,
  1298. $.SECTION,
  1299. $.SELECT,
  1300. $.SOURCE,
  1301. $.STYLE,
  1302. $.SUMMARY,
  1303. $.TABLE,
  1304. $.TBODY,
  1305. $.TD,
  1306. $.TEMPLATE,
  1307. $.TEXTAREA,
  1308. $.TFOOT,
  1309. $.TH,
  1310. $.THEAD,
  1311. $.TITLE,
  1312. $.TR,
  1313. $.TRACK,
  1314. $.UL,
  1315. $.WBR,
  1316. $.XMP,
  1317. ]),
  1318. [NS.MATHML]: new Set([$.MI, $.MO, $.MN, $.MS, $.MTEXT, $.ANNOTATION_XML]),
  1319. [NS.SVG]: new Set([$.TITLE, $.FOREIGN_OBJECT, $.DESC]),
  1320. [NS.XLINK]: new Set(),
  1321. [NS.XML]: new Set(),
  1322. [NS.XMLNS]: new Set(),
  1323. };
  1324. const NUMBERED_HEADERS = new Set([$.H1, $.H2, $.H3, $.H4, $.H5, $.H6]);
  1325. //States
  1326. var State;
  1327. (function (State) {
  1328. State[State["DATA"] = 0] = "DATA";
  1329. State[State["RCDATA"] = 1] = "RCDATA";
  1330. State[State["RAWTEXT"] = 2] = "RAWTEXT";
  1331. State[State["SCRIPT_DATA"] = 3] = "SCRIPT_DATA";
  1332. State[State["PLAINTEXT"] = 4] = "PLAINTEXT";
  1333. State[State["TAG_OPEN"] = 5] = "TAG_OPEN";
  1334. State[State["END_TAG_OPEN"] = 6] = "END_TAG_OPEN";
  1335. State[State["TAG_NAME"] = 7] = "TAG_NAME";
  1336. State[State["RCDATA_LESS_THAN_SIGN"] = 8] = "RCDATA_LESS_THAN_SIGN";
  1337. State[State["RCDATA_END_TAG_OPEN"] = 9] = "RCDATA_END_TAG_OPEN";
  1338. State[State["RCDATA_END_TAG_NAME"] = 10] = "RCDATA_END_TAG_NAME";
  1339. State[State["RAWTEXT_LESS_THAN_SIGN"] = 11] = "RAWTEXT_LESS_THAN_SIGN";
  1340. State[State["RAWTEXT_END_TAG_OPEN"] = 12] = "RAWTEXT_END_TAG_OPEN";
  1341. State[State["RAWTEXT_END_TAG_NAME"] = 13] = "RAWTEXT_END_TAG_NAME";
  1342. State[State["SCRIPT_DATA_LESS_THAN_SIGN"] = 14] = "SCRIPT_DATA_LESS_THAN_SIGN";
  1343. State[State["SCRIPT_DATA_END_TAG_OPEN"] = 15] = "SCRIPT_DATA_END_TAG_OPEN";
  1344. State[State["SCRIPT_DATA_END_TAG_NAME"] = 16] = "SCRIPT_DATA_END_TAG_NAME";
  1345. State[State["SCRIPT_DATA_ESCAPE_START"] = 17] = "SCRIPT_DATA_ESCAPE_START";
  1346. State[State["SCRIPT_DATA_ESCAPE_START_DASH"] = 18] = "SCRIPT_DATA_ESCAPE_START_DASH";
  1347. State[State["SCRIPT_DATA_ESCAPED"] = 19] = "SCRIPT_DATA_ESCAPED";
  1348. State[State["SCRIPT_DATA_ESCAPED_DASH"] = 20] = "SCRIPT_DATA_ESCAPED_DASH";
  1349. State[State["SCRIPT_DATA_ESCAPED_DASH_DASH"] = 21] = "SCRIPT_DATA_ESCAPED_DASH_DASH";
  1350. State[State["SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"] = 22] = "SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN";
  1351. State[State["SCRIPT_DATA_ESCAPED_END_TAG_OPEN"] = 23] = "SCRIPT_DATA_ESCAPED_END_TAG_OPEN";
  1352. State[State["SCRIPT_DATA_ESCAPED_END_TAG_NAME"] = 24] = "SCRIPT_DATA_ESCAPED_END_TAG_NAME";
  1353. State[State["SCRIPT_DATA_DOUBLE_ESCAPE_START"] = 25] = "SCRIPT_DATA_DOUBLE_ESCAPE_START";
  1354. State[State["SCRIPT_DATA_DOUBLE_ESCAPED"] = 26] = "SCRIPT_DATA_DOUBLE_ESCAPED";
  1355. State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH"] = 27] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH";
  1356. State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"] = 28] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH";
  1357. State[State["SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"] = 29] = "SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN";
  1358. State[State["SCRIPT_DATA_DOUBLE_ESCAPE_END"] = 30] = "SCRIPT_DATA_DOUBLE_ESCAPE_END";
  1359. State[State["BEFORE_ATTRIBUTE_NAME"] = 31] = "BEFORE_ATTRIBUTE_NAME";
  1360. State[State["ATTRIBUTE_NAME"] = 32] = "ATTRIBUTE_NAME";
  1361. State[State["AFTER_ATTRIBUTE_NAME"] = 33] = "AFTER_ATTRIBUTE_NAME";
  1362. State[State["BEFORE_ATTRIBUTE_VALUE"] = 34] = "BEFORE_ATTRIBUTE_VALUE";
  1363. State[State["ATTRIBUTE_VALUE_DOUBLE_QUOTED"] = 35] = "ATTRIBUTE_VALUE_DOUBLE_QUOTED";
  1364. State[State["ATTRIBUTE_VALUE_SINGLE_QUOTED"] = 36] = "ATTRIBUTE_VALUE_SINGLE_QUOTED";
  1365. State[State["ATTRIBUTE_VALUE_UNQUOTED"] = 37] = "ATTRIBUTE_VALUE_UNQUOTED";
  1366. State[State["AFTER_ATTRIBUTE_VALUE_QUOTED"] = 38] = "AFTER_ATTRIBUTE_VALUE_QUOTED";
  1367. State[State["SELF_CLOSING_START_TAG"] = 39] = "SELF_CLOSING_START_TAG";
  1368. State[State["BOGUS_COMMENT"] = 40] = "BOGUS_COMMENT";
  1369. State[State["MARKUP_DECLARATION_OPEN"] = 41] = "MARKUP_DECLARATION_OPEN";
  1370. State[State["COMMENT_START"] = 42] = "COMMENT_START";
  1371. State[State["COMMENT_START_DASH"] = 43] = "COMMENT_START_DASH";
  1372. State[State["COMMENT"] = 44] = "COMMENT";
  1373. State[State["COMMENT_LESS_THAN_SIGN"] = 45] = "COMMENT_LESS_THAN_SIGN";
  1374. State[State["COMMENT_LESS_THAN_SIGN_BANG"] = 46] = "COMMENT_LESS_THAN_SIGN_BANG";
  1375. State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH"] = 47] = "COMMENT_LESS_THAN_SIGN_BANG_DASH";
  1376. State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"] = 48] = "COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH";
  1377. State[State["COMMENT_END_DASH"] = 49] = "COMMENT_END_DASH";
  1378. State[State["COMMENT_END"] = 50] = "COMMENT_END";
  1379. State[State["COMMENT_END_BANG"] = 51] = "COMMENT_END_BANG";
  1380. State[State["DOCTYPE"] = 52] = "DOCTYPE";
  1381. State[State["BEFORE_DOCTYPE_NAME"] = 53] = "BEFORE_DOCTYPE_NAME";
  1382. State[State["DOCTYPE_NAME"] = 54] = "DOCTYPE_NAME";
  1383. State[State["AFTER_DOCTYPE_NAME"] = 55] = "AFTER_DOCTYPE_NAME";
  1384. State[State["AFTER_DOCTYPE_PUBLIC_KEYWORD"] = 56] = "AFTER_DOCTYPE_PUBLIC_KEYWORD";
  1385. State[State["BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"] = 57] = "BEFORE_DOCTYPE_PUBLIC_IDENTIFIER";
  1386. State[State["DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"] = 58] = "DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED";
  1387. State[State["DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"] = 59] = "DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED";
  1388. State[State["AFTER_DOCTYPE_PUBLIC_IDENTIFIER"] = 60] = "AFTER_DOCTYPE_PUBLIC_IDENTIFIER";
  1389. State[State["BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"] = 61] = "BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS";
  1390. State[State["AFTER_DOCTYPE_SYSTEM_KEYWORD"] = 62] = "AFTER_DOCTYPE_SYSTEM_KEYWORD";
  1391. State[State["BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"] = 63] = "BEFORE_DOCTYPE_SYSTEM_IDENTIFIER";
  1392. State[State["DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"] = 64] = "DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED";
  1393. State[State["DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"] = 65] = "DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED";
  1394. State[State["AFTER_DOCTYPE_SYSTEM_IDENTIFIER"] = 66] = "AFTER_DOCTYPE_SYSTEM_IDENTIFIER";
  1395. State[State["BOGUS_DOCTYPE"] = 67] = "BOGUS_DOCTYPE";
  1396. State[State["CDATA_SECTION"] = 68] = "CDATA_SECTION";
  1397. State[State["CDATA_SECTION_BRACKET"] = 69] = "CDATA_SECTION_BRACKET";
  1398. State[State["CDATA_SECTION_END"] = 70] = "CDATA_SECTION_END";
  1399. State[State["CHARACTER_REFERENCE"] = 71] = "CHARACTER_REFERENCE";
  1400. State[State["AMBIGUOUS_AMPERSAND"] = 72] = "AMBIGUOUS_AMPERSAND";
  1401. })(State || (State = {}));
  1402. //Tokenizer initial states for different modes
  1403. const TokenizerMode = {
  1404. RCDATA: State.RCDATA,
  1405. RAWTEXT: State.RAWTEXT,
  1406. SCRIPT_DATA: State.SCRIPT_DATA,
  1407. PLAINTEXT: State.PLAINTEXT};
  1408. //Utils
  1409. //OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline
  1410. //this functions if they will be situated in another module due to context switch.
  1411. //Always perform inlining check before modifying this functions ('node --trace-inlining').
  1412. function isAsciiDigit(cp) {
  1413. return cp >= CODE_POINTS.DIGIT_0 && cp <= CODE_POINTS.DIGIT_9;
  1414. }
  1415. function isAsciiUpper(cp) {
  1416. return cp >= CODE_POINTS.LATIN_CAPITAL_A && cp <= CODE_POINTS.LATIN_CAPITAL_Z;
  1417. }
  1418. function isAsciiLower(cp) {
  1419. return cp >= CODE_POINTS.LATIN_SMALL_A && cp <= CODE_POINTS.LATIN_SMALL_Z;
  1420. }
  1421. function isAsciiLetter(cp) {
  1422. return isAsciiLower(cp) || isAsciiUpper(cp);
  1423. }
  1424. function isAsciiAlphaNumeric(cp) {
  1425. return isAsciiLetter(cp) || isAsciiDigit(cp);
  1426. }
  1427. function toAsciiLower(cp) {
  1428. return cp + 32;
  1429. }
  1430. function isWhitespace(cp) {
  1431. return cp === CODE_POINTS.SPACE || cp === CODE_POINTS.LINE_FEED || cp === CODE_POINTS.TABULATION || cp === CODE_POINTS.FORM_FEED;
  1432. }
  1433. function isScriptDataDoubleEscapeSequenceEnd(cp) {
  1434. return isWhitespace(cp) || cp === CODE_POINTS.SOLIDUS || cp === CODE_POINTS.GREATER_THAN_SIGN;
  1435. }
  1436. function getErrorForNumericCharacterReference(code) {
  1437. if (code === CODE_POINTS.NULL) {
  1438. return ERR.nullCharacterReference;
  1439. }
  1440. else if (code > 1114111) {
  1441. return ERR.characterReferenceOutsideUnicodeRange;
  1442. }
  1443. else if (isSurrogate(code)) {
  1444. return ERR.surrogateCharacterReference;
  1445. }
  1446. else if (isUndefinedCodePoint(code)) {
  1447. return ERR.noncharacterCharacterReference;
  1448. }
  1449. else if (isControlCodePoint(code) || code === CODE_POINTS.CARRIAGE_RETURN) {
  1450. return ERR.controlCharacterReference;
  1451. }
  1452. return null;
  1453. }
  1454. //Tokenizer
  1455. class Tokenizer {
  1456. constructor(options, handler) {
  1457. this.options = options;
  1458. this.handler = handler;
  1459. this.paused = false;
  1460. /** Ensures that the parsing loop isn't run multiple times at once. */
  1461. this.inLoop = false;
  1462. /**
  1463. * Indicates that the current adjusted node exists, is not an element in the HTML namespace,
  1464. * and that it is not an integration point for either MathML or HTML.
  1465. *
  1466. * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction}
  1467. */
  1468. this.inForeignNode = false;
  1469. this.lastStartTagName = '';
  1470. this.active = false;
  1471. this.state = State.DATA;
  1472. this.returnState = State.DATA;
  1473. this.entityStartPos = 0;
  1474. this.consumedAfterSnapshot = -1;
  1475. this.currentCharacterToken = null;
  1476. this.currentToken = null;
  1477. this.currentAttr = { name: '', value: '' };
  1478. this.preprocessor = new Preprocessor(handler);
  1479. this.currentLocation = this.getCurrentLocation(-1);
  1480. this.entityDecoder = new EntityDecoder(htmlDecodeTree, (cp, consumed) => {
  1481. // Note: Set `pos` _before_ flushing, as flushing might drop
  1482. // the current chunk and invalidate `entityStartPos`.
  1483. this.preprocessor.pos = this.entityStartPos + consumed - 1;
  1484. this._flushCodePointConsumedAsCharacterReference(cp);
  1485. }, handler.onParseError
  1486. ? {
  1487. missingSemicolonAfterCharacterReference: () => {
  1488. this._err(ERR.missingSemicolonAfterCharacterReference, 1);
  1489. },
  1490. absenceOfDigitsInNumericCharacterReference: (consumed) => {
  1491. this._err(ERR.absenceOfDigitsInNumericCharacterReference, this.entityStartPos - this.preprocessor.pos + consumed);
  1492. },
  1493. validateNumericCharacterReference: (code) => {
  1494. const error = getErrorForNumericCharacterReference(code);
  1495. if (error)
  1496. this._err(error, 1);
  1497. },
  1498. }
  1499. : undefined);
  1500. }
  1501. //Errors
  1502. _err(code, cpOffset = 0) {
  1503. var _a, _b;
  1504. (_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code, cpOffset));
  1505. }
  1506. // NOTE: `offset` may never run across line boundaries.
  1507. getCurrentLocation(offset) {
  1508. if (!this.options.sourceCodeLocationInfo) {
  1509. return null;
  1510. }
  1511. return {
  1512. startLine: this.preprocessor.line,
  1513. startCol: this.preprocessor.col - offset,
  1514. startOffset: this.preprocessor.offset - offset,
  1515. endLine: -1,
  1516. endCol: -1,
  1517. endOffset: -1,
  1518. };
  1519. }
  1520. _runParsingLoop() {
  1521. if (this.inLoop)
  1522. return;
  1523. this.inLoop = true;
  1524. while (this.active && !this.paused) {
  1525. this.consumedAfterSnapshot = 0;
  1526. const cp = this._consume();
  1527. if (!this._ensureHibernation()) {
  1528. this._callState(cp);
  1529. }
  1530. }
  1531. this.inLoop = false;
  1532. }
  1533. //API
  1534. pause() {
  1535. this.paused = true;
  1536. }
  1537. resume(writeCallback) {
  1538. if (!this.paused) {
  1539. throw new Error('Parser was already resumed');
  1540. }
  1541. this.paused = false;
  1542. // Necessary for synchronous resume.
  1543. if (this.inLoop)
  1544. return;
  1545. this._runParsingLoop();
  1546. if (!this.paused) {
  1547. writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback();
  1548. }
  1549. }
  1550. write(chunk, isLastChunk, writeCallback) {
  1551. this.active = true;
  1552. this.preprocessor.write(chunk, isLastChunk);
  1553. this._runParsingLoop();
  1554. if (!this.paused) {
  1555. writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback();
  1556. }
  1557. }
  1558. insertHtmlAtCurrentPos(chunk) {
  1559. this.active = true;
  1560. this.preprocessor.insertHtmlAtCurrentPos(chunk);
  1561. this._runParsingLoop();
  1562. }
  1563. //Hibernation
  1564. _ensureHibernation() {
  1565. if (this.preprocessor.endOfChunkHit) {
  1566. this.preprocessor.retreat(this.consumedAfterSnapshot);
  1567. this.consumedAfterSnapshot = 0;
  1568. this.active = false;
  1569. return true;
  1570. }
  1571. return false;
  1572. }
  1573. //Consumption
  1574. _consume() {
  1575. this.consumedAfterSnapshot++;
  1576. return this.preprocessor.advance();
  1577. }
  1578. _advanceBy(count) {
  1579. this.consumedAfterSnapshot += count;
  1580. for (let i = 0; i < count; i++) {
  1581. this.preprocessor.advance();
  1582. }
  1583. }
  1584. _consumeSequenceIfMatch(pattern, caseSensitive) {
  1585. if (this.preprocessor.startsWith(pattern, caseSensitive)) {
  1586. // We will already have consumed one character before calling this method.
  1587. this._advanceBy(pattern.length - 1);
  1588. return true;
  1589. }
  1590. return false;
  1591. }
  1592. //Token creation
  1593. _createStartTagToken() {
  1594. this.currentToken = {
  1595. type: TokenType.START_TAG,
  1596. tagName: '',
  1597. tagID: TAG_ID.UNKNOWN,
  1598. selfClosing: false,
  1599. ackSelfClosing: false,
  1600. attrs: [],
  1601. location: this.getCurrentLocation(1),
  1602. };
  1603. }
  1604. _createEndTagToken() {
  1605. this.currentToken = {
  1606. type: TokenType.END_TAG,
  1607. tagName: '',
  1608. tagID: TAG_ID.UNKNOWN,
  1609. selfClosing: false,
  1610. ackSelfClosing: false,
  1611. attrs: [],
  1612. location: this.getCurrentLocation(2),
  1613. };
  1614. }
  1615. _createCommentToken(offset) {
  1616. this.currentToken = {
  1617. type: TokenType.COMMENT,
  1618. data: '',
  1619. location: this.getCurrentLocation(offset),
  1620. };
  1621. }
  1622. _createDoctypeToken(initialName) {
  1623. this.currentToken = {
  1624. type: TokenType.DOCTYPE,
  1625. name: initialName,
  1626. forceQuirks: false,
  1627. publicId: null,
  1628. systemId: null,
  1629. location: this.currentLocation,
  1630. };
  1631. }
  1632. _createCharacterToken(type, chars) {
  1633. this.currentCharacterToken = {
  1634. type,
  1635. chars,
  1636. location: this.currentLocation,
  1637. };
  1638. }
  1639. //Tag attributes
  1640. _createAttr(attrNameFirstCh) {
  1641. this.currentAttr = {
  1642. name: attrNameFirstCh,
  1643. value: '',
  1644. };
  1645. this.currentLocation = this.getCurrentLocation(0);
  1646. }
  1647. _leaveAttrName() {
  1648. var _a;
  1649. var _b;
  1650. const token = this.currentToken;
  1651. if (getTokenAttr(token, this.currentAttr.name) === null) {
  1652. token.attrs.push(this.currentAttr);
  1653. if (token.location && this.currentLocation) {
  1654. const attrLocations = ((_a = (_b = token.location).attrs) !== null && _a !== void 0 ? _a : (_b.attrs = Object.create(null)));
  1655. attrLocations[this.currentAttr.name] = this.currentLocation;
  1656. // Set end location
  1657. this._leaveAttrValue();
  1658. }
  1659. }
  1660. else {
  1661. this._err(ERR.duplicateAttribute);
  1662. }
  1663. }
  1664. _leaveAttrValue() {
  1665. if (this.currentLocation) {
  1666. this.currentLocation.endLine = this.preprocessor.line;
  1667. this.currentLocation.endCol = this.preprocessor.col;
  1668. this.currentLocation.endOffset = this.preprocessor.offset;
  1669. }
  1670. }
  1671. //Token emission
  1672. prepareToken(ct) {
  1673. this._emitCurrentCharacterToken(ct.location);
  1674. this.currentToken = null;
  1675. if (ct.location) {
  1676. ct.location.endLine = this.preprocessor.line;
  1677. ct.location.endCol = this.preprocessor.col + 1;
  1678. ct.location.endOffset = this.preprocessor.offset + 1;
  1679. }
  1680. this.currentLocation = this.getCurrentLocation(-1);
  1681. }
  1682. emitCurrentTagToken() {
  1683. const ct = this.currentToken;
  1684. this.prepareToken(ct);
  1685. ct.tagID = getTagID(ct.tagName);
  1686. if (ct.type === TokenType.START_TAG) {
  1687. this.lastStartTagName = ct.tagName;
  1688. this.handler.onStartTag(ct);
  1689. }
  1690. else {
  1691. if (ct.attrs.length > 0) {
  1692. this._err(ERR.endTagWithAttributes);
  1693. }
  1694. if (ct.selfClosing) {
  1695. this._err(ERR.endTagWithTrailingSolidus);
  1696. }
  1697. this.handler.onEndTag(ct);
  1698. }
  1699. this.preprocessor.dropParsedChunk();
  1700. }
  1701. emitCurrentComment(ct) {
  1702. this.prepareToken(ct);
  1703. this.handler.onComment(ct);
  1704. this.preprocessor.dropParsedChunk();
  1705. }
  1706. emitCurrentDoctype(ct) {
  1707. this.prepareToken(ct);
  1708. this.handler.onDoctype(ct);
  1709. this.preprocessor.dropParsedChunk();
  1710. }
  1711. _emitCurrentCharacterToken(nextLocation) {
  1712. if (this.currentCharacterToken) {
  1713. //NOTE: if we have a pending character token, make it's end location equal to the
  1714. //current token's start location.
  1715. if (nextLocation && this.currentCharacterToken.location) {
  1716. this.currentCharacterToken.location.endLine = nextLocation.startLine;
  1717. this.currentCharacterToken.location.endCol = nextLocation.startCol;
  1718. this.currentCharacterToken.location.endOffset = nextLocation.startOffset;
  1719. }
  1720. switch (this.currentCharacterToken.type) {
  1721. case TokenType.CHARACTER: {
  1722. this.handler.onCharacter(this.currentCharacterToken);
  1723. break;
  1724. }
  1725. case TokenType.NULL_CHARACTER: {
  1726. this.handler.onNullCharacter(this.currentCharacterToken);
  1727. break;
  1728. }
  1729. case TokenType.WHITESPACE_CHARACTER: {
  1730. this.handler.onWhitespaceCharacter(this.currentCharacterToken);
  1731. break;
  1732. }
  1733. }
  1734. this.currentCharacterToken = null;
  1735. }
  1736. }
  1737. _emitEOFToken() {
  1738. const location = this.getCurrentLocation(0);
  1739. if (location) {
  1740. location.endLine = location.startLine;
  1741. location.endCol = location.startCol;
  1742. location.endOffset = location.startOffset;
  1743. }
  1744. this._emitCurrentCharacterToken(location);
  1745. this.handler.onEof({ type: TokenType.EOF, location });
  1746. this.active = false;
  1747. }
  1748. //Characters emission
  1749. //OPTIMIZATION: The specification uses only one type of character token (one token per character).
  1750. //This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters.
  1751. //If we have a sequence of characters that belong to the same group, the parser can process it
  1752. //as a single solid character token.
  1753. //So, there are 3 types of character tokens in parse5:
  1754. //1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000')
  1755. //2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n \r\t \f')
  1756. //3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
  1757. _appendCharToCurrentCharacterToken(type, ch) {
  1758. if (this.currentCharacterToken) {
  1759. if (this.currentCharacterToken.type === type) {
  1760. this.currentCharacterToken.chars += ch;
  1761. return;
  1762. }
  1763. else {
  1764. this.currentLocation = this.getCurrentLocation(0);
  1765. this._emitCurrentCharacterToken(this.currentLocation);
  1766. this.preprocessor.dropParsedChunk();
  1767. }
  1768. }
  1769. this._createCharacterToken(type, ch);
  1770. }
  1771. _emitCodePoint(cp) {
  1772. const type = isWhitespace(cp)
  1773. ? TokenType.WHITESPACE_CHARACTER
  1774. : cp === CODE_POINTS.NULL
  1775. ? TokenType.NULL_CHARACTER
  1776. : TokenType.CHARACTER;
  1777. this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp));
  1778. }
  1779. //NOTE: used when we emit characters explicitly.
  1780. //This is always for non-whitespace and non-null characters, which allows us to avoid additional checks.
  1781. _emitChars(ch) {
  1782. this._appendCharToCurrentCharacterToken(TokenType.CHARACTER, ch);
  1783. }
  1784. // Character reference helpers
  1785. _startCharacterReference() {
  1786. this.returnState = this.state;
  1787. this.state = State.CHARACTER_REFERENCE;
  1788. this.entityStartPos = this.preprocessor.pos;
  1789. this.entityDecoder.startEntity(this._isCharacterReferenceInAttribute() ? DecodingMode.Attribute : DecodingMode.Legacy);
  1790. }
  1791. _isCharacterReferenceInAttribute() {
  1792. return (this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED ||
  1793. this.returnState === State.ATTRIBUTE_VALUE_SINGLE_QUOTED ||
  1794. this.returnState === State.ATTRIBUTE_VALUE_UNQUOTED);
  1795. }
  1796. _flushCodePointConsumedAsCharacterReference(cp) {
  1797. if (this._isCharacterReferenceInAttribute()) {
  1798. this.currentAttr.value += String.fromCodePoint(cp);
  1799. }
  1800. else {
  1801. this._emitCodePoint(cp);
  1802. }
  1803. }
  1804. // Calling states this way turns out to be much faster than any other approach.
  1805. _callState(cp) {
  1806. switch (this.state) {
  1807. case State.DATA: {
  1808. this._stateData(cp);
  1809. break;
  1810. }
  1811. case State.RCDATA: {
  1812. this._stateRcdata(cp);
  1813. break;
  1814. }
  1815. case State.RAWTEXT: {
  1816. this._stateRawtext(cp);
  1817. break;
  1818. }
  1819. case State.SCRIPT_DATA: {
  1820. this._stateScriptData(cp);
  1821. break;
  1822. }
  1823. case State.PLAINTEXT: {
  1824. this._statePlaintext(cp);
  1825. break;
  1826. }
  1827. case State.TAG_OPEN: {
  1828. this._stateTagOpen(cp);
  1829. break;
  1830. }
  1831. case State.END_TAG_OPEN: {
  1832. this._stateEndTagOpen(cp);
  1833. break;
  1834. }
  1835. case State.TAG_NAME: {
  1836. this._stateTagName(cp);
  1837. break;
  1838. }
  1839. case State.RCDATA_LESS_THAN_SIGN: {
  1840. this._stateRcdataLessThanSign(cp);
  1841. break;
  1842. }
  1843. case State.RCDATA_END_TAG_OPEN: {
  1844. this._stateRcdataEndTagOpen(cp);
  1845. break;
  1846. }
  1847. case State.RCDATA_END_TAG_NAME: {
  1848. this._stateRcdataEndTagName(cp);
  1849. break;
  1850. }
  1851. case State.RAWTEXT_LESS_THAN_SIGN: {
  1852. this._stateRawtextLessThanSign(cp);
  1853. break;
  1854. }
  1855. case State.RAWTEXT_END_TAG_OPEN: {
  1856. this._stateRawtextEndTagOpen(cp);
  1857. break;
  1858. }
  1859. case State.RAWTEXT_END_TAG_NAME: {
  1860. this._stateRawtextEndTagName(cp);
  1861. break;
  1862. }
  1863. case State.SCRIPT_DATA_LESS_THAN_SIGN: {
  1864. this._stateScriptDataLessThanSign(cp);
  1865. break;
  1866. }
  1867. case State.SCRIPT_DATA_END_TAG_OPEN: {
  1868. this._stateScriptDataEndTagOpen(cp);
  1869. break;
  1870. }
  1871. case State.SCRIPT_DATA_END_TAG_NAME: {
  1872. this._stateScriptDataEndTagName(cp);
  1873. break;
  1874. }
  1875. case State.SCRIPT_DATA_ESCAPE_START: {
  1876. this._stateScriptDataEscapeStart(cp);
  1877. break;
  1878. }
  1879. case State.SCRIPT_DATA_ESCAPE_START_DASH: {
  1880. this._stateScriptDataEscapeStartDash(cp);
  1881. break;
  1882. }
  1883. case State.SCRIPT_DATA_ESCAPED: {
  1884. this._stateScriptDataEscaped(cp);
  1885. break;
  1886. }
  1887. case State.SCRIPT_DATA_ESCAPED_DASH: {
  1888. this._stateScriptDataEscapedDash(cp);
  1889. break;
  1890. }
  1891. case State.SCRIPT_DATA_ESCAPED_DASH_DASH: {
  1892. this._stateScriptDataEscapedDashDash(cp);
  1893. break;
  1894. }
  1895. case State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: {
  1896. this._stateScriptDataEscapedLessThanSign(cp);
  1897. break;
  1898. }
  1899. case State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN: {
  1900. this._stateScriptDataEscapedEndTagOpen(cp);
  1901. break;
  1902. }
  1903. case State.SCRIPT_DATA_ESCAPED_END_TAG_NAME: {
  1904. this._stateScriptDataEscapedEndTagName(cp);
  1905. break;
  1906. }
  1907. case State.SCRIPT_DATA_DOUBLE_ESCAPE_START: {
  1908. this._stateScriptDataDoubleEscapeStart(cp);
  1909. break;
  1910. }
  1911. case State.SCRIPT_DATA_DOUBLE_ESCAPED: {
  1912. this._stateScriptDataDoubleEscaped(cp);
  1913. break;
  1914. }
  1915. case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH: {
  1916. this._stateScriptDataDoubleEscapedDash(cp);
  1917. break;
  1918. }
  1919. case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: {
  1920. this._stateScriptDataDoubleEscapedDashDash(cp);
  1921. break;
  1922. }
  1923. case State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: {
  1924. this._stateScriptDataDoubleEscapedLessThanSign(cp);
  1925. break;
  1926. }
  1927. case State.SCRIPT_DATA_DOUBLE_ESCAPE_END: {
  1928. this._stateScriptDataDoubleEscapeEnd(cp);
  1929. break;
  1930. }
  1931. case State.BEFORE_ATTRIBUTE_NAME: {
  1932. this._stateBeforeAttributeName(cp);
  1933. break;
  1934. }
  1935. case State.ATTRIBUTE_NAME: {
  1936. this._stateAttributeName(cp);
  1937. break;
  1938. }
  1939. case State.AFTER_ATTRIBUTE_NAME: {
  1940. this._stateAfterAttributeName(cp);
  1941. break;
  1942. }
  1943. case State.BEFORE_ATTRIBUTE_VALUE: {
  1944. this._stateBeforeAttributeValue(cp);
  1945. break;
  1946. }
  1947. case State.ATTRIBUTE_VALUE_DOUBLE_QUOTED: {
  1948. this._stateAttributeValueDoubleQuoted(cp);
  1949. break;
  1950. }
  1951. case State.ATTRIBUTE_VALUE_SINGLE_QUOTED: {
  1952. this._stateAttributeValueSingleQuoted(cp);
  1953. break;
  1954. }
  1955. case State.ATTRIBUTE_VALUE_UNQUOTED: {
  1956. this._stateAttributeValueUnquoted(cp);
  1957. break;
  1958. }
  1959. case State.AFTER_ATTRIBUTE_VALUE_QUOTED: {
  1960. this._stateAfterAttributeValueQuoted(cp);
  1961. break;
  1962. }
  1963. case State.SELF_CLOSING_START_TAG: {
  1964. this._stateSelfClosingStartTag(cp);
  1965. break;
  1966. }
  1967. case State.BOGUS_COMMENT: {
  1968. this._stateBogusComment(cp);
  1969. break;
  1970. }
  1971. case State.MARKUP_DECLARATION_OPEN: {
  1972. this._stateMarkupDeclarationOpen(cp);
  1973. break;
  1974. }
  1975. case State.COMMENT_START: {
  1976. this._stateCommentStart(cp);
  1977. break;
  1978. }
  1979. case State.COMMENT_START_DASH: {
  1980. this._stateCommentStartDash(cp);
  1981. break;
  1982. }
  1983. case State.COMMENT: {
  1984. this._stateComment(cp);
  1985. break;
  1986. }
  1987. case State.COMMENT_LESS_THAN_SIGN: {
  1988. this._stateCommentLessThanSign(cp);
  1989. break;
  1990. }
  1991. case State.COMMENT_LESS_THAN_SIGN_BANG: {
  1992. this._stateCommentLessThanSignBang(cp);
  1993. break;
  1994. }
  1995. case State.COMMENT_LESS_THAN_SIGN_BANG_DASH: {
  1996. this._stateCommentLessThanSignBangDash(cp);
  1997. break;
  1998. }
  1999. case State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: {
  2000. this._stateCommentLessThanSignBangDashDash(cp);
  2001. break;
  2002. }
  2003. case State.COMMENT_END_DASH: {
  2004. this._stateCommentEndDash(cp);
  2005. break;
  2006. }
  2007. case State.COMMENT_END: {
  2008. this._stateCommentEnd(cp);
  2009. break;
  2010. }
  2011. case State.COMMENT_END_BANG: {
  2012. this._stateCommentEndBang(cp);
  2013. break;
  2014. }
  2015. case State.DOCTYPE: {
  2016. this._stateDoctype(cp);
  2017. break;
  2018. }
  2019. case State.BEFORE_DOCTYPE_NAME: {
  2020. this._stateBeforeDoctypeName(cp);
  2021. break;
  2022. }
  2023. case State.DOCTYPE_NAME: {
  2024. this._stateDoctypeName(cp);
  2025. break;
  2026. }
  2027. case State.AFTER_DOCTYPE_NAME: {
  2028. this._stateAfterDoctypeName(cp);
  2029. break;
  2030. }
  2031. case State.AFTER_DOCTYPE_PUBLIC_KEYWORD: {
  2032. this._stateAfterDoctypePublicKeyword(cp);
  2033. break;
  2034. }
  2035. case State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: {
  2036. this._stateBeforeDoctypePublicIdentifier(cp);
  2037. break;
  2038. }
  2039. case State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: {
  2040. this._stateDoctypePublicIdentifierDoubleQuoted(cp);
  2041. break;
  2042. }
  2043. case State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
  2044. this._stateDoctypePublicIdentifierSingleQuoted(cp);
  2045. break;
  2046. }
  2047. case State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER: {
  2048. this._stateAfterDoctypePublicIdentifier(cp);
  2049. break;
  2050. }
  2051. case State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: {
  2052. this._stateBetweenDoctypePublicAndSystemIdentifiers(cp);
  2053. break;
  2054. }
  2055. case State.AFTER_DOCTYPE_SYSTEM_KEYWORD: {
  2056. this._stateAfterDoctypeSystemKeyword(cp);
  2057. break;
  2058. }
  2059. case State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: {
  2060. this._stateBeforeDoctypeSystemIdentifier(cp);
  2061. break;
  2062. }
  2063. case State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: {
  2064. this._stateDoctypeSystemIdentifierDoubleQuoted(cp);
  2065. break;
  2066. }
  2067. case State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
  2068. this._stateDoctypeSystemIdentifierSingleQuoted(cp);
  2069. break;
  2070. }
  2071. case State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
  2072. this._stateAfterDoctypeSystemIdentifier(cp);
  2073. break;
  2074. }
  2075. case State.BOGUS_DOCTYPE: {
  2076. this._stateBogusDoctype(cp);
  2077. break;
  2078. }
  2079. case State.CDATA_SECTION: {
  2080. this._stateCdataSection(cp);
  2081. break;
  2082. }
  2083. case State.CDATA_SECTION_BRACKET: {
  2084. this._stateCdataSectionBracket(cp);
  2085. break;
  2086. }
  2087. case State.CDATA_SECTION_END: {
  2088. this._stateCdataSectionEnd(cp);
  2089. break;
  2090. }
  2091. case State.CHARACTER_REFERENCE: {
  2092. this._stateCharacterReference();
  2093. break;
  2094. }
  2095. case State.AMBIGUOUS_AMPERSAND: {
  2096. this._stateAmbiguousAmpersand(cp);
  2097. break;
  2098. }
  2099. default: {
  2100. throw new Error('Unknown state');
  2101. }
  2102. }
  2103. }
  2104. // State machine
  2105. // Data state
  2106. //------------------------------------------------------------------
  2107. _stateData(cp) {
  2108. switch (cp) {
  2109. case CODE_POINTS.LESS_THAN_SIGN: {
  2110. this.state = State.TAG_OPEN;
  2111. break;
  2112. }
  2113. case CODE_POINTS.AMPERSAND: {
  2114. this._startCharacterReference();
  2115. break;
  2116. }
  2117. case CODE_POINTS.NULL: {
  2118. this._err(ERR.unexpectedNullCharacter);
  2119. this._emitCodePoint(cp);
  2120. break;
  2121. }
  2122. case CODE_POINTS.EOF: {
  2123. this._emitEOFToken();
  2124. break;
  2125. }
  2126. default: {
  2127. this._emitCodePoint(cp);
  2128. }
  2129. }
  2130. }
  2131. // RCDATA state
  2132. //------------------------------------------------------------------
  2133. _stateRcdata(cp) {
  2134. switch (cp) {
  2135. case CODE_POINTS.AMPERSAND: {
  2136. this._startCharacterReference();
  2137. break;
  2138. }
  2139. case CODE_POINTS.LESS_THAN_SIGN: {
  2140. this.state = State.RCDATA_LESS_THAN_SIGN;
  2141. break;
  2142. }
  2143. case CODE_POINTS.NULL: {
  2144. this._err(ERR.unexpectedNullCharacter);
  2145. this._emitChars(REPLACEMENT_CHARACTER);
  2146. break;
  2147. }
  2148. case CODE_POINTS.EOF: {
  2149. this._emitEOFToken();
  2150. break;
  2151. }
  2152. default: {
  2153. this._emitCodePoint(cp);
  2154. }
  2155. }
  2156. }
  2157. // RAWTEXT state
  2158. //------------------------------------------------------------------
  2159. _stateRawtext(cp) {
  2160. switch (cp) {
  2161. case CODE_POINTS.LESS_THAN_SIGN: {
  2162. this.state = State.RAWTEXT_LESS_THAN_SIGN;
  2163. break;
  2164. }
  2165. case CODE_POINTS.NULL: {
  2166. this._err(ERR.unexpectedNullCharacter);
  2167. this._emitChars(REPLACEMENT_CHARACTER);
  2168. break;
  2169. }
  2170. case CODE_POINTS.EOF: {
  2171. this._emitEOFToken();
  2172. break;
  2173. }
  2174. default: {
  2175. this._emitCodePoint(cp);
  2176. }
  2177. }
  2178. }
  2179. // Script data state
  2180. //------------------------------------------------------------------
  2181. _stateScriptData(cp) {
  2182. switch (cp) {
  2183. case CODE_POINTS.LESS_THAN_SIGN: {
  2184. this.state = State.SCRIPT_DATA_LESS_THAN_SIGN;
  2185. break;
  2186. }
  2187. case CODE_POINTS.NULL: {
  2188. this._err(ERR.unexpectedNullCharacter);
  2189. this._emitChars(REPLACEMENT_CHARACTER);
  2190. break;
  2191. }
  2192. case CODE_POINTS.EOF: {
  2193. this._emitEOFToken();
  2194. break;
  2195. }
  2196. default: {
  2197. this._emitCodePoint(cp);
  2198. }
  2199. }
  2200. }
  2201. // PLAINTEXT state
  2202. //------------------------------------------------------------------
  2203. _statePlaintext(cp) {
  2204. switch (cp) {
  2205. case CODE_POINTS.NULL: {
  2206. this._err(ERR.unexpectedNullCharacter);
  2207. this._emitChars(REPLACEMENT_CHARACTER);
  2208. break;
  2209. }
  2210. case CODE_POINTS.EOF: {
  2211. this._emitEOFToken();
  2212. break;
  2213. }
  2214. default: {
  2215. this._emitCodePoint(cp);
  2216. }
  2217. }
  2218. }
  2219. // Tag open state
  2220. //------------------------------------------------------------------
  2221. _stateTagOpen(cp) {
  2222. if (isAsciiLetter(cp)) {
  2223. this._createStartTagToken();
  2224. this.state = State.TAG_NAME;
  2225. this._stateTagName(cp);
  2226. }
  2227. else
  2228. switch (cp) {
  2229. case CODE_POINTS.EXCLAMATION_MARK: {
  2230. this.state = State.MARKUP_DECLARATION_OPEN;
  2231. break;
  2232. }
  2233. case CODE_POINTS.SOLIDUS: {
  2234. this.state = State.END_TAG_OPEN;
  2235. break;
  2236. }
  2237. case CODE_POINTS.QUESTION_MARK: {
  2238. this._err(ERR.unexpectedQuestionMarkInsteadOfTagName);
  2239. this._createCommentToken(1);
  2240. this.state = State.BOGUS_COMMENT;
  2241. this._stateBogusComment(cp);
  2242. break;
  2243. }
  2244. case CODE_POINTS.EOF: {
  2245. this._err(ERR.eofBeforeTagName);
  2246. this._emitChars('<');
  2247. this._emitEOFToken();
  2248. break;
  2249. }
  2250. default: {
  2251. this._err(ERR.invalidFirstCharacterOfTagName);
  2252. this._emitChars('<');
  2253. this.state = State.DATA;
  2254. this._stateData(cp);
  2255. }
  2256. }
  2257. }
  2258. // End tag open state
  2259. //------------------------------------------------------------------
  2260. _stateEndTagOpen(cp) {
  2261. if (isAsciiLetter(cp)) {
  2262. this._createEndTagToken();
  2263. this.state = State.TAG_NAME;
  2264. this._stateTagName(cp);
  2265. }
  2266. else
  2267. switch (cp) {
  2268. case CODE_POINTS.GREATER_THAN_SIGN: {
  2269. this._err(ERR.missingEndTagName);
  2270. this.state = State.DATA;
  2271. break;
  2272. }
  2273. case CODE_POINTS.EOF: {
  2274. this._err(ERR.eofBeforeTagName);
  2275. this._emitChars('</');
  2276. this._emitEOFToken();
  2277. break;
  2278. }
  2279. default: {
  2280. this._err(ERR.invalidFirstCharacterOfTagName);
  2281. this._createCommentToken(2);
  2282. this.state = State.BOGUS_COMMENT;
  2283. this._stateBogusComment(cp);
  2284. }
  2285. }
  2286. }
  2287. // Tag name state
  2288. //------------------------------------------------------------------
  2289. _stateTagName(cp) {
  2290. const token = this.currentToken;
  2291. switch (cp) {
  2292. case CODE_POINTS.SPACE:
  2293. case CODE_POINTS.LINE_FEED:
  2294. case CODE_POINTS.TABULATION:
  2295. case CODE_POINTS.FORM_FEED: {
  2296. this.state = State.BEFORE_ATTRIBUTE_NAME;
  2297. break;
  2298. }
  2299. case CODE_POINTS.SOLIDUS: {
  2300. this.state = State.SELF_CLOSING_START_TAG;
  2301. break;
  2302. }
  2303. case CODE_POINTS.GREATER_THAN_SIGN: {
  2304. this.state = State.DATA;
  2305. this.emitCurrentTagToken();
  2306. break;
  2307. }
  2308. case CODE_POINTS.NULL: {
  2309. this._err(ERR.unexpectedNullCharacter);
  2310. token.tagName += REPLACEMENT_CHARACTER;
  2311. break;
  2312. }
  2313. case CODE_POINTS.EOF: {
  2314. this._err(ERR.eofInTag);
  2315. this._emitEOFToken();
  2316. break;
  2317. }
  2318. default: {
  2319. token.tagName += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
  2320. }
  2321. }
  2322. }
  2323. // RCDATA less-than sign state
  2324. //------------------------------------------------------------------
  2325. _stateRcdataLessThanSign(cp) {
  2326. if (cp === CODE_POINTS.SOLIDUS) {
  2327. this.state = State.RCDATA_END_TAG_OPEN;
  2328. }
  2329. else {
  2330. this._emitChars('<');
  2331. this.state = State.RCDATA;
  2332. this._stateRcdata(cp);
  2333. }
  2334. }
  2335. // RCDATA end tag open state
  2336. //------------------------------------------------------------------
  2337. _stateRcdataEndTagOpen(cp) {
  2338. if (isAsciiLetter(cp)) {
  2339. this.state = State.RCDATA_END_TAG_NAME;
  2340. this._stateRcdataEndTagName(cp);
  2341. }
  2342. else {
  2343. this._emitChars('</');
  2344. this.state = State.RCDATA;
  2345. this._stateRcdata(cp);
  2346. }
  2347. }
  2348. handleSpecialEndTag(_cp) {
  2349. if (!this.preprocessor.startsWith(this.lastStartTagName, false)) {
  2350. return !this._ensureHibernation();
  2351. }
  2352. this._createEndTagToken();
  2353. const token = this.currentToken;
  2354. token.tagName = this.lastStartTagName;
  2355. const cp = this.preprocessor.peek(this.lastStartTagName.length);
  2356. switch (cp) {
  2357. case CODE_POINTS.SPACE:
  2358. case CODE_POINTS.LINE_FEED:
  2359. case CODE_POINTS.TABULATION:
  2360. case CODE_POINTS.FORM_FEED: {
  2361. this._advanceBy(this.lastStartTagName.length);
  2362. this.state = State.BEFORE_ATTRIBUTE_NAME;
  2363. return false;
  2364. }
  2365. case CODE_POINTS.SOLIDUS: {
  2366. this._advanceBy(this.lastStartTagName.length);
  2367. this.state = State.SELF_CLOSING_START_TAG;
  2368. return false;
  2369. }
  2370. case CODE_POINTS.GREATER_THAN_SIGN: {
  2371. this._advanceBy(this.lastStartTagName.length);
  2372. this.emitCurrentTagToken();
  2373. this.state = State.DATA;
  2374. return false;
  2375. }
  2376. default: {
  2377. return !this._ensureHibernation();
  2378. }
  2379. }
  2380. }
  2381. // RCDATA end tag name state
  2382. //------------------------------------------------------------------
  2383. _stateRcdataEndTagName(cp) {
  2384. if (this.handleSpecialEndTag(cp)) {
  2385. this._emitChars('</');
  2386. this.state = State.RCDATA;
  2387. this._stateRcdata(cp);
  2388. }
  2389. }
  2390. // RAWTEXT less-than sign state
  2391. //------------------------------------------------------------------
  2392. _stateRawtextLessThanSign(cp) {
  2393. if (cp === CODE_POINTS.SOLIDUS) {
  2394. this.state = State.RAWTEXT_END_TAG_OPEN;
  2395. }
  2396. else {
  2397. this._emitChars('<');
  2398. this.state = State.RAWTEXT;
  2399. this._stateRawtext(cp);
  2400. }
  2401. }
  2402. // RAWTEXT end tag open state
  2403. //------------------------------------------------------------------
  2404. _stateRawtextEndTagOpen(cp) {
  2405. if (isAsciiLetter(cp)) {
  2406. this.state = State.RAWTEXT_END_TAG_NAME;
  2407. this._stateRawtextEndTagName(cp);
  2408. }
  2409. else {
  2410. this._emitChars('</');
  2411. this.state = State.RAWTEXT;
  2412. this._stateRawtext(cp);
  2413. }
  2414. }
  2415. // RAWTEXT end tag name state
  2416. //------------------------------------------------------------------
  2417. _stateRawtextEndTagName(cp) {
  2418. if (this.handleSpecialEndTag(cp)) {
  2419. this._emitChars('</');
  2420. this.state = State.RAWTEXT;
  2421. this._stateRawtext(cp);
  2422. }
  2423. }
  2424. // Script data less-than sign state
  2425. //------------------------------------------------------------------
  2426. _stateScriptDataLessThanSign(cp) {
  2427. switch (cp) {
  2428. case CODE_POINTS.SOLIDUS: {
  2429. this.state = State.SCRIPT_DATA_END_TAG_OPEN;
  2430. break;
  2431. }
  2432. case CODE_POINTS.EXCLAMATION_MARK: {
  2433. this.state = State.SCRIPT_DATA_ESCAPE_START;
  2434. this._emitChars('<!');
  2435. break;
  2436. }
  2437. default: {
  2438. this._emitChars('<');
  2439. this.state = State.SCRIPT_DATA;
  2440. this._stateScriptData(cp);
  2441. }
  2442. }
  2443. }
  2444. // Script data end tag open state
  2445. //------------------------------------------------------------------
  2446. _stateScriptDataEndTagOpen(cp) {
  2447. if (isAsciiLetter(cp)) {
  2448. this.state = State.SCRIPT_DATA_END_TAG_NAME;
  2449. this._stateScriptDataEndTagName(cp);
  2450. }
  2451. else {
  2452. this._emitChars('</');
  2453. this.state = State.SCRIPT_DATA;
  2454. this._stateScriptData(cp);
  2455. }
  2456. }
  2457. // Script data end tag name state
  2458. //------------------------------------------------------------------
  2459. _stateScriptDataEndTagName(cp) {
  2460. if (this.handleSpecialEndTag(cp)) {
  2461. this._emitChars('</');
  2462. this.state = State.SCRIPT_DATA;
  2463. this._stateScriptData(cp);
  2464. }
  2465. }
  2466. // Script data escape start state
  2467. //------------------------------------------------------------------
  2468. _stateScriptDataEscapeStart(cp) {
  2469. if (cp === CODE_POINTS.HYPHEN_MINUS) {
  2470. this.state = State.SCRIPT_DATA_ESCAPE_START_DASH;
  2471. this._emitChars('-');
  2472. }
  2473. else {
  2474. this.state = State.SCRIPT_DATA;
  2475. this._stateScriptData(cp);
  2476. }
  2477. }
  2478. // Script data escape start dash state
  2479. //------------------------------------------------------------------
  2480. _stateScriptDataEscapeStartDash(cp) {
  2481. if (cp === CODE_POINTS.HYPHEN_MINUS) {
  2482. this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
  2483. this._emitChars('-');
  2484. }
  2485. else {
  2486. this.state = State.SCRIPT_DATA;
  2487. this._stateScriptData(cp);
  2488. }
  2489. }
  2490. // Script data escaped state
  2491. //------------------------------------------------------------------
  2492. _stateScriptDataEscaped(cp) {
  2493. switch (cp) {
  2494. case CODE_POINTS.HYPHEN_MINUS: {
  2495. this.state = State.SCRIPT_DATA_ESCAPED_DASH;
  2496. this._emitChars('-');
  2497. break;
  2498. }
  2499. case CODE_POINTS.LESS_THAN_SIGN: {
  2500. this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
  2501. break;
  2502. }
  2503. case CODE_POINTS.NULL: {
  2504. this._err(ERR.unexpectedNullCharacter);
  2505. this._emitChars(REPLACEMENT_CHARACTER);
  2506. break;
  2507. }
  2508. case CODE_POINTS.EOF: {
  2509. this._err(ERR.eofInScriptHtmlCommentLikeText);
  2510. this._emitEOFToken();
  2511. break;
  2512. }
  2513. default: {
  2514. this._emitCodePoint(cp);
  2515. }
  2516. }
  2517. }
  2518. // Script data escaped dash state
  2519. //------------------------------------------------------------------
  2520. _stateScriptDataEscapedDash(cp) {
  2521. switch (cp) {
  2522. case CODE_POINTS.HYPHEN_MINUS: {
  2523. this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
  2524. this._emitChars('-');
  2525. break;
  2526. }
  2527. case CODE_POINTS.LESS_THAN_SIGN: {
  2528. this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
  2529. break;
  2530. }
  2531. case CODE_POINTS.NULL: {
  2532. this._err(ERR.unexpectedNullCharacter);
  2533. this.state = State.SCRIPT_DATA_ESCAPED;
  2534. this._emitChars(REPLACEMENT_CHARACTER);
  2535. break;
  2536. }
  2537. case CODE_POINTS.EOF: {
  2538. this._err(ERR.eofInScriptHtmlCommentLikeText);
  2539. this._emitEOFToken();
  2540. break;
  2541. }
  2542. default: {
  2543. this.state = State.SCRIPT_DATA_ESCAPED;
  2544. this._emitCodePoint(cp);
  2545. }
  2546. }
  2547. }
  2548. // Script data escaped dash dash state
  2549. //------------------------------------------------------------------
  2550. _stateScriptDataEscapedDashDash(cp) {
  2551. switch (cp) {
  2552. case CODE_POINTS.HYPHEN_MINUS: {
  2553. this._emitChars('-');
  2554. break;
  2555. }
  2556. case CODE_POINTS.LESS_THAN_SIGN: {
  2557. this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
  2558. break;
  2559. }
  2560. case CODE_POINTS.GREATER_THAN_SIGN: {
  2561. this.state = State.SCRIPT_DATA;
  2562. this._emitChars('>');
  2563. break;
  2564. }
  2565. case CODE_POINTS.NULL: {
  2566. this._err(ERR.unexpectedNullCharacter);
  2567. this.state = State.SCRIPT_DATA_ESCAPED;
  2568. this._emitChars(REPLACEMENT_CHARACTER);
  2569. break;
  2570. }
  2571. case CODE_POINTS.EOF: {
  2572. this._err(ERR.eofInScriptHtmlCommentLikeText);
  2573. this._emitEOFToken();
  2574. break;
  2575. }
  2576. default: {
  2577. this.state = State.SCRIPT_DATA_ESCAPED;
  2578. this._emitCodePoint(cp);
  2579. }
  2580. }
  2581. }
  2582. // Script data escaped less-than sign state
  2583. //------------------------------------------------------------------
  2584. _stateScriptDataEscapedLessThanSign(cp) {
  2585. if (cp === CODE_POINTS.SOLIDUS) {
  2586. this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN;
  2587. }
  2588. else if (isAsciiLetter(cp)) {
  2589. this._emitChars('<');
  2590. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_START;
  2591. this._stateScriptDataDoubleEscapeStart(cp);
  2592. }
  2593. else {
  2594. this._emitChars('<');
  2595. this.state = State.SCRIPT_DATA_ESCAPED;
  2596. this._stateScriptDataEscaped(cp);
  2597. }
  2598. }
  2599. // Script data escaped end tag open state
  2600. //------------------------------------------------------------------
  2601. _stateScriptDataEscapedEndTagOpen(cp) {
  2602. if (isAsciiLetter(cp)) {
  2603. this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_NAME;
  2604. this._stateScriptDataEscapedEndTagName(cp);
  2605. }
  2606. else {
  2607. this._emitChars('</');
  2608. this.state = State.SCRIPT_DATA_ESCAPED;
  2609. this._stateScriptDataEscaped(cp);
  2610. }
  2611. }
  2612. // Script data escaped end tag name state
  2613. //------------------------------------------------------------------
  2614. _stateScriptDataEscapedEndTagName(cp) {
  2615. if (this.handleSpecialEndTag(cp)) {
  2616. this._emitChars('</');
  2617. this.state = State.SCRIPT_DATA_ESCAPED;
  2618. this._stateScriptDataEscaped(cp);
  2619. }
  2620. }
  2621. // Script data double escape start state
  2622. //------------------------------------------------------------------
  2623. _stateScriptDataDoubleEscapeStart(cp) {
  2624. if (this.preprocessor.startsWith(SEQUENCES.SCRIPT, false) &&
  2625. isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek(SEQUENCES.SCRIPT.length))) {
  2626. this._emitCodePoint(cp);
  2627. for (let i = 0; i < SEQUENCES.SCRIPT.length; i++) {
  2628. this._emitCodePoint(this._consume());
  2629. }
  2630. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
  2631. }
  2632. else if (!this._ensureHibernation()) {
  2633. this.state = State.SCRIPT_DATA_ESCAPED;
  2634. this._stateScriptDataEscaped(cp);
  2635. }
  2636. }
  2637. // Script data double escaped state
  2638. //------------------------------------------------------------------
  2639. _stateScriptDataDoubleEscaped(cp) {
  2640. switch (cp) {
  2641. case CODE_POINTS.HYPHEN_MINUS: {
  2642. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
  2643. this._emitChars('-');
  2644. break;
  2645. }
  2646. case CODE_POINTS.LESS_THAN_SIGN: {
  2647. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
  2648. this._emitChars('<');
  2649. break;
  2650. }
  2651. case CODE_POINTS.NULL: {
  2652. this._err(ERR.unexpectedNullCharacter);
  2653. this._emitChars(REPLACEMENT_CHARACTER);
  2654. break;
  2655. }
  2656. case CODE_POINTS.EOF: {
  2657. this._err(ERR.eofInScriptHtmlCommentLikeText);
  2658. this._emitEOFToken();
  2659. break;
  2660. }
  2661. default: {
  2662. this._emitCodePoint(cp);
  2663. }
  2664. }
  2665. }
  2666. // Script data double escaped dash state
  2667. //------------------------------------------------------------------
  2668. _stateScriptDataDoubleEscapedDash(cp) {
  2669. switch (cp) {
  2670. case CODE_POINTS.HYPHEN_MINUS: {
  2671. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
  2672. this._emitChars('-');
  2673. break;
  2674. }
  2675. case CODE_POINTS.LESS_THAN_SIGN: {
  2676. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
  2677. this._emitChars('<');
  2678. break;
  2679. }
  2680. case CODE_POINTS.NULL: {
  2681. this._err(ERR.unexpectedNullCharacter);
  2682. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
  2683. this._emitChars(REPLACEMENT_CHARACTER);
  2684. break;
  2685. }
  2686. case CODE_POINTS.EOF: {
  2687. this._err(ERR.eofInScriptHtmlCommentLikeText);
  2688. this._emitEOFToken();
  2689. break;
  2690. }
  2691. default: {
  2692. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
  2693. this._emitCodePoint(cp);
  2694. }
  2695. }
  2696. }
  2697. // Script data double escaped dash dash state
  2698. //------------------------------------------------------------------
  2699. _stateScriptDataDoubleEscapedDashDash(cp) {
  2700. switch (cp) {
  2701. case CODE_POINTS.HYPHEN_MINUS: {
  2702. this._emitChars('-');
  2703. break;
  2704. }
  2705. case CODE_POINTS.LESS_THAN_SIGN: {
  2706. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
  2707. this._emitChars('<');
  2708. break;
  2709. }
  2710. case CODE_POINTS.GREATER_THAN_SIGN: {
  2711. this.state = State.SCRIPT_DATA;
  2712. this._emitChars('>');
  2713. break;
  2714. }
  2715. case CODE_POINTS.NULL: {
  2716. this._err(ERR.unexpectedNullCharacter);
  2717. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
  2718. this._emitChars(REPLACEMENT_CHARACTER);
  2719. break;
  2720. }
  2721. case CODE_POINTS.EOF: {
  2722. this._err(ERR.eofInScriptHtmlCommentLikeText);
  2723. this._emitEOFToken();
  2724. break;
  2725. }
  2726. default: {
  2727. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
  2728. this._emitCodePoint(cp);
  2729. }
  2730. }
  2731. }
  2732. // Script data double escaped less-than sign state
  2733. //------------------------------------------------------------------
  2734. _stateScriptDataDoubleEscapedLessThanSign(cp) {
  2735. if (cp === CODE_POINTS.SOLIDUS) {
  2736. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_END;
  2737. this._emitChars('/');
  2738. }
  2739. else {
  2740. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
  2741. this._stateScriptDataDoubleEscaped(cp);
  2742. }
  2743. }
  2744. // Script data double escape end state
  2745. //------------------------------------------------------------------
  2746. _stateScriptDataDoubleEscapeEnd(cp) {
  2747. if (this.preprocessor.startsWith(SEQUENCES.SCRIPT, false) &&
  2748. isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek(SEQUENCES.SCRIPT.length))) {
  2749. this._emitCodePoint(cp);
  2750. for (let i = 0; i < SEQUENCES.SCRIPT.length; i++) {
  2751. this._emitCodePoint(this._consume());
  2752. }
  2753. this.state = State.SCRIPT_DATA_ESCAPED;
  2754. }
  2755. else if (!this._ensureHibernation()) {
  2756. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
  2757. this._stateScriptDataDoubleEscaped(cp);
  2758. }
  2759. }
  2760. // Before attribute name state
  2761. //------------------------------------------------------------------
  2762. _stateBeforeAttributeName(cp) {
  2763. switch (cp) {
  2764. case CODE_POINTS.SPACE:
  2765. case CODE_POINTS.LINE_FEED:
  2766. case CODE_POINTS.TABULATION:
  2767. case CODE_POINTS.FORM_FEED: {
  2768. // Ignore whitespace
  2769. break;
  2770. }
  2771. case CODE_POINTS.SOLIDUS:
  2772. case CODE_POINTS.GREATER_THAN_SIGN:
  2773. case CODE_POINTS.EOF: {
  2774. this.state = State.AFTER_ATTRIBUTE_NAME;
  2775. this._stateAfterAttributeName(cp);
  2776. break;
  2777. }
  2778. case CODE_POINTS.EQUALS_SIGN: {
  2779. this._err(ERR.unexpectedEqualsSignBeforeAttributeName);
  2780. this._createAttr('=');
  2781. this.state = State.ATTRIBUTE_NAME;
  2782. break;
  2783. }
  2784. default: {
  2785. this._createAttr('');
  2786. this.state = State.ATTRIBUTE_NAME;
  2787. this._stateAttributeName(cp);
  2788. }
  2789. }
  2790. }
  2791. // Attribute name state
  2792. //------------------------------------------------------------------
  2793. _stateAttributeName(cp) {
  2794. switch (cp) {
  2795. case CODE_POINTS.SPACE:
  2796. case CODE_POINTS.LINE_FEED:
  2797. case CODE_POINTS.TABULATION:
  2798. case CODE_POINTS.FORM_FEED:
  2799. case CODE_POINTS.SOLIDUS:
  2800. case CODE_POINTS.GREATER_THAN_SIGN:
  2801. case CODE_POINTS.EOF: {
  2802. this._leaveAttrName();
  2803. this.state = State.AFTER_ATTRIBUTE_NAME;
  2804. this._stateAfterAttributeName(cp);
  2805. break;
  2806. }
  2807. case CODE_POINTS.EQUALS_SIGN: {
  2808. this._leaveAttrName();
  2809. this.state = State.BEFORE_ATTRIBUTE_VALUE;
  2810. break;
  2811. }
  2812. case CODE_POINTS.QUOTATION_MARK:
  2813. case CODE_POINTS.APOSTROPHE:
  2814. case CODE_POINTS.LESS_THAN_SIGN: {
  2815. this._err(ERR.unexpectedCharacterInAttributeName);
  2816. this.currentAttr.name += String.fromCodePoint(cp);
  2817. break;
  2818. }
  2819. case CODE_POINTS.NULL: {
  2820. this._err(ERR.unexpectedNullCharacter);
  2821. this.currentAttr.name += REPLACEMENT_CHARACTER;
  2822. break;
  2823. }
  2824. default: {
  2825. this.currentAttr.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
  2826. }
  2827. }
  2828. }
  2829. // After attribute name state
  2830. //------------------------------------------------------------------
  2831. _stateAfterAttributeName(cp) {
  2832. switch (cp) {
  2833. case CODE_POINTS.SPACE:
  2834. case CODE_POINTS.LINE_FEED:
  2835. case CODE_POINTS.TABULATION:
  2836. case CODE_POINTS.FORM_FEED: {
  2837. // Ignore whitespace
  2838. break;
  2839. }
  2840. case CODE_POINTS.SOLIDUS: {
  2841. this.state = State.SELF_CLOSING_START_TAG;
  2842. break;
  2843. }
  2844. case CODE_POINTS.EQUALS_SIGN: {
  2845. this.state = State.BEFORE_ATTRIBUTE_VALUE;
  2846. break;
  2847. }
  2848. case CODE_POINTS.GREATER_THAN_SIGN: {
  2849. this.state = State.DATA;
  2850. this.emitCurrentTagToken();
  2851. break;
  2852. }
  2853. case CODE_POINTS.EOF: {
  2854. this._err(ERR.eofInTag);
  2855. this._emitEOFToken();
  2856. break;
  2857. }
  2858. default: {
  2859. this._createAttr('');
  2860. this.state = State.ATTRIBUTE_NAME;
  2861. this._stateAttributeName(cp);
  2862. }
  2863. }
  2864. }
  2865. // Before attribute value state
  2866. //------------------------------------------------------------------
  2867. _stateBeforeAttributeValue(cp) {
  2868. switch (cp) {
  2869. case CODE_POINTS.SPACE:
  2870. case CODE_POINTS.LINE_FEED:
  2871. case CODE_POINTS.TABULATION:
  2872. case CODE_POINTS.FORM_FEED: {
  2873. // Ignore whitespace
  2874. break;
  2875. }
  2876. case CODE_POINTS.QUOTATION_MARK: {
  2877. this.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
  2878. break;
  2879. }
  2880. case CODE_POINTS.APOSTROPHE: {
  2881. this.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED;
  2882. break;
  2883. }
  2884. case CODE_POINTS.GREATER_THAN_SIGN: {
  2885. this._err(ERR.missingAttributeValue);
  2886. this.state = State.DATA;
  2887. this.emitCurrentTagToken();
  2888. break;
  2889. }
  2890. default: {
  2891. this.state = State.ATTRIBUTE_VALUE_UNQUOTED;
  2892. this._stateAttributeValueUnquoted(cp);
  2893. }
  2894. }
  2895. }
  2896. // Attribute value (double-quoted) state
  2897. //------------------------------------------------------------------
  2898. _stateAttributeValueDoubleQuoted(cp) {
  2899. switch (cp) {
  2900. case CODE_POINTS.QUOTATION_MARK: {
  2901. this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
  2902. break;
  2903. }
  2904. case CODE_POINTS.AMPERSAND: {
  2905. this._startCharacterReference();
  2906. break;
  2907. }
  2908. case CODE_POINTS.NULL: {
  2909. this._err(ERR.unexpectedNullCharacter);
  2910. this.currentAttr.value += REPLACEMENT_CHARACTER;
  2911. break;
  2912. }
  2913. case CODE_POINTS.EOF: {
  2914. this._err(ERR.eofInTag);
  2915. this._emitEOFToken();
  2916. break;
  2917. }
  2918. default: {
  2919. this.currentAttr.value += String.fromCodePoint(cp);
  2920. }
  2921. }
  2922. }
  2923. // Attribute value (single-quoted) state
  2924. //------------------------------------------------------------------
  2925. _stateAttributeValueSingleQuoted(cp) {
  2926. switch (cp) {
  2927. case CODE_POINTS.APOSTROPHE: {
  2928. this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
  2929. break;
  2930. }
  2931. case CODE_POINTS.AMPERSAND: {
  2932. this._startCharacterReference();
  2933. break;
  2934. }
  2935. case CODE_POINTS.NULL: {
  2936. this._err(ERR.unexpectedNullCharacter);
  2937. this.currentAttr.value += REPLACEMENT_CHARACTER;
  2938. break;
  2939. }
  2940. case CODE_POINTS.EOF: {
  2941. this._err(ERR.eofInTag);
  2942. this._emitEOFToken();
  2943. break;
  2944. }
  2945. default: {
  2946. this.currentAttr.value += String.fromCodePoint(cp);
  2947. }
  2948. }
  2949. }
  2950. // Attribute value (unquoted) state
  2951. //------------------------------------------------------------------
  2952. _stateAttributeValueUnquoted(cp) {
  2953. switch (cp) {
  2954. case CODE_POINTS.SPACE:
  2955. case CODE_POINTS.LINE_FEED:
  2956. case CODE_POINTS.TABULATION:
  2957. case CODE_POINTS.FORM_FEED: {
  2958. this._leaveAttrValue();
  2959. this.state = State.BEFORE_ATTRIBUTE_NAME;
  2960. break;
  2961. }
  2962. case CODE_POINTS.AMPERSAND: {
  2963. this._startCharacterReference();
  2964. break;
  2965. }
  2966. case CODE_POINTS.GREATER_THAN_SIGN: {
  2967. this._leaveAttrValue();
  2968. this.state = State.DATA;
  2969. this.emitCurrentTagToken();
  2970. break;
  2971. }
  2972. case CODE_POINTS.NULL: {
  2973. this._err(ERR.unexpectedNullCharacter);
  2974. this.currentAttr.value += REPLACEMENT_CHARACTER;
  2975. break;
  2976. }
  2977. case CODE_POINTS.QUOTATION_MARK:
  2978. case CODE_POINTS.APOSTROPHE:
  2979. case CODE_POINTS.LESS_THAN_SIGN:
  2980. case CODE_POINTS.EQUALS_SIGN:
  2981. case CODE_POINTS.GRAVE_ACCENT: {
  2982. this._err(ERR.unexpectedCharacterInUnquotedAttributeValue);
  2983. this.currentAttr.value += String.fromCodePoint(cp);
  2984. break;
  2985. }
  2986. case CODE_POINTS.EOF: {
  2987. this._err(ERR.eofInTag);
  2988. this._emitEOFToken();
  2989. break;
  2990. }
  2991. default: {
  2992. this.currentAttr.value += String.fromCodePoint(cp);
  2993. }
  2994. }
  2995. }
  2996. // After attribute value (quoted) state
  2997. //------------------------------------------------------------------
  2998. _stateAfterAttributeValueQuoted(cp) {
  2999. switch (cp) {
  3000. case CODE_POINTS.SPACE:
  3001. case CODE_POINTS.LINE_FEED:
  3002. case CODE_POINTS.TABULATION:
  3003. case CODE_POINTS.FORM_FEED: {
  3004. this._leaveAttrValue();
  3005. this.state = State.BEFORE_ATTRIBUTE_NAME;
  3006. break;
  3007. }
  3008. case CODE_POINTS.SOLIDUS: {
  3009. this._leaveAttrValue();
  3010. this.state = State.SELF_CLOSING_START_TAG;
  3011. break;
  3012. }
  3013. case CODE_POINTS.GREATER_THAN_SIGN: {
  3014. this._leaveAttrValue();
  3015. this.state = State.DATA;
  3016. this.emitCurrentTagToken();
  3017. break;
  3018. }
  3019. case CODE_POINTS.EOF: {
  3020. this._err(ERR.eofInTag);
  3021. this._emitEOFToken();
  3022. break;
  3023. }
  3024. default: {
  3025. this._err(ERR.missingWhitespaceBetweenAttributes);
  3026. this.state = State.BEFORE_ATTRIBUTE_NAME;
  3027. this._stateBeforeAttributeName(cp);
  3028. }
  3029. }
  3030. }
  3031. // Self-closing start tag state
  3032. //------------------------------------------------------------------
  3033. _stateSelfClosingStartTag(cp) {
  3034. switch (cp) {
  3035. case CODE_POINTS.GREATER_THAN_SIGN: {
  3036. const token = this.currentToken;
  3037. token.selfClosing = true;
  3038. this.state = State.DATA;
  3039. this.emitCurrentTagToken();
  3040. break;
  3041. }
  3042. case CODE_POINTS.EOF: {
  3043. this._err(ERR.eofInTag);
  3044. this._emitEOFToken();
  3045. break;
  3046. }
  3047. default: {
  3048. this._err(ERR.unexpectedSolidusInTag);
  3049. this.state = State.BEFORE_ATTRIBUTE_NAME;
  3050. this._stateBeforeAttributeName(cp);
  3051. }
  3052. }
  3053. }
  3054. // Bogus comment state
  3055. //------------------------------------------------------------------
  3056. _stateBogusComment(cp) {
  3057. const token = this.currentToken;
  3058. switch (cp) {
  3059. case CODE_POINTS.GREATER_THAN_SIGN: {
  3060. this.state = State.DATA;
  3061. this.emitCurrentComment(token);
  3062. break;
  3063. }
  3064. case CODE_POINTS.EOF: {
  3065. this.emitCurrentComment(token);
  3066. this._emitEOFToken();
  3067. break;
  3068. }
  3069. case CODE_POINTS.NULL: {
  3070. this._err(ERR.unexpectedNullCharacter);
  3071. token.data += REPLACEMENT_CHARACTER;
  3072. break;
  3073. }
  3074. default: {
  3075. token.data += String.fromCodePoint(cp);
  3076. }
  3077. }
  3078. }
  3079. // Markup declaration open state
  3080. //------------------------------------------------------------------
  3081. _stateMarkupDeclarationOpen(cp) {
  3082. if (this._consumeSequenceIfMatch(SEQUENCES.DASH_DASH, true)) {
  3083. this._createCommentToken(SEQUENCES.DASH_DASH.length + 1);
  3084. this.state = State.COMMENT_START;
  3085. }
  3086. else if (this._consumeSequenceIfMatch(SEQUENCES.DOCTYPE, false)) {
  3087. // NOTE: Doctypes tokens are created without fixed offsets. We keep track of the moment a doctype *might* start here.
  3088. this.currentLocation = this.getCurrentLocation(SEQUENCES.DOCTYPE.length + 1);
  3089. this.state = State.DOCTYPE;
  3090. }
  3091. else if (this._consumeSequenceIfMatch(SEQUENCES.CDATA_START, true)) {
  3092. if (this.inForeignNode) {
  3093. this.state = State.CDATA_SECTION;
  3094. }
  3095. else {
  3096. this._err(ERR.cdataInHtmlContent);
  3097. this._createCommentToken(SEQUENCES.CDATA_START.length + 1);
  3098. this.currentToken.data = '[CDATA[';
  3099. this.state = State.BOGUS_COMMENT;
  3100. }
  3101. }
  3102. //NOTE: Sequence lookups can be abrupted by hibernation. In that case, lookup
  3103. //results are no longer valid and we will need to start over.
  3104. else if (!this._ensureHibernation()) {
  3105. this._err(ERR.incorrectlyOpenedComment);
  3106. this._createCommentToken(2);
  3107. this.state = State.BOGUS_COMMENT;
  3108. this._stateBogusComment(cp);
  3109. }
  3110. }
  3111. // Comment start state
  3112. //------------------------------------------------------------------
  3113. _stateCommentStart(cp) {
  3114. switch (cp) {
  3115. case CODE_POINTS.HYPHEN_MINUS: {
  3116. this.state = State.COMMENT_START_DASH;
  3117. break;
  3118. }
  3119. case CODE_POINTS.GREATER_THAN_SIGN: {
  3120. this._err(ERR.abruptClosingOfEmptyComment);
  3121. this.state = State.DATA;
  3122. const token = this.currentToken;
  3123. this.emitCurrentComment(token);
  3124. break;
  3125. }
  3126. default: {
  3127. this.state = State.COMMENT;
  3128. this._stateComment(cp);
  3129. }
  3130. }
  3131. }
  3132. // Comment start dash state
  3133. //------------------------------------------------------------------
  3134. _stateCommentStartDash(cp) {
  3135. const token = this.currentToken;
  3136. switch (cp) {
  3137. case CODE_POINTS.HYPHEN_MINUS: {
  3138. this.state = State.COMMENT_END;
  3139. break;
  3140. }
  3141. case CODE_POINTS.GREATER_THAN_SIGN: {
  3142. this._err(ERR.abruptClosingOfEmptyComment);
  3143. this.state = State.DATA;
  3144. this.emitCurrentComment(token);
  3145. break;
  3146. }
  3147. case CODE_POINTS.EOF: {
  3148. this._err(ERR.eofInComment);
  3149. this.emitCurrentComment(token);
  3150. this._emitEOFToken();
  3151. break;
  3152. }
  3153. default: {
  3154. token.data += '-';
  3155. this.state = State.COMMENT;
  3156. this._stateComment(cp);
  3157. }
  3158. }
  3159. }
  3160. // Comment state
  3161. //------------------------------------------------------------------
  3162. _stateComment(cp) {
  3163. const token = this.currentToken;
  3164. switch (cp) {
  3165. case CODE_POINTS.HYPHEN_MINUS: {
  3166. this.state = State.COMMENT_END_DASH;
  3167. break;
  3168. }
  3169. case CODE_POINTS.LESS_THAN_SIGN: {
  3170. token.data += '<';
  3171. this.state = State.COMMENT_LESS_THAN_SIGN;
  3172. break;
  3173. }
  3174. case CODE_POINTS.NULL: {
  3175. this._err(ERR.unexpectedNullCharacter);
  3176. token.data += REPLACEMENT_CHARACTER;
  3177. break;
  3178. }
  3179. case CODE_POINTS.EOF: {
  3180. this._err(ERR.eofInComment);
  3181. this.emitCurrentComment(token);
  3182. this._emitEOFToken();
  3183. break;
  3184. }
  3185. default: {
  3186. token.data += String.fromCodePoint(cp);
  3187. }
  3188. }
  3189. }
  3190. // Comment less-than sign state
  3191. //------------------------------------------------------------------
  3192. _stateCommentLessThanSign(cp) {
  3193. const token = this.currentToken;
  3194. switch (cp) {
  3195. case CODE_POINTS.EXCLAMATION_MARK: {
  3196. token.data += '!';
  3197. this.state = State.COMMENT_LESS_THAN_SIGN_BANG;
  3198. break;
  3199. }
  3200. case CODE_POINTS.LESS_THAN_SIGN: {
  3201. token.data += '<';
  3202. break;
  3203. }
  3204. default: {
  3205. this.state = State.COMMENT;
  3206. this._stateComment(cp);
  3207. }
  3208. }
  3209. }
  3210. // Comment less-than sign bang state
  3211. //------------------------------------------------------------------
  3212. _stateCommentLessThanSignBang(cp) {
  3213. if (cp === CODE_POINTS.HYPHEN_MINUS) {
  3214. this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH;
  3215. }
  3216. else {
  3217. this.state = State.COMMENT;
  3218. this._stateComment(cp);
  3219. }
  3220. }
  3221. // Comment less-than sign bang dash state
  3222. //------------------------------------------------------------------
  3223. _stateCommentLessThanSignBangDash(cp) {
  3224. if (cp === CODE_POINTS.HYPHEN_MINUS) {
  3225. this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH;
  3226. }
  3227. else {
  3228. this.state = State.COMMENT_END_DASH;
  3229. this._stateCommentEndDash(cp);
  3230. }
  3231. }
  3232. // Comment less-than sign bang dash dash state
  3233. //------------------------------------------------------------------
  3234. _stateCommentLessThanSignBangDashDash(cp) {
  3235. if (cp !== CODE_POINTS.GREATER_THAN_SIGN && cp !== CODE_POINTS.EOF) {
  3236. this._err(ERR.nestedComment);
  3237. }
  3238. this.state = State.COMMENT_END;
  3239. this._stateCommentEnd(cp);
  3240. }
  3241. // Comment end dash state
  3242. //------------------------------------------------------------------
  3243. _stateCommentEndDash(cp) {
  3244. const token = this.currentToken;
  3245. switch (cp) {
  3246. case CODE_POINTS.HYPHEN_MINUS: {
  3247. this.state = State.COMMENT_END;
  3248. break;
  3249. }
  3250. case CODE_POINTS.EOF: {
  3251. this._err(ERR.eofInComment);
  3252. this.emitCurrentComment(token);
  3253. this._emitEOFToken();
  3254. break;
  3255. }
  3256. default: {
  3257. token.data += '-';
  3258. this.state = State.COMMENT;
  3259. this._stateComment(cp);
  3260. }
  3261. }
  3262. }
  3263. // Comment end state
  3264. //------------------------------------------------------------------
  3265. _stateCommentEnd(cp) {
  3266. const token = this.currentToken;
  3267. switch (cp) {
  3268. case CODE_POINTS.GREATER_THAN_SIGN: {
  3269. this.state = State.DATA;
  3270. this.emitCurrentComment(token);
  3271. break;
  3272. }
  3273. case CODE_POINTS.EXCLAMATION_MARK: {
  3274. this.state = State.COMMENT_END_BANG;
  3275. break;
  3276. }
  3277. case CODE_POINTS.HYPHEN_MINUS: {
  3278. token.data += '-';
  3279. break;
  3280. }
  3281. case CODE_POINTS.EOF: {
  3282. this._err(ERR.eofInComment);
  3283. this.emitCurrentComment(token);
  3284. this._emitEOFToken();
  3285. break;
  3286. }
  3287. default: {
  3288. token.data += '--';
  3289. this.state = State.COMMENT;
  3290. this._stateComment(cp);
  3291. }
  3292. }
  3293. }
  3294. // Comment end bang state
  3295. //------------------------------------------------------------------
  3296. _stateCommentEndBang(cp) {
  3297. const token = this.currentToken;
  3298. switch (cp) {
  3299. case CODE_POINTS.HYPHEN_MINUS: {
  3300. token.data += '--!';
  3301. this.state = State.COMMENT_END_DASH;
  3302. break;
  3303. }
  3304. case CODE_POINTS.GREATER_THAN_SIGN: {
  3305. this._err(ERR.incorrectlyClosedComment);
  3306. this.state = State.DATA;
  3307. this.emitCurrentComment(token);
  3308. break;
  3309. }
  3310. case CODE_POINTS.EOF: {
  3311. this._err(ERR.eofInComment);
  3312. this.emitCurrentComment(token);
  3313. this._emitEOFToken();
  3314. break;
  3315. }
  3316. default: {
  3317. token.data += '--!';
  3318. this.state = State.COMMENT;
  3319. this._stateComment(cp);
  3320. }
  3321. }
  3322. }
  3323. // DOCTYPE state
  3324. //------------------------------------------------------------------
  3325. _stateDoctype(cp) {
  3326. switch (cp) {
  3327. case CODE_POINTS.SPACE:
  3328. case CODE_POINTS.LINE_FEED:
  3329. case CODE_POINTS.TABULATION:
  3330. case CODE_POINTS.FORM_FEED: {
  3331. this.state = State.BEFORE_DOCTYPE_NAME;
  3332. break;
  3333. }
  3334. case CODE_POINTS.GREATER_THAN_SIGN: {
  3335. this.state = State.BEFORE_DOCTYPE_NAME;
  3336. this._stateBeforeDoctypeName(cp);
  3337. break;
  3338. }
  3339. case CODE_POINTS.EOF: {
  3340. this._err(ERR.eofInDoctype);
  3341. this._createDoctypeToken(null);
  3342. const token = this.currentToken;
  3343. token.forceQuirks = true;
  3344. this.emitCurrentDoctype(token);
  3345. this._emitEOFToken();
  3346. break;
  3347. }
  3348. default: {
  3349. this._err(ERR.missingWhitespaceBeforeDoctypeName);
  3350. this.state = State.BEFORE_DOCTYPE_NAME;
  3351. this._stateBeforeDoctypeName(cp);
  3352. }
  3353. }
  3354. }
  3355. // Before DOCTYPE name state
  3356. //------------------------------------------------------------------
  3357. _stateBeforeDoctypeName(cp) {
  3358. if (isAsciiUpper(cp)) {
  3359. this._createDoctypeToken(String.fromCharCode(toAsciiLower(cp)));
  3360. this.state = State.DOCTYPE_NAME;
  3361. }
  3362. else
  3363. switch (cp) {
  3364. case CODE_POINTS.SPACE:
  3365. case CODE_POINTS.LINE_FEED:
  3366. case CODE_POINTS.TABULATION:
  3367. case CODE_POINTS.FORM_FEED: {
  3368. // Ignore whitespace
  3369. break;
  3370. }
  3371. case CODE_POINTS.NULL: {
  3372. this._err(ERR.unexpectedNullCharacter);
  3373. this._createDoctypeToken(REPLACEMENT_CHARACTER);
  3374. this.state = State.DOCTYPE_NAME;
  3375. break;
  3376. }
  3377. case CODE_POINTS.GREATER_THAN_SIGN: {
  3378. this._err(ERR.missingDoctypeName);
  3379. this._createDoctypeToken(null);
  3380. const token = this.currentToken;
  3381. token.forceQuirks = true;
  3382. this.emitCurrentDoctype(token);
  3383. this.state = State.DATA;
  3384. break;
  3385. }
  3386. case CODE_POINTS.EOF: {
  3387. this._err(ERR.eofInDoctype);
  3388. this._createDoctypeToken(null);
  3389. const token = this.currentToken;
  3390. token.forceQuirks = true;
  3391. this.emitCurrentDoctype(token);
  3392. this._emitEOFToken();
  3393. break;
  3394. }
  3395. default: {
  3396. this._createDoctypeToken(String.fromCodePoint(cp));
  3397. this.state = State.DOCTYPE_NAME;
  3398. }
  3399. }
  3400. }
  3401. // DOCTYPE name state
  3402. //------------------------------------------------------------------
  3403. _stateDoctypeName(cp) {
  3404. const token = this.currentToken;
  3405. switch (cp) {
  3406. case CODE_POINTS.SPACE:
  3407. case CODE_POINTS.LINE_FEED:
  3408. case CODE_POINTS.TABULATION:
  3409. case CODE_POINTS.FORM_FEED: {
  3410. this.state = State.AFTER_DOCTYPE_NAME;
  3411. break;
  3412. }
  3413. case CODE_POINTS.GREATER_THAN_SIGN: {
  3414. this.state = State.DATA;
  3415. this.emitCurrentDoctype(token);
  3416. break;
  3417. }
  3418. case CODE_POINTS.NULL: {
  3419. this._err(ERR.unexpectedNullCharacter);
  3420. token.name += REPLACEMENT_CHARACTER;
  3421. break;
  3422. }
  3423. case CODE_POINTS.EOF: {
  3424. this._err(ERR.eofInDoctype);
  3425. token.forceQuirks = true;
  3426. this.emitCurrentDoctype(token);
  3427. this._emitEOFToken();
  3428. break;
  3429. }
  3430. default: {
  3431. token.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
  3432. }
  3433. }
  3434. }
  3435. // After DOCTYPE name state
  3436. //------------------------------------------------------------------
  3437. _stateAfterDoctypeName(cp) {
  3438. const token = this.currentToken;
  3439. switch (cp) {
  3440. case CODE_POINTS.SPACE:
  3441. case CODE_POINTS.LINE_FEED:
  3442. case CODE_POINTS.TABULATION:
  3443. case CODE_POINTS.FORM_FEED: {
  3444. // Ignore whitespace
  3445. break;
  3446. }
  3447. case CODE_POINTS.GREATER_THAN_SIGN: {
  3448. this.state = State.DATA;
  3449. this.emitCurrentDoctype(token);
  3450. break;
  3451. }
  3452. case CODE_POINTS.EOF: {
  3453. this._err(ERR.eofInDoctype);
  3454. token.forceQuirks = true;
  3455. this.emitCurrentDoctype(token);
  3456. this._emitEOFToken();
  3457. break;
  3458. }
  3459. default: {
  3460. if (this._consumeSequenceIfMatch(SEQUENCES.PUBLIC, false)) {
  3461. this.state = State.AFTER_DOCTYPE_PUBLIC_KEYWORD;
  3462. }
  3463. else if (this._consumeSequenceIfMatch(SEQUENCES.SYSTEM, false)) {
  3464. this.state = State.AFTER_DOCTYPE_SYSTEM_KEYWORD;
  3465. }
  3466. //NOTE: sequence lookup can be abrupted by hibernation. In that case lookup
  3467. //results are no longer valid and we will need to start over.
  3468. else if (!this._ensureHibernation()) {
  3469. this._err(ERR.invalidCharacterSequenceAfterDoctypeName);
  3470. token.forceQuirks = true;
  3471. this.state = State.BOGUS_DOCTYPE;
  3472. this._stateBogusDoctype(cp);
  3473. }
  3474. }
  3475. }
  3476. }
  3477. // After DOCTYPE public keyword state
  3478. //------------------------------------------------------------------
  3479. _stateAfterDoctypePublicKeyword(cp) {
  3480. const token = this.currentToken;
  3481. switch (cp) {
  3482. case CODE_POINTS.SPACE:
  3483. case CODE_POINTS.LINE_FEED:
  3484. case CODE_POINTS.TABULATION:
  3485. case CODE_POINTS.FORM_FEED: {
  3486. this.state = State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
  3487. break;
  3488. }
  3489. case CODE_POINTS.QUOTATION_MARK: {
  3490. this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword);
  3491. token.publicId = '';
  3492. this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
  3493. break;
  3494. }
  3495. case CODE_POINTS.APOSTROPHE: {
  3496. this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword);
  3497. token.publicId = '';
  3498. this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
  3499. break;
  3500. }
  3501. case CODE_POINTS.GREATER_THAN_SIGN: {
  3502. this._err(ERR.missingDoctypePublicIdentifier);
  3503. token.forceQuirks = true;
  3504. this.state = State.DATA;
  3505. this.emitCurrentDoctype(token);
  3506. break;
  3507. }
  3508. case CODE_POINTS.EOF: {
  3509. this._err(ERR.eofInDoctype);
  3510. token.forceQuirks = true;
  3511. this.emitCurrentDoctype(token);
  3512. this._emitEOFToken();
  3513. break;
  3514. }
  3515. default: {
  3516. this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier);
  3517. token.forceQuirks = true;
  3518. this.state = State.BOGUS_DOCTYPE;
  3519. this._stateBogusDoctype(cp);
  3520. }
  3521. }
  3522. }
  3523. // Before DOCTYPE public identifier state
  3524. //------------------------------------------------------------------
  3525. _stateBeforeDoctypePublicIdentifier(cp) {
  3526. const token = this.currentToken;
  3527. switch (cp) {
  3528. case CODE_POINTS.SPACE:
  3529. case CODE_POINTS.LINE_FEED:
  3530. case CODE_POINTS.TABULATION:
  3531. case CODE_POINTS.FORM_FEED: {
  3532. // Ignore whitespace
  3533. break;
  3534. }
  3535. case CODE_POINTS.QUOTATION_MARK: {
  3536. token.publicId = '';
  3537. this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
  3538. break;
  3539. }
  3540. case CODE_POINTS.APOSTROPHE: {
  3541. token.publicId = '';
  3542. this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
  3543. break;
  3544. }
  3545. case CODE_POINTS.GREATER_THAN_SIGN: {
  3546. this._err(ERR.missingDoctypePublicIdentifier);
  3547. token.forceQuirks = true;
  3548. this.state = State.DATA;
  3549. this.emitCurrentDoctype(token);
  3550. break;
  3551. }
  3552. case CODE_POINTS.EOF: {
  3553. this._err(ERR.eofInDoctype);
  3554. token.forceQuirks = true;
  3555. this.emitCurrentDoctype(token);
  3556. this._emitEOFToken();
  3557. break;
  3558. }
  3559. default: {
  3560. this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier);
  3561. token.forceQuirks = true;
  3562. this.state = State.BOGUS_DOCTYPE;
  3563. this._stateBogusDoctype(cp);
  3564. }
  3565. }
  3566. }
  3567. // DOCTYPE public identifier (double-quoted) state
  3568. //------------------------------------------------------------------
  3569. _stateDoctypePublicIdentifierDoubleQuoted(cp) {
  3570. const token = this.currentToken;
  3571. switch (cp) {
  3572. case CODE_POINTS.QUOTATION_MARK: {
  3573. this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
  3574. break;
  3575. }
  3576. case CODE_POINTS.NULL: {
  3577. this._err(ERR.unexpectedNullCharacter);
  3578. token.publicId += REPLACEMENT_CHARACTER;
  3579. break;
  3580. }
  3581. case CODE_POINTS.GREATER_THAN_SIGN: {
  3582. this._err(ERR.abruptDoctypePublicIdentifier);
  3583. token.forceQuirks = true;
  3584. this.emitCurrentDoctype(token);
  3585. this.state = State.DATA;
  3586. break;
  3587. }
  3588. case CODE_POINTS.EOF: {
  3589. this._err(ERR.eofInDoctype);
  3590. token.forceQuirks = true;
  3591. this.emitCurrentDoctype(token);
  3592. this._emitEOFToken();
  3593. break;
  3594. }
  3595. default: {
  3596. token.publicId += String.fromCodePoint(cp);
  3597. }
  3598. }
  3599. }
  3600. // DOCTYPE public identifier (single-quoted) state
  3601. //------------------------------------------------------------------
  3602. _stateDoctypePublicIdentifierSingleQuoted(cp) {
  3603. const token = this.currentToken;
  3604. switch (cp) {
  3605. case CODE_POINTS.APOSTROPHE: {
  3606. this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
  3607. break;
  3608. }
  3609. case CODE_POINTS.NULL: {
  3610. this._err(ERR.unexpectedNullCharacter);
  3611. token.publicId += REPLACEMENT_CHARACTER;
  3612. break;
  3613. }
  3614. case CODE_POINTS.GREATER_THAN_SIGN: {
  3615. this._err(ERR.abruptDoctypePublicIdentifier);
  3616. token.forceQuirks = true;
  3617. this.emitCurrentDoctype(token);
  3618. this.state = State.DATA;
  3619. break;
  3620. }
  3621. case CODE_POINTS.EOF: {
  3622. this._err(ERR.eofInDoctype);
  3623. token.forceQuirks = true;
  3624. this.emitCurrentDoctype(token);
  3625. this._emitEOFToken();
  3626. break;
  3627. }
  3628. default: {
  3629. token.publicId += String.fromCodePoint(cp);
  3630. }
  3631. }
  3632. }
  3633. // After DOCTYPE public identifier state
  3634. //------------------------------------------------------------------
  3635. _stateAfterDoctypePublicIdentifier(cp) {
  3636. const token = this.currentToken;
  3637. switch (cp) {
  3638. case CODE_POINTS.SPACE:
  3639. case CODE_POINTS.LINE_FEED:
  3640. case CODE_POINTS.TABULATION:
  3641. case CODE_POINTS.FORM_FEED: {
  3642. this.state = State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
  3643. break;
  3644. }
  3645. case CODE_POINTS.GREATER_THAN_SIGN: {
  3646. this.state = State.DATA;
  3647. this.emitCurrentDoctype(token);
  3648. break;
  3649. }
  3650. case CODE_POINTS.QUOTATION_MARK: {
  3651. this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
  3652. token.systemId = '';
  3653. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
  3654. break;
  3655. }
  3656. case CODE_POINTS.APOSTROPHE: {
  3657. this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
  3658. token.systemId = '';
  3659. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
  3660. break;
  3661. }
  3662. case CODE_POINTS.EOF: {
  3663. this._err(ERR.eofInDoctype);
  3664. token.forceQuirks = true;
  3665. this.emitCurrentDoctype(token);
  3666. this._emitEOFToken();
  3667. break;
  3668. }
  3669. default: {
  3670. this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
  3671. token.forceQuirks = true;
  3672. this.state = State.BOGUS_DOCTYPE;
  3673. this._stateBogusDoctype(cp);
  3674. }
  3675. }
  3676. }
  3677. // Between DOCTYPE public and system identifiers state
  3678. //------------------------------------------------------------------
  3679. _stateBetweenDoctypePublicAndSystemIdentifiers(cp) {
  3680. const token = this.currentToken;
  3681. switch (cp) {
  3682. case CODE_POINTS.SPACE:
  3683. case CODE_POINTS.LINE_FEED:
  3684. case CODE_POINTS.TABULATION:
  3685. case CODE_POINTS.FORM_FEED: {
  3686. // Ignore whitespace
  3687. break;
  3688. }
  3689. case CODE_POINTS.GREATER_THAN_SIGN: {
  3690. this.emitCurrentDoctype(token);
  3691. this.state = State.DATA;
  3692. break;
  3693. }
  3694. case CODE_POINTS.QUOTATION_MARK: {
  3695. token.systemId = '';
  3696. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
  3697. break;
  3698. }
  3699. case CODE_POINTS.APOSTROPHE: {
  3700. token.systemId = '';
  3701. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
  3702. break;
  3703. }
  3704. case CODE_POINTS.EOF: {
  3705. this._err(ERR.eofInDoctype);
  3706. token.forceQuirks = true;
  3707. this.emitCurrentDoctype(token);
  3708. this._emitEOFToken();
  3709. break;
  3710. }
  3711. default: {
  3712. this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
  3713. token.forceQuirks = true;
  3714. this.state = State.BOGUS_DOCTYPE;
  3715. this._stateBogusDoctype(cp);
  3716. }
  3717. }
  3718. }
  3719. // After DOCTYPE system keyword state
  3720. //------------------------------------------------------------------
  3721. _stateAfterDoctypeSystemKeyword(cp) {
  3722. const token = this.currentToken;
  3723. switch (cp) {
  3724. case CODE_POINTS.SPACE:
  3725. case CODE_POINTS.LINE_FEED:
  3726. case CODE_POINTS.TABULATION:
  3727. case CODE_POINTS.FORM_FEED: {
  3728. this.state = State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
  3729. break;
  3730. }
  3731. case CODE_POINTS.QUOTATION_MARK: {
  3732. this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword);
  3733. token.systemId = '';
  3734. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
  3735. break;
  3736. }
  3737. case CODE_POINTS.APOSTROPHE: {
  3738. this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword);
  3739. token.systemId = '';
  3740. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
  3741. break;
  3742. }
  3743. case CODE_POINTS.GREATER_THAN_SIGN: {
  3744. this._err(ERR.missingDoctypeSystemIdentifier);
  3745. token.forceQuirks = true;
  3746. this.state = State.DATA;
  3747. this.emitCurrentDoctype(token);
  3748. break;
  3749. }
  3750. case CODE_POINTS.EOF: {
  3751. this._err(ERR.eofInDoctype);
  3752. token.forceQuirks = true;
  3753. this.emitCurrentDoctype(token);
  3754. this._emitEOFToken();
  3755. break;
  3756. }
  3757. default: {
  3758. this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
  3759. token.forceQuirks = true;
  3760. this.state = State.BOGUS_DOCTYPE;
  3761. this._stateBogusDoctype(cp);
  3762. }
  3763. }
  3764. }
  3765. // Before DOCTYPE system identifier state
  3766. //------------------------------------------------------------------
  3767. _stateBeforeDoctypeSystemIdentifier(cp) {
  3768. const token = this.currentToken;
  3769. switch (cp) {
  3770. case CODE_POINTS.SPACE:
  3771. case CODE_POINTS.LINE_FEED:
  3772. case CODE_POINTS.TABULATION:
  3773. case CODE_POINTS.FORM_FEED: {
  3774. // Ignore whitespace
  3775. break;
  3776. }
  3777. case CODE_POINTS.QUOTATION_MARK: {
  3778. token.systemId = '';
  3779. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
  3780. break;
  3781. }
  3782. case CODE_POINTS.APOSTROPHE: {
  3783. token.systemId = '';
  3784. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
  3785. break;
  3786. }
  3787. case CODE_POINTS.GREATER_THAN_SIGN: {
  3788. this._err(ERR.missingDoctypeSystemIdentifier);
  3789. token.forceQuirks = true;
  3790. this.state = State.DATA;
  3791. this.emitCurrentDoctype(token);
  3792. break;
  3793. }
  3794. case CODE_POINTS.EOF: {
  3795. this._err(ERR.eofInDoctype);
  3796. token.forceQuirks = true;
  3797. this.emitCurrentDoctype(token);
  3798. this._emitEOFToken();
  3799. break;
  3800. }
  3801. default: {
  3802. this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
  3803. token.forceQuirks = true;
  3804. this.state = State.BOGUS_DOCTYPE;
  3805. this._stateBogusDoctype(cp);
  3806. }
  3807. }
  3808. }
  3809. // DOCTYPE system identifier (double-quoted) state
  3810. //------------------------------------------------------------------
  3811. _stateDoctypeSystemIdentifierDoubleQuoted(cp) {
  3812. const token = this.currentToken;
  3813. switch (cp) {
  3814. case CODE_POINTS.QUOTATION_MARK: {
  3815. this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
  3816. break;
  3817. }
  3818. case CODE_POINTS.NULL: {
  3819. this._err(ERR.unexpectedNullCharacter);
  3820. token.systemId += REPLACEMENT_CHARACTER;
  3821. break;
  3822. }
  3823. case CODE_POINTS.GREATER_THAN_SIGN: {
  3824. this._err(ERR.abruptDoctypeSystemIdentifier);
  3825. token.forceQuirks = true;
  3826. this.emitCurrentDoctype(token);
  3827. this.state = State.DATA;
  3828. break;
  3829. }
  3830. case CODE_POINTS.EOF: {
  3831. this._err(ERR.eofInDoctype);
  3832. token.forceQuirks = true;
  3833. this.emitCurrentDoctype(token);
  3834. this._emitEOFToken();
  3835. break;
  3836. }
  3837. default: {
  3838. token.systemId += String.fromCodePoint(cp);
  3839. }
  3840. }
  3841. }
  3842. // DOCTYPE system identifier (single-quoted) state
  3843. //------------------------------------------------------------------
  3844. _stateDoctypeSystemIdentifierSingleQuoted(cp) {
  3845. const token = this.currentToken;
  3846. switch (cp) {
  3847. case CODE_POINTS.APOSTROPHE: {
  3848. this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
  3849. break;
  3850. }
  3851. case CODE_POINTS.NULL: {
  3852. this._err(ERR.unexpectedNullCharacter);
  3853. token.systemId += REPLACEMENT_CHARACTER;
  3854. break;
  3855. }
  3856. case CODE_POINTS.GREATER_THAN_SIGN: {
  3857. this._err(ERR.abruptDoctypeSystemIdentifier);
  3858. token.forceQuirks = true;
  3859. this.emitCurrentDoctype(token);
  3860. this.state = State.DATA;
  3861. break;
  3862. }
  3863. case CODE_POINTS.EOF: {
  3864. this._err(ERR.eofInDoctype);
  3865. token.forceQuirks = true;
  3866. this.emitCurrentDoctype(token);
  3867. this._emitEOFToken();
  3868. break;
  3869. }
  3870. default: {
  3871. token.systemId += String.fromCodePoint(cp);
  3872. }
  3873. }
  3874. }
  3875. // After DOCTYPE system identifier state
  3876. //------------------------------------------------------------------
  3877. _stateAfterDoctypeSystemIdentifier(cp) {
  3878. const token = this.currentToken;
  3879. switch (cp) {
  3880. case CODE_POINTS.SPACE:
  3881. case CODE_POINTS.LINE_FEED:
  3882. case CODE_POINTS.TABULATION:
  3883. case CODE_POINTS.FORM_FEED: {
  3884. // Ignore whitespace
  3885. break;
  3886. }
  3887. case CODE_POINTS.GREATER_THAN_SIGN: {
  3888. this.emitCurrentDoctype(token);
  3889. this.state = State.DATA;
  3890. break;
  3891. }
  3892. case CODE_POINTS.EOF: {
  3893. this._err(ERR.eofInDoctype);
  3894. token.forceQuirks = true;
  3895. this.emitCurrentDoctype(token);
  3896. this._emitEOFToken();
  3897. break;
  3898. }
  3899. default: {
  3900. this._err(ERR.unexpectedCharacterAfterDoctypeSystemIdentifier);
  3901. this.state = State.BOGUS_DOCTYPE;
  3902. this._stateBogusDoctype(cp);
  3903. }
  3904. }
  3905. }
  3906. // Bogus DOCTYPE state
  3907. //------------------------------------------------------------------
  3908. _stateBogusDoctype(cp) {
  3909. const token = this.currentToken;
  3910. switch (cp) {
  3911. case CODE_POINTS.GREATER_THAN_SIGN: {
  3912. this.emitCurrentDoctype(token);
  3913. this.state = State.DATA;
  3914. break;
  3915. }
  3916. case CODE_POINTS.NULL: {
  3917. this._err(ERR.unexpectedNullCharacter);
  3918. break;
  3919. }
  3920. case CODE_POINTS.EOF: {
  3921. this.emitCurrentDoctype(token);
  3922. this._emitEOFToken();
  3923. break;
  3924. }
  3925. // Do nothing
  3926. }
  3927. }
  3928. // CDATA section state
  3929. //------------------------------------------------------------------
  3930. _stateCdataSection(cp) {
  3931. switch (cp) {
  3932. case CODE_POINTS.RIGHT_SQUARE_BRACKET: {
  3933. this.state = State.CDATA_SECTION_BRACKET;
  3934. break;
  3935. }
  3936. case CODE_POINTS.EOF: {
  3937. this._err(ERR.eofInCdata);
  3938. this._emitEOFToken();
  3939. break;
  3940. }
  3941. default: {
  3942. this._emitCodePoint(cp);
  3943. }
  3944. }
  3945. }
  3946. // CDATA section bracket state
  3947. //------------------------------------------------------------------
  3948. _stateCdataSectionBracket(cp) {
  3949. if (cp === CODE_POINTS.RIGHT_SQUARE_BRACKET) {
  3950. this.state = State.CDATA_SECTION_END;
  3951. }
  3952. else {
  3953. this._emitChars(']');
  3954. this.state = State.CDATA_SECTION;
  3955. this._stateCdataSection(cp);
  3956. }
  3957. }
  3958. // CDATA section end state
  3959. //------------------------------------------------------------------
  3960. _stateCdataSectionEnd(cp) {
  3961. switch (cp) {
  3962. case CODE_POINTS.GREATER_THAN_SIGN: {
  3963. this.state = State.DATA;
  3964. break;
  3965. }
  3966. case CODE_POINTS.RIGHT_SQUARE_BRACKET: {
  3967. this._emitChars(']');
  3968. break;
  3969. }
  3970. default: {
  3971. this._emitChars(']]');
  3972. this.state = State.CDATA_SECTION;
  3973. this._stateCdataSection(cp);
  3974. }
  3975. }
  3976. }
  3977. // Character reference state
  3978. //------------------------------------------------------------------
  3979. _stateCharacterReference() {
  3980. let length = this.entityDecoder.write(this.preprocessor.html, this.preprocessor.pos);
  3981. if (length < 0) {
  3982. if (this.preprocessor.lastChunkWritten) {
  3983. length = this.entityDecoder.end();
  3984. }
  3985. else {
  3986. // Wait for the rest of the entity.
  3987. this.active = false;
  3988. // Mark the entire buffer as read.
  3989. this.preprocessor.pos = this.preprocessor.html.length - 1;
  3990. this.consumedAfterSnapshot = 0;
  3991. this.preprocessor.endOfChunkHit = true;
  3992. return;
  3993. }
  3994. }
  3995. if (length === 0) {
  3996. // This was not a valid entity. Go back to the beginning, and
  3997. // figure out what to do.
  3998. this.preprocessor.pos = this.entityStartPos;
  3999. this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.AMPERSAND);
  4000. this.state =
  4001. !this._isCharacterReferenceInAttribute() && isAsciiAlphaNumeric(this.preprocessor.peek(1))
  4002. ? State.AMBIGUOUS_AMPERSAND
  4003. : this.returnState;
  4004. }
  4005. else {
  4006. // We successfully parsed an entity. Switch to the return state.
  4007. this.state = this.returnState;
  4008. }
  4009. }
  4010. // Ambiguos ampersand state
  4011. //------------------------------------------------------------------
  4012. _stateAmbiguousAmpersand(cp) {
  4013. if (isAsciiAlphaNumeric(cp)) {
  4014. this._flushCodePointConsumedAsCharacterReference(cp);
  4015. }
  4016. else {
  4017. if (cp === CODE_POINTS.SEMICOLON) {
  4018. this._err(ERR.unknownNamedCharacterReference);
  4019. }
  4020. this.state = this.returnState;
  4021. this._callState(cp);
  4022. }
  4023. }
  4024. }
  4025. //Element utils
  4026. const IMPLICIT_END_TAG_REQUIRED = new Set([TAG_ID.DD, TAG_ID.DT, TAG_ID.LI, TAG_ID.OPTGROUP, TAG_ID.OPTION, TAG_ID.P, TAG_ID.RB, TAG_ID.RP, TAG_ID.RT, TAG_ID.RTC]);
  4027. const IMPLICIT_END_TAG_REQUIRED_THOROUGHLY = new Set([
  4028. ...IMPLICIT_END_TAG_REQUIRED,
  4029. TAG_ID.CAPTION,
  4030. TAG_ID.COLGROUP,
  4031. TAG_ID.TBODY,
  4032. TAG_ID.TD,
  4033. TAG_ID.TFOOT,
  4034. TAG_ID.TH,
  4035. TAG_ID.THEAD,
  4036. TAG_ID.TR,
  4037. ]);
  4038. const SCOPING_ELEMENTS_HTML = new Set([
  4039. TAG_ID.APPLET,
  4040. TAG_ID.CAPTION,
  4041. TAG_ID.HTML,
  4042. TAG_ID.MARQUEE,
  4043. TAG_ID.OBJECT,
  4044. TAG_ID.TABLE,
  4045. TAG_ID.TD,
  4046. TAG_ID.TEMPLATE,
  4047. TAG_ID.TH,
  4048. ]);
  4049. const SCOPING_ELEMENTS_HTML_LIST = new Set([...SCOPING_ELEMENTS_HTML, TAG_ID.OL, TAG_ID.UL]);
  4050. const SCOPING_ELEMENTS_HTML_BUTTON = new Set([...SCOPING_ELEMENTS_HTML, TAG_ID.BUTTON]);
  4051. const SCOPING_ELEMENTS_MATHML = new Set([TAG_ID.ANNOTATION_XML, TAG_ID.MI, TAG_ID.MN, TAG_ID.MO, TAG_ID.MS, TAG_ID.MTEXT]);
  4052. const SCOPING_ELEMENTS_SVG = new Set([TAG_ID.DESC, TAG_ID.FOREIGN_OBJECT, TAG_ID.TITLE]);
  4053. const TABLE_ROW_CONTEXT = new Set([TAG_ID.TR, TAG_ID.TEMPLATE, TAG_ID.HTML]);
  4054. const TABLE_BODY_CONTEXT = new Set([TAG_ID.TBODY, TAG_ID.TFOOT, TAG_ID.THEAD, TAG_ID.TEMPLATE, TAG_ID.HTML]);
  4055. const TABLE_CONTEXT = new Set([TAG_ID.TABLE, TAG_ID.TEMPLATE, TAG_ID.HTML]);
  4056. const TABLE_CELLS = new Set([TAG_ID.TD, TAG_ID.TH]);
  4057. //Stack of open elements
  4058. class OpenElementStack {
  4059. get currentTmplContentOrNode() {
  4060. return this._isInTemplate() ? this.treeAdapter.getTemplateContent(this.current) : this.current;
  4061. }
  4062. constructor(document, treeAdapter, handler) {
  4063. this.treeAdapter = treeAdapter;
  4064. this.handler = handler;
  4065. this.items = [];
  4066. this.tagIDs = [];
  4067. this.stackTop = -1;
  4068. this.tmplCount = 0;
  4069. this.currentTagId = TAG_ID.UNKNOWN;
  4070. this.current = document;
  4071. }
  4072. //Index of element
  4073. _indexOf(element) {
  4074. return this.items.lastIndexOf(element, this.stackTop);
  4075. }
  4076. //Update current element
  4077. _isInTemplate() {
  4078. return this.currentTagId === TAG_ID.TEMPLATE && this.treeAdapter.getNamespaceURI(this.current) === NS.HTML;
  4079. }
  4080. _updateCurrentElement() {
  4081. this.current = this.items[this.stackTop];
  4082. this.currentTagId = this.tagIDs[this.stackTop];
  4083. }
  4084. //Mutations
  4085. push(element, tagID) {
  4086. this.stackTop++;
  4087. this.items[this.stackTop] = element;
  4088. this.current = element;
  4089. this.tagIDs[this.stackTop] = tagID;
  4090. this.currentTagId = tagID;
  4091. if (this._isInTemplate()) {
  4092. this.tmplCount++;
  4093. }
  4094. this.handler.onItemPush(element, tagID, true);
  4095. }
  4096. pop() {
  4097. const popped = this.current;
  4098. if (this.tmplCount > 0 && this._isInTemplate()) {
  4099. this.tmplCount--;
  4100. }
  4101. this.stackTop--;
  4102. this._updateCurrentElement();
  4103. this.handler.onItemPop(popped, true);
  4104. }
  4105. replace(oldElement, newElement) {
  4106. const idx = this._indexOf(oldElement);
  4107. this.items[idx] = newElement;
  4108. if (idx === this.stackTop) {
  4109. this.current = newElement;
  4110. }
  4111. }
  4112. insertAfter(referenceElement, newElement, newElementID) {
  4113. const insertionIdx = this._indexOf(referenceElement) + 1;
  4114. this.items.splice(insertionIdx, 0, newElement);
  4115. this.tagIDs.splice(insertionIdx, 0, newElementID);
  4116. this.stackTop++;
  4117. if (insertionIdx === this.stackTop) {
  4118. this._updateCurrentElement();
  4119. }
  4120. this.handler.onItemPush(this.current, this.currentTagId, insertionIdx === this.stackTop);
  4121. }
  4122. popUntilTagNamePopped(tagName) {
  4123. let targetIdx = this.stackTop + 1;
  4124. do {
  4125. targetIdx = this.tagIDs.lastIndexOf(tagName, targetIdx - 1);
  4126. } while (targetIdx > 0 && this.treeAdapter.getNamespaceURI(this.items[targetIdx]) !== NS.HTML);
  4127. this.shortenToLength(targetIdx < 0 ? 0 : targetIdx);
  4128. }
  4129. shortenToLength(idx) {
  4130. while (this.stackTop >= idx) {
  4131. const popped = this.current;
  4132. if (this.tmplCount > 0 && this._isInTemplate()) {
  4133. this.tmplCount -= 1;
  4134. }
  4135. this.stackTop--;
  4136. this._updateCurrentElement();
  4137. this.handler.onItemPop(popped, this.stackTop < idx);
  4138. }
  4139. }
  4140. popUntilElementPopped(element) {
  4141. const idx = this._indexOf(element);
  4142. this.shortenToLength(idx < 0 ? 0 : idx);
  4143. }
  4144. popUntilPopped(tagNames, targetNS) {
  4145. const idx = this._indexOfTagNames(tagNames, targetNS);
  4146. this.shortenToLength(idx < 0 ? 0 : idx);
  4147. }
  4148. popUntilNumberedHeaderPopped() {
  4149. this.popUntilPopped(NUMBERED_HEADERS, NS.HTML);
  4150. }
  4151. popUntilTableCellPopped() {
  4152. this.popUntilPopped(TABLE_CELLS, NS.HTML);
  4153. }
  4154. popAllUpToHtmlElement() {
  4155. //NOTE: here we assume that the root <html> element is always first in the open element stack, so
  4156. //we perform this fast stack clean up.
  4157. this.tmplCount = 0;
  4158. this.shortenToLength(1);
  4159. }
  4160. _indexOfTagNames(tagNames, namespace) {
  4161. for (let i = this.stackTop; i >= 0; i--) {
  4162. if (tagNames.has(this.tagIDs[i]) && this.treeAdapter.getNamespaceURI(this.items[i]) === namespace) {
  4163. return i;
  4164. }
  4165. }
  4166. return -1;
  4167. }
  4168. clearBackTo(tagNames, targetNS) {
  4169. const idx = this._indexOfTagNames(tagNames, targetNS);
  4170. this.shortenToLength(idx + 1);
  4171. }
  4172. clearBackToTableContext() {
  4173. this.clearBackTo(TABLE_CONTEXT, NS.HTML);
  4174. }
  4175. clearBackToTableBodyContext() {
  4176. this.clearBackTo(TABLE_BODY_CONTEXT, NS.HTML);
  4177. }
  4178. clearBackToTableRowContext() {
  4179. this.clearBackTo(TABLE_ROW_CONTEXT, NS.HTML);
  4180. }
  4181. remove(element) {
  4182. const idx = this._indexOf(element);
  4183. if (idx >= 0) {
  4184. if (idx === this.stackTop) {
  4185. this.pop();
  4186. }
  4187. else {
  4188. this.items.splice(idx, 1);
  4189. this.tagIDs.splice(idx, 1);
  4190. this.stackTop--;
  4191. this._updateCurrentElement();
  4192. this.handler.onItemPop(element, false);
  4193. }
  4194. }
  4195. }
  4196. //Search
  4197. tryPeekProperlyNestedBodyElement() {
  4198. //Properly nested <body> element (should be second element in stack).
  4199. return this.stackTop >= 1 && this.tagIDs[1] === TAG_ID.BODY ? this.items[1] : null;
  4200. }
  4201. contains(element) {
  4202. return this._indexOf(element) > -1;
  4203. }
  4204. getCommonAncestor(element) {
  4205. const elementIdx = this._indexOf(element) - 1;
  4206. return elementIdx >= 0 ? this.items[elementIdx] : null;
  4207. }
  4208. isRootHtmlElementCurrent() {
  4209. return this.stackTop === 0 && this.tagIDs[0] === TAG_ID.HTML;
  4210. }
  4211. //Element in scope
  4212. hasInDynamicScope(tagName, htmlScope) {
  4213. for (let i = this.stackTop; i >= 0; i--) {
  4214. const tn = this.tagIDs[i];
  4215. switch (this.treeAdapter.getNamespaceURI(this.items[i])) {
  4216. case NS.HTML: {
  4217. if (tn === tagName)
  4218. return true;
  4219. if (htmlScope.has(tn))
  4220. return false;
  4221. break;
  4222. }
  4223. case NS.SVG: {
  4224. if (SCOPING_ELEMENTS_SVG.has(tn))
  4225. return false;
  4226. break;
  4227. }
  4228. case NS.MATHML: {
  4229. if (SCOPING_ELEMENTS_MATHML.has(tn))
  4230. return false;
  4231. break;
  4232. }
  4233. }
  4234. }
  4235. return true;
  4236. }
  4237. hasInScope(tagName) {
  4238. return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML);
  4239. }
  4240. hasInListItemScope(tagName) {
  4241. return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML_LIST);
  4242. }
  4243. hasInButtonScope(tagName) {
  4244. return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML_BUTTON);
  4245. }
  4246. hasNumberedHeaderInScope() {
  4247. for (let i = this.stackTop; i >= 0; i--) {
  4248. const tn = this.tagIDs[i];
  4249. switch (this.treeAdapter.getNamespaceURI(this.items[i])) {
  4250. case NS.HTML: {
  4251. if (NUMBERED_HEADERS.has(tn))
  4252. return true;
  4253. if (SCOPING_ELEMENTS_HTML.has(tn))
  4254. return false;
  4255. break;
  4256. }
  4257. case NS.SVG: {
  4258. if (SCOPING_ELEMENTS_SVG.has(tn))
  4259. return false;
  4260. break;
  4261. }
  4262. case NS.MATHML: {
  4263. if (SCOPING_ELEMENTS_MATHML.has(tn))
  4264. return false;
  4265. break;
  4266. }
  4267. }
  4268. }
  4269. return true;
  4270. }
  4271. hasInTableScope(tagName) {
  4272. for (let i = this.stackTop; i >= 0; i--) {
  4273. if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
  4274. continue;
  4275. }
  4276. switch (this.tagIDs[i]) {
  4277. case tagName: {
  4278. return true;
  4279. }
  4280. case TAG_ID.TABLE:
  4281. case TAG_ID.HTML: {
  4282. return false;
  4283. }
  4284. }
  4285. }
  4286. return true;
  4287. }
  4288. hasTableBodyContextInTableScope() {
  4289. for (let i = this.stackTop; i >= 0; i--) {
  4290. if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
  4291. continue;
  4292. }
  4293. switch (this.tagIDs[i]) {
  4294. case TAG_ID.TBODY:
  4295. case TAG_ID.THEAD:
  4296. case TAG_ID.TFOOT: {
  4297. return true;
  4298. }
  4299. case TAG_ID.TABLE:
  4300. case TAG_ID.HTML: {
  4301. return false;
  4302. }
  4303. }
  4304. }
  4305. return true;
  4306. }
  4307. hasInSelectScope(tagName) {
  4308. for (let i = this.stackTop; i >= 0; i--) {
  4309. if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
  4310. continue;
  4311. }
  4312. switch (this.tagIDs[i]) {
  4313. case tagName: {
  4314. return true;
  4315. }
  4316. case TAG_ID.OPTION:
  4317. case TAG_ID.OPTGROUP: {
  4318. break;
  4319. }
  4320. default: {
  4321. return false;
  4322. }
  4323. }
  4324. }
  4325. return true;
  4326. }
  4327. //Implied end tags
  4328. generateImpliedEndTags() {
  4329. while (IMPLICIT_END_TAG_REQUIRED.has(this.currentTagId)) {
  4330. this.pop();
  4331. }
  4332. }
  4333. generateImpliedEndTagsThoroughly() {
  4334. while (IMPLICIT_END_TAG_REQUIRED_THOROUGHLY.has(this.currentTagId)) {
  4335. this.pop();
  4336. }
  4337. }
  4338. generateImpliedEndTagsWithExclusion(exclusionId) {
  4339. while (this.currentTagId !== exclusionId && IMPLICIT_END_TAG_REQUIRED_THOROUGHLY.has(this.currentTagId)) {
  4340. this.pop();
  4341. }
  4342. }
  4343. }
  4344. //Const
  4345. const NOAH_ARK_CAPACITY = 3;
  4346. var EntryType;
  4347. (function (EntryType) {
  4348. EntryType[EntryType["Marker"] = 0] = "Marker";
  4349. EntryType[EntryType["Element"] = 1] = "Element";
  4350. })(EntryType || (EntryType = {}));
  4351. const MARKER = { type: EntryType.Marker };
  4352. //List of formatting elements
  4353. class FormattingElementList {
  4354. constructor(treeAdapter) {
  4355. this.treeAdapter = treeAdapter;
  4356. this.entries = [];
  4357. this.bookmark = null;
  4358. }
  4359. //Noah Ark's condition
  4360. //OPTIMIZATION: at first we try to find possible candidates for exclusion using
  4361. //lightweight heuristics without thorough attributes check.
  4362. _getNoahArkConditionCandidates(newElement, neAttrs) {
  4363. const candidates = [];
  4364. const neAttrsLength = neAttrs.length;
  4365. const neTagName = this.treeAdapter.getTagName(newElement);
  4366. const neNamespaceURI = this.treeAdapter.getNamespaceURI(newElement);
  4367. for (let i = 0; i < this.entries.length; i++) {
  4368. const entry = this.entries[i];
  4369. if (entry.type === EntryType.Marker) {
  4370. break;
  4371. }
  4372. const { element } = entry;
  4373. if (this.treeAdapter.getTagName(element) === neTagName &&
  4374. this.treeAdapter.getNamespaceURI(element) === neNamespaceURI) {
  4375. const elementAttrs = this.treeAdapter.getAttrList(element);
  4376. if (elementAttrs.length === neAttrsLength) {
  4377. candidates.push({ idx: i, attrs: elementAttrs });
  4378. }
  4379. }
  4380. }
  4381. return candidates;
  4382. }
  4383. _ensureNoahArkCondition(newElement) {
  4384. if (this.entries.length < NOAH_ARK_CAPACITY)
  4385. return;
  4386. const neAttrs = this.treeAdapter.getAttrList(newElement);
  4387. const candidates = this._getNoahArkConditionCandidates(newElement, neAttrs);
  4388. if (candidates.length < NOAH_ARK_CAPACITY)
  4389. return;
  4390. //NOTE: build attrs map for the new element, so we can perform fast lookups
  4391. const neAttrsMap = new Map(neAttrs.map((neAttr) => [neAttr.name, neAttr.value]));
  4392. let validCandidates = 0;
  4393. //NOTE: remove bottommost candidates, until Noah's Ark condition will not be met
  4394. for (let i = 0; i < candidates.length; i++) {
  4395. const candidate = candidates[i];
  4396. // We know that `candidate.attrs.length === neAttrs.length`
  4397. if (candidate.attrs.every((cAttr) => neAttrsMap.get(cAttr.name) === cAttr.value)) {
  4398. validCandidates += 1;
  4399. if (validCandidates >= NOAH_ARK_CAPACITY) {
  4400. this.entries.splice(candidate.idx, 1);
  4401. }
  4402. }
  4403. }
  4404. }
  4405. //Mutations
  4406. insertMarker() {
  4407. this.entries.unshift(MARKER);
  4408. }
  4409. pushElement(element, token) {
  4410. this._ensureNoahArkCondition(element);
  4411. this.entries.unshift({
  4412. type: EntryType.Element,
  4413. element,
  4414. token,
  4415. });
  4416. }
  4417. insertElementAfterBookmark(element, token) {
  4418. const bookmarkIdx = this.entries.indexOf(this.bookmark);
  4419. this.entries.splice(bookmarkIdx, 0, {
  4420. type: EntryType.Element,
  4421. element,
  4422. token,
  4423. });
  4424. }
  4425. removeEntry(entry) {
  4426. const entryIndex = this.entries.indexOf(entry);
  4427. if (entryIndex >= 0) {
  4428. this.entries.splice(entryIndex, 1);
  4429. }
  4430. }
  4431. /**
  4432. * Clears the list of formatting elements up to the last marker.
  4433. *
  4434. * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker
  4435. */
  4436. clearToLastMarker() {
  4437. const markerIdx = this.entries.indexOf(MARKER);
  4438. if (markerIdx >= 0) {
  4439. this.entries.splice(0, markerIdx + 1);
  4440. }
  4441. else {
  4442. this.entries.length = 0;
  4443. }
  4444. }
  4445. //Search
  4446. getElementEntryInScopeWithTagName(tagName) {
  4447. const entry = this.entries.find((entry) => entry.type === EntryType.Marker || this.treeAdapter.getTagName(entry.element) === tagName);
  4448. return entry && entry.type === EntryType.Element ? entry : null;
  4449. }
  4450. getElementEntry(element) {
  4451. return this.entries.find((entry) => entry.type === EntryType.Element && entry.element === element);
  4452. }
  4453. }
  4454. const defaultTreeAdapter = {
  4455. //Node construction
  4456. createDocument() {
  4457. return {
  4458. nodeName: '#document',
  4459. mode: DOCUMENT_MODE.NO_QUIRKS,
  4460. childNodes: [],
  4461. };
  4462. },
  4463. createDocumentFragment() {
  4464. return {
  4465. nodeName: '#document-fragment',
  4466. childNodes: [],
  4467. };
  4468. },
  4469. createElement(tagName, namespaceURI, attrs) {
  4470. return {
  4471. nodeName: tagName,
  4472. tagName,
  4473. attrs,
  4474. namespaceURI,
  4475. childNodes: [],
  4476. parentNode: null,
  4477. };
  4478. },
  4479. createCommentNode(data) {
  4480. return {
  4481. nodeName: '#comment',
  4482. data,
  4483. parentNode: null,
  4484. };
  4485. },
  4486. createTextNode(value) {
  4487. return {
  4488. nodeName: '#text',
  4489. value,
  4490. parentNode: null,
  4491. };
  4492. },
  4493. //Tree mutation
  4494. appendChild(parentNode, newNode) {
  4495. parentNode.childNodes.push(newNode);
  4496. newNode.parentNode = parentNode;
  4497. },
  4498. insertBefore(parentNode, newNode, referenceNode) {
  4499. const insertionIdx = parentNode.childNodes.indexOf(referenceNode);
  4500. parentNode.childNodes.splice(insertionIdx, 0, newNode);
  4501. newNode.parentNode = parentNode;
  4502. },
  4503. setTemplateContent(templateElement, contentElement) {
  4504. templateElement.content = contentElement;
  4505. },
  4506. getTemplateContent(templateElement) {
  4507. return templateElement.content;
  4508. },
  4509. setDocumentType(document, name, publicId, systemId) {
  4510. const doctypeNode = document.childNodes.find((node) => node.nodeName === '#documentType');
  4511. if (doctypeNode) {
  4512. doctypeNode.name = name;
  4513. doctypeNode.publicId = publicId;
  4514. doctypeNode.systemId = systemId;
  4515. }
  4516. else {
  4517. const node = {
  4518. nodeName: '#documentType',
  4519. name,
  4520. publicId,
  4521. systemId,
  4522. parentNode: null,
  4523. };
  4524. defaultTreeAdapter.appendChild(document, node);
  4525. }
  4526. },
  4527. setDocumentMode(document, mode) {
  4528. document.mode = mode;
  4529. },
  4530. getDocumentMode(document) {
  4531. return document.mode;
  4532. },
  4533. detachNode(node) {
  4534. if (node.parentNode) {
  4535. const idx = node.parentNode.childNodes.indexOf(node);
  4536. node.parentNode.childNodes.splice(idx, 1);
  4537. node.parentNode = null;
  4538. }
  4539. },
  4540. insertText(parentNode, text) {
  4541. if (parentNode.childNodes.length > 0) {
  4542. const prevNode = parentNode.childNodes[parentNode.childNodes.length - 1];
  4543. if (defaultTreeAdapter.isTextNode(prevNode)) {
  4544. prevNode.value += text;
  4545. return;
  4546. }
  4547. }
  4548. defaultTreeAdapter.appendChild(parentNode, defaultTreeAdapter.createTextNode(text));
  4549. },
  4550. insertTextBefore(parentNode, text, referenceNode) {
  4551. const prevNode = parentNode.childNodes[parentNode.childNodes.indexOf(referenceNode) - 1];
  4552. if (prevNode && defaultTreeAdapter.isTextNode(prevNode)) {
  4553. prevNode.value += text;
  4554. }
  4555. else {
  4556. defaultTreeAdapter.insertBefore(parentNode, defaultTreeAdapter.createTextNode(text), referenceNode);
  4557. }
  4558. },
  4559. adoptAttributes(recipient, attrs) {
  4560. const recipientAttrsMap = new Set(recipient.attrs.map((attr) => attr.name));
  4561. for (let j = 0; j < attrs.length; j++) {
  4562. if (!recipientAttrsMap.has(attrs[j].name)) {
  4563. recipient.attrs.push(attrs[j]);
  4564. }
  4565. }
  4566. },
  4567. //Tree traversing
  4568. getFirstChild(node) {
  4569. return node.childNodes[0];
  4570. },
  4571. getChildNodes(node) {
  4572. return node.childNodes;
  4573. },
  4574. getParentNode(node) {
  4575. return node.parentNode;
  4576. },
  4577. getAttrList(element) {
  4578. return element.attrs;
  4579. },
  4580. //Node data
  4581. getTagName(element) {
  4582. return element.tagName;
  4583. },
  4584. getNamespaceURI(element) {
  4585. return element.namespaceURI;
  4586. },
  4587. getTextNodeContent(textNode) {
  4588. return textNode.value;
  4589. },
  4590. getCommentNodeContent(commentNode) {
  4591. return commentNode.data;
  4592. },
  4593. getDocumentTypeNodeName(doctypeNode) {
  4594. return doctypeNode.name;
  4595. },
  4596. getDocumentTypeNodePublicId(doctypeNode) {
  4597. return doctypeNode.publicId;
  4598. },
  4599. getDocumentTypeNodeSystemId(doctypeNode) {
  4600. return doctypeNode.systemId;
  4601. },
  4602. //Node types
  4603. isTextNode(node) {
  4604. return node.nodeName === '#text';
  4605. },
  4606. isCommentNode(node) {
  4607. return node.nodeName === '#comment';
  4608. },
  4609. isDocumentTypeNode(node) {
  4610. return node.nodeName === '#documentType';
  4611. },
  4612. isElementNode(node) {
  4613. return Object.prototype.hasOwnProperty.call(node, 'tagName');
  4614. },
  4615. // Source code location
  4616. setNodeSourceCodeLocation(node, location) {
  4617. node.sourceCodeLocation = location;
  4618. },
  4619. getNodeSourceCodeLocation(node) {
  4620. return node.sourceCodeLocation;
  4621. },
  4622. updateNodeSourceCodeLocation(node, endLocation) {
  4623. node.sourceCodeLocation = { ...node.sourceCodeLocation, ...endLocation };
  4624. },
  4625. };
  4626. //Const
  4627. const VALID_DOCTYPE_NAME = 'html';
  4628. const VALID_SYSTEM_ID = 'about:legacy-compat';
  4629. const QUIRKS_MODE_SYSTEM_ID = 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd';
  4630. const QUIRKS_MODE_PUBLIC_ID_PREFIXES = [
  4631. '+//silmaril//dtd html pro v0r11 19970101//',
  4632. '-//as//dtd html 3.0 aswedit + extensions//',
  4633. '-//advasoft ltd//dtd html 3.0 aswedit + extensions//',
  4634. '-//ietf//dtd html 2.0 level 1//',
  4635. '-//ietf//dtd html 2.0 level 2//',
  4636. '-//ietf//dtd html 2.0 strict level 1//',
  4637. '-//ietf//dtd html 2.0 strict level 2//',
  4638. '-//ietf//dtd html 2.0 strict//',
  4639. '-//ietf//dtd html 2.0//',
  4640. '-//ietf//dtd html 2.1e//',
  4641. '-//ietf//dtd html 3.0//',
  4642. '-//ietf//dtd html 3.2 final//',
  4643. '-//ietf//dtd html 3.2//',
  4644. '-//ietf//dtd html 3//',
  4645. '-//ietf//dtd html level 0//',
  4646. '-//ietf//dtd html level 1//',
  4647. '-//ietf//dtd html level 2//',
  4648. '-//ietf//dtd html level 3//',
  4649. '-//ietf//dtd html strict level 0//',
  4650. '-//ietf//dtd html strict level 1//',
  4651. '-//ietf//dtd html strict level 2//',
  4652. '-//ietf//dtd html strict level 3//',
  4653. '-//ietf//dtd html strict//',
  4654. '-//ietf//dtd html//',
  4655. '-//metrius//dtd metrius presentational//',
  4656. '-//microsoft//dtd internet explorer 2.0 html strict//',
  4657. '-//microsoft//dtd internet explorer 2.0 html//',
  4658. '-//microsoft//dtd internet explorer 2.0 tables//',
  4659. '-//microsoft//dtd internet explorer 3.0 html strict//',
  4660. '-//microsoft//dtd internet explorer 3.0 html//',
  4661. '-//microsoft//dtd internet explorer 3.0 tables//',
  4662. '-//netscape comm. corp.//dtd html//',
  4663. '-//netscape comm. corp.//dtd strict html//',
  4664. "-//o'reilly and associates//dtd html 2.0//",
  4665. "-//o'reilly and associates//dtd html extended 1.0//",
  4666. "-//o'reilly and associates//dtd html extended relaxed 1.0//",
  4667. '-//sq//dtd html 2.0 hotmetal + extensions//',
  4668. '-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//',
  4669. '-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//',
  4670. '-//spyglass//dtd html 2.0 extended//',
  4671. '-//sun microsystems corp.//dtd hotjava html//',
  4672. '-//sun microsystems corp.//dtd hotjava strict html//',
  4673. '-//w3c//dtd html 3 1995-03-24//',
  4674. '-//w3c//dtd html 3.2 draft//',
  4675. '-//w3c//dtd html 3.2 final//',
  4676. '-//w3c//dtd html 3.2//',
  4677. '-//w3c//dtd html 3.2s draft//',
  4678. '-//w3c//dtd html 4.0 frameset//',
  4679. '-//w3c//dtd html 4.0 transitional//',
  4680. '-//w3c//dtd html experimental 19960712//',
  4681. '-//w3c//dtd html experimental 970421//',
  4682. '-//w3c//dtd w3 html//',
  4683. '-//w3o//dtd w3 html 3.0//',
  4684. '-//webtechs//dtd mozilla html 2.0//',
  4685. '-//webtechs//dtd mozilla html//',
  4686. ];
  4687. const QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = [
  4688. ...QUIRKS_MODE_PUBLIC_ID_PREFIXES,
  4689. '-//w3c//dtd html 4.01 frameset//',
  4690. '-//w3c//dtd html 4.01 transitional//',
  4691. ];
  4692. const QUIRKS_MODE_PUBLIC_IDS = new Set([
  4693. '-//w3o//dtd w3 html strict 3.0//en//',
  4694. '-/w3c/dtd html 4.0 transitional/en',
  4695. 'html',
  4696. ]);
  4697. const LIMITED_QUIRKS_PUBLIC_ID_PREFIXES = ['-//w3c//dtd xhtml 1.0 frameset//', '-//w3c//dtd xhtml 1.0 transitional//'];
  4698. const LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES = [
  4699. ...LIMITED_QUIRKS_PUBLIC_ID_PREFIXES,
  4700. '-//w3c//dtd html 4.01 frameset//',
  4701. '-//w3c//dtd html 4.01 transitional//',
  4702. ];
  4703. //Utils
  4704. function hasPrefix(publicId, prefixes) {
  4705. return prefixes.some((prefix) => publicId.startsWith(prefix));
  4706. }
  4707. //API
  4708. function isConforming(token) {
  4709. return (token.name === VALID_DOCTYPE_NAME &&
  4710. token.publicId === null &&
  4711. (token.systemId === null || token.systemId === VALID_SYSTEM_ID));
  4712. }
  4713. function getDocumentMode(token) {
  4714. if (token.name !== VALID_DOCTYPE_NAME) {
  4715. return DOCUMENT_MODE.QUIRKS;
  4716. }
  4717. const { systemId } = token;
  4718. if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID) {
  4719. return DOCUMENT_MODE.QUIRKS;
  4720. }
  4721. let { publicId } = token;
  4722. if (publicId !== null) {
  4723. publicId = publicId.toLowerCase();
  4724. if (QUIRKS_MODE_PUBLIC_IDS.has(publicId)) {
  4725. return DOCUMENT_MODE.QUIRKS;
  4726. }
  4727. let prefixes = systemId === null ? QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES : QUIRKS_MODE_PUBLIC_ID_PREFIXES;
  4728. if (hasPrefix(publicId, prefixes)) {
  4729. return DOCUMENT_MODE.QUIRKS;
  4730. }
  4731. prefixes =
  4732. systemId === null ? LIMITED_QUIRKS_PUBLIC_ID_PREFIXES : LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES;
  4733. if (hasPrefix(publicId, prefixes)) {
  4734. return DOCUMENT_MODE.LIMITED_QUIRKS;
  4735. }
  4736. }
  4737. return DOCUMENT_MODE.NO_QUIRKS;
  4738. }
  4739. //MIME types
  4740. const MIME_TYPES = {
  4741. TEXT_HTML: 'text/html',
  4742. APPLICATION_XML: 'application/xhtml+xml',
  4743. };
  4744. //Attributes
  4745. const DEFINITION_URL_ATTR = 'definitionurl';
  4746. const ADJUSTED_DEFINITION_URL_ATTR = 'definitionURL';
  4747. const SVG_ATTRS_ADJUSTMENT_MAP = new Map([
  4748. 'attributeName',
  4749. 'attributeType',
  4750. 'baseFrequency',
  4751. 'baseProfile',
  4752. 'calcMode',
  4753. 'clipPathUnits',
  4754. 'diffuseConstant',
  4755. 'edgeMode',
  4756. 'filterUnits',
  4757. 'glyphRef',
  4758. 'gradientTransform',
  4759. 'gradientUnits',
  4760. 'kernelMatrix',
  4761. 'kernelUnitLength',
  4762. 'keyPoints',
  4763. 'keySplines',
  4764. 'keyTimes',
  4765. 'lengthAdjust',
  4766. 'limitingConeAngle',
  4767. 'markerHeight',
  4768. 'markerUnits',
  4769. 'markerWidth',
  4770. 'maskContentUnits',
  4771. 'maskUnits',
  4772. 'numOctaves',
  4773. 'pathLength',
  4774. 'patternContentUnits',
  4775. 'patternTransform',
  4776. 'patternUnits',
  4777. 'pointsAtX',
  4778. 'pointsAtY',
  4779. 'pointsAtZ',
  4780. 'preserveAlpha',
  4781. 'preserveAspectRatio',
  4782. 'primitiveUnits',
  4783. 'refX',
  4784. 'refY',
  4785. 'repeatCount',
  4786. 'repeatDur',
  4787. 'requiredExtensions',
  4788. 'requiredFeatures',
  4789. 'specularConstant',
  4790. 'specularExponent',
  4791. 'spreadMethod',
  4792. 'startOffset',
  4793. 'stdDeviation',
  4794. 'stitchTiles',
  4795. 'surfaceScale',
  4796. 'systemLanguage',
  4797. 'tableValues',
  4798. 'targetX',
  4799. 'targetY',
  4800. 'textLength',
  4801. 'viewBox',
  4802. 'viewTarget',
  4803. 'xChannelSelector',
  4804. 'yChannelSelector',
  4805. 'zoomAndPan',
  4806. ].map((attr) => [attr.toLowerCase(), attr]));
  4807. const XML_ATTRS_ADJUSTMENT_MAP = new Map([
  4808. ['xlink:actuate', { prefix: 'xlink', name: 'actuate', namespace: NS.XLINK }],
  4809. ['xlink:arcrole', { prefix: 'xlink', name: 'arcrole', namespace: NS.XLINK }],
  4810. ['xlink:href', { prefix: 'xlink', name: 'href', namespace: NS.XLINK }],
  4811. ['xlink:role', { prefix: 'xlink', name: 'role', namespace: NS.XLINK }],
  4812. ['xlink:show', { prefix: 'xlink', name: 'show', namespace: NS.XLINK }],
  4813. ['xlink:title', { prefix: 'xlink', name: 'title', namespace: NS.XLINK }],
  4814. ['xlink:type', { prefix: 'xlink', name: 'type', namespace: NS.XLINK }],
  4815. ['xml:lang', { prefix: 'xml', name: 'lang', namespace: NS.XML }],
  4816. ['xml:space', { prefix: 'xml', name: 'space', namespace: NS.XML }],
  4817. ['xmlns', { prefix: '', name: 'xmlns', namespace: NS.XMLNS }],
  4818. ['xmlns:xlink', { prefix: 'xmlns', name: 'xlink', namespace: NS.XMLNS }],
  4819. ]);
  4820. //SVG tag names adjustment map
  4821. const SVG_TAG_NAMES_ADJUSTMENT_MAP = new Map([
  4822. 'altGlyph',
  4823. 'altGlyphDef',
  4824. 'altGlyphItem',
  4825. 'animateColor',
  4826. 'animateMotion',
  4827. 'animateTransform',
  4828. 'clipPath',
  4829. 'feBlend',
  4830. 'feColorMatrix',
  4831. 'feComponentTransfer',
  4832. 'feComposite',
  4833. 'feConvolveMatrix',
  4834. 'feDiffuseLighting',
  4835. 'feDisplacementMap',
  4836. 'feDistantLight',
  4837. 'feFlood',
  4838. 'feFuncA',
  4839. 'feFuncB',
  4840. 'feFuncG',
  4841. 'feFuncR',
  4842. 'feGaussianBlur',
  4843. 'feImage',
  4844. 'feMerge',
  4845. 'feMergeNode',
  4846. 'feMorphology',
  4847. 'feOffset',
  4848. 'fePointLight',
  4849. 'feSpecularLighting',
  4850. 'feSpotLight',
  4851. 'feTile',
  4852. 'feTurbulence',
  4853. 'foreignObject',
  4854. 'glyphRef',
  4855. 'linearGradient',
  4856. 'radialGradient',
  4857. 'textPath',
  4858. ].map((tn) => [tn.toLowerCase(), tn]));
  4859. //Tags that causes exit from foreign content
  4860. const EXITS_FOREIGN_CONTENT = new Set([
  4861. TAG_ID.B,
  4862. TAG_ID.BIG,
  4863. TAG_ID.BLOCKQUOTE,
  4864. TAG_ID.BODY,
  4865. TAG_ID.BR,
  4866. TAG_ID.CENTER,
  4867. TAG_ID.CODE,
  4868. TAG_ID.DD,
  4869. TAG_ID.DIV,
  4870. TAG_ID.DL,
  4871. TAG_ID.DT,
  4872. TAG_ID.EM,
  4873. TAG_ID.EMBED,
  4874. TAG_ID.H1,
  4875. TAG_ID.H2,
  4876. TAG_ID.H3,
  4877. TAG_ID.H4,
  4878. TAG_ID.H5,
  4879. TAG_ID.H6,
  4880. TAG_ID.HEAD,
  4881. TAG_ID.HR,
  4882. TAG_ID.I,
  4883. TAG_ID.IMG,
  4884. TAG_ID.LI,
  4885. TAG_ID.LISTING,
  4886. TAG_ID.MENU,
  4887. TAG_ID.META,
  4888. TAG_ID.NOBR,
  4889. TAG_ID.OL,
  4890. TAG_ID.P,
  4891. TAG_ID.PRE,
  4892. TAG_ID.RUBY,
  4893. TAG_ID.S,
  4894. TAG_ID.SMALL,
  4895. TAG_ID.SPAN,
  4896. TAG_ID.STRONG,
  4897. TAG_ID.STRIKE,
  4898. TAG_ID.SUB,
  4899. TAG_ID.SUP,
  4900. TAG_ID.TABLE,
  4901. TAG_ID.TT,
  4902. TAG_ID.U,
  4903. TAG_ID.UL,
  4904. TAG_ID.VAR,
  4905. ]);
  4906. //Check exit from foreign content
  4907. function causesExit(startTagToken) {
  4908. const tn = startTagToken.tagID;
  4909. const isFontWithAttrs = tn === TAG_ID.FONT &&
  4910. startTagToken.attrs.some(({ name }) => name === ATTRS.COLOR || name === ATTRS.SIZE || name === ATTRS.FACE);
  4911. return isFontWithAttrs || EXITS_FOREIGN_CONTENT.has(tn);
  4912. }
  4913. //Token adjustments
  4914. function adjustTokenMathMLAttrs(token) {
  4915. for (let i = 0; i < token.attrs.length; i++) {
  4916. if (token.attrs[i].name === DEFINITION_URL_ATTR) {
  4917. token.attrs[i].name = ADJUSTED_DEFINITION_URL_ATTR;
  4918. break;
  4919. }
  4920. }
  4921. }
  4922. function adjustTokenSVGAttrs(token) {
  4923. for (let i = 0; i < token.attrs.length; i++) {
  4924. const adjustedAttrName = SVG_ATTRS_ADJUSTMENT_MAP.get(token.attrs[i].name);
  4925. if (adjustedAttrName != null) {
  4926. token.attrs[i].name = adjustedAttrName;
  4927. }
  4928. }
  4929. }
  4930. function adjustTokenXMLAttrs(token) {
  4931. for (let i = 0; i < token.attrs.length; i++) {
  4932. const adjustedAttrEntry = XML_ATTRS_ADJUSTMENT_MAP.get(token.attrs[i].name);
  4933. if (adjustedAttrEntry) {
  4934. token.attrs[i].prefix = adjustedAttrEntry.prefix;
  4935. token.attrs[i].name = adjustedAttrEntry.name;
  4936. token.attrs[i].namespace = adjustedAttrEntry.namespace;
  4937. }
  4938. }
  4939. }
  4940. function adjustTokenSVGTagName(token) {
  4941. const adjustedTagName = SVG_TAG_NAMES_ADJUSTMENT_MAP.get(token.tagName);
  4942. if (adjustedTagName != null) {
  4943. token.tagName = adjustedTagName;
  4944. token.tagID = getTagID(token.tagName);
  4945. }
  4946. }
  4947. //Integration points
  4948. function isMathMLTextIntegrationPoint(tn, ns) {
  4949. return ns === NS.MATHML && (tn === TAG_ID.MI || tn === TAG_ID.MO || tn === TAG_ID.MN || tn === TAG_ID.MS || tn === TAG_ID.MTEXT);
  4950. }
  4951. function isHtmlIntegrationPoint(tn, ns, attrs) {
  4952. if (ns === NS.MATHML && tn === TAG_ID.ANNOTATION_XML) {
  4953. for (let i = 0; i < attrs.length; i++) {
  4954. if (attrs[i].name === ATTRS.ENCODING) {
  4955. const value = attrs[i].value.toLowerCase();
  4956. return value === MIME_TYPES.TEXT_HTML || value === MIME_TYPES.APPLICATION_XML;
  4957. }
  4958. }
  4959. }
  4960. return ns === NS.SVG && (tn === TAG_ID.FOREIGN_OBJECT || tn === TAG_ID.DESC || tn === TAG_ID.TITLE);
  4961. }
  4962. function isIntegrationPoint(tn, ns, attrs, foreignNS) {
  4963. return (((!foreignNS || foreignNS === NS.HTML) && isHtmlIntegrationPoint(tn, ns, attrs)) ||
  4964. ((!foreignNS || foreignNS === NS.MATHML) && isMathMLTextIntegrationPoint(tn, ns)));
  4965. }
  4966. //Misc constants
  4967. const HIDDEN_INPUT_TYPE = 'hidden';
  4968. //Adoption agency loops iteration count
  4969. const AA_OUTER_LOOP_ITER = 8;
  4970. const AA_INNER_LOOP_ITER = 3;
  4971. //Insertion modes
  4972. var InsertionMode;
  4973. (function (InsertionMode) {
  4974. InsertionMode[InsertionMode["INITIAL"] = 0] = "INITIAL";
  4975. InsertionMode[InsertionMode["BEFORE_HTML"] = 1] = "BEFORE_HTML";
  4976. InsertionMode[InsertionMode["BEFORE_HEAD"] = 2] = "BEFORE_HEAD";
  4977. InsertionMode[InsertionMode["IN_HEAD"] = 3] = "IN_HEAD";
  4978. InsertionMode[InsertionMode["IN_HEAD_NO_SCRIPT"] = 4] = "IN_HEAD_NO_SCRIPT";
  4979. InsertionMode[InsertionMode["AFTER_HEAD"] = 5] = "AFTER_HEAD";
  4980. InsertionMode[InsertionMode["IN_BODY"] = 6] = "IN_BODY";
  4981. InsertionMode[InsertionMode["TEXT"] = 7] = "TEXT";
  4982. InsertionMode[InsertionMode["IN_TABLE"] = 8] = "IN_TABLE";
  4983. InsertionMode[InsertionMode["IN_TABLE_TEXT"] = 9] = "IN_TABLE_TEXT";
  4984. InsertionMode[InsertionMode["IN_CAPTION"] = 10] = "IN_CAPTION";
  4985. InsertionMode[InsertionMode["IN_COLUMN_GROUP"] = 11] = "IN_COLUMN_GROUP";
  4986. InsertionMode[InsertionMode["IN_TABLE_BODY"] = 12] = "IN_TABLE_BODY";
  4987. InsertionMode[InsertionMode["IN_ROW"] = 13] = "IN_ROW";
  4988. InsertionMode[InsertionMode["IN_CELL"] = 14] = "IN_CELL";
  4989. InsertionMode[InsertionMode["IN_SELECT"] = 15] = "IN_SELECT";
  4990. InsertionMode[InsertionMode["IN_SELECT_IN_TABLE"] = 16] = "IN_SELECT_IN_TABLE";
  4991. InsertionMode[InsertionMode["IN_TEMPLATE"] = 17] = "IN_TEMPLATE";
  4992. InsertionMode[InsertionMode["AFTER_BODY"] = 18] = "AFTER_BODY";
  4993. InsertionMode[InsertionMode["IN_FRAMESET"] = 19] = "IN_FRAMESET";
  4994. InsertionMode[InsertionMode["AFTER_FRAMESET"] = 20] = "AFTER_FRAMESET";
  4995. InsertionMode[InsertionMode["AFTER_AFTER_BODY"] = 21] = "AFTER_AFTER_BODY";
  4996. InsertionMode[InsertionMode["AFTER_AFTER_FRAMESET"] = 22] = "AFTER_AFTER_FRAMESET";
  4997. })(InsertionMode || (InsertionMode = {}));
  4998. const BASE_LOC = {
  4999. startLine: -1,
  5000. startCol: -1,
  5001. startOffset: -1,
  5002. endLine: -1,
  5003. endCol: -1,
  5004. endOffset: -1,
  5005. };
  5006. const TABLE_STRUCTURE_TAGS = new Set([TAG_ID.TABLE, TAG_ID.TBODY, TAG_ID.TFOOT, TAG_ID.THEAD, TAG_ID.TR]);
  5007. const defaultParserOptions = {
  5008. scriptingEnabled: true,
  5009. sourceCodeLocationInfo: false,
  5010. treeAdapter: defaultTreeAdapter,
  5011. onParseError: null,
  5012. };
  5013. //Parser
  5014. class Parser {
  5015. constructor(options, document,
  5016. /** @internal */
  5017. fragmentContext = null,
  5018. /** @internal */
  5019. scriptHandler = null) {
  5020. this.fragmentContext = fragmentContext;
  5021. this.scriptHandler = scriptHandler;
  5022. this.currentToken = null;
  5023. this.stopped = false;
  5024. /** @internal */
  5025. this.insertionMode = InsertionMode.INITIAL;
  5026. /** @internal */
  5027. this.originalInsertionMode = InsertionMode.INITIAL;
  5028. /** @internal */
  5029. this.headElement = null;
  5030. /** @internal */
  5031. this.formElement = null;
  5032. /** Indicates that the current node is not an element in the HTML namespace */
  5033. this.currentNotInHTML = false;
  5034. /**
  5035. * The template insertion mode stack is maintained from the left.
  5036. * Ie. the topmost element will always have index 0.
  5037. *
  5038. * @internal
  5039. */
  5040. this.tmplInsertionModeStack = [];
  5041. /** @internal */
  5042. this.pendingCharacterTokens = [];
  5043. /** @internal */
  5044. this.hasNonWhitespacePendingCharacterToken = false;
  5045. /** @internal */
  5046. this.framesetOk = true;
  5047. /** @internal */
  5048. this.skipNextNewLine = false;
  5049. /** @internal */
  5050. this.fosterParentingEnabled = false;
  5051. this.options = {
  5052. ...defaultParserOptions,
  5053. ...options,
  5054. };
  5055. this.treeAdapter = this.options.treeAdapter;
  5056. this.onParseError = this.options.onParseError;
  5057. // Always enable location info if we report parse errors.
  5058. if (this.onParseError) {
  5059. this.options.sourceCodeLocationInfo = true;
  5060. }
  5061. this.document = document !== null && document !== void 0 ? document : this.treeAdapter.createDocument();
  5062. this.tokenizer = new Tokenizer(this.options, this);
  5063. this.activeFormattingElements = new FormattingElementList(this.treeAdapter);
  5064. this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : TAG_ID.UNKNOWN;
  5065. this._setContextModes(fragmentContext !== null && fragmentContext !== void 0 ? fragmentContext : this.document, this.fragmentContextID);
  5066. this.openElements = new OpenElementStack(this.document, this.treeAdapter, this);
  5067. }
  5068. // API
  5069. static parse(html, options) {
  5070. const parser = new this(options);
  5071. parser.tokenizer.write(html, true);
  5072. return parser.document;
  5073. }
  5074. static getFragmentParser(fragmentContext, options) {
  5075. const opts = {
  5076. ...defaultParserOptions,
  5077. ...options,
  5078. };
  5079. //NOTE: use a <template> element as the fragment context if no context element was provided,
  5080. //so we will parse in a "forgiving" manner
  5081. fragmentContext !== null && fragmentContext !== void 0 ? fragmentContext : (fragmentContext = opts.treeAdapter.createElement(TAG_NAMES.TEMPLATE, NS.HTML, []));
  5082. //NOTE: create a fake element which will be used as the `document` for fragment parsing.
  5083. //This is important for jsdom, where a new `document` cannot be created. This led to
  5084. //fragment parsing messing with the main `document`.
  5085. const documentMock = opts.treeAdapter.createElement('documentmock', NS.HTML, []);
  5086. const parser = new this(opts, documentMock, fragmentContext);
  5087. if (parser.fragmentContextID === TAG_ID.TEMPLATE) {
  5088. parser.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE);
  5089. }
  5090. parser._initTokenizerForFragmentParsing();
  5091. parser._insertFakeRootElement();
  5092. parser._resetInsertionMode();
  5093. parser._findFormInFragmentContext();
  5094. return parser;
  5095. }
  5096. getFragment() {
  5097. const rootElement = this.treeAdapter.getFirstChild(this.document);
  5098. const fragment = this.treeAdapter.createDocumentFragment();
  5099. this._adoptNodes(rootElement, fragment);
  5100. return fragment;
  5101. }
  5102. //Errors
  5103. /** @internal */
  5104. _err(token, code, beforeToken) {
  5105. var _a;
  5106. if (!this.onParseError)
  5107. return;
  5108. const loc = (_a = token.location) !== null && _a !== void 0 ? _a : BASE_LOC;
  5109. const err = {
  5110. code,
  5111. startLine: loc.startLine,
  5112. startCol: loc.startCol,
  5113. startOffset: loc.startOffset,
  5114. endLine: beforeToken ? loc.startLine : loc.endLine,
  5115. endCol: beforeToken ? loc.startCol : loc.endCol,
  5116. endOffset: beforeToken ? loc.startOffset : loc.endOffset,
  5117. };
  5118. this.onParseError(err);
  5119. }
  5120. //Stack events
  5121. /** @internal */
  5122. onItemPush(node, tid, isTop) {
  5123. var _a, _b;
  5124. (_b = (_a = this.treeAdapter).onItemPush) === null || _b === void 0 ? void 0 : _b.call(_a, node);
  5125. if (isTop && this.openElements.stackTop > 0)
  5126. this._setContextModes(node, tid);
  5127. }
  5128. /** @internal */
  5129. onItemPop(node, isTop) {
  5130. var _a, _b;
  5131. if (this.options.sourceCodeLocationInfo) {
  5132. this._setEndLocation(node, this.currentToken);
  5133. }
  5134. (_b = (_a = this.treeAdapter).onItemPop) === null || _b === void 0 ? void 0 : _b.call(_a, node, this.openElements.current);
  5135. if (isTop) {
  5136. let current;
  5137. let currentTagId;
  5138. if (this.openElements.stackTop === 0 && this.fragmentContext) {
  5139. current = this.fragmentContext;
  5140. currentTagId = this.fragmentContextID;
  5141. }
  5142. else {
  5143. ({ current, currentTagId } = this.openElements);
  5144. }
  5145. this._setContextModes(current, currentTagId);
  5146. }
  5147. }
  5148. _setContextModes(current, tid) {
  5149. const isHTML = current === this.document || this.treeAdapter.getNamespaceURI(current) === NS.HTML;
  5150. this.currentNotInHTML = !isHTML;
  5151. this.tokenizer.inForeignNode = !isHTML && !this._isIntegrationPoint(tid, current);
  5152. }
  5153. /** @protected */
  5154. _switchToTextParsing(currentToken, nextTokenizerState) {
  5155. this._insertElement(currentToken, NS.HTML);
  5156. this.tokenizer.state = nextTokenizerState;
  5157. this.originalInsertionMode = this.insertionMode;
  5158. this.insertionMode = InsertionMode.TEXT;
  5159. }
  5160. switchToPlaintextParsing() {
  5161. this.insertionMode = InsertionMode.TEXT;
  5162. this.originalInsertionMode = InsertionMode.IN_BODY;
  5163. this.tokenizer.state = TokenizerMode.PLAINTEXT;
  5164. }
  5165. //Fragment parsing
  5166. /** @protected */
  5167. _getAdjustedCurrentElement() {
  5168. return this.openElements.stackTop === 0 && this.fragmentContext
  5169. ? this.fragmentContext
  5170. : this.openElements.current;
  5171. }
  5172. /** @protected */
  5173. _findFormInFragmentContext() {
  5174. let node = this.fragmentContext;
  5175. while (node) {
  5176. if (this.treeAdapter.getTagName(node) === TAG_NAMES.FORM) {
  5177. this.formElement = node;
  5178. break;
  5179. }
  5180. node = this.treeAdapter.getParentNode(node);
  5181. }
  5182. }
  5183. _initTokenizerForFragmentParsing() {
  5184. if (!this.fragmentContext || this.treeAdapter.getNamespaceURI(this.fragmentContext) !== NS.HTML) {
  5185. return;
  5186. }
  5187. switch (this.fragmentContextID) {
  5188. case TAG_ID.TITLE:
  5189. case TAG_ID.TEXTAREA: {
  5190. this.tokenizer.state = TokenizerMode.RCDATA;
  5191. break;
  5192. }
  5193. case TAG_ID.STYLE:
  5194. case TAG_ID.XMP:
  5195. case TAG_ID.IFRAME:
  5196. case TAG_ID.NOEMBED:
  5197. case TAG_ID.NOFRAMES:
  5198. case TAG_ID.NOSCRIPT: {
  5199. this.tokenizer.state = TokenizerMode.RAWTEXT;
  5200. break;
  5201. }
  5202. case TAG_ID.SCRIPT: {
  5203. this.tokenizer.state = TokenizerMode.SCRIPT_DATA;
  5204. break;
  5205. }
  5206. case TAG_ID.PLAINTEXT: {
  5207. this.tokenizer.state = TokenizerMode.PLAINTEXT;
  5208. break;
  5209. }
  5210. // Do nothing
  5211. }
  5212. }
  5213. //Tree mutation
  5214. /** @protected */
  5215. _setDocumentType(token) {
  5216. const name = token.name || '';
  5217. const publicId = token.publicId || '';
  5218. const systemId = token.systemId || '';
  5219. this.treeAdapter.setDocumentType(this.document, name, publicId, systemId);
  5220. if (token.location) {
  5221. const documentChildren = this.treeAdapter.getChildNodes(this.document);
  5222. const docTypeNode = documentChildren.find((node) => this.treeAdapter.isDocumentTypeNode(node));
  5223. if (docTypeNode) {
  5224. this.treeAdapter.setNodeSourceCodeLocation(docTypeNode, token.location);
  5225. }
  5226. }
  5227. }
  5228. /** @protected */
  5229. _attachElementToTree(element, location) {
  5230. if (this.options.sourceCodeLocationInfo) {
  5231. const loc = location && {
  5232. ...location,
  5233. startTag: location,
  5234. };
  5235. this.treeAdapter.setNodeSourceCodeLocation(element, loc);
  5236. }
  5237. if (this._shouldFosterParentOnInsertion()) {
  5238. this._fosterParentElement(element);
  5239. }
  5240. else {
  5241. const parent = this.openElements.currentTmplContentOrNode;
  5242. this.treeAdapter.appendChild(parent, element);
  5243. }
  5244. }
  5245. /**
  5246. * For self-closing tags. Add an element to the tree, but skip adding it
  5247. * to the stack.
  5248. */
  5249. /** @protected */
  5250. _appendElement(token, namespaceURI) {
  5251. const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
  5252. this._attachElementToTree(element, token.location);
  5253. }
  5254. /** @protected */
  5255. _insertElement(token, namespaceURI) {
  5256. const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
  5257. this._attachElementToTree(element, token.location);
  5258. this.openElements.push(element, token.tagID);
  5259. }
  5260. /** @protected */
  5261. _insertFakeElement(tagName, tagID) {
  5262. const element = this.treeAdapter.createElement(tagName, NS.HTML, []);
  5263. this._attachElementToTree(element, null);
  5264. this.openElements.push(element, tagID);
  5265. }
  5266. /** @protected */
  5267. _insertTemplate(token) {
  5268. const tmpl = this.treeAdapter.createElement(token.tagName, NS.HTML, token.attrs);
  5269. const content = this.treeAdapter.createDocumentFragment();
  5270. this.treeAdapter.setTemplateContent(tmpl, content);
  5271. this._attachElementToTree(tmpl, token.location);
  5272. this.openElements.push(tmpl, token.tagID);
  5273. if (this.options.sourceCodeLocationInfo)
  5274. this.treeAdapter.setNodeSourceCodeLocation(content, null);
  5275. }
  5276. /** @protected */
  5277. _insertFakeRootElement() {
  5278. const element = this.treeAdapter.createElement(TAG_NAMES.HTML, NS.HTML, []);
  5279. if (this.options.sourceCodeLocationInfo)
  5280. this.treeAdapter.setNodeSourceCodeLocation(element, null);
  5281. this.treeAdapter.appendChild(this.openElements.current, element);
  5282. this.openElements.push(element, TAG_ID.HTML);
  5283. }
  5284. /** @protected */
  5285. _appendCommentNode(token, parent) {
  5286. const commentNode = this.treeAdapter.createCommentNode(token.data);
  5287. this.treeAdapter.appendChild(parent, commentNode);
  5288. if (this.options.sourceCodeLocationInfo) {
  5289. this.treeAdapter.setNodeSourceCodeLocation(commentNode, token.location);
  5290. }
  5291. }
  5292. /** @protected */
  5293. _insertCharacters(token) {
  5294. let parent;
  5295. let beforeElement;
  5296. if (this._shouldFosterParentOnInsertion()) {
  5297. ({ parent, beforeElement } = this._findFosterParentingLocation());
  5298. if (beforeElement) {
  5299. this.treeAdapter.insertTextBefore(parent, token.chars, beforeElement);
  5300. }
  5301. else {
  5302. this.treeAdapter.insertText(parent, token.chars);
  5303. }
  5304. }
  5305. else {
  5306. parent = this.openElements.currentTmplContentOrNode;
  5307. this.treeAdapter.insertText(parent, token.chars);
  5308. }
  5309. if (!token.location)
  5310. return;
  5311. const siblings = this.treeAdapter.getChildNodes(parent);
  5312. const textNodeIdx = beforeElement ? siblings.lastIndexOf(beforeElement) : siblings.length;
  5313. const textNode = siblings[textNodeIdx - 1];
  5314. //NOTE: if we have a location assigned by another token, then just update the end position
  5315. const tnLoc = this.treeAdapter.getNodeSourceCodeLocation(textNode);
  5316. if (tnLoc) {
  5317. const { endLine, endCol, endOffset } = token.location;
  5318. this.treeAdapter.updateNodeSourceCodeLocation(textNode, { endLine, endCol, endOffset });
  5319. }
  5320. else if (this.options.sourceCodeLocationInfo) {
  5321. this.treeAdapter.setNodeSourceCodeLocation(textNode, token.location);
  5322. }
  5323. }
  5324. /** @protected */
  5325. _adoptNodes(donor, recipient) {
  5326. for (let child = this.treeAdapter.getFirstChild(donor); child; child = this.treeAdapter.getFirstChild(donor)) {
  5327. this.treeAdapter.detachNode(child);
  5328. this.treeAdapter.appendChild(recipient, child);
  5329. }
  5330. }
  5331. /** @protected */
  5332. _setEndLocation(element, closingToken) {
  5333. if (this.treeAdapter.getNodeSourceCodeLocation(element) && closingToken.location) {
  5334. const ctLoc = closingToken.location;
  5335. const tn = this.treeAdapter.getTagName(element);
  5336. const endLoc =
  5337. // NOTE: For cases like <p> <p> </p> - First 'p' closes without a closing
  5338. // tag and for cases like <td> <p> </td> - 'p' closes without a closing tag.
  5339. closingToken.type === TokenType.END_TAG && tn === closingToken.tagName
  5340. ? {
  5341. endTag: { ...ctLoc },
  5342. endLine: ctLoc.endLine,
  5343. endCol: ctLoc.endCol,
  5344. endOffset: ctLoc.endOffset,
  5345. }
  5346. : {
  5347. endLine: ctLoc.startLine,
  5348. endCol: ctLoc.startCol,
  5349. endOffset: ctLoc.startOffset,
  5350. };
  5351. this.treeAdapter.updateNodeSourceCodeLocation(element, endLoc);
  5352. }
  5353. }
  5354. //Token processing
  5355. shouldProcessStartTagTokenInForeignContent(token) {
  5356. // Check that neither current === document, or ns === NS.HTML
  5357. if (!this.currentNotInHTML)
  5358. return false;
  5359. let current;
  5360. let currentTagId;
  5361. if (this.openElements.stackTop === 0 && this.fragmentContext) {
  5362. current = this.fragmentContext;
  5363. currentTagId = this.fragmentContextID;
  5364. }
  5365. else {
  5366. ({ current, currentTagId } = this.openElements);
  5367. }
  5368. if (token.tagID === TAG_ID.SVG &&
  5369. this.treeAdapter.getTagName(current) === TAG_NAMES.ANNOTATION_XML &&
  5370. this.treeAdapter.getNamespaceURI(current) === NS.MATHML) {
  5371. return false;
  5372. }
  5373. return (
  5374. // Check that `current` is not an integration point for HTML or MathML elements.
  5375. this.tokenizer.inForeignNode ||
  5376. // If it _is_ an integration point, then we might have to check that it is not an HTML
  5377. // integration point.
  5378. ((token.tagID === TAG_ID.MGLYPH || token.tagID === TAG_ID.MALIGNMARK) &&
  5379. !this._isIntegrationPoint(currentTagId, current, NS.HTML)));
  5380. }
  5381. /** @protected */
  5382. _processToken(token) {
  5383. switch (token.type) {
  5384. case TokenType.CHARACTER: {
  5385. this.onCharacter(token);
  5386. break;
  5387. }
  5388. case TokenType.NULL_CHARACTER: {
  5389. this.onNullCharacter(token);
  5390. break;
  5391. }
  5392. case TokenType.COMMENT: {
  5393. this.onComment(token);
  5394. break;
  5395. }
  5396. case TokenType.DOCTYPE: {
  5397. this.onDoctype(token);
  5398. break;
  5399. }
  5400. case TokenType.START_TAG: {
  5401. this._processStartTag(token);
  5402. break;
  5403. }
  5404. case TokenType.END_TAG: {
  5405. this.onEndTag(token);
  5406. break;
  5407. }
  5408. case TokenType.EOF: {
  5409. this.onEof(token);
  5410. break;
  5411. }
  5412. case TokenType.WHITESPACE_CHARACTER: {
  5413. this.onWhitespaceCharacter(token);
  5414. break;
  5415. }
  5416. }
  5417. }
  5418. //Integration points
  5419. /** @protected */
  5420. _isIntegrationPoint(tid, element, foreignNS) {
  5421. const ns = this.treeAdapter.getNamespaceURI(element);
  5422. const attrs = this.treeAdapter.getAttrList(element);
  5423. return isIntegrationPoint(tid, ns, attrs, foreignNS);
  5424. }
  5425. //Active formatting elements reconstruction
  5426. /** @protected */
  5427. _reconstructActiveFormattingElements() {
  5428. const listLength = this.activeFormattingElements.entries.length;
  5429. if (listLength) {
  5430. const endIndex = this.activeFormattingElements.entries.findIndex((entry) => entry.type === EntryType.Marker || this.openElements.contains(entry.element));
  5431. const unopenIdx = endIndex < 0 ? listLength - 1 : endIndex - 1;
  5432. for (let i = unopenIdx; i >= 0; i--) {
  5433. const entry = this.activeFormattingElements.entries[i];
  5434. this._insertElement(entry.token, this.treeAdapter.getNamespaceURI(entry.element));
  5435. entry.element = this.openElements.current;
  5436. }
  5437. }
  5438. }
  5439. //Close elements
  5440. /** @protected */
  5441. _closeTableCell() {
  5442. this.openElements.generateImpliedEndTags();
  5443. this.openElements.popUntilTableCellPopped();
  5444. this.activeFormattingElements.clearToLastMarker();
  5445. this.insertionMode = InsertionMode.IN_ROW;
  5446. }
  5447. /** @protected */
  5448. _closePElement() {
  5449. this.openElements.generateImpliedEndTagsWithExclusion(TAG_ID.P);
  5450. this.openElements.popUntilTagNamePopped(TAG_ID.P);
  5451. }
  5452. //Insertion modes
  5453. /** @protected */
  5454. _resetInsertionMode() {
  5455. for (let i = this.openElements.stackTop; i >= 0; i--) {
  5456. //Insertion mode reset map
  5457. switch (i === 0 && this.fragmentContext ? this.fragmentContextID : this.openElements.tagIDs[i]) {
  5458. case TAG_ID.TR: {
  5459. this.insertionMode = InsertionMode.IN_ROW;
  5460. return;
  5461. }
  5462. case TAG_ID.TBODY:
  5463. case TAG_ID.THEAD:
  5464. case TAG_ID.TFOOT: {
  5465. this.insertionMode = InsertionMode.IN_TABLE_BODY;
  5466. return;
  5467. }
  5468. case TAG_ID.CAPTION: {
  5469. this.insertionMode = InsertionMode.IN_CAPTION;
  5470. return;
  5471. }
  5472. case TAG_ID.COLGROUP: {
  5473. this.insertionMode = InsertionMode.IN_COLUMN_GROUP;
  5474. return;
  5475. }
  5476. case TAG_ID.TABLE: {
  5477. this.insertionMode = InsertionMode.IN_TABLE;
  5478. return;
  5479. }
  5480. case TAG_ID.BODY: {
  5481. this.insertionMode = InsertionMode.IN_BODY;
  5482. return;
  5483. }
  5484. case TAG_ID.FRAMESET: {
  5485. this.insertionMode = InsertionMode.IN_FRAMESET;
  5486. return;
  5487. }
  5488. case TAG_ID.SELECT: {
  5489. this._resetInsertionModeForSelect(i);
  5490. return;
  5491. }
  5492. case TAG_ID.TEMPLATE: {
  5493. this.insertionMode = this.tmplInsertionModeStack[0];
  5494. return;
  5495. }
  5496. case TAG_ID.HTML: {
  5497. this.insertionMode = this.headElement ? InsertionMode.AFTER_HEAD : InsertionMode.BEFORE_HEAD;
  5498. return;
  5499. }
  5500. case TAG_ID.TD:
  5501. case TAG_ID.TH: {
  5502. if (i > 0) {
  5503. this.insertionMode = InsertionMode.IN_CELL;
  5504. return;
  5505. }
  5506. break;
  5507. }
  5508. case TAG_ID.HEAD: {
  5509. if (i > 0) {
  5510. this.insertionMode = InsertionMode.IN_HEAD;
  5511. return;
  5512. }
  5513. break;
  5514. }
  5515. }
  5516. }
  5517. this.insertionMode = InsertionMode.IN_BODY;
  5518. }
  5519. /** @protected */
  5520. _resetInsertionModeForSelect(selectIdx) {
  5521. if (selectIdx > 0) {
  5522. for (let i = selectIdx - 1; i > 0; i--) {
  5523. const tn = this.openElements.tagIDs[i];
  5524. if (tn === TAG_ID.TEMPLATE) {
  5525. break;
  5526. }
  5527. else if (tn === TAG_ID.TABLE) {
  5528. this.insertionMode = InsertionMode.IN_SELECT_IN_TABLE;
  5529. return;
  5530. }
  5531. }
  5532. }
  5533. this.insertionMode = InsertionMode.IN_SELECT;
  5534. }
  5535. //Foster parenting
  5536. /** @protected */
  5537. _isElementCausesFosterParenting(tn) {
  5538. return TABLE_STRUCTURE_TAGS.has(tn);
  5539. }
  5540. /** @protected */
  5541. _shouldFosterParentOnInsertion() {
  5542. return this.fosterParentingEnabled && this._isElementCausesFosterParenting(this.openElements.currentTagId);
  5543. }
  5544. /** @protected */
  5545. _findFosterParentingLocation() {
  5546. for (let i = this.openElements.stackTop; i >= 0; i--) {
  5547. const openElement = this.openElements.items[i];
  5548. switch (this.openElements.tagIDs[i]) {
  5549. case TAG_ID.TEMPLATE: {
  5550. if (this.treeAdapter.getNamespaceURI(openElement) === NS.HTML) {
  5551. return { parent: this.treeAdapter.getTemplateContent(openElement), beforeElement: null };
  5552. }
  5553. break;
  5554. }
  5555. case TAG_ID.TABLE: {
  5556. const parent = this.treeAdapter.getParentNode(openElement);
  5557. if (parent) {
  5558. return { parent, beforeElement: openElement };
  5559. }
  5560. return { parent: this.openElements.items[i - 1], beforeElement: null };
  5561. }
  5562. // Do nothing
  5563. }
  5564. }
  5565. return { parent: this.openElements.items[0], beforeElement: null };
  5566. }
  5567. /** @protected */
  5568. _fosterParentElement(element) {
  5569. const location = this._findFosterParentingLocation();
  5570. if (location.beforeElement) {
  5571. this.treeAdapter.insertBefore(location.parent, element, location.beforeElement);
  5572. }
  5573. else {
  5574. this.treeAdapter.appendChild(location.parent, element);
  5575. }
  5576. }
  5577. //Special elements
  5578. /** @protected */
  5579. _isSpecialElement(element, id) {
  5580. const ns = this.treeAdapter.getNamespaceURI(element);
  5581. return SPECIAL_ELEMENTS[ns].has(id);
  5582. }
  5583. /** @internal */
  5584. onCharacter(token) {
  5585. this.skipNextNewLine = false;
  5586. if (this.tokenizer.inForeignNode) {
  5587. characterInForeignContent(this, token);
  5588. return;
  5589. }
  5590. switch (this.insertionMode) {
  5591. case InsertionMode.INITIAL: {
  5592. tokenInInitialMode(this, token);
  5593. break;
  5594. }
  5595. case InsertionMode.BEFORE_HTML: {
  5596. tokenBeforeHtml(this, token);
  5597. break;
  5598. }
  5599. case InsertionMode.BEFORE_HEAD: {
  5600. tokenBeforeHead(this, token);
  5601. break;
  5602. }
  5603. case InsertionMode.IN_HEAD: {
  5604. tokenInHead(this, token);
  5605. break;
  5606. }
  5607. case InsertionMode.IN_HEAD_NO_SCRIPT: {
  5608. tokenInHeadNoScript(this, token);
  5609. break;
  5610. }
  5611. case InsertionMode.AFTER_HEAD: {
  5612. tokenAfterHead(this, token);
  5613. break;
  5614. }
  5615. case InsertionMode.IN_BODY:
  5616. case InsertionMode.IN_CAPTION:
  5617. case InsertionMode.IN_CELL:
  5618. case InsertionMode.IN_TEMPLATE: {
  5619. characterInBody(this, token);
  5620. break;
  5621. }
  5622. case InsertionMode.TEXT:
  5623. case InsertionMode.IN_SELECT:
  5624. case InsertionMode.IN_SELECT_IN_TABLE: {
  5625. this._insertCharacters(token);
  5626. break;
  5627. }
  5628. case InsertionMode.IN_TABLE:
  5629. case InsertionMode.IN_TABLE_BODY:
  5630. case InsertionMode.IN_ROW: {
  5631. characterInTable(this, token);
  5632. break;
  5633. }
  5634. case InsertionMode.IN_TABLE_TEXT: {
  5635. characterInTableText(this, token);
  5636. break;
  5637. }
  5638. case InsertionMode.IN_COLUMN_GROUP: {
  5639. tokenInColumnGroup(this, token);
  5640. break;
  5641. }
  5642. case InsertionMode.AFTER_BODY: {
  5643. tokenAfterBody(this, token);
  5644. break;
  5645. }
  5646. case InsertionMode.AFTER_AFTER_BODY: {
  5647. tokenAfterAfterBody(this, token);
  5648. break;
  5649. }
  5650. // Do nothing
  5651. }
  5652. }
  5653. /** @internal */
  5654. onNullCharacter(token) {
  5655. this.skipNextNewLine = false;
  5656. if (this.tokenizer.inForeignNode) {
  5657. nullCharacterInForeignContent(this, token);
  5658. return;
  5659. }
  5660. switch (this.insertionMode) {
  5661. case InsertionMode.INITIAL: {
  5662. tokenInInitialMode(this, token);
  5663. break;
  5664. }
  5665. case InsertionMode.BEFORE_HTML: {
  5666. tokenBeforeHtml(this, token);
  5667. break;
  5668. }
  5669. case InsertionMode.BEFORE_HEAD: {
  5670. tokenBeforeHead(this, token);
  5671. break;
  5672. }
  5673. case InsertionMode.IN_HEAD: {
  5674. tokenInHead(this, token);
  5675. break;
  5676. }
  5677. case InsertionMode.IN_HEAD_NO_SCRIPT: {
  5678. tokenInHeadNoScript(this, token);
  5679. break;
  5680. }
  5681. case InsertionMode.AFTER_HEAD: {
  5682. tokenAfterHead(this, token);
  5683. break;
  5684. }
  5685. case InsertionMode.TEXT: {
  5686. this._insertCharacters(token);
  5687. break;
  5688. }
  5689. case InsertionMode.IN_TABLE:
  5690. case InsertionMode.IN_TABLE_BODY:
  5691. case InsertionMode.IN_ROW: {
  5692. characterInTable(this, token);
  5693. break;
  5694. }
  5695. case InsertionMode.IN_COLUMN_GROUP: {
  5696. tokenInColumnGroup(this, token);
  5697. break;
  5698. }
  5699. case InsertionMode.AFTER_BODY: {
  5700. tokenAfterBody(this, token);
  5701. break;
  5702. }
  5703. case InsertionMode.AFTER_AFTER_BODY: {
  5704. tokenAfterAfterBody(this, token);
  5705. break;
  5706. }
  5707. // Do nothing
  5708. }
  5709. }
  5710. /** @internal */
  5711. onComment(token) {
  5712. this.skipNextNewLine = false;
  5713. if (this.currentNotInHTML) {
  5714. appendComment(this, token);
  5715. return;
  5716. }
  5717. switch (this.insertionMode) {
  5718. case InsertionMode.INITIAL:
  5719. case InsertionMode.BEFORE_HTML:
  5720. case InsertionMode.BEFORE_HEAD:
  5721. case InsertionMode.IN_HEAD:
  5722. case InsertionMode.IN_HEAD_NO_SCRIPT:
  5723. case InsertionMode.AFTER_HEAD:
  5724. case InsertionMode.IN_BODY:
  5725. case InsertionMode.IN_TABLE:
  5726. case InsertionMode.IN_CAPTION:
  5727. case InsertionMode.IN_COLUMN_GROUP:
  5728. case InsertionMode.IN_TABLE_BODY:
  5729. case InsertionMode.IN_ROW:
  5730. case InsertionMode.IN_CELL:
  5731. case InsertionMode.IN_SELECT:
  5732. case InsertionMode.IN_SELECT_IN_TABLE:
  5733. case InsertionMode.IN_TEMPLATE:
  5734. case InsertionMode.IN_FRAMESET:
  5735. case InsertionMode.AFTER_FRAMESET: {
  5736. appendComment(this, token);
  5737. break;
  5738. }
  5739. case InsertionMode.IN_TABLE_TEXT: {
  5740. tokenInTableText(this, token);
  5741. break;
  5742. }
  5743. case InsertionMode.AFTER_BODY: {
  5744. appendCommentToRootHtmlElement(this, token);
  5745. break;
  5746. }
  5747. case InsertionMode.AFTER_AFTER_BODY:
  5748. case InsertionMode.AFTER_AFTER_FRAMESET: {
  5749. appendCommentToDocument(this, token);
  5750. break;
  5751. }
  5752. // Do nothing
  5753. }
  5754. }
  5755. /** @internal */
  5756. onDoctype(token) {
  5757. this.skipNextNewLine = false;
  5758. switch (this.insertionMode) {
  5759. case InsertionMode.INITIAL: {
  5760. doctypeInInitialMode(this, token);
  5761. break;
  5762. }
  5763. case InsertionMode.BEFORE_HEAD:
  5764. case InsertionMode.IN_HEAD:
  5765. case InsertionMode.IN_HEAD_NO_SCRIPT:
  5766. case InsertionMode.AFTER_HEAD: {
  5767. this._err(token, ERR.misplacedDoctype);
  5768. break;
  5769. }
  5770. case InsertionMode.IN_TABLE_TEXT: {
  5771. tokenInTableText(this, token);
  5772. break;
  5773. }
  5774. // Do nothing
  5775. }
  5776. }
  5777. /** @internal */
  5778. onStartTag(token) {
  5779. this.skipNextNewLine = false;
  5780. this.currentToken = token;
  5781. this._processStartTag(token);
  5782. if (token.selfClosing && !token.ackSelfClosing) {
  5783. this._err(token, ERR.nonVoidHtmlElementStartTagWithTrailingSolidus);
  5784. }
  5785. }
  5786. /**
  5787. * Processes a given start tag.
  5788. *
  5789. * `onStartTag` checks if a self-closing tag was recognized. When a token
  5790. * is moved inbetween multiple insertion modes, this check for self-closing
  5791. * could lead to false positives. To avoid this, `_processStartTag` is used
  5792. * for nested calls.
  5793. *
  5794. * @param token The token to process.
  5795. * @protected
  5796. */
  5797. _processStartTag(token) {
  5798. if (this.shouldProcessStartTagTokenInForeignContent(token)) {
  5799. startTagInForeignContent(this, token);
  5800. }
  5801. else {
  5802. this._startTagOutsideForeignContent(token);
  5803. }
  5804. }
  5805. /** @protected */
  5806. _startTagOutsideForeignContent(token) {
  5807. switch (this.insertionMode) {
  5808. case InsertionMode.INITIAL: {
  5809. tokenInInitialMode(this, token);
  5810. break;
  5811. }
  5812. case InsertionMode.BEFORE_HTML: {
  5813. startTagBeforeHtml(this, token);
  5814. break;
  5815. }
  5816. case InsertionMode.BEFORE_HEAD: {
  5817. startTagBeforeHead(this, token);
  5818. break;
  5819. }
  5820. case InsertionMode.IN_HEAD: {
  5821. startTagInHead(this, token);
  5822. break;
  5823. }
  5824. case InsertionMode.IN_HEAD_NO_SCRIPT: {
  5825. startTagInHeadNoScript(this, token);
  5826. break;
  5827. }
  5828. case InsertionMode.AFTER_HEAD: {
  5829. startTagAfterHead(this, token);
  5830. break;
  5831. }
  5832. case InsertionMode.IN_BODY: {
  5833. startTagInBody(this, token);
  5834. break;
  5835. }
  5836. case InsertionMode.IN_TABLE: {
  5837. startTagInTable(this, token);
  5838. break;
  5839. }
  5840. case InsertionMode.IN_TABLE_TEXT: {
  5841. tokenInTableText(this, token);
  5842. break;
  5843. }
  5844. case InsertionMode.IN_CAPTION: {
  5845. startTagInCaption(this, token);
  5846. break;
  5847. }
  5848. case InsertionMode.IN_COLUMN_GROUP: {
  5849. startTagInColumnGroup(this, token);
  5850. break;
  5851. }
  5852. case InsertionMode.IN_TABLE_BODY: {
  5853. startTagInTableBody(this, token);
  5854. break;
  5855. }
  5856. case InsertionMode.IN_ROW: {
  5857. startTagInRow(this, token);
  5858. break;
  5859. }
  5860. case InsertionMode.IN_CELL: {
  5861. startTagInCell(this, token);
  5862. break;
  5863. }
  5864. case InsertionMode.IN_SELECT: {
  5865. startTagInSelect(this, token);
  5866. break;
  5867. }
  5868. case InsertionMode.IN_SELECT_IN_TABLE: {
  5869. startTagInSelectInTable(this, token);
  5870. break;
  5871. }
  5872. case InsertionMode.IN_TEMPLATE: {
  5873. startTagInTemplate(this, token);
  5874. break;
  5875. }
  5876. case InsertionMode.AFTER_BODY: {
  5877. startTagAfterBody(this, token);
  5878. break;
  5879. }
  5880. case InsertionMode.IN_FRAMESET: {
  5881. startTagInFrameset(this, token);
  5882. break;
  5883. }
  5884. case InsertionMode.AFTER_FRAMESET: {
  5885. startTagAfterFrameset(this, token);
  5886. break;
  5887. }
  5888. case InsertionMode.AFTER_AFTER_BODY: {
  5889. startTagAfterAfterBody(this, token);
  5890. break;
  5891. }
  5892. case InsertionMode.AFTER_AFTER_FRAMESET: {
  5893. startTagAfterAfterFrameset(this, token);
  5894. break;
  5895. }
  5896. // Do nothing
  5897. }
  5898. }
  5899. /** @internal */
  5900. onEndTag(token) {
  5901. this.skipNextNewLine = false;
  5902. this.currentToken = token;
  5903. if (this.currentNotInHTML) {
  5904. endTagInForeignContent(this, token);
  5905. }
  5906. else {
  5907. this._endTagOutsideForeignContent(token);
  5908. }
  5909. }
  5910. /** @protected */
  5911. _endTagOutsideForeignContent(token) {
  5912. switch (this.insertionMode) {
  5913. case InsertionMode.INITIAL: {
  5914. tokenInInitialMode(this, token);
  5915. break;
  5916. }
  5917. case InsertionMode.BEFORE_HTML: {
  5918. endTagBeforeHtml(this, token);
  5919. break;
  5920. }
  5921. case InsertionMode.BEFORE_HEAD: {
  5922. endTagBeforeHead(this, token);
  5923. break;
  5924. }
  5925. case InsertionMode.IN_HEAD: {
  5926. endTagInHead(this, token);
  5927. break;
  5928. }
  5929. case InsertionMode.IN_HEAD_NO_SCRIPT: {
  5930. endTagInHeadNoScript(this, token);
  5931. break;
  5932. }
  5933. case InsertionMode.AFTER_HEAD: {
  5934. endTagAfterHead(this, token);
  5935. break;
  5936. }
  5937. case InsertionMode.IN_BODY: {
  5938. endTagInBody(this, token);
  5939. break;
  5940. }
  5941. case InsertionMode.TEXT: {
  5942. endTagInText(this, token);
  5943. break;
  5944. }
  5945. case InsertionMode.IN_TABLE: {
  5946. endTagInTable(this, token);
  5947. break;
  5948. }
  5949. case InsertionMode.IN_TABLE_TEXT: {
  5950. tokenInTableText(this, token);
  5951. break;
  5952. }
  5953. case InsertionMode.IN_CAPTION: {
  5954. endTagInCaption(this, token);
  5955. break;
  5956. }
  5957. case InsertionMode.IN_COLUMN_GROUP: {
  5958. endTagInColumnGroup(this, token);
  5959. break;
  5960. }
  5961. case InsertionMode.IN_TABLE_BODY: {
  5962. endTagInTableBody(this, token);
  5963. break;
  5964. }
  5965. case InsertionMode.IN_ROW: {
  5966. endTagInRow(this, token);
  5967. break;
  5968. }
  5969. case InsertionMode.IN_CELL: {
  5970. endTagInCell(this, token);
  5971. break;
  5972. }
  5973. case InsertionMode.IN_SELECT: {
  5974. endTagInSelect(this, token);
  5975. break;
  5976. }
  5977. case InsertionMode.IN_SELECT_IN_TABLE: {
  5978. endTagInSelectInTable(this, token);
  5979. break;
  5980. }
  5981. case InsertionMode.IN_TEMPLATE: {
  5982. endTagInTemplate(this, token);
  5983. break;
  5984. }
  5985. case InsertionMode.AFTER_BODY: {
  5986. endTagAfterBody(this, token);
  5987. break;
  5988. }
  5989. case InsertionMode.IN_FRAMESET: {
  5990. endTagInFrameset(this, token);
  5991. break;
  5992. }
  5993. case InsertionMode.AFTER_FRAMESET: {
  5994. endTagAfterFrameset(this, token);
  5995. break;
  5996. }
  5997. case InsertionMode.AFTER_AFTER_BODY: {
  5998. tokenAfterAfterBody(this, token);
  5999. break;
  6000. }
  6001. // Do nothing
  6002. }
  6003. }
  6004. /** @internal */
  6005. onEof(token) {
  6006. switch (this.insertionMode) {
  6007. case InsertionMode.INITIAL: {
  6008. tokenInInitialMode(this, token);
  6009. break;
  6010. }
  6011. case InsertionMode.BEFORE_HTML: {
  6012. tokenBeforeHtml(this, token);
  6013. break;
  6014. }
  6015. case InsertionMode.BEFORE_HEAD: {
  6016. tokenBeforeHead(this, token);
  6017. break;
  6018. }
  6019. case InsertionMode.IN_HEAD: {
  6020. tokenInHead(this, token);
  6021. break;
  6022. }
  6023. case InsertionMode.IN_HEAD_NO_SCRIPT: {
  6024. tokenInHeadNoScript(this, token);
  6025. break;
  6026. }
  6027. case InsertionMode.AFTER_HEAD: {
  6028. tokenAfterHead(this, token);
  6029. break;
  6030. }
  6031. case InsertionMode.IN_BODY:
  6032. case InsertionMode.IN_TABLE:
  6033. case InsertionMode.IN_CAPTION:
  6034. case InsertionMode.IN_COLUMN_GROUP:
  6035. case InsertionMode.IN_TABLE_BODY:
  6036. case InsertionMode.IN_ROW:
  6037. case InsertionMode.IN_CELL:
  6038. case InsertionMode.IN_SELECT:
  6039. case InsertionMode.IN_SELECT_IN_TABLE: {
  6040. eofInBody(this, token);
  6041. break;
  6042. }
  6043. case InsertionMode.TEXT: {
  6044. eofInText(this, token);
  6045. break;
  6046. }
  6047. case InsertionMode.IN_TABLE_TEXT: {
  6048. tokenInTableText(this, token);
  6049. break;
  6050. }
  6051. case InsertionMode.IN_TEMPLATE: {
  6052. eofInTemplate(this, token);
  6053. break;
  6054. }
  6055. case InsertionMode.AFTER_BODY:
  6056. case InsertionMode.IN_FRAMESET:
  6057. case InsertionMode.AFTER_FRAMESET:
  6058. case InsertionMode.AFTER_AFTER_BODY:
  6059. case InsertionMode.AFTER_AFTER_FRAMESET: {
  6060. stopParsing(this, token);
  6061. break;
  6062. }
  6063. // Do nothing
  6064. }
  6065. }
  6066. /** @internal */
  6067. onWhitespaceCharacter(token) {
  6068. if (this.skipNextNewLine) {
  6069. this.skipNextNewLine = false;
  6070. if (token.chars.charCodeAt(0) === CODE_POINTS.LINE_FEED) {
  6071. if (token.chars.length === 1) {
  6072. return;
  6073. }
  6074. token.chars = token.chars.substr(1);
  6075. }
  6076. }
  6077. if (this.tokenizer.inForeignNode) {
  6078. this._insertCharacters(token);
  6079. return;
  6080. }
  6081. switch (this.insertionMode) {
  6082. case InsertionMode.IN_HEAD:
  6083. case InsertionMode.IN_HEAD_NO_SCRIPT:
  6084. case InsertionMode.AFTER_HEAD:
  6085. case InsertionMode.TEXT:
  6086. case InsertionMode.IN_COLUMN_GROUP:
  6087. case InsertionMode.IN_SELECT:
  6088. case InsertionMode.IN_SELECT_IN_TABLE:
  6089. case InsertionMode.IN_FRAMESET:
  6090. case InsertionMode.AFTER_FRAMESET: {
  6091. this._insertCharacters(token);
  6092. break;
  6093. }
  6094. case InsertionMode.IN_BODY:
  6095. case InsertionMode.IN_CAPTION:
  6096. case InsertionMode.IN_CELL:
  6097. case InsertionMode.IN_TEMPLATE:
  6098. case InsertionMode.AFTER_BODY:
  6099. case InsertionMode.AFTER_AFTER_BODY:
  6100. case InsertionMode.AFTER_AFTER_FRAMESET: {
  6101. whitespaceCharacterInBody(this, token);
  6102. break;
  6103. }
  6104. case InsertionMode.IN_TABLE:
  6105. case InsertionMode.IN_TABLE_BODY:
  6106. case InsertionMode.IN_ROW: {
  6107. characterInTable(this, token);
  6108. break;
  6109. }
  6110. case InsertionMode.IN_TABLE_TEXT: {
  6111. whitespaceCharacterInTableText(this, token);
  6112. break;
  6113. }
  6114. // Do nothing
  6115. }
  6116. }
  6117. }
  6118. //Adoption agency algorithm
  6119. //(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adoptionAgency)
  6120. //------------------------------------------------------------------
  6121. //Steps 5-8 of the algorithm
  6122. function aaObtainFormattingElementEntry(p, token) {
  6123. let formattingElementEntry = p.activeFormattingElements.getElementEntryInScopeWithTagName(token.tagName);
  6124. if (formattingElementEntry) {
  6125. if (!p.openElements.contains(formattingElementEntry.element)) {
  6126. p.activeFormattingElements.removeEntry(formattingElementEntry);
  6127. formattingElementEntry = null;
  6128. }
  6129. else if (!p.openElements.hasInScope(token.tagID)) {
  6130. formattingElementEntry = null;
  6131. }
  6132. }
  6133. else {
  6134. genericEndTagInBody(p, token);
  6135. }
  6136. return formattingElementEntry;
  6137. }
  6138. //Steps 9 and 10 of the algorithm
  6139. function aaObtainFurthestBlock(p, formattingElementEntry) {
  6140. let furthestBlock = null;
  6141. let idx = p.openElements.stackTop;
  6142. for (; idx >= 0; idx--) {
  6143. const element = p.openElements.items[idx];
  6144. if (element === formattingElementEntry.element) {
  6145. break;
  6146. }
  6147. if (p._isSpecialElement(element, p.openElements.tagIDs[idx])) {
  6148. furthestBlock = element;
  6149. }
  6150. }
  6151. if (!furthestBlock) {
  6152. p.openElements.shortenToLength(idx < 0 ? 0 : idx);
  6153. p.activeFormattingElements.removeEntry(formattingElementEntry);
  6154. }
  6155. return furthestBlock;
  6156. }
  6157. //Step 13 of the algorithm
  6158. function aaInnerLoop(p, furthestBlock, formattingElement) {
  6159. let lastElement = furthestBlock;
  6160. let nextElement = p.openElements.getCommonAncestor(furthestBlock);
  6161. for (let i = 0, element = nextElement; element !== formattingElement; i++, element = nextElement) {
  6162. //NOTE: store the next element for the next loop iteration (it may be deleted from the stack by step 9.5)
  6163. nextElement = p.openElements.getCommonAncestor(element);
  6164. const elementEntry = p.activeFormattingElements.getElementEntry(element);
  6165. const counterOverflow = elementEntry && i >= AA_INNER_LOOP_ITER;
  6166. const shouldRemoveFromOpenElements = !elementEntry || counterOverflow;
  6167. if (shouldRemoveFromOpenElements) {
  6168. if (counterOverflow) {
  6169. p.activeFormattingElements.removeEntry(elementEntry);
  6170. }
  6171. p.openElements.remove(element);
  6172. }
  6173. else {
  6174. element = aaRecreateElementFromEntry(p, elementEntry);
  6175. if (lastElement === furthestBlock) {
  6176. p.activeFormattingElements.bookmark = elementEntry;
  6177. }
  6178. p.treeAdapter.detachNode(lastElement);
  6179. p.treeAdapter.appendChild(element, lastElement);
  6180. lastElement = element;
  6181. }
  6182. }
  6183. return lastElement;
  6184. }
  6185. //Step 13.7 of the algorithm
  6186. function aaRecreateElementFromEntry(p, elementEntry) {
  6187. const ns = p.treeAdapter.getNamespaceURI(elementEntry.element);
  6188. const newElement = p.treeAdapter.createElement(elementEntry.token.tagName, ns, elementEntry.token.attrs);
  6189. p.openElements.replace(elementEntry.element, newElement);
  6190. elementEntry.element = newElement;
  6191. return newElement;
  6192. }
  6193. //Step 14 of the algorithm
  6194. function aaInsertLastNodeInCommonAncestor(p, commonAncestor, lastElement) {
  6195. const tn = p.treeAdapter.getTagName(commonAncestor);
  6196. const tid = getTagID(tn);
  6197. if (p._isElementCausesFosterParenting(tid)) {
  6198. p._fosterParentElement(lastElement);
  6199. }
  6200. else {
  6201. const ns = p.treeAdapter.getNamespaceURI(commonAncestor);
  6202. if (tid === TAG_ID.TEMPLATE && ns === NS.HTML) {
  6203. commonAncestor = p.treeAdapter.getTemplateContent(commonAncestor);
  6204. }
  6205. p.treeAdapter.appendChild(commonAncestor, lastElement);
  6206. }
  6207. }
  6208. //Steps 15-19 of the algorithm
  6209. function aaReplaceFormattingElement(p, furthestBlock, formattingElementEntry) {
  6210. const ns = p.treeAdapter.getNamespaceURI(formattingElementEntry.element);
  6211. const { token } = formattingElementEntry;
  6212. const newElement = p.treeAdapter.createElement(token.tagName, ns, token.attrs);
  6213. p._adoptNodes(furthestBlock, newElement);
  6214. p.treeAdapter.appendChild(furthestBlock, newElement);
  6215. p.activeFormattingElements.insertElementAfterBookmark(newElement, token);
  6216. p.activeFormattingElements.removeEntry(formattingElementEntry);
  6217. p.openElements.remove(formattingElementEntry.element);
  6218. p.openElements.insertAfter(furthestBlock, newElement, token.tagID);
  6219. }
  6220. //Algorithm entry point
  6221. function callAdoptionAgency(p, token) {
  6222. for (let i = 0; i < AA_OUTER_LOOP_ITER; i++) {
  6223. const formattingElementEntry = aaObtainFormattingElementEntry(p, token);
  6224. if (!formattingElementEntry) {
  6225. break;
  6226. }
  6227. const furthestBlock = aaObtainFurthestBlock(p, formattingElementEntry);
  6228. if (!furthestBlock) {
  6229. break;
  6230. }
  6231. p.activeFormattingElements.bookmark = formattingElementEntry;
  6232. const lastElement = aaInnerLoop(p, furthestBlock, formattingElementEntry.element);
  6233. const commonAncestor = p.openElements.getCommonAncestor(formattingElementEntry.element);
  6234. p.treeAdapter.detachNode(lastElement);
  6235. if (commonAncestor)
  6236. aaInsertLastNodeInCommonAncestor(p, commonAncestor, lastElement);
  6237. aaReplaceFormattingElement(p, furthestBlock, formattingElementEntry);
  6238. }
  6239. }
  6240. //Generic token handlers
  6241. //------------------------------------------------------------------
  6242. function appendComment(p, token) {
  6243. p._appendCommentNode(token, p.openElements.currentTmplContentOrNode);
  6244. }
  6245. function appendCommentToRootHtmlElement(p, token) {
  6246. p._appendCommentNode(token, p.openElements.items[0]);
  6247. }
  6248. function appendCommentToDocument(p, token) {
  6249. p._appendCommentNode(token, p.document);
  6250. }
  6251. function stopParsing(p, token) {
  6252. p.stopped = true;
  6253. // NOTE: Set end locations for elements that remain on the open element stack.
  6254. if (token.location) {
  6255. // NOTE: If we are not in a fragment, `html` and `body` will stay on the stack.
  6256. // This is a problem, as we might overwrite their end position here.
  6257. const target = p.fragmentContext ? 0 : 2;
  6258. for (let i = p.openElements.stackTop; i >= target; i--) {
  6259. p._setEndLocation(p.openElements.items[i], token);
  6260. }
  6261. // Handle `html` and `body`
  6262. if (!p.fragmentContext && p.openElements.stackTop >= 0) {
  6263. const htmlElement = p.openElements.items[0];
  6264. const htmlLocation = p.treeAdapter.getNodeSourceCodeLocation(htmlElement);
  6265. if (htmlLocation && !htmlLocation.endTag) {
  6266. p._setEndLocation(htmlElement, token);
  6267. if (p.openElements.stackTop >= 1) {
  6268. const bodyElement = p.openElements.items[1];
  6269. const bodyLocation = p.treeAdapter.getNodeSourceCodeLocation(bodyElement);
  6270. if (bodyLocation && !bodyLocation.endTag) {
  6271. p._setEndLocation(bodyElement, token);
  6272. }
  6273. }
  6274. }
  6275. }
  6276. }
  6277. }
  6278. // The "initial" insertion mode
  6279. //------------------------------------------------------------------
  6280. function doctypeInInitialMode(p, token) {
  6281. p._setDocumentType(token);
  6282. const mode = token.forceQuirks ? DOCUMENT_MODE.QUIRKS : getDocumentMode(token);
  6283. if (!isConforming(token)) {
  6284. p._err(token, ERR.nonConformingDoctype);
  6285. }
  6286. p.treeAdapter.setDocumentMode(p.document, mode);
  6287. p.insertionMode = InsertionMode.BEFORE_HTML;
  6288. }
  6289. function tokenInInitialMode(p, token) {
  6290. p._err(token, ERR.missingDoctype, true);
  6291. p.treeAdapter.setDocumentMode(p.document, DOCUMENT_MODE.QUIRKS);
  6292. p.insertionMode = InsertionMode.BEFORE_HTML;
  6293. p._processToken(token);
  6294. }
  6295. // The "before html" insertion mode
  6296. //------------------------------------------------------------------
  6297. function startTagBeforeHtml(p, token) {
  6298. if (token.tagID === TAG_ID.HTML) {
  6299. p._insertElement(token, NS.HTML);
  6300. p.insertionMode = InsertionMode.BEFORE_HEAD;
  6301. }
  6302. else {
  6303. tokenBeforeHtml(p, token);
  6304. }
  6305. }
  6306. function endTagBeforeHtml(p, token) {
  6307. const tn = token.tagID;
  6308. if (tn === TAG_ID.HTML || tn === TAG_ID.HEAD || tn === TAG_ID.BODY || tn === TAG_ID.BR) {
  6309. tokenBeforeHtml(p, token);
  6310. }
  6311. }
  6312. function tokenBeforeHtml(p, token) {
  6313. p._insertFakeRootElement();
  6314. p.insertionMode = InsertionMode.BEFORE_HEAD;
  6315. p._processToken(token);
  6316. }
  6317. // The "before head" insertion mode
  6318. //------------------------------------------------------------------
  6319. function startTagBeforeHead(p, token) {
  6320. switch (token.tagID) {
  6321. case TAG_ID.HTML: {
  6322. startTagInBody(p, token);
  6323. break;
  6324. }
  6325. case TAG_ID.HEAD: {
  6326. p._insertElement(token, NS.HTML);
  6327. p.headElement = p.openElements.current;
  6328. p.insertionMode = InsertionMode.IN_HEAD;
  6329. break;
  6330. }
  6331. default: {
  6332. tokenBeforeHead(p, token);
  6333. }
  6334. }
  6335. }
  6336. function endTagBeforeHead(p, token) {
  6337. const tn = token.tagID;
  6338. if (tn === TAG_ID.HEAD || tn === TAG_ID.BODY || tn === TAG_ID.HTML || tn === TAG_ID.BR) {
  6339. tokenBeforeHead(p, token);
  6340. }
  6341. else {
  6342. p._err(token, ERR.endTagWithoutMatchingOpenElement);
  6343. }
  6344. }
  6345. function tokenBeforeHead(p, token) {
  6346. p._insertFakeElement(TAG_NAMES.HEAD, TAG_ID.HEAD);
  6347. p.headElement = p.openElements.current;
  6348. p.insertionMode = InsertionMode.IN_HEAD;
  6349. p._processToken(token);
  6350. }
  6351. // The "in head" insertion mode
  6352. //------------------------------------------------------------------
  6353. function startTagInHead(p, token) {
  6354. switch (token.tagID) {
  6355. case TAG_ID.HTML: {
  6356. startTagInBody(p, token);
  6357. break;
  6358. }
  6359. case TAG_ID.BASE:
  6360. case TAG_ID.BASEFONT:
  6361. case TAG_ID.BGSOUND:
  6362. case TAG_ID.LINK:
  6363. case TAG_ID.META: {
  6364. p._appendElement(token, NS.HTML);
  6365. token.ackSelfClosing = true;
  6366. break;
  6367. }
  6368. case TAG_ID.TITLE: {
  6369. p._switchToTextParsing(token, TokenizerMode.RCDATA);
  6370. break;
  6371. }
  6372. case TAG_ID.NOSCRIPT: {
  6373. if (p.options.scriptingEnabled) {
  6374. p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
  6375. }
  6376. else {
  6377. p._insertElement(token, NS.HTML);
  6378. p.insertionMode = InsertionMode.IN_HEAD_NO_SCRIPT;
  6379. }
  6380. break;
  6381. }
  6382. case TAG_ID.NOFRAMES:
  6383. case TAG_ID.STYLE: {
  6384. p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
  6385. break;
  6386. }
  6387. case TAG_ID.SCRIPT: {
  6388. p._switchToTextParsing(token, TokenizerMode.SCRIPT_DATA);
  6389. break;
  6390. }
  6391. case TAG_ID.TEMPLATE: {
  6392. p._insertTemplate(token);
  6393. p.activeFormattingElements.insertMarker();
  6394. p.framesetOk = false;
  6395. p.insertionMode = InsertionMode.IN_TEMPLATE;
  6396. p.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE);
  6397. break;
  6398. }
  6399. case TAG_ID.HEAD: {
  6400. p._err(token, ERR.misplacedStartTagForHeadElement);
  6401. break;
  6402. }
  6403. default: {
  6404. tokenInHead(p, token);
  6405. }
  6406. }
  6407. }
  6408. function endTagInHead(p, token) {
  6409. switch (token.tagID) {
  6410. case TAG_ID.HEAD: {
  6411. p.openElements.pop();
  6412. p.insertionMode = InsertionMode.AFTER_HEAD;
  6413. break;
  6414. }
  6415. case TAG_ID.BODY:
  6416. case TAG_ID.BR:
  6417. case TAG_ID.HTML: {
  6418. tokenInHead(p, token);
  6419. break;
  6420. }
  6421. case TAG_ID.TEMPLATE: {
  6422. templateEndTagInHead(p, token);
  6423. break;
  6424. }
  6425. default: {
  6426. p._err(token, ERR.endTagWithoutMatchingOpenElement);
  6427. }
  6428. }
  6429. }
  6430. function templateEndTagInHead(p, token) {
  6431. if (p.openElements.tmplCount > 0) {
  6432. p.openElements.generateImpliedEndTagsThoroughly();
  6433. if (p.openElements.currentTagId !== TAG_ID.TEMPLATE) {
  6434. p._err(token, ERR.closingOfElementWithOpenChildElements);
  6435. }
  6436. p.openElements.popUntilTagNamePopped(TAG_ID.TEMPLATE);
  6437. p.activeFormattingElements.clearToLastMarker();
  6438. p.tmplInsertionModeStack.shift();
  6439. p._resetInsertionMode();
  6440. }
  6441. else {
  6442. p._err(token, ERR.endTagWithoutMatchingOpenElement);
  6443. }
  6444. }
  6445. function tokenInHead(p, token) {
  6446. p.openElements.pop();
  6447. p.insertionMode = InsertionMode.AFTER_HEAD;
  6448. p._processToken(token);
  6449. }
  6450. // The "in head no script" insertion mode
  6451. //------------------------------------------------------------------
  6452. function startTagInHeadNoScript(p, token) {
  6453. switch (token.tagID) {
  6454. case TAG_ID.HTML: {
  6455. startTagInBody(p, token);
  6456. break;
  6457. }
  6458. case TAG_ID.BASEFONT:
  6459. case TAG_ID.BGSOUND:
  6460. case TAG_ID.HEAD:
  6461. case TAG_ID.LINK:
  6462. case TAG_ID.META:
  6463. case TAG_ID.NOFRAMES:
  6464. case TAG_ID.STYLE: {
  6465. startTagInHead(p, token);
  6466. break;
  6467. }
  6468. case TAG_ID.NOSCRIPT: {
  6469. p._err(token, ERR.nestedNoscriptInHead);
  6470. break;
  6471. }
  6472. default: {
  6473. tokenInHeadNoScript(p, token);
  6474. }
  6475. }
  6476. }
  6477. function endTagInHeadNoScript(p, token) {
  6478. switch (token.tagID) {
  6479. case TAG_ID.NOSCRIPT: {
  6480. p.openElements.pop();
  6481. p.insertionMode = InsertionMode.IN_HEAD;
  6482. break;
  6483. }
  6484. case TAG_ID.BR: {
  6485. tokenInHeadNoScript(p, token);
  6486. break;
  6487. }
  6488. default: {
  6489. p._err(token, ERR.endTagWithoutMatchingOpenElement);
  6490. }
  6491. }
  6492. }
  6493. function tokenInHeadNoScript(p, token) {
  6494. const errCode = token.type === TokenType.EOF ? ERR.openElementsLeftAfterEof : ERR.disallowedContentInNoscriptInHead;
  6495. p._err(token, errCode);
  6496. p.openElements.pop();
  6497. p.insertionMode = InsertionMode.IN_HEAD;
  6498. p._processToken(token);
  6499. }
  6500. // The "after head" insertion mode
  6501. //------------------------------------------------------------------
  6502. function startTagAfterHead(p, token) {
  6503. switch (token.tagID) {
  6504. case TAG_ID.HTML: {
  6505. startTagInBody(p, token);
  6506. break;
  6507. }
  6508. case TAG_ID.BODY: {
  6509. p._insertElement(token, NS.HTML);
  6510. p.framesetOk = false;
  6511. p.insertionMode = InsertionMode.IN_BODY;
  6512. break;
  6513. }
  6514. case TAG_ID.FRAMESET: {
  6515. p._insertElement(token, NS.HTML);
  6516. p.insertionMode = InsertionMode.IN_FRAMESET;
  6517. break;
  6518. }
  6519. case TAG_ID.BASE:
  6520. case TAG_ID.BASEFONT:
  6521. case TAG_ID.BGSOUND:
  6522. case TAG_ID.LINK:
  6523. case TAG_ID.META:
  6524. case TAG_ID.NOFRAMES:
  6525. case TAG_ID.SCRIPT:
  6526. case TAG_ID.STYLE:
  6527. case TAG_ID.TEMPLATE:
  6528. case TAG_ID.TITLE: {
  6529. p._err(token, ERR.abandonedHeadElementChild);
  6530. p.openElements.push(p.headElement, TAG_ID.HEAD);
  6531. startTagInHead(p, token);
  6532. p.openElements.remove(p.headElement);
  6533. break;
  6534. }
  6535. case TAG_ID.HEAD: {
  6536. p._err(token, ERR.misplacedStartTagForHeadElement);
  6537. break;
  6538. }
  6539. default: {
  6540. tokenAfterHead(p, token);
  6541. }
  6542. }
  6543. }
  6544. function endTagAfterHead(p, token) {
  6545. switch (token.tagID) {
  6546. case TAG_ID.BODY:
  6547. case TAG_ID.HTML:
  6548. case TAG_ID.BR: {
  6549. tokenAfterHead(p, token);
  6550. break;
  6551. }
  6552. case TAG_ID.TEMPLATE: {
  6553. templateEndTagInHead(p, token);
  6554. break;
  6555. }
  6556. default: {
  6557. p._err(token, ERR.endTagWithoutMatchingOpenElement);
  6558. }
  6559. }
  6560. }
  6561. function tokenAfterHead(p, token) {
  6562. p._insertFakeElement(TAG_NAMES.BODY, TAG_ID.BODY);
  6563. p.insertionMode = InsertionMode.IN_BODY;
  6564. modeInBody(p, token);
  6565. }
  6566. // The "in body" insertion mode
  6567. //------------------------------------------------------------------
  6568. function modeInBody(p, token) {
  6569. switch (token.type) {
  6570. case TokenType.CHARACTER: {
  6571. characterInBody(p, token);
  6572. break;
  6573. }
  6574. case TokenType.WHITESPACE_CHARACTER: {
  6575. whitespaceCharacterInBody(p, token);
  6576. break;
  6577. }
  6578. case TokenType.COMMENT: {
  6579. appendComment(p, token);
  6580. break;
  6581. }
  6582. case TokenType.START_TAG: {
  6583. startTagInBody(p, token);
  6584. break;
  6585. }
  6586. case TokenType.END_TAG: {
  6587. endTagInBody(p, token);
  6588. break;
  6589. }
  6590. case TokenType.EOF: {
  6591. eofInBody(p, token);
  6592. break;
  6593. }
  6594. // Do nothing
  6595. }
  6596. }
  6597. function whitespaceCharacterInBody(p, token) {
  6598. p._reconstructActiveFormattingElements();
  6599. p._insertCharacters(token);
  6600. }
  6601. function characterInBody(p, token) {
  6602. p._reconstructActiveFormattingElements();
  6603. p._insertCharacters(token);
  6604. p.framesetOk = false;
  6605. }
  6606. function htmlStartTagInBody(p, token) {
  6607. if (p.openElements.tmplCount === 0) {
  6608. p.treeAdapter.adoptAttributes(p.openElements.items[0], token.attrs);
  6609. }
  6610. }
  6611. function bodyStartTagInBody(p, token) {
  6612. const bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
  6613. if (bodyElement && p.openElements.tmplCount === 0) {
  6614. p.framesetOk = false;
  6615. p.treeAdapter.adoptAttributes(bodyElement, token.attrs);
  6616. }
  6617. }
  6618. function framesetStartTagInBody(p, token) {
  6619. const bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
  6620. if (p.framesetOk && bodyElement) {
  6621. p.treeAdapter.detachNode(bodyElement);
  6622. p.openElements.popAllUpToHtmlElement();
  6623. p._insertElement(token, NS.HTML);
  6624. p.insertionMode = InsertionMode.IN_FRAMESET;
  6625. }
  6626. }
  6627. function addressStartTagInBody(p, token) {
  6628. if (p.openElements.hasInButtonScope(TAG_ID.P)) {
  6629. p._closePElement();
  6630. }
  6631. p._insertElement(token, NS.HTML);
  6632. }
  6633. function numberedHeaderStartTagInBody(p, token) {
  6634. if (p.openElements.hasInButtonScope(TAG_ID.P)) {
  6635. p._closePElement();
  6636. }
  6637. if (NUMBERED_HEADERS.has(p.openElements.currentTagId)) {
  6638. p.openElements.pop();
  6639. }
  6640. p._insertElement(token, NS.HTML);
  6641. }
  6642. function preStartTagInBody(p, token) {
  6643. if (p.openElements.hasInButtonScope(TAG_ID.P)) {
  6644. p._closePElement();
  6645. }
  6646. p._insertElement(token, NS.HTML);
  6647. //NOTE: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move
  6648. //on to the next one. (Newlines at the start of pre blocks are ignored as an authoring convenience.)
  6649. p.skipNextNewLine = true;
  6650. p.framesetOk = false;
  6651. }
  6652. function formStartTagInBody(p, token) {
  6653. const inTemplate = p.openElements.tmplCount > 0;
  6654. if (!p.formElement || inTemplate) {
  6655. if (p.openElements.hasInButtonScope(TAG_ID.P)) {
  6656. p._closePElement();
  6657. }
  6658. p._insertElement(token, NS.HTML);
  6659. if (!inTemplate) {
  6660. p.formElement = p.openElements.current;
  6661. }
  6662. }
  6663. }
  6664. function listItemStartTagInBody(p, token) {
  6665. p.framesetOk = false;
  6666. const tn = token.tagID;
  6667. for (let i = p.openElements.stackTop; i >= 0; i--) {
  6668. const elementId = p.openElements.tagIDs[i];
  6669. if ((tn === TAG_ID.LI && elementId === TAG_ID.LI) ||
  6670. ((tn === TAG_ID.DD || tn === TAG_ID.DT) && (elementId === TAG_ID.DD || elementId === TAG_ID.DT))) {
  6671. p.openElements.generateImpliedEndTagsWithExclusion(elementId);
  6672. p.openElements.popUntilTagNamePopped(elementId);
  6673. break;
  6674. }
  6675. if (elementId !== TAG_ID.ADDRESS &&
  6676. elementId !== TAG_ID.DIV &&
  6677. elementId !== TAG_ID.P &&
  6678. p._isSpecialElement(p.openElements.items[i], elementId)) {
  6679. break;
  6680. }
  6681. }
  6682. if (p.openElements.hasInButtonScope(TAG_ID.P)) {
  6683. p._closePElement();
  6684. }
  6685. p._insertElement(token, NS.HTML);
  6686. }
  6687. function plaintextStartTagInBody(p, token) {
  6688. if (p.openElements.hasInButtonScope(TAG_ID.P)) {
  6689. p._closePElement();
  6690. }
  6691. p._insertElement(token, NS.HTML);
  6692. p.tokenizer.state = TokenizerMode.PLAINTEXT;
  6693. }
  6694. function buttonStartTagInBody(p, token) {
  6695. if (p.openElements.hasInScope(TAG_ID.BUTTON)) {
  6696. p.openElements.generateImpliedEndTags();
  6697. p.openElements.popUntilTagNamePopped(TAG_ID.BUTTON);
  6698. }
  6699. p._reconstructActiveFormattingElements();
  6700. p._insertElement(token, NS.HTML);
  6701. p.framesetOk = false;
  6702. }
  6703. function aStartTagInBody(p, token) {
  6704. const activeElementEntry = p.activeFormattingElements.getElementEntryInScopeWithTagName(TAG_NAMES.A);
  6705. if (activeElementEntry) {
  6706. callAdoptionAgency(p, token);
  6707. p.openElements.remove(activeElementEntry.element);
  6708. p.activeFormattingElements.removeEntry(activeElementEntry);
  6709. }
  6710. p._reconstructActiveFormattingElements();
  6711. p._insertElement(token, NS.HTML);
  6712. p.activeFormattingElements.pushElement(p.openElements.current, token);
  6713. }
  6714. function bStartTagInBody(p, token) {
  6715. p._reconstructActiveFormattingElements();
  6716. p._insertElement(token, NS.HTML);
  6717. p.activeFormattingElements.pushElement(p.openElements.current, token);
  6718. }
  6719. function nobrStartTagInBody(p, token) {
  6720. p._reconstructActiveFormattingElements();
  6721. if (p.openElements.hasInScope(TAG_ID.NOBR)) {
  6722. callAdoptionAgency(p, token);
  6723. p._reconstructActiveFormattingElements();
  6724. }
  6725. p._insertElement(token, NS.HTML);
  6726. p.activeFormattingElements.pushElement(p.openElements.current, token);
  6727. }
  6728. function appletStartTagInBody(p, token) {
  6729. p._reconstructActiveFormattingElements();
  6730. p._insertElement(token, NS.HTML);
  6731. p.activeFormattingElements.insertMarker();
  6732. p.framesetOk = false;
  6733. }
  6734. function tableStartTagInBody(p, token) {
  6735. if (p.treeAdapter.getDocumentMode(p.document) !== DOCUMENT_MODE.QUIRKS && p.openElements.hasInButtonScope(TAG_ID.P)) {
  6736. p._closePElement();
  6737. }
  6738. p._insertElement(token, NS.HTML);
  6739. p.framesetOk = false;
  6740. p.insertionMode = InsertionMode.IN_TABLE;
  6741. }
  6742. function areaStartTagInBody(p, token) {
  6743. p._reconstructActiveFormattingElements();
  6744. p._appendElement(token, NS.HTML);
  6745. p.framesetOk = false;
  6746. token.ackSelfClosing = true;
  6747. }
  6748. function isHiddenInput(token) {
  6749. const inputType = getTokenAttr(token, ATTRS.TYPE);
  6750. return inputType != null && inputType.toLowerCase() === HIDDEN_INPUT_TYPE;
  6751. }
  6752. function inputStartTagInBody(p, token) {
  6753. p._reconstructActiveFormattingElements();
  6754. p._appendElement(token, NS.HTML);
  6755. if (!isHiddenInput(token)) {
  6756. p.framesetOk = false;
  6757. }
  6758. token.ackSelfClosing = true;
  6759. }
  6760. function paramStartTagInBody(p, token) {
  6761. p._appendElement(token, NS.HTML);
  6762. token.ackSelfClosing = true;
  6763. }
  6764. function hrStartTagInBody(p, token) {
  6765. if (p.openElements.hasInButtonScope(TAG_ID.P)) {
  6766. p._closePElement();
  6767. }
  6768. p._appendElement(token, NS.HTML);
  6769. p.framesetOk = false;
  6770. token.ackSelfClosing = true;
  6771. }
  6772. function imageStartTagInBody(p, token) {
  6773. token.tagName = TAG_NAMES.IMG;
  6774. token.tagID = TAG_ID.IMG;
  6775. areaStartTagInBody(p, token);
  6776. }
  6777. function textareaStartTagInBody(p, token) {
  6778. p._insertElement(token, NS.HTML);
  6779. //NOTE: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move
  6780. //on to the next one. (Newlines at the start of textarea elements are ignored as an authoring convenience.)
  6781. p.skipNextNewLine = true;
  6782. p.tokenizer.state = TokenizerMode.RCDATA;
  6783. p.originalInsertionMode = p.insertionMode;
  6784. p.framesetOk = false;
  6785. p.insertionMode = InsertionMode.TEXT;
  6786. }
  6787. function xmpStartTagInBody(p, token) {
  6788. if (p.openElements.hasInButtonScope(TAG_ID.P)) {
  6789. p._closePElement();
  6790. }
  6791. p._reconstructActiveFormattingElements();
  6792. p.framesetOk = false;
  6793. p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
  6794. }
  6795. function iframeStartTagInBody(p, token) {
  6796. p.framesetOk = false;
  6797. p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
  6798. }
  6799. //NOTE: here we assume that we always act as a user agent with enabled plugins/frames, so we parse
  6800. //<noembed>/<noframes> as rawtext.
  6801. function rawTextStartTagInBody(p, token) {
  6802. p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
  6803. }
  6804. function selectStartTagInBody(p, token) {
  6805. p._reconstructActiveFormattingElements();
  6806. p._insertElement(token, NS.HTML);
  6807. p.framesetOk = false;
  6808. p.insertionMode =
  6809. p.insertionMode === InsertionMode.IN_TABLE ||
  6810. p.insertionMode === InsertionMode.IN_CAPTION ||
  6811. p.insertionMode === InsertionMode.IN_TABLE_BODY ||
  6812. p.insertionMode === InsertionMode.IN_ROW ||
  6813. p.insertionMode === InsertionMode.IN_CELL
  6814. ? InsertionMode.IN_SELECT_IN_TABLE
  6815. : InsertionMode.IN_SELECT;
  6816. }
  6817. function optgroupStartTagInBody(p, token) {
  6818. if (p.openElements.currentTagId === TAG_ID.OPTION) {
  6819. p.openElements.pop();
  6820. }
  6821. p._reconstructActiveFormattingElements();
  6822. p._insertElement(token, NS.HTML);
  6823. }
  6824. function rbStartTagInBody(p, token) {
  6825. if (p.openElements.hasInScope(TAG_ID.RUBY)) {
  6826. p.openElements.generateImpliedEndTags();
  6827. }
  6828. p._insertElement(token, NS.HTML);
  6829. }
  6830. function rtStartTagInBody(p, token) {
  6831. if (p.openElements.hasInScope(TAG_ID.RUBY)) {
  6832. p.openElements.generateImpliedEndTagsWithExclusion(TAG_ID.RTC);
  6833. }
  6834. p._insertElement(token, NS.HTML);
  6835. }
  6836. function mathStartTagInBody(p, token) {
  6837. p._reconstructActiveFormattingElements();
  6838. adjustTokenMathMLAttrs(token);
  6839. adjustTokenXMLAttrs(token);
  6840. if (token.selfClosing) {
  6841. p._appendElement(token, NS.MATHML);
  6842. }
  6843. else {
  6844. p._insertElement(token, NS.MATHML);
  6845. }
  6846. token.ackSelfClosing = true;
  6847. }
  6848. function svgStartTagInBody(p, token) {
  6849. p._reconstructActiveFormattingElements();
  6850. adjustTokenSVGAttrs(token);
  6851. adjustTokenXMLAttrs(token);
  6852. if (token.selfClosing) {
  6853. p._appendElement(token, NS.SVG);
  6854. }
  6855. else {
  6856. p._insertElement(token, NS.SVG);
  6857. }
  6858. token.ackSelfClosing = true;
  6859. }
  6860. function genericStartTagInBody(p, token) {
  6861. p._reconstructActiveFormattingElements();
  6862. p._insertElement(token, NS.HTML);
  6863. }
  6864. function startTagInBody(p, token) {
  6865. switch (token.tagID) {
  6866. case TAG_ID.I:
  6867. case TAG_ID.S:
  6868. case TAG_ID.B:
  6869. case TAG_ID.U:
  6870. case TAG_ID.EM:
  6871. case TAG_ID.TT:
  6872. case TAG_ID.BIG:
  6873. case TAG_ID.CODE:
  6874. case TAG_ID.FONT:
  6875. case TAG_ID.SMALL:
  6876. case TAG_ID.STRIKE:
  6877. case TAG_ID.STRONG: {
  6878. bStartTagInBody(p, token);
  6879. break;
  6880. }
  6881. case TAG_ID.A: {
  6882. aStartTagInBody(p, token);
  6883. break;
  6884. }
  6885. case TAG_ID.H1:
  6886. case TAG_ID.H2:
  6887. case TAG_ID.H3:
  6888. case TAG_ID.H4:
  6889. case TAG_ID.H5:
  6890. case TAG_ID.H6: {
  6891. numberedHeaderStartTagInBody(p, token);
  6892. break;
  6893. }
  6894. case TAG_ID.P:
  6895. case TAG_ID.DL:
  6896. case TAG_ID.OL:
  6897. case TAG_ID.UL:
  6898. case TAG_ID.DIV:
  6899. case TAG_ID.DIR:
  6900. case TAG_ID.NAV:
  6901. case TAG_ID.MAIN:
  6902. case TAG_ID.MENU:
  6903. case TAG_ID.ASIDE:
  6904. case TAG_ID.CENTER:
  6905. case TAG_ID.FIGURE:
  6906. case TAG_ID.FOOTER:
  6907. case TAG_ID.HEADER:
  6908. case TAG_ID.HGROUP:
  6909. case TAG_ID.DIALOG:
  6910. case TAG_ID.DETAILS:
  6911. case TAG_ID.ADDRESS:
  6912. case TAG_ID.ARTICLE:
  6913. case TAG_ID.SEARCH:
  6914. case TAG_ID.SECTION:
  6915. case TAG_ID.SUMMARY:
  6916. case TAG_ID.FIELDSET:
  6917. case TAG_ID.BLOCKQUOTE:
  6918. case TAG_ID.FIGCAPTION: {
  6919. addressStartTagInBody(p, token);
  6920. break;
  6921. }
  6922. case TAG_ID.LI:
  6923. case TAG_ID.DD:
  6924. case TAG_ID.DT: {
  6925. listItemStartTagInBody(p, token);
  6926. break;
  6927. }
  6928. case TAG_ID.BR:
  6929. case TAG_ID.IMG:
  6930. case TAG_ID.WBR:
  6931. case TAG_ID.AREA:
  6932. case TAG_ID.EMBED:
  6933. case TAG_ID.KEYGEN: {
  6934. areaStartTagInBody(p, token);
  6935. break;
  6936. }
  6937. case TAG_ID.HR: {
  6938. hrStartTagInBody(p, token);
  6939. break;
  6940. }
  6941. case TAG_ID.RB:
  6942. case TAG_ID.RTC: {
  6943. rbStartTagInBody(p, token);
  6944. break;
  6945. }
  6946. case TAG_ID.RT:
  6947. case TAG_ID.RP: {
  6948. rtStartTagInBody(p, token);
  6949. break;
  6950. }
  6951. case TAG_ID.PRE:
  6952. case TAG_ID.LISTING: {
  6953. preStartTagInBody(p, token);
  6954. break;
  6955. }
  6956. case TAG_ID.XMP: {
  6957. xmpStartTagInBody(p, token);
  6958. break;
  6959. }
  6960. case TAG_ID.SVG: {
  6961. svgStartTagInBody(p, token);
  6962. break;
  6963. }
  6964. case TAG_ID.HTML: {
  6965. htmlStartTagInBody(p, token);
  6966. break;
  6967. }
  6968. case TAG_ID.BASE:
  6969. case TAG_ID.LINK:
  6970. case TAG_ID.META:
  6971. case TAG_ID.STYLE:
  6972. case TAG_ID.TITLE:
  6973. case TAG_ID.SCRIPT:
  6974. case TAG_ID.BGSOUND:
  6975. case TAG_ID.BASEFONT:
  6976. case TAG_ID.TEMPLATE: {
  6977. startTagInHead(p, token);
  6978. break;
  6979. }
  6980. case TAG_ID.BODY: {
  6981. bodyStartTagInBody(p, token);
  6982. break;
  6983. }
  6984. case TAG_ID.FORM: {
  6985. formStartTagInBody(p, token);
  6986. break;
  6987. }
  6988. case TAG_ID.NOBR: {
  6989. nobrStartTagInBody(p, token);
  6990. break;
  6991. }
  6992. case TAG_ID.MATH: {
  6993. mathStartTagInBody(p, token);
  6994. break;
  6995. }
  6996. case TAG_ID.TABLE: {
  6997. tableStartTagInBody(p, token);
  6998. break;
  6999. }
  7000. case TAG_ID.INPUT: {
  7001. inputStartTagInBody(p, token);
  7002. break;
  7003. }
  7004. case TAG_ID.PARAM:
  7005. case TAG_ID.TRACK:
  7006. case TAG_ID.SOURCE: {
  7007. paramStartTagInBody(p, token);
  7008. break;
  7009. }
  7010. case TAG_ID.IMAGE: {
  7011. imageStartTagInBody(p, token);
  7012. break;
  7013. }
  7014. case TAG_ID.BUTTON: {
  7015. buttonStartTagInBody(p, token);
  7016. break;
  7017. }
  7018. case TAG_ID.APPLET:
  7019. case TAG_ID.OBJECT:
  7020. case TAG_ID.MARQUEE: {
  7021. appletStartTagInBody(p, token);
  7022. break;
  7023. }
  7024. case TAG_ID.IFRAME: {
  7025. iframeStartTagInBody(p, token);
  7026. break;
  7027. }
  7028. case TAG_ID.SELECT: {
  7029. selectStartTagInBody(p, token);
  7030. break;
  7031. }
  7032. case TAG_ID.OPTION:
  7033. case TAG_ID.OPTGROUP: {
  7034. optgroupStartTagInBody(p, token);
  7035. break;
  7036. }
  7037. case TAG_ID.NOEMBED:
  7038. case TAG_ID.NOFRAMES: {
  7039. rawTextStartTagInBody(p, token);
  7040. break;
  7041. }
  7042. case TAG_ID.FRAMESET: {
  7043. framesetStartTagInBody(p, token);
  7044. break;
  7045. }
  7046. case TAG_ID.TEXTAREA: {
  7047. textareaStartTagInBody(p, token);
  7048. break;
  7049. }
  7050. case TAG_ID.NOSCRIPT: {
  7051. if (p.options.scriptingEnabled) {
  7052. rawTextStartTagInBody(p, token);
  7053. }
  7054. else {
  7055. genericStartTagInBody(p, token);
  7056. }
  7057. break;
  7058. }
  7059. case TAG_ID.PLAINTEXT: {
  7060. plaintextStartTagInBody(p, token);
  7061. break;
  7062. }
  7063. case TAG_ID.COL:
  7064. case TAG_ID.TH:
  7065. case TAG_ID.TD:
  7066. case TAG_ID.TR:
  7067. case TAG_ID.HEAD:
  7068. case TAG_ID.FRAME:
  7069. case TAG_ID.TBODY:
  7070. case TAG_ID.TFOOT:
  7071. case TAG_ID.THEAD:
  7072. case TAG_ID.CAPTION:
  7073. case TAG_ID.COLGROUP: {
  7074. // Ignore token
  7075. break;
  7076. }
  7077. default: {
  7078. genericStartTagInBody(p, token);
  7079. }
  7080. }
  7081. }
  7082. function bodyEndTagInBody(p, token) {
  7083. if (p.openElements.hasInScope(TAG_ID.BODY)) {
  7084. p.insertionMode = InsertionMode.AFTER_BODY;
  7085. //NOTE: <body> is never popped from the stack, so we need to updated
  7086. //the end location explicitly.
  7087. if (p.options.sourceCodeLocationInfo) {
  7088. const bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
  7089. if (bodyElement) {
  7090. p._setEndLocation(bodyElement, token);
  7091. }
  7092. }
  7093. }
  7094. }
  7095. function htmlEndTagInBody(p, token) {
  7096. if (p.openElements.hasInScope(TAG_ID.BODY)) {
  7097. p.insertionMode = InsertionMode.AFTER_BODY;
  7098. endTagAfterBody(p, token);
  7099. }
  7100. }
  7101. function addressEndTagInBody(p, token) {
  7102. const tn = token.tagID;
  7103. if (p.openElements.hasInScope(tn)) {
  7104. p.openElements.generateImpliedEndTags();
  7105. p.openElements.popUntilTagNamePopped(tn);
  7106. }
  7107. }
  7108. function formEndTagInBody(p) {
  7109. const inTemplate = p.openElements.tmplCount > 0;
  7110. const { formElement } = p;
  7111. if (!inTemplate) {
  7112. p.formElement = null;
  7113. }
  7114. if ((formElement || inTemplate) && p.openElements.hasInScope(TAG_ID.FORM)) {
  7115. p.openElements.generateImpliedEndTags();
  7116. if (inTemplate) {
  7117. p.openElements.popUntilTagNamePopped(TAG_ID.FORM);
  7118. }
  7119. else if (formElement) {
  7120. p.openElements.remove(formElement);
  7121. }
  7122. }
  7123. }
  7124. function pEndTagInBody(p) {
  7125. if (!p.openElements.hasInButtonScope(TAG_ID.P)) {
  7126. p._insertFakeElement(TAG_NAMES.P, TAG_ID.P);
  7127. }
  7128. p._closePElement();
  7129. }
  7130. function liEndTagInBody(p) {
  7131. if (p.openElements.hasInListItemScope(TAG_ID.LI)) {
  7132. p.openElements.generateImpliedEndTagsWithExclusion(TAG_ID.LI);
  7133. p.openElements.popUntilTagNamePopped(TAG_ID.LI);
  7134. }
  7135. }
  7136. function ddEndTagInBody(p, token) {
  7137. const tn = token.tagID;
  7138. if (p.openElements.hasInScope(tn)) {
  7139. p.openElements.generateImpliedEndTagsWithExclusion(tn);
  7140. p.openElements.popUntilTagNamePopped(tn);
  7141. }
  7142. }
  7143. function numberedHeaderEndTagInBody(p) {
  7144. if (p.openElements.hasNumberedHeaderInScope()) {
  7145. p.openElements.generateImpliedEndTags();
  7146. p.openElements.popUntilNumberedHeaderPopped();
  7147. }
  7148. }
  7149. function appletEndTagInBody(p, token) {
  7150. const tn = token.tagID;
  7151. if (p.openElements.hasInScope(tn)) {
  7152. p.openElements.generateImpliedEndTags();
  7153. p.openElements.popUntilTagNamePopped(tn);
  7154. p.activeFormattingElements.clearToLastMarker();
  7155. }
  7156. }
  7157. function brEndTagInBody(p) {
  7158. p._reconstructActiveFormattingElements();
  7159. p._insertFakeElement(TAG_NAMES.BR, TAG_ID.BR);
  7160. p.openElements.pop();
  7161. p.framesetOk = false;
  7162. }
  7163. function genericEndTagInBody(p, token) {
  7164. const tn = token.tagName;
  7165. const tid = token.tagID;
  7166. for (let i = p.openElements.stackTop; i > 0; i--) {
  7167. const element = p.openElements.items[i];
  7168. const elementId = p.openElements.tagIDs[i];
  7169. // Compare the tag name here, as the tag might not be a known tag with an ID.
  7170. if (tid === elementId && (tid !== TAG_ID.UNKNOWN || p.treeAdapter.getTagName(element) === tn)) {
  7171. p.openElements.generateImpliedEndTagsWithExclusion(tid);
  7172. if (p.openElements.stackTop >= i)
  7173. p.openElements.shortenToLength(i);
  7174. break;
  7175. }
  7176. if (p._isSpecialElement(element, elementId)) {
  7177. break;
  7178. }
  7179. }
  7180. }
  7181. function endTagInBody(p, token) {
  7182. switch (token.tagID) {
  7183. case TAG_ID.A:
  7184. case TAG_ID.B:
  7185. case TAG_ID.I:
  7186. case TAG_ID.S:
  7187. case TAG_ID.U:
  7188. case TAG_ID.EM:
  7189. case TAG_ID.TT:
  7190. case TAG_ID.BIG:
  7191. case TAG_ID.CODE:
  7192. case TAG_ID.FONT:
  7193. case TAG_ID.NOBR:
  7194. case TAG_ID.SMALL:
  7195. case TAG_ID.STRIKE:
  7196. case TAG_ID.STRONG: {
  7197. callAdoptionAgency(p, token);
  7198. break;
  7199. }
  7200. case TAG_ID.P: {
  7201. pEndTagInBody(p);
  7202. break;
  7203. }
  7204. case TAG_ID.DL:
  7205. case TAG_ID.UL:
  7206. case TAG_ID.OL:
  7207. case TAG_ID.DIR:
  7208. case TAG_ID.DIV:
  7209. case TAG_ID.NAV:
  7210. case TAG_ID.PRE:
  7211. case TAG_ID.MAIN:
  7212. case TAG_ID.MENU:
  7213. case TAG_ID.ASIDE:
  7214. case TAG_ID.BUTTON:
  7215. case TAG_ID.CENTER:
  7216. case TAG_ID.FIGURE:
  7217. case TAG_ID.FOOTER:
  7218. case TAG_ID.HEADER:
  7219. case TAG_ID.HGROUP:
  7220. case TAG_ID.DIALOG:
  7221. case TAG_ID.ADDRESS:
  7222. case TAG_ID.ARTICLE:
  7223. case TAG_ID.DETAILS:
  7224. case TAG_ID.SEARCH:
  7225. case TAG_ID.SECTION:
  7226. case TAG_ID.SUMMARY:
  7227. case TAG_ID.LISTING:
  7228. case TAG_ID.FIELDSET:
  7229. case TAG_ID.BLOCKQUOTE:
  7230. case TAG_ID.FIGCAPTION: {
  7231. addressEndTagInBody(p, token);
  7232. break;
  7233. }
  7234. case TAG_ID.LI: {
  7235. liEndTagInBody(p);
  7236. break;
  7237. }
  7238. case TAG_ID.DD:
  7239. case TAG_ID.DT: {
  7240. ddEndTagInBody(p, token);
  7241. break;
  7242. }
  7243. case TAG_ID.H1:
  7244. case TAG_ID.H2:
  7245. case TAG_ID.H3:
  7246. case TAG_ID.H4:
  7247. case TAG_ID.H5:
  7248. case TAG_ID.H6: {
  7249. numberedHeaderEndTagInBody(p);
  7250. break;
  7251. }
  7252. case TAG_ID.BR: {
  7253. brEndTagInBody(p);
  7254. break;
  7255. }
  7256. case TAG_ID.BODY: {
  7257. bodyEndTagInBody(p, token);
  7258. break;
  7259. }
  7260. case TAG_ID.HTML: {
  7261. htmlEndTagInBody(p, token);
  7262. break;
  7263. }
  7264. case TAG_ID.FORM: {
  7265. formEndTagInBody(p);
  7266. break;
  7267. }
  7268. case TAG_ID.APPLET:
  7269. case TAG_ID.OBJECT:
  7270. case TAG_ID.MARQUEE: {
  7271. appletEndTagInBody(p, token);
  7272. break;
  7273. }
  7274. case TAG_ID.TEMPLATE: {
  7275. templateEndTagInHead(p, token);
  7276. break;
  7277. }
  7278. default: {
  7279. genericEndTagInBody(p, token);
  7280. }
  7281. }
  7282. }
  7283. function eofInBody(p, token) {
  7284. if (p.tmplInsertionModeStack.length > 0) {
  7285. eofInTemplate(p, token);
  7286. }
  7287. else {
  7288. stopParsing(p, token);
  7289. }
  7290. }
  7291. // The "text" insertion mode
  7292. //------------------------------------------------------------------
  7293. function endTagInText(p, token) {
  7294. var _a;
  7295. if (token.tagID === TAG_ID.SCRIPT) {
  7296. (_a = p.scriptHandler) === null || _a === void 0 ? void 0 : _a.call(p, p.openElements.current);
  7297. }
  7298. p.openElements.pop();
  7299. p.insertionMode = p.originalInsertionMode;
  7300. }
  7301. function eofInText(p, token) {
  7302. p._err(token, ERR.eofInElementThatCanContainOnlyText);
  7303. p.openElements.pop();
  7304. p.insertionMode = p.originalInsertionMode;
  7305. p.onEof(token);
  7306. }
  7307. // The "in table" insertion mode
  7308. //------------------------------------------------------------------
  7309. function characterInTable(p, token) {
  7310. if (TABLE_STRUCTURE_TAGS.has(p.openElements.currentTagId)) {
  7311. p.pendingCharacterTokens.length = 0;
  7312. p.hasNonWhitespacePendingCharacterToken = false;
  7313. p.originalInsertionMode = p.insertionMode;
  7314. p.insertionMode = InsertionMode.IN_TABLE_TEXT;
  7315. switch (token.type) {
  7316. case TokenType.CHARACTER: {
  7317. characterInTableText(p, token);
  7318. break;
  7319. }
  7320. case TokenType.WHITESPACE_CHARACTER: {
  7321. whitespaceCharacterInTableText(p, token);
  7322. break;
  7323. }
  7324. // Ignore null
  7325. }
  7326. }
  7327. else {
  7328. tokenInTable(p, token);
  7329. }
  7330. }
  7331. function captionStartTagInTable(p, token) {
  7332. p.openElements.clearBackToTableContext();
  7333. p.activeFormattingElements.insertMarker();
  7334. p._insertElement(token, NS.HTML);
  7335. p.insertionMode = InsertionMode.IN_CAPTION;
  7336. }
  7337. function colgroupStartTagInTable(p, token) {
  7338. p.openElements.clearBackToTableContext();
  7339. p._insertElement(token, NS.HTML);
  7340. p.insertionMode = InsertionMode.IN_COLUMN_GROUP;
  7341. }
  7342. function colStartTagInTable(p, token) {
  7343. p.openElements.clearBackToTableContext();
  7344. p._insertFakeElement(TAG_NAMES.COLGROUP, TAG_ID.COLGROUP);
  7345. p.insertionMode = InsertionMode.IN_COLUMN_GROUP;
  7346. startTagInColumnGroup(p, token);
  7347. }
  7348. function tbodyStartTagInTable(p, token) {
  7349. p.openElements.clearBackToTableContext();
  7350. p._insertElement(token, NS.HTML);
  7351. p.insertionMode = InsertionMode.IN_TABLE_BODY;
  7352. }
  7353. function tdStartTagInTable(p, token) {
  7354. p.openElements.clearBackToTableContext();
  7355. p._insertFakeElement(TAG_NAMES.TBODY, TAG_ID.TBODY);
  7356. p.insertionMode = InsertionMode.IN_TABLE_BODY;
  7357. startTagInTableBody(p, token);
  7358. }
  7359. function tableStartTagInTable(p, token) {
  7360. if (p.openElements.hasInTableScope(TAG_ID.TABLE)) {
  7361. p.openElements.popUntilTagNamePopped(TAG_ID.TABLE);
  7362. p._resetInsertionMode();
  7363. p._processStartTag(token);
  7364. }
  7365. }
  7366. function inputStartTagInTable(p, token) {
  7367. if (isHiddenInput(token)) {
  7368. p._appendElement(token, NS.HTML);
  7369. }
  7370. else {
  7371. tokenInTable(p, token);
  7372. }
  7373. token.ackSelfClosing = true;
  7374. }
  7375. function formStartTagInTable(p, token) {
  7376. if (!p.formElement && p.openElements.tmplCount === 0) {
  7377. p._insertElement(token, NS.HTML);
  7378. p.formElement = p.openElements.current;
  7379. p.openElements.pop();
  7380. }
  7381. }
  7382. function startTagInTable(p, token) {
  7383. switch (token.tagID) {
  7384. case TAG_ID.TD:
  7385. case TAG_ID.TH:
  7386. case TAG_ID.TR: {
  7387. tdStartTagInTable(p, token);
  7388. break;
  7389. }
  7390. case TAG_ID.STYLE:
  7391. case TAG_ID.SCRIPT:
  7392. case TAG_ID.TEMPLATE: {
  7393. startTagInHead(p, token);
  7394. break;
  7395. }
  7396. case TAG_ID.COL: {
  7397. colStartTagInTable(p, token);
  7398. break;
  7399. }
  7400. case TAG_ID.FORM: {
  7401. formStartTagInTable(p, token);
  7402. break;
  7403. }
  7404. case TAG_ID.TABLE: {
  7405. tableStartTagInTable(p, token);
  7406. break;
  7407. }
  7408. case TAG_ID.TBODY:
  7409. case TAG_ID.TFOOT:
  7410. case TAG_ID.THEAD: {
  7411. tbodyStartTagInTable(p, token);
  7412. break;
  7413. }
  7414. case TAG_ID.INPUT: {
  7415. inputStartTagInTable(p, token);
  7416. break;
  7417. }
  7418. case TAG_ID.CAPTION: {
  7419. captionStartTagInTable(p, token);
  7420. break;
  7421. }
  7422. case TAG_ID.COLGROUP: {
  7423. colgroupStartTagInTable(p, token);
  7424. break;
  7425. }
  7426. default: {
  7427. tokenInTable(p, token);
  7428. }
  7429. }
  7430. }
  7431. function endTagInTable(p, token) {
  7432. switch (token.tagID) {
  7433. case TAG_ID.TABLE: {
  7434. if (p.openElements.hasInTableScope(TAG_ID.TABLE)) {
  7435. p.openElements.popUntilTagNamePopped(TAG_ID.TABLE);
  7436. p._resetInsertionMode();
  7437. }
  7438. break;
  7439. }
  7440. case TAG_ID.TEMPLATE: {
  7441. templateEndTagInHead(p, token);
  7442. break;
  7443. }
  7444. case TAG_ID.BODY:
  7445. case TAG_ID.CAPTION:
  7446. case TAG_ID.COL:
  7447. case TAG_ID.COLGROUP:
  7448. case TAG_ID.HTML:
  7449. case TAG_ID.TBODY:
  7450. case TAG_ID.TD:
  7451. case TAG_ID.TFOOT:
  7452. case TAG_ID.TH:
  7453. case TAG_ID.THEAD:
  7454. case TAG_ID.TR: {
  7455. // Ignore token
  7456. break;
  7457. }
  7458. default: {
  7459. tokenInTable(p, token);
  7460. }
  7461. }
  7462. }
  7463. function tokenInTable(p, token) {
  7464. const savedFosterParentingState = p.fosterParentingEnabled;
  7465. p.fosterParentingEnabled = true;
  7466. // Process token in `In Body` mode
  7467. modeInBody(p, token);
  7468. p.fosterParentingEnabled = savedFosterParentingState;
  7469. }
  7470. // The "in table text" insertion mode
  7471. //------------------------------------------------------------------
  7472. function whitespaceCharacterInTableText(p, token) {
  7473. p.pendingCharacterTokens.push(token);
  7474. }
  7475. function characterInTableText(p, token) {
  7476. p.pendingCharacterTokens.push(token);
  7477. p.hasNonWhitespacePendingCharacterToken = true;
  7478. }
  7479. function tokenInTableText(p, token) {
  7480. let i = 0;
  7481. if (p.hasNonWhitespacePendingCharacterToken) {
  7482. for (; i < p.pendingCharacterTokens.length; i++) {
  7483. tokenInTable(p, p.pendingCharacterTokens[i]);
  7484. }
  7485. }
  7486. else {
  7487. for (; i < p.pendingCharacterTokens.length; i++) {
  7488. p._insertCharacters(p.pendingCharacterTokens[i]);
  7489. }
  7490. }
  7491. p.insertionMode = p.originalInsertionMode;
  7492. p._processToken(token);
  7493. }
  7494. // The "in caption" insertion mode
  7495. //------------------------------------------------------------------
  7496. const TABLE_VOID_ELEMENTS = new Set([TAG_ID.CAPTION, TAG_ID.COL, TAG_ID.COLGROUP, TAG_ID.TBODY, TAG_ID.TD, TAG_ID.TFOOT, TAG_ID.TH, TAG_ID.THEAD, TAG_ID.TR]);
  7497. function startTagInCaption(p, token) {
  7498. const tn = token.tagID;
  7499. if (TABLE_VOID_ELEMENTS.has(tn)) {
  7500. if (p.openElements.hasInTableScope(TAG_ID.CAPTION)) {
  7501. p.openElements.generateImpliedEndTags();
  7502. p.openElements.popUntilTagNamePopped(TAG_ID.CAPTION);
  7503. p.activeFormattingElements.clearToLastMarker();
  7504. p.insertionMode = InsertionMode.IN_TABLE;
  7505. startTagInTable(p, token);
  7506. }
  7507. }
  7508. else {
  7509. startTagInBody(p, token);
  7510. }
  7511. }
  7512. function endTagInCaption(p, token) {
  7513. const tn = token.tagID;
  7514. switch (tn) {
  7515. case TAG_ID.CAPTION:
  7516. case TAG_ID.TABLE: {
  7517. if (p.openElements.hasInTableScope(TAG_ID.CAPTION)) {
  7518. p.openElements.generateImpliedEndTags();
  7519. p.openElements.popUntilTagNamePopped(TAG_ID.CAPTION);
  7520. p.activeFormattingElements.clearToLastMarker();
  7521. p.insertionMode = InsertionMode.IN_TABLE;
  7522. if (tn === TAG_ID.TABLE) {
  7523. endTagInTable(p, token);
  7524. }
  7525. }
  7526. break;
  7527. }
  7528. case TAG_ID.BODY:
  7529. case TAG_ID.COL:
  7530. case TAG_ID.COLGROUP:
  7531. case TAG_ID.HTML:
  7532. case TAG_ID.TBODY:
  7533. case TAG_ID.TD:
  7534. case TAG_ID.TFOOT:
  7535. case TAG_ID.TH:
  7536. case TAG_ID.THEAD:
  7537. case TAG_ID.TR: {
  7538. // Ignore token
  7539. break;
  7540. }
  7541. default: {
  7542. endTagInBody(p, token);
  7543. }
  7544. }
  7545. }
  7546. // The "in column group" insertion mode
  7547. //------------------------------------------------------------------
  7548. function startTagInColumnGroup(p, token) {
  7549. switch (token.tagID) {
  7550. case TAG_ID.HTML: {
  7551. startTagInBody(p, token);
  7552. break;
  7553. }
  7554. case TAG_ID.COL: {
  7555. p._appendElement(token, NS.HTML);
  7556. token.ackSelfClosing = true;
  7557. break;
  7558. }
  7559. case TAG_ID.TEMPLATE: {
  7560. startTagInHead(p, token);
  7561. break;
  7562. }
  7563. default: {
  7564. tokenInColumnGroup(p, token);
  7565. }
  7566. }
  7567. }
  7568. function endTagInColumnGroup(p, token) {
  7569. switch (token.tagID) {
  7570. case TAG_ID.COLGROUP: {
  7571. if (p.openElements.currentTagId === TAG_ID.COLGROUP) {
  7572. p.openElements.pop();
  7573. p.insertionMode = InsertionMode.IN_TABLE;
  7574. }
  7575. break;
  7576. }
  7577. case TAG_ID.TEMPLATE: {
  7578. templateEndTagInHead(p, token);
  7579. break;
  7580. }
  7581. case TAG_ID.COL: {
  7582. // Ignore token
  7583. break;
  7584. }
  7585. default: {
  7586. tokenInColumnGroup(p, token);
  7587. }
  7588. }
  7589. }
  7590. function tokenInColumnGroup(p, token) {
  7591. if (p.openElements.currentTagId === TAG_ID.COLGROUP) {
  7592. p.openElements.pop();
  7593. p.insertionMode = InsertionMode.IN_TABLE;
  7594. p._processToken(token);
  7595. }
  7596. }
  7597. // The "in table body" insertion mode
  7598. //------------------------------------------------------------------
  7599. function startTagInTableBody(p, token) {
  7600. switch (token.tagID) {
  7601. case TAG_ID.TR: {
  7602. p.openElements.clearBackToTableBodyContext();
  7603. p._insertElement(token, NS.HTML);
  7604. p.insertionMode = InsertionMode.IN_ROW;
  7605. break;
  7606. }
  7607. case TAG_ID.TH:
  7608. case TAG_ID.TD: {
  7609. p.openElements.clearBackToTableBodyContext();
  7610. p._insertFakeElement(TAG_NAMES.TR, TAG_ID.TR);
  7611. p.insertionMode = InsertionMode.IN_ROW;
  7612. startTagInRow(p, token);
  7613. break;
  7614. }
  7615. case TAG_ID.CAPTION:
  7616. case TAG_ID.COL:
  7617. case TAG_ID.COLGROUP:
  7618. case TAG_ID.TBODY:
  7619. case TAG_ID.TFOOT:
  7620. case TAG_ID.THEAD: {
  7621. if (p.openElements.hasTableBodyContextInTableScope()) {
  7622. p.openElements.clearBackToTableBodyContext();
  7623. p.openElements.pop();
  7624. p.insertionMode = InsertionMode.IN_TABLE;
  7625. startTagInTable(p, token);
  7626. }
  7627. break;
  7628. }
  7629. default: {
  7630. startTagInTable(p, token);
  7631. }
  7632. }
  7633. }
  7634. function endTagInTableBody(p, token) {
  7635. const tn = token.tagID;
  7636. switch (token.tagID) {
  7637. case TAG_ID.TBODY:
  7638. case TAG_ID.TFOOT:
  7639. case TAG_ID.THEAD: {
  7640. if (p.openElements.hasInTableScope(tn)) {
  7641. p.openElements.clearBackToTableBodyContext();
  7642. p.openElements.pop();
  7643. p.insertionMode = InsertionMode.IN_TABLE;
  7644. }
  7645. break;
  7646. }
  7647. case TAG_ID.TABLE: {
  7648. if (p.openElements.hasTableBodyContextInTableScope()) {
  7649. p.openElements.clearBackToTableBodyContext();
  7650. p.openElements.pop();
  7651. p.insertionMode = InsertionMode.IN_TABLE;
  7652. endTagInTable(p, token);
  7653. }
  7654. break;
  7655. }
  7656. case TAG_ID.BODY:
  7657. case TAG_ID.CAPTION:
  7658. case TAG_ID.COL:
  7659. case TAG_ID.COLGROUP:
  7660. case TAG_ID.HTML:
  7661. case TAG_ID.TD:
  7662. case TAG_ID.TH:
  7663. case TAG_ID.TR: {
  7664. // Ignore token
  7665. break;
  7666. }
  7667. default: {
  7668. endTagInTable(p, token);
  7669. }
  7670. }
  7671. }
  7672. // The "in row" insertion mode
  7673. //------------------------------------------------------------------
  7674. function startTagInRow(p, token) {
  7675. switch (token.tagID) {
  7676. case TAG_ID.TH:
  7677. case TAG_ID.TD: {
  7678. p.openElements.clearBackToTableRowContext();
  7679. p._insertElement(token, NS.HTML);
  7680. p.insertionMode = InsertionMode.IN_CELL;
  7681. p.activeFormattingElements.insertMarker();
  7682. break;
  7683. }
  7684. case TAG_ID.CAPTION:
  7685. case TAG_ID.COL:
  7686. case TAG_ID.COLGROUP:
  7687. case TAG_ID.TBODY:
  7688. case TAG_ID.TFOOT:
  7689. case TAG_ID.THEAD:
  7690. case TAG_ID.TR: {
  7691. if (p.openElements.hasInTableScope(TAG_ID.TR)) {
  7692. p.openElements.clearBackToTableRowContext();
  7693. p.openElements.pop();
  7694. p.insertionMode = InsertionMode.IN_TABLE_BODY;
  7695. startTagInTableBody(p, token);
  7696. }
  7697. break;
  7698. }
  7699. default: {
  7700. startTagInTable(p, token);
  7701. }
  7702. }
  7703. }
  7704. function endTagInRow(p, token) {
  7705. switch (token.tagID) {
  7706. case TAG_ID.TR: {
  7707. if (p.openElements.hasInTableScope(TAG_ID.TR)) {
  7708. p.openElements.clearBackToTableRowContext();
  7709. p.openElements.pop();
  7710. p.insertionMode = InsertionMode.IN_TABLE_BODY;
  7711. }
  7712. break;
  7713. }
  7714. case TAG_ID.TABLE: {
  7715. if (p.openElements.hasInTableScope(TAG_ID.TR)) {
  7716. p.openElements.clearBackToTableRowContext();
  7717. p.openElements.pop();
  7718. p.insertionMode = InsertionMode.IN_TABLE_BODY;
  7719. endTagInTableBody(p, token);
  7720. }
  7721. break;
  7722. }
  7723. case TAG_ID.TBODY:
  7724. case TAG_ID.TFOOT:
  7725. case TAG_ID.THEAD: {
  7726. if (p.openElements.hasInTableScope(token.tagID) || p.openElements.hasInTableScope(TAG_ID.TR)) {
  7727. p.openElements.clearBackToTableRowContext();
  7728. p.openElements.pop();
  7729. p.insertionMode = InsertionMode.IN_TABLE_BODY;
  7730. endTagInTableBody(p, token);
  7731. }
  7732. break;
  7733. }
  7734. case TAG_ID.BODY:
  7735. case TAG_ID.CAPTION:
  7736. case TAG_ID.COL:
  7737. case TAG_ID.COLGROUP:
  7738. case TAG_ID.HTML:
  7739. case TAG_ID.TD:
  7740. case TAG_ID.TH: {
  7741. // Ignore end tag
  7742. break;
  7743. }
  7744. default: {
  7745. endTagInTable(p, token);
  7746. }
  7747. }
  7748. }
  7749. // The "in cell" insertion mode
  7750. //------------------------------------------------------------------
  7751. function startTagInCell(p, token) {
  7752. const tn = token.tagID;
  7753. if (TABLE_VOID_ELEMENTS.has(tn)) {
  7754. if (p.openElements.hasInTableScope(TAG_ID.TD) || p.openElements.hasInTableScope(TAG_ID.TH)) {
  7755. p._closeTableCell();
  7756. startTagInRow(p, token);
  7757. }
  7758. }
  7759. else {
  7760. startTagInBody(p, token);
  7761. }
  7762. }
  7763. function endTagInCell(p, token) {
  7764. const tn = token.tagID;
  7765. switch (tn) {
  7766. case TAG_ID.TD:
  7767. case TAG_ID.TH: {
  7768. if (p.openElements.hasInTableScope(tn)) {
  7769. p.openElements.generateImpliedEndTags();
  7770. p.openElements.popUntilTagNamePopped(tn);
  7771. p.activeFormattingElements.clearToLastMarker();
  7772. p.insertionMode = InsertionMode.IN_ROW;
  7773. }
  7774. break;
  7775. }
  7776. case TAG_ID.TABLE:
  7777. case TAG_ID.TBODY:
  7778. case TAG_ID.TFOOT:
  7779. case TAG_ID.THEAD:
  7780. case TAG_ID.TR: {
  7781. if (p.openElements.hasInTableScope(tn)) {
  7782. p._closeTableCell();
  7783. endTagInRow(p, token);
  7784. }
  7785. break;
  7786. }
  7787. case TAG_ID.BODY:
  7788. case TAG_ID.CAPTION:
  7789. case TAG_ID.COL:
  7790. case TAG_ID.COLGROUP:
  7791. case TAG_ID.HTML: {
  7792. // Ignore token
  7793. break;
  7794. }
  7795. default: {
  7796. endTagInBody(p, token);
  7797. }
  7798. }
  7799. }
  7800. // The "in select" insertion mode
  7801. //------------------------------------------------------------------
  7802. function startTagInSelect(p, token) {
  7803. switch (token.tagID) {
  7804. case TAG_ID.HTML: {
  7805. startTagInBody(p, token);
  7806. break;
  7807. }
  7808. case TAG_ID.OPTION: {
  7809. if (p.openElements.currentTagId === TAG_ID.OPTION) {
  7810. p.openElements.pop();
  7811. }
  7812. p._insertElement(token, NS.HTML);
  7813. break;
  7814. }
  7815. case TAG_ID.OPTGROUP: {
  7816. if (p.openElements.currentTagId === TAG_ID.OPTION) {
  7817. p.openElements.pop();
  7818. }
  7819. if (p.openElements.currentTagId === TAG_ID.OPTGROUP) {
  7820. p.openElements.pop();
  7821. }
  7822. p._insertElement(token, NS.HTML);
  7823. break;
  7824. }
  7825. case TAG_ID.HR: {
  7826. if (p.openElements.currentTagId === TAG_ID.OPTION) {
  7827. p.openElements.pop();
  7828. }
  7829. if (p.openElements.currentTagId === TAG_ID.OPTGROUP) {
  7830. p.openElements.pop();
  7831. }
  7832. p._appendElement(token, NS.HTML);
  7833. token.ackSelfClosing = true;
  7834. break;
  7835. }
  7836. case TAG_ID.INPUT:
  7837. case TAG_ID.KEYGEN:
  7838. case TAG_ID.TEXTAREA:
  7839. case TAG_ID.SELECT: {
  7840. if (p.openElements.hasInSelectScope(TAG_ID.SELECT)) {
  7841. p.openElements.popUntilTagNamePopped(TAG_ID.SELECT);
  7842. p._resetInsertionMode();
  7843. if (token.tagID !== TAG_ID.SELECT) {
  7844. p._processStartTag(token);
  7845. }
  7846. }
  7847. break;
  7848. }
  7849. case TAG_ID.SCRIPT:
  7850. case TAG_ID.TEMPLATE: {
  7851. startTagInHead(p, token);
  7852. break;
  7853. }
  7854. // Do nothing
  7855. }
  7856. }
  7857. function endTagInSelect(p, token) {
  7858. switch (token.tagID) {
  7859. case TAG_ID.OPTGROUP: {
  7860. if (p.openElements.stackTop > 0 &&
  7861. p.openElements.currentTagId === TAG_ID.OPTION &&
  7862. p.openElements.tagIDs[p.openElements.stackTop - 1] === TAG_ID.OPTGROUP) {
  7863. p.openElements.pop();
  7864. }
  7865. if (p.openElements.currentTagId === TAG_ID.OPTGROUP) {
  7866. p.openElements.pop();
  7867. }
  7868. break;
  7869. }
  7870. case TAG_ID.OPTION: {
  7871. if (p.openElements.currentTagId === TAG_ID.OPTION) {
  7872. p.openElements.pop();
  7873. }
  7874. break;
  7875. }
  7876. case TAG_ID.SELECT: {
  7877. if (p.openElements.hasInSelectScope(TAG_ID.SELECT)) {
  7878. p.openElements.popUntilTagNamePopped(TAG_ID.SELECT);
  7879. p._resetInsertionMode();
  7880. }
  7881. break;
  7882. }
  7883. case TAG_ID.TEMPLATE: {
  7884. templateEndTagInHead(p, token);
  7885. break;
  7886. }
  7887. // Do nothing
  7888. }
  7889. }
  7890. // The "in select in table" insertion mode
  7891. //------------------------------------------------------------------
  7892. function startTagInSelectInTable(p, token) {
  7893. const tn = token.tagID;
  7894. if (tn === TAG_ID.CAPTION ||
  7895. tn === TAG_ID.TABLE ||
  7896. tn === TAG_ID.TBODY ||
  7897. tn === TAG_ID.TFOOT ||
  7898. tn === TAG_ID.THEAD ||
  7899. tn === TAG_ID.TR ||
  7900. tn === TAG_ID.TD ||
  7901. tn === TAG_ID.TH) {
  7902. p.openElements.popUntilTagNamePopped(TAG_ID.SELECT);
  7903. p._resetInsertionMode();
  7904. p._processStartTag(token);
  7905. }
  7906. else {
  7907. startTagInSelect(p, token);
  7908. }
  7909. }
  7910. function endTagInSelectInTable(p, token) {
  7911. const tn = token.tagID;
  7912. if (tn === TAG_ID.CAPTION ||
  7913. tn === TAG_ID.TABLE ||
  7914. tn === TAG_ID.TBODY ||
  7915. tn === TAG_ID.TFOOT ||
  7916. tn === TAG_ID.THEAD ||
  7917. tn === TAG_ID.TR ||
  7918. tn === TAG_ID.TD ||
  7919. tn === TAG_ID.TH) {
  7920. if (p.openElements.hasInTableScope(tn)) {
  7921. p.openElements.popUntilTagNamePopped(TAG_ID.SELECT);
  7922. p._resetInsertionMode();
  7923. p.onEndTag(token);
  7924. }
  7925. }
  7926. else {
  7927. endTagInSelect(p, token);
  7928. }
  7929. }
  7930. // The "in template" insertion mode
  7931. //------------------------------------------------------------------
  7932. function startTagInTemplate(p, token) {
  7933. switch (token.tagID) {
  7934. // First, handle tags that can start without a mode change
  7935. case TAG_ID.BASE:
  7936. case TAG_ID.BASEFONT:
  7937. case TAG_ID.BGSOUND:
  7938. case TAG_ID.LINK:
  7939. case TAG_ID.META:
  7940. case TAG_ID.NOFRAMES:
  7941. case TAG_ID.SCRIPT:
  7942. case TAG_ID.STYLE:
  7943. case TAG_ID.TEMPLATE:
  7944. case TAG_ID.TITLE: {
  7945. startTagInHead(p, token);
  7946. break;
  7947. }
  7948. // Re-process the token in the appropriate mode
  7949. case TAG_ID.CAPTION:
  7950. case TAG_ID.COLGROUP:
  7951. case TAG_ID.TBODY:
  7952. case TAG_ID.TFOOT:
  7953. case TAG_ID.THEAD: {
  7954. p.tmplInsertionModeStack[0] = InsertionMode.IN_TABLE;
  7955. p.insertionMode = InsertionMode.IN_TABLE;
  7956. startTagInTable(p, token);
  7957. break;
  7958. }
  7959. case TAG_ID.COL: {
  7960. p.tmplInsertionModeStack[0] = InsertionMode.IN_COLUMN_GROUP;
  7961. p.insertionMode = InsertionMode.IN_COLUMN_GROUP;
  7962. startTagInColumnGroup(p, token);
  7963. break;
  7964. }
  7965. case TAG_ID.TR: {
  7966. p.tmplInsertionModeStack[0] = InsertionMode.IN_TABLE_BODY;
  7967. p.insertionMode = InsertionMode.IN_TABLE_BODY;
  7968. startTagInTableBody(p, token);
  7969. break;
  7970. }
  7971. case TAG_ID.TD:
  7972. case TAG_ID.TH: {
  7973. p.tmplInsertionModeStack[0] = InsertionMode.IN_ROW;
  7974. p.insertionMode = InsertionMode.IN_ROW;
  7975. startTagInRow(p, token);
  7976. break;
  7977. }
  7978. default: {
  7979. p.tmplInsertionModeStack[0] = InsertionMode.IN_BODY;
  7980. p.insertionMode = InsertionMode.IN_BODY;
  7981. startTagInBody(p, token);
  7982. }
  7983. }
  7984. }
  7985. function endTagInTemplate(p, token) {
  7986. if (token.tagID === TAG_ID.TEMPLATE) {
  7987. templateEndTagInHead(p, token);
  7988. }
  7989. }
  7990. function eofInTemplate(p, token) {
  7991. if (p.openElements.tmplCount > 0) {
  7992. p.openElements.popUntilTagNamePopped(TAG_ID.TEMPLATE);
  7993. p.activeFormattingElements.clearToLastMarker();
  7994. p.tmplInsertionModeStack.shift();
  7995. p._resetInsertionMode();
  7996. p.onEof(token);
  7997. }
  7998. else {
  7999. stopParsing(p, token);
  8000. }
  8001. }
  8002. // The "after body" insertion mode
  8003. //------------------------------------------------------------------
  8004. function startTagAfterBody(p, token) {
  8005. if (token.tagID === TAG_ID.HTML) {
  8006. startTagInBody(p, token);
  8007. }
  8008. else {
  8009. tokenAfterBody(p, token);
  8010. }
  8011. }
  8012. function endTagAfterBody(p, token) {
  8013. var _a;
  8014. if (token.tagID === TAG_ID.HTML) {
  8015. if (!p.fragmentContext) {
  8016. p.insertionMode = InsertionMode.AFTER_AFTER_BODY;
  8017. }
  8018. //NOTE: <html> is never popped from the stack, so we need to updated
  8019. //the end location explicitly.
  8020. if (p.options.sourceCodeLocationInfo && p.openElements.tagIDs[0] === TAG_ID.HTML) {
  8021. p._setEndLocation(p.openElements.items[0], token);
  8022. // Update the body element, if it doesn't have an end tag
  8023. const bodyElement = p.openElements.items[1];
  8024. if (bodyElement && !((_a = p.treeAdapter.getNodeSourceCodeLocation(bodyElement)) === null || _a === void 0 ? void 0 : _a.endTag)) {
  8025. p._setEndLocation(bodyElement, token);
  8026. }
  8027. }
  8028. }
  8029. else {
  8030. tokenAfterBody(p, token);
  8031. }
  8032. }
  8033. function tokenAfterBody(p, token) {
  8034. p.insertionMode = InsertionMode.IN_BODY;
  8035. modeInBody(p, token);
  8036. }
  8037. // The "in frameset" insertion mode
  8038. //------------------------------------------------------------------
  8039. function startTagInFrameset(p, token) {
  8040. switch (token.tagID) {
  8041. case TAG_ID.HTML: {
  8042. startTagInBody(p, token);
  8043. break;
  8044. }
  8045. case TAG_ID.FRAMESET: {
  8046. p._insertElement(token, NS.HTML);
  8047. break;
  8048. }
  8049. case TAG_ID.FRAME: {
  8050. p._appendElement(token, NS.HTML);
  8051. token.ackSelfClosing = true;
  8052. break;
  8053. }
  8054. case TAG_ID.NOFRAMES: {
  8055. startTagInHead(p, token);
  8056. break;
  8057. }
  8058. // Do nothing
  8059. }
  8060. }
  8061. function endTagInFrameset(p, token) {
  8062. if (token.tagID === TAG_ID.FRAMESET && !p.openElements.isRootHtmlElementCurrent()) {
  8063. p.openElements.pop();
  8064. if (!p.fragmentContext && p.openElements.currentTagId !== TAG_ID.FRAMESET) {
  8065. p.insertionMode = InsertionMode.AFTER_FRAMESET;
  8066. }
  8067. }
  8068. }
  8069. // The "after frameset" insertion mode
  8070. //------------------------------------------------------------------
  8071. function startTagAfterFrameset(p, token) {
  8072. switch (token.tagID) {
  8073. case TAG_ID.HTML: {
  8074. startTagInBody(p, token);
  8075. break;
  8076. }
  8077. case TAG_ID.NOFRAMES: {
  8078. startTagInHead(p, token);
  8079. break;
  8080. }
  8081. // Do nothing
  8082. }
  8083. }
  8084. function endTagAfterFrameset(p, token) {
  8085. if (token.tagID === TAG_ID.HTML) {
  8086. p.insertionMode = InsertionMode.AFTER_AFTER_FRAMESET;
  8087. }
  8088. }
  8089. // The "after after body" insertion mode
  8090. //------------------------------------------------------------------
  8091. function startTagAfterAfterBody(p, token) {
  8092. if (token.tagID === TAG_ID.HTML) {
  8093. startTagInBody(p, token);
  8094. }
  8095. else {
  8096. tokenAfterAfterBody(p, token);
  8097. }
  8098. }
  8099. function tokenAfterAfterBody(p, token) {
  8100. p.insertionMode = InsertionMode.IN_BODY;
  8101. modeInBody(p, token);
  8102. }
  8103. // The "after after frameset" insertion mode
  8104. //------------------------------------------------------------------
  8105. function startTagAfterAfterFrameset(p, token) {
  8106. switch (token.tagID) {
  8107. case TAG_ID.HTML: {
  8108. startTagInBody(p, token);
  8109. break;
  8110. }
  8111. case TAG_ID.NOFRAMES: {
  8112. startTagInHead(p, token);
  8113. break;
  8114. }
  8115. // Do nothing
  8116. }
  8117. }
  8118. // The rules for parsing tokens in foreign content
  8119. //------------------------------------------------------------------
  8120. function nullCharacterInForeignContent(p, token) {
  8121. token.chars = REPLACEMENT_CHARACTER;
  8122. p._insertCharacters(token);
  8123. }
  8124. function characterInForeignContent(p, token) {
  8125. p._insertCharacters(token);
  8126. p.framesetOk = false;
  8127. }
  8128. function popUntilHtmlOrIntegrationPoint(p) {
  8129. while (p.treeAdapter.getNamespaceURI(p.openElements.current) !== NS.HTML &&
  8130. !p._isIntegrationPoint(p.openElements.currentTagId, p.openElements.current)) {
  8131. p.openElements.pop();
  8132. }
  8133. }
  8134. function startTagInForeignContent(p, token) {
  8135. if (causesExit(token)) {
  8136. popUntilHtmlOrIntegrationPoint(p);
  8137. p._startTagOutsideForeignContent(token);
  8138. }
  8139. else {
  8140. const current = p._getAdjustedCurrentElement();
  8141. const currentNs = p.treeAdapter.getNamespaceURI(current);
  8142. if (currentNs === NS.MATHML) {
  8143. adjustTokenMathMLAttrs(token);
  8144. }
  8145. else if (currentNs === NS.SVG) {
  8146. adjustTokenSVGTagName(token);
  8147. adjustTokenSVGAttrs(token);
  8148. }
  8149. adjustTokenXMLAttrs(token);
  8150. if (token.selfClosing) {
  8151. p._appendElement(token, currentNs);
  8152. }
  8153. else {
  8154. p._insertElement(token, currentNs);
  8155. }
  8156. token.ackSelfClosing = true;
  8157. }
  8158. }
  8159. function endTagInForeignContent(p, token) {
  8160. if (token.tagID === TAG_ID.P || token.tagID === TAG_ID.BR) {
  8161. popUntilHtmlOrIntegrationPoint(p);
  8162. p._endTagOutsideForeignContent(token);
  8163. return;
  8164. }
  8165. for (let i = p.openElements.stackTop; i > 0; i--) {
  8166. const element = p.openElements.items[i];
  8167. if (p.treeAdapter.getNamespaceURI(element) === NS.HTML) {
  8168. p._endTagOutsideForeignContent(token);
  8169. break;
  8170. }
  8171. const tagName = p.treeAdapter.getTagName(element);
  8172. if (tagName.toLowerCase() === token.tagName) {
  8173. //NOTE: update the token tag name for `_setEndLocation`.
  8174. token.tagName = tagName;
  8175. p.openElements.shortenToLength(i);
  8176. break;
  8177. }
  8178. }
  8179. }
  8180. // Shorthands
  8181. /**
  8182. * Parses an HTML string.
  8183. *
  8184. * @param html Input HTML string.
  8185. * @param options Parsing options.
  8186. * @returns Document
  8187. *
  8188. * @example
  8189. *
  8190. * ```js
  8191. * const parse5 = require('parse5');
  8192. *
  8193. * const document = parse5.parse('<!DOCTYPE html><html><head></head><body>Hi there!</body></html>');
  8194. *
  8195. * console.log(document.childNodes[1].tagName); //> 'html'
  8196. *```
  8197. */
  8198. function parse(html, options) {
  8199. return Parser.parse(html, options);
  8200. }
  8201. export { ERR as ErrorCodes, Parser, Tokenizer, TokenizerMode, defaultTreeAdapter, parse };