| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271 |
- "use strict";
- const parse5 = require("parse5");
- const sax = require("sax");
- const attributes = require("../living/attributes");
- const DocumentType = require("../living/generated/DocumentType");
- const JSDOMParse5Adapter = require("./parse5-adapter-parsing");
- const { HTML_NS } = require("../living/helpers/namespaces");
- // Horrible monkey-patch to implement https://github.com/inikulin/parse5/issues/237
- const OpenElementStack = require("parse5/lib/parser/open_element_stack");
- const originalPop = OpenElementStack.prototype.pop;
- OpenElementStack.prototype.pop = function (...args) {
- const before = this.items[this.stackTop];
- originalPop.apply(this, args);
- if (before._poppedOffStackOfOpenElements) {
- before._poppedOffStackOfOpenElements();
- }
- };
- const originalPush = OpenElementStack.prototype.push;
- OpenElementStack.prototype.push = function (...args) {
- originalPush.apply(this, args);
- const after = this.items[this.stackTop];
- if (after._pushedOnStackOfOpenElements) {
- after._pushedOnStackOfOpenElements();
- }
- };
- module.exports = class HTMLToDOM {
- constructor(parsingMode) {
- this.parser = parsingMode === "xml" ? sax : parse5;
- }
- appendToNode(html, node) {
- html = String(html);
- return this._doParse(html, true, node);
- }
- appendToDocument(html, documentImpl) {
- html = String(html);
- return this._doParse(html, false, documentImpl, documentImpl._parseOptions);
- }
- _doParse(...args) {
- return this.parser === parse5 ? this._parseWithParse5(...args) : this._parseWithSax(...args);
- }
- _parseWithParse5(html, isFragment, contextNode, options = {}) {
- const adapter = new JSDOMParse5Adapter(contextNode._ownerDocument || contextNode);
- options.treeAdapter = adapter;
- if (isFragment) {
- const fragment = this.parser.parseFragment(contextNode, html, options);
- if (contextNode._templateContents) {
- contextNode._templateContents.appendChild(fragment);
- } else {
- contextNode.appendChild(fragment);
- }
- } else {
- this.parser.parse(html, options);
- }
- return contextNode;
- }
- _parseWithSax(html, isFragment, contextNode) {
- const SaxParser = this.parser.parser;
- const parser = new SaxParser(/* strict = */true, { xmlns: true, strictEntities: true });
- parser.noscript = false;
- parser.looseCase = "toString";
- const openStack = [contextNode];
- parser.ontext = text => {
- setChildForSax(openStack[openStack.length - 1], {
- type: "text",
- data: text
- });
- };
- parser.oncdata = cdata => {
- setChildForSax(openStack[openStack.length - 1], {
- type: "cdata",
- data: cdata
- });
- };
- parser.onopentag = arg => {
- const attrs = Object.keys(arg.attributes).map(key => {
- const rawAttribute = arg.attributes[key];
- let { prefix } = rawAttribute;
- let localName = rawAttribute.local;
- if (prefix === "xmlns" && localName === "") {
- // intended weirdness in node-sax, see https://github.com/isaacs/sax-js/issues/165
- localName = prefix;
- prefix = null;
- }
- if (prefix === "") {
- prefix = null;
- }
- const namespace = rawAttribute.uri === "" ? null : rawAttribute.uri;
- return { name: rawAttribute.name, value: rawAttribute.value, prefix, localName, namespace };
- });
- const tag = {
- type: "tag",
- name: arg.local,
- prefix: arg.prefix,
- namespace: arg.uri,
- attributes: attrs
- };
- if (arg.local === "script" && arg.uri === HTML_NS) {
- openStack.push(tag);
- } else {
- const elem = setChildForSax(openStack[openStack.length - 1], tag);
- openStack.push(elem);
- }
- };
- parser.onclosetag = () => {
- const elem = openStack.pop();
- if (elem.constructor.name === "Object") { // we have an empty script tag
- setChildForSax(openStack[openStack.length - 1], elem);
- }
- };
- parser.onscript = scriptText => {
- const tag = openStack.pop();
- tag.children = [{ type: "text", data: scriptText }];
- const elem = setChildForSax(openStack[openStack.length - 1], tag);
- openStack.push(elem);
- };
- parser.oncomment = comment => {
- setChildForSax(openStack[openStack.length - 1], {
- type: "comment",
- data: comment
- });
- };
- parser.onprocessinginstruction = pi => {
- setChildForSax(openStack[openStack.length - 1], {
- type: "directive",
- name: "?" + pi.name,
- data: "?" + pi.name + " " + pi.body + "?"
- });
- };
- parser.ondoctype = dt => {
- setChildForSax(openStack[openStack.length - 1], {
- type: "directive",
- name: "!doctype",
- data: "!doctype " + dt
- });
- const entityMatcher = /<!ENTITY ([^ ]+) "([^"]+)">/g;
- let result;
- while ((result = entityMatcher.exec(dt))) {
- const [, name, value] = result;
- if (!(name in parser.ENTITIES)) {
- parser.ENTITIES[name] = value;
- }
- }
- };
- parser.onerror = err => {
- throw err;
- };
- parser.write(html).close();
- }
- };
- function setChildForSax(parentImpl, node) {
- const currentDocument = (parentImpl && parentImpl._ownerDocument) || parentImpl;
- let newNode;
- let isTemplateContents = false;
- switch (node.type) {
- case "tag":
- case "script":
- case "style":
- newNode = currentDocument._createElementWithCorrectElementInterface(node.name, node.namespace);
- newNode._prefix = node.prefix || null;
- newNode._namespaceURI = node.namespace || null;
- break;
- case "root":
- // If we are in <template> then add all children to the parent's _templateContents; skip this virtual root node.
- if (parentImpl.tagName === "TEMPLATE" && parentImpl._namespaceURI === HTML_NS) {
- newNode = parentImpl._templateContents;
- isTemplateContents = true;
- }
- break;
- case "text":
- // HTML entities should already be decoded by the parser, so no need to decode them
- newNode = currentDocument.createTextNode(node.data);
- break;
- case "cdata":
- newNode = currentDocument.createCDATASection(node.data);
- break;
- case "comment":
- newNode = currentDocument.createComment(node.data);
- break;
- case "directive":
- if (node.name[0] === "?" && node.name.toLowerCase() !== "?xml") {
- const data = node.data.slice(node.name.length + 1, -1);
- newNode = currentDocument.createProcessingInstruction(node.name.substring(1), data);
- } else if (node.name.toLowerCase() === "!doctype") {
- newNode = parseDocType(currentDocument, "<" + node.data + ">");
- }
- break;
- }
- if (!newNode) {
- return null;
- }
- if (node.attributes) {
- for (const a of node.attributes) {
- attributes.setAttributeValue(newNode, a.localName, a.value, a.prefix, a.namespace);
- }
- }
- if (node.children) {
- for (let c = 0; c < node.children.length; c++) {
- setChildForSax(newNode, node.children[c]);
- }
- }
- if (!isTemplateContents) {
- if (parentImpl._templateContents) {
- // Setting innerHTML on a <template>
- parentImpl._templateContents.appendChild(newNode);
- } else {
- parentImpl.appendChild(newNode);
- }
- }
- return newNode;
- }
- const HTML5_DOCTYPE = /<!doctype html>/i;
- const PUBLIC_DOCTYPE = /<!doctype\s+([^\s]+)\s+public\s+"([^"]+)"\s+"([^"]+)"/i;
- const SYSTEM_DOCTYPE = /<!doctype\s+([^\s]+)\s+system\s+"([^"]+)"/i;
- function parseDocType(doc, html) {
- if (HTML5_DOCTYPE.test(html)) {
- return createDocumentTypeInternal(doc, "html", "", "");
- }
- const publicPieces = PUBLIC_DOCTYPE.exec(html);
- if (publicPieces) {
- return createDocumentTypeInternal(doc, publicPieces[1], publicPieces[2], publicPieces[3]);
- }
- const systemPieces = SYSTEM_DOCTYPE.exec(html);
- if (systemPieces) {
- return createDocumentTypeInternal(doc, systemPieces[1], "", systemPieces[2]);
- }
- // Shouldn't get here (the parser shouldn't let us know about invalid doctypes), but our logic likely isn't
- // real-world perfect, so let's fallback.
- return createDocumentTypeInternal(doc, "html", "", "");
- }
- function createDocumentTypeInternal(ownerDocument, name, publicId, systemId) {
- return DocumentType.createImpl([], { ownerDocument, name, publicId, systemId });
- }
|