import { NodeType } from '@common/html/enums/node-type.enum';
import { SimpleDomDocument, SimpleDomNode, SimpleDomRegisteredElement } from '@common/html/simple-dom/node';
import { Parser } from 'htmlparser2';

/**
 * Creates an empty HTML document.
 * @returns Return an HTML document that only contains an html and body tag.
 */
function createEmptyHtmlDoc(): SimpleDomDocument {
  return parseFromString('<html><body></body></html>', 'text/html');
}

/**
 * Normalizes an HTML document to have an html and body tag.
 * @param doc The HTML document to normalize.
 * @returns A normalized HTML document.
 */
function normalizeHtml(doc: SimpleDomDocument): SimpleDomDocument {
  const firstChild = doc.firstChild;

  // If the doc is empty then create an empty doc with an html and body tag
  if (!firstChild) {
    doc = createEmptyHtmlDoc();
  }
  // Else if the doc's root isn't an html tag then wrap the doc in an html tag
  else if (firstChild.nodeName.toLowerCase() !== 'html') {
    const newDoc = createEmptyHtmlDoc();

    // If the first child is a body then append the body's children to the new doc
    if (firstChild.nodeName.toLowerCase() === 'body') {
      newDoc.querySelector('body').append(...firstChild.childNodes);
    }
    // Else append all the doc's children to the new doc
    else {
      newDoc.querySelector('body').append(...doc.childNodes);
    }

    doc = newDoc;
  }

  return doc;
}

/**
 * Options for parsing an HTML or XML string into a SimpleDomDocument.
 */
export interface SimpleDomParseOptions {
  /**
   * Whether or not to normalize the HTML by wrapping the content in <html><body></body></html> nodes if not already.
   * This only applies when the type is `text/html`.
   */
  normalizeHTML?: boolean;
  /** Called when parsing starts for a node. */
  onNodeStart?: (node: SimpleDomNode) => void;
  /** Called when a node is done being parsed including all of its children. */
  onNode?: (node: SimpleDomNode) => void;
  /** Called when a node is done being parsed. This is called after onNode. */
  onNodeEnd?: (node: SimpleDomNode) => void;
  /** When enabled the document is validated as xml and will throw an error when invalid. Defaults to false. */
  strictMode?: boolean;
  /**
   * A map of custom implementations to register with the document.
   * Maps element name to registered element.
   * The elements are registered before the document is parsed so that the custom implementations are available during parsing.
   */
  registerElements?: Dictionary<SimpleDomRegisteredElement>;
  /** A list of tags that should be serialized as void tags. */
  registerVoidTags?: string[];
}

/**
 * Parses an html or xml string into a SimpleDomDocument.
 * @param markup The markup to parse.
 * @param type The type of markup to parse. Either 'text/html' or 'text/xml'.
 * @param options Additional options for parsing.
 * @returns The parsed document.
 */
export function parseFromString(markup: string, type: 'text/html' | 'text/xml', options?: SimpleDomParseOptions): SimpleDomDocument {
  const doc = new SimpleDomDocument(null, '#document', NodeType.DOCUMENT_NODE);
  doc.contentType = type;

  // Register custom elements
  if (options?.registerElements) {
    for (const [name, element] of Object.entries(options.registerElements)) {
      doc.registerElement(name, element);
    }
  }

  // Register custom void tags
  if (options?.registerVoidTags?.length > 0) {
    doc.registerVoidTags(options.registerVoidTags);
  }

  let node: SimpleDomNode = doc;
  let parsingCData = false;

  const parser = new Parser({
    // <tagName>
    onopentag(name: string, attributes: Dictionary<string>) {
      node = node.appendChild(new SimpleDomNode(doc, name, NodeType.ELEMENT_NODE));
      node.attributes = Object.entries(attributes).map(([name, value]) => {
        return { nodeName: name, nodeType: NodeType.ATTRIBUTE_NODE, nodeValue: value };
      });
      options?.onNodeStart?.(node);
    },

    // #comment
    oncomment(data: string) {
      const comment = node.appendChild(new SimpleDomNode(doc, '#comment', NodeType.COMMENT_NODE));
      comment.nodeValue = data ?? '';;
      options?.onNode?.(comment);
    },

    // #text
    ontext(text: string) {
      let newNode: SimpleDomNode;

      if (parsingCData) {
        newNode = node.appendChild(new SimpleDomNode(doc, '#cdata-section', NodeType.CDATA_SECTION_NODE));
      } else {
        newNode = node.appendChild(new SimpleDomNode(doc, '#text', NodeType.TEXT_NODE));
      }

      newNode.nodeValue = text ?? '';
      options?.onNode?.(newNode);
    },

    // #cdata
    oncdatastart() { parsingCData = true; },
    oncdataend() { parsingCData = false; },

    // </tagName>
    onclosetag() {
      // Grab the parent node because onNode might remove the current node settings its parentNode to null
      const parentNode = node.parentNode;
      options?.onNode?.(node);
      options?.onNodeEnd?.(node);
      node = parentNode;
    }
  }, {
    // Maintain the original names
    lowerCaseTags: false,
    lowerCaseAttributeNames: false,
    // Always decode entities because htmlparser2 says it can cause an issue otherwise. https://github.com/fb55/htmlparser2/issues/105
    decodeEntities: true,
    // These options differ when xmlMode is enabled or disabled but we want them always to be true
    recognizeCDATA: true,
    recognizeSelfClosing: true,
    // Strict mode causes the parser to throw an error when the document is invalid
    strictMode: options?.strictMode ?? false,
    xmlMode: type === 'text/xml'
  });

  // Parse the markup
  parser.end(markup);

  if (options?.normalizeHTML || (type === 'text/html' && options?.normalizeHTML !== false)) {
    return normalizeHtml(doc);
  } else {
    return doc;
  }
}

/**
 * Parses an html string into a document fragment.
 * Uses parseFromString with the normalizeHTML option set to false so that the fragment is not wrapped in an html and body tag.
 * @param markup The markup to parse.
 * @param type The type of markup to parse. Either 'text/html' or 'text/xml'.
 * @param options Additional options for parsing.
 * @returns The parsed document fragment.
 */
export function parseFragmentFromString(markup: string, type: 'text/html' | 'text/xml', options?: SimpleDomParseOptions): SimpleDomNode {
  const doc = parseFromString(markup, type, {
    ...options,
    normalizeHTML: false
  });

  const fragment = doc.createDocumentFragment();
  fragment.append(...doc.childNodes);
  return fragment;
}
