import { CfHtmlParser } from '@common/clipboard/cf-html/cf-html-parser';
import { GoogleDocsNormalizer } from '@common/clipboard/google-docs/google-docs-normalizer';
import { GoogleSheetsNormalizer } from '@common/clipboard/google-sheets/google-sheets-normalizer';
import { HtmlNormalizer } from '@common/clipboard/html/html-normalizer';
import { MsHtmlNormalizer } from '@common/clipboard/ms-html/ms-html-normalizer';
import { HtmlPasteNormalizerData } from '@common/clipboard/types/html-paste-normalizer-data.type';
import { HtmlPasteNormalizerOptions } from '@common/clipboard/types/html-paste-normalizer-options.type';
import { HtmlPasteNormalizer } from '@common/clipboard/types/html-paste-normalizer.type';
import { parseFromString } from '@common/html/simple-dom/parse';

// This regex is used below but is not needed when using SimpleDom.
// const selfClosingTagRegex = /<\s*([a-zA-Z][a-zA-Z0-9-]*:[a-zA-Z][a-zA-Z0-9-]*)([^>]*)\/>/g;

/**
 * Normalizes pasted content.
 * Handles CF_HTML, MS_HTML (from Microsoft Word), Excel, and Google Doc formats.
 */
export class PasteNormalizer {
  normalize(html: string, options?: HtmlPasteNormalizerOptions): string {
    let data: HtmlPasteNormalizerData;

    // Parse the clipboard data
    const cfHtmlParser = new CfHtmlParser();
    if (cfHtmlParser.isCfHtml(html)) {
      data = cfHtmlParser.parseFromString(html);
    } else {
      // When using a spec compliant implementation of DOMParser on the raw HTML our void tags are getting converted to open tags with no closing tag which is essentially closing the tag at the end of its parent.
      // This is a problem because its changing the markup.
      // NOTE: this is not a problem with SimpleDom so the replace code is commented out
      // <p>foo</p><MadCap:menuProxy /><p>bar</p>
      // becomes
      // <p>foo</p><MadCap:menuProxy><p>bar</p></MadCap:menuProxy>
      // Convert self closing tags to open tags with closed tags with selfClosingTagRegex.
      // html = html.replace(selfClosingTagRegex, '<$1$2></$1>');

      data = {
        css: null,
        doc: parseFromString(html, 'text/html') as unknown as Document, // Parse as text/html because the clipboard data from other sources is usually html and we need to process it correctly
        html
      };
    }

    // Run the normalizers on the data
    const normalizers: HtmlPasteNormalizer[] = [
      new MsHtmlNormalizer(),
      new GoogleDocsNormalizer(),
      new GoogleSheetsNormalizer(),
      new HtmlNormalizer()
    ];

    normalizers.forEach(normalizer => {
      if (normalizer.matches(html)) {
        data.doc = normalizer.normalize(data, options);
      }
    });

    return data.doc.querySelector('body').innerHTML;
  }
}
