import { descendants } from '@common/html/util/dom';

/**
 * Normalizes spacing in HTML strings including proprietary Microsoft whitespace.
 * Replaces the last space preceding elements closing tag with `&nbsp;` so the space doesn't get lost in later processing.
 * This method also takes into account Word specific `<o:p></o:p>` empty tags.
 * Additionally multiline sequences of spaces and new lines between tags are collapsed into one space.
 * @param htmlString The HTML string in which spacing should be normalized.
 * @returns The HTML with spaces normalized.
 */
export function normalizeSpacing(htmlString: string): string {
  // Call normalizeSafariSpaceSpans two times to cover nested spans.
  return normalizeSafariSpaceSpans(normalizeSafariSpaceSpans(htmlString))
    // Remove all \r\n from "spacerun spans" so the last replace line doesn't strip all whitespace.
    .replace(/(<span\s+style=['"]mso-spacerun:yes['"]>[^\S\r\n]*?)[\r\n]+([^\S\r\n]*<\/span>)/g, '$1$2')
    .replace(/<span\s+style=['"]mso-spacerun:yes['"]><\/span>/g, '')
    .replace(/(<span\s+style=['"]letter-spacing:[^'"]+?['"]>)[\r\n]+(<\/span>)/g, '$1 $2')
    .replace(/ <\//g, '\u00A0</')
    .replace(/ <o:p><\/o:p>/g, '\u00A0<o:p></o:p>')
    // Collapse all whitespace when they contain any \r or \n.
    .replace(/>([^\S\r\n]*[\r\n]\s*)</g, '> <');
}

/**
 * Normalizes specific spacing generated by Safari when content is pasted from Word (`<span class="Apple-converted-space"> </span>`)
 * by replacing all space sequences longer than 1 space with `&nbsp; ` pairs so the space doesn't get lost in later processing.
 * @param htmlString The HTML string in which spacing should be normalized.
 * @returns The HTML with spaces normalized.
 */
function normalizeSafariSpaceSpans(htmlString: string) {
  return htmlString.replace(/<span(?: class="Apple-converted-space"|)>(\s+)<\/span>/g, (fullMatch, spaces) => {
    return spaces.length === 1 ? ' ' : Array(spaces.length + 1).join('\u00A0 ').substr(0, spaces.length);
  });
}

/**
 * Normalizes spacing in special Microsoft `spacerun spans` (`<span style='mso-spacerun:yes'>\s+</span>`) by replacing
 * all spaces with `&nbsp; ` pairs so the space doesn't get lost in later processing.
 * @param doc The document in which spacing should be normalized.
 */
export function normalizeSpacerunSpans(doc: Document) {
  descendants(doc.documentElement, (element: HTMLElement) => {
    const style = element.getAttribute('style');
    if (style?.includes('spacerun')) {
      const textLength = element.textContent.length ?? 0;
      element.textContent = Array(textLength + 1).join('\u00A0 ').substr(0, textLength);
    }
  }, { depthFirst: true });
}
