import { flattenNode, hasAttributeStyle, unwrapNode } from '@common/html/util/dom';

/**
 * Normalizes page breaks in HTML by unwrapping the Microsoft page breaks to be direct children of the body element.
 * If a tagName is provided, the page break will be replaced with an element with the provided tagName.
 * An element with `mso-special-character: line-break` and `page-break-after: always` or `page-break-before: always` is considered a Microsoft page break.
 * If a page break element is not a direct child of the body element, then the page break's will be unwrapped making it a child of the body element.
 * e.g.
 * ```html
 * <b>
 *   <span>
 *   <br style="page-break-after: always">
 *   </span>
 * </b>
 * ```
 * will become
 * ```html
 * <br style="page-break-after: always">
 * ```
 */
export class MsPageBreakNormalizer {
  /**
   * Normalizes the HTML.
   * @param doc The HTML doc to normalize.
   * @param options Optionally provide a tag name to replace the page breaks with.
   */
  normalize(doc: Document, options?: { tagName: string }) {
    // Grab all elements so they can be traversed linearly
    const body = doc.querySelector('body');

    // Unwrap all page breaks that are not direct children of the body element
    flattenNode(body).forEach((element: HTMLElement) => {
      if ((element.style?.['page-break-after'] === 'always' || element.style?.['page-break-before'] === 'always') && hasAttributeStyle(element, 'mso-special-character', 'line-break')) {
        unwrapNode(element, body);

        // Replace the page break with a different tag if one was provided
        if (options?.tagName) {
          element.replaceWith(doc.createElement(options.tagName));
        }
      }
    });
  }
}
