import { descendants } from '@common/html/util/dom';

/**
 * Normalizes the HTML by removing attributes that are not whitelisted.
 */
export class HtmlAttributeNormalizer {
  /**
   * Normalizes the HTML by removing attributes that are not whitelisted.
   * The whitelist is a dictionary of tag names to attribute names.
   * The whitelist key is a regex string that is case insensitive that is matched against the tag name.
   * The whitelist value is a regex string that is case insensitive that is matched against the attribute name.
   *
   * e.g.
   * ```typescript
   * {
   *  '.*': 'style|class', // Keep the style and class attributes for all tags
   *  'form|input': ['name', 'value', 'madcap:.+'].join('|'), // Keep the name, value, and all madcap attributes for form and input tags
   *  'a': '.*' // Keep all attributes for anchor tags
   * }
   * ```
   *
   * @param doc The HTML doc to normalize.
   * @param whitelist The whitelist of attributes to keep.
   */
  normalize(doc: Document, whitelist?: Dictionary<string>) {
    descendants(doc.documentElement, (node: Element) => {
      // Gather the whitelist entries that match the node's tag name
      const matches = Object.entries(whitelist ?? {}).map(([key, value]) => {
        return {
          key, value
        };
      }).filter(pair => {
        return new RegExp(pair.key, 'i').test(node.nodeName);
      });

      // Remove any attributes that are not whitelisted
      node.getAttributeNames?.()?.forEach(name => {
        const whitelisted = matches.some(pair => {
          return new RegExp(pair.value, 'i').test(name);
        });

        if (!whitelisted) {
          node.removeAttribute(name);
        }
      });
    });
  }
}
