// for use to compare 2 html texts to find differences -- they are considered equal if they are normalized to the same text
// that is, certain differences are ignored and are considered the same:
// 1. &nbsp; and space (' ') differences
// 2. &quot; and quote (') differences
// 3. trailing commas (';') on html attributes
// 4. amp; and ampersand ('&') differences
export const normalizeHtml = (htmlText: string): string => {
    const doc = new DOMParser().parseFromString(htmlText, 'text/html');

    let normalizedHTML = doc.body.innerHTML;

    normalizedHTML = normalizedHTML
        .replace(/&nbsp;/g, ' ')
        .replace(/&quot;/g, '\'')
        .replace(
            // match tags and perform normalization on their attributes
            /<([a-zA-Z0-9]+)(\s[^>]*)?>/g, 
            match => match
                .replaceAll(';', '') // remove semicolons from comparing html tag attributes
                .replaceAll('amp;', '&')
        ); 

    return normalizedHTML.trim();
}

export const htmlIsEqual = (newHtmlText?: string, originalHtmlText?: string): boolean => {
    if (originalHtmlText === newHtmlText)
        return true;

    if ((originalHtmlText && newHtmlText === undefined) || (originalHtmlText === undefined && newHtmlText))
        return false;

    if (originalHtmlText !== undefined && newHtmlText !== undefined) {
        return normalizeHtml(originalHtmlText) === normalizeHtml(newHtmlText);
    }
    
    return false;
}