/// <reference types="matrixrequirements-type-declarations" />
import { ICleanup } from "../../ProjectSettings";
import { globalMatrix, matrixSession } from "../../globals";

export type { IHTMLClean, IHTMLReplacement, IHTMLWhiteList };
export { HTMLCleaner };

interface IHTMLClean {
    whiteLists: {
        styles: IHTMLWhiteList[];
        classes: IHTMLWhiteList[];
        properties: IHTMLWhiteList[];
        data: IHTMLWhiteList[];
    };
    blackLists: {
        removeTags: string[];
        keepOnlyInside: string[];
        removeEmpty: string[];
    };
    tagReplacements: IHTMLReplacement[];
    tagReplacementsDoc: IHTMLReplacement[];
}
interface IHTMLReplacement {
    what: string;
    with: string;
}
interface IHTMLWhiteList {
    tagName: string;
    allowed: string[];
}

class HTMLCleaner {
    private autoEscape: boolean;
    text: string;
    cleanConfig: IHTMLClean = {
        blackLists: {
            removeTags: [
                "HEAD",
                "FOOT",
                "STYLE",
                "SCRIPT",
                "IFRAME",
                "applet",
                "embed",
                "noframes",
                "noscript",
                "meta",
                "link",
                "template",
            ],
            keepOnlyInside: [
                "font",
                "a:not([href])",
                // u: word..
                "u",
            ],
            removeEmpty: ["p", "a"],
        },
        whiteLists: {
            styles: [
                {
                    tagName: "SPAN",
                    allowed: [
                        "font-weight:bold",
                        "font-style:italic",
                        "text-decoration:underline",
                        "text-align:right",
                        "text-align:left",
                        "text-align:center",
                        "margin-left:25px",
                        "margin-left:50px",
                        "margin-left:75px",
                        "margin-left:100px",
                        "margin-left:125px",
                        "line-height:1",
                        "line-height:1.2",
                        "line-height:1.4",
                        "line-height:1.6",
                        "line-height:1.8",
                        "line-height:2",
                        "line-height:3",
                    ],
                },
            ],
            classes: [
                {
                    tagName: "SPAN",
                    allowed: [
                        "captionDetails",
                        "captionFix",
                        "captionPre",
                        "captionFig",
                        "captionPost",
                        "captionText",
                        "referenceDetails",
                        "referenceFix",
                        "referencePre",
                        "referenceFig",
                        "referencePost",
                        "referenceText",
                    ],
                },
                { tagName: "P", allowed: ["h1", "h2", "h3", "h4", "caption"] },
            ],
            properties: [
                { tagName: "IMG", allowed: ["src"] },
                { tagName: "A", allowed: ["href", "target"] },
                { tagName: "SPAN", allowed: ["contenteditable"] },
                { tagName: "TD", allowed: ["colspan", "rowspan"] },
                { tagName: "TH", allowed: ["colspan", "rowspan"] },
            ],
            data: [{ tagName: "SPAN", allowed: ["mid"] }],
        },
        tagReplacements: [
            { what: "H1", with: "<p class='h1'>" },
            { what: "H2", with: "<p class='h2'>" },
            { what: "H3", with: "<p class='h3'>" },
            { what: "H4", with: "<p class='h4'>" },
            { what: "H5", with: "<p class='h5'>" },
            { what: "H6", with: "<p class='h6'>" },
            { what: "H7", with: "<p style='font-weight:bold'>" },
            { what: "H8", with: "<p style='font-weight:bold'>" },
            { what: "I", with: "<span style='font-style:italic'>" },
            { what: "EM", with: "<span style='font-style:italic'>" },
            { what: "B", with: "<span style='font-weight:bold'>" },
            { what: "STRONG", with: "<span style='font-weight:bold'>" },
        ],
        tagReplacementsDoc: [
            { what: "H6", with: "<p style='font-weight:bold'>" },
            { what: "H7", with: "<p style='font-weight:bold'>" },
            { what: "H8", with: "<p style='font-weight:bold'>" },
            { what: "H5", with: "<h6>" },
            { what: "H4", with: "<h5>" },
            { what: "H3", with: "<h4>" },
            { what: "H2", with: "<h3>" },
            { what: "H1", with: "<h2>" },
            { what: "I", with: "<span style='font-style:italic'>" },
            { what: "EM", with: "<span style='font-style:italic'>" },
            { what: "B", with: "<span style='font-weight:bold'>" },
            { what: "STRONG", with: "<span style='font-weight:bold'>" },
        ],
    };

    constructor(htmlCode: string, autoEscape?: boolean) {
        // @ts-ignore TODO: MATRIX-6934: nullStrictCheck should be fixed for next line
        this.autoEscape = autoEscape;
        this.text = htmlCode;
    }

    getClean(cleaningLevel?: HTMLCleaner.CleanLevel, keepMatrix?: boolean) {
        if (cleaningLevel == HTMLCleaner.CleanLevel.Server) {
            // use the server whitelists
            this.applyServerCleaning();
            return this.text;
        }
        // remove dangerous stuff (MATRIX-936 weird characters kill xml / doc generation) copy paste word
        if (keepMatrix) {
            // keep /n for table formatting...
            // eslint-disable-next-line no-control-regex
            this.text = this.text.replace(/[\u0001-\u0009]/g, " ");
            // eslint-disable-next-line no-control-regex
            this.text = this.text.replace(/[\u000B-\u001F]/g, " ");
        } else {
            // eslint-disable-next-line no-control-regex
            this.text = this.text.replace(/[\u0001-\u001F]/g, " ");
        }

        // remove some stuff which should definitely not be in like a <head>
        if (cleaningLevel == HTMLCleaner.CleanLevel.BasicSafety) {
            // meta and head need to stay to link word docs
            this.cleanConfig.blackLists.removeTags = this.cleanConfig.blackLists.removeTags.filter(function (tag) {
                tag.toLowerCase() != "head" && tag.toLowerCase() != "meta";
            });
        }
        if (cleaningLevel != HTMLCleaner.CleanLevel.PurifyOnly) {
            this.removeTags();
        }

        if (cleaningLevel == HTMLCleaner.CleanLevel.Basic || cleaningLevel == HTMLCleaner.CleanLevel.BasicSafety) {
            return this.text;
        }

        // always sanitize
        this.text = DOMPurify.sanitize(this.text);

        if (cleaningLevel == HTMLCleaner.CleanLevel.PurifyOnly) {
            return this.text;
        }

        /*********** most code maintenance ***************/
        this.replaceNoCount("	", "");
        this.replaceNoCount("  ", " ");
        this.replaceNoCount(" \n", "\n");
        this.replaceNoCount("	\n", "\n");
        this.replaceNoCount("\n\n", "\n");
        this.replaceNoCount("  ", " ");

        // remove word specifc noise
        let tagStripper = new RegExp("<(/)*(v:|\\?xml:|st1:|o:)(.*?)>", "gi");
        this.text = this.text.replace(tagStripper, "");

        this.replaceNoCount("<>", " ");

        if (cleaningLevel != HTMLCleaner.CleanLevel.Soft) {
            // handle word lists (needs to happen before unknown classes and comment are removed
            this.handleWordLists();

            // remove remaining comments
            this.text = this.text.replace(/<!--[^>]*-->/g, "");

            /*********** this might change the formatting ***************/

            // some things might not be in but we want to keep the inside <font>
            this.removeInsideTags();

            // clean inline stlye, classes, properties etc. according to some white lists

            if (!keepMatrix) {
                this.cleanInlineStyle();
                this.cleanClasses();
                this.cleanProperties();
            }
        }
        if (!keepMatrix) {
            this.setMaxImageSize();
            this.cleanData();
        }
        this.replaceNoCount("&nbsp;&nbsp;", " ");
        this.replaceNoCount("&nbsp; ", " "), this.replaceNoCount(" &nbsp;", " ");

        if (cleaningLevel != HTMLCleaner.CleanLevel.Soft && !keepMatrix) {
            // @ts-ignore TODO: MATRIX-6934: nullStrictCheck should be fixed for next line
            this.replaceTags(cleaningLevel);
        }

        this.replaceNoCount("> &nbsp;<", ">&nbsp;<");
        this.replaceNoCount(">&nbsp; <", ">&nbsp;<");
        // not good this will actually remove sometimes needed spaces ... this.replaceNoCount("> <", "><");

        this.replaceNoCount("> \n", ">\n");
        let e = 0;
        do {
            e = 0;
            e += this.replaceCount("  ", " ");
            e += this.replaceCount(" >", ">");
            e += this.replaceCount("	", "");
            e += this.replaceCount("  ", " ");
            e += this.replaceCount("&nbsp;\n", "\n");
            e += this.replaceCount(" \n", "\n");
            e += this.replaceCount("\n\n", "\n");
            e += this.replaceCount("</li></ul><ul><li>", "</li><li>"); // make real lists
        } while (e > 0);

        this.removeUseLessStuff();

        return this.text;
    }

    replaceWiki() {
        if (localStorage.getItem("wiki") != "on") {
            return this.text;
        }

        this.applyWiki();

        return this.text;
    }

    private getCleanupSettings(): ICleanup {
        let cleanup = globalMatrix.ItemConfig.getCleanupRules();
        if (!cleanup) {
            cleanup = matrixSession.getCustomerSettingJSON("htmlCleanup");
        }
        if (!cleanup) {
            // @ts-ignore TODO: MATRIX-6934: nullStrictCheck should be fixed for next line
            return null;
        } else if (!cleanup.cleanup) {
            // @ts-ignore TODO: MATRIX-6934: nullStrictCheck should be fixed for next line
            return null;
        }
        return cleanup;
    }
    // returns a list of problems
    public checkServerCleaning() {
        let cleaning = this.getCleanupSettings();
        if (!cleaning) {
            // the server does not care
            return [];
        }
        let results: string[] = [];

        let code = $("<div>").html(this.text);
        $("*", code).each(function (idx, node) {
            let name = node.nodeName.toLowerCase();
            if (cleaning.tags.indexOf(name) == -1) {
                results.push(`<b>${name}</b> is not a supported tag`);
            } else {
                let attrs = node.getAttributeNames();

                // check attributes

                for (let attIdx = 0; attIdx < attrs.length; attIdx++) {
                    let attr = attrs[attIdx].toLowerCase();
                    if (
                        (cleaning.attributes[":all"] && cleaning.attributes[":all"].indexOf(attr) != -1) ||
                        (cleaning.attributes[name] && cleaning.attributes[name].indexOf(attr) != -1) ||
                        (cleaning.enforcedAttributes[name] && cleaning.enforcedAttributes[name].indexOf(attr) != -1)
                    ) {
                        if (cleaning.protocolAttributes) {
                            for (let protocolAttribute of cleaning.protocolAttributes.filter(
                                (p) => p.element == name && p.attribute == attr,
                            )) {
                                let supportedProtocols = protocolAttribute.protocols;
                                let actualValue = node.getAttribute(attr);
                                let actualProtocol = actualValue ? actualValue.split(":")[0] : "unspecified";
                                if (supportedProtocols.indexOf(actualProtocol) == -1) {
                                    results.push(
                                        `<b>${attr}</b> of <b>${name}</b> uses an unsupported protocol: <b>${actualProtocol}</b> `,
                                    );
                                }
                            }
                        }
                    } else {
                        results.push(`<b>${attr}</b> is not a supported attribute of <b>${name}</b>`);
                    }
                }
            }
        });

        return results;
    }

    public applyServerCleaning() {
        let that = this;
        let cleaning = this.getCleanupSettings();
        if (!cleaning) {
            // the server does not care
            return this.text;
        }

        this.text = DOMPurify.sanitize(this.text, { ALLOWED_TAGS: cleaning.tags });

        let code = $("<div>").html(this.text);
        $.each(this.cleanConfig.blackLists.removeTags, function (csidx: number, tag: string) {
            $(tag, code).each(function (idx, cb) {
                $(cb).remove();
            });
        });
        this.text = code.html();
        $("*", code).each(function (idx, node) {
            let name = node.nodeName.toLowerCase();

            let attrs = node.getAttributeNames();

            // check attributes

            for (let attIdx = 0; attIdx < attrs.length; attIdx++) {
                let attr = attrs[attIdx].toLowerCase();
                if (
                    (cleaning.attributes[":all"] && cleaning.attributes[":all"].indexOf(attr) != -1) ||
                    (cleaning.attributes[name] && cleaning.attributes[name].indexOf(attr) != -1) ||
                    (cleaning.enforcedAttributes[name] && cleaning.enforcedAttributes[name].indexOf(attr) != -1)
                ) {
                    if (cleaning.protocolAttributes) {
                        for (let protocolAttribute of cleaning.protocolAttributes.filter(
                            (p) => p.element == name && p.attribute == attr,
                        )) {
                            let supportedProtocols = protocolAttribute.protocols;
                            let actualValue = node.getAttribute(attr);
                            let actualProtocol = actualValue ? actualValue.split(":")[0] : "unspecified";
                            if (supportedProtocols.indexOf(actualProtocol) == -1) {
                                node.removeAttribute(attr);
                            }
                        }
                    }
                } else {
                    node.removeAttribute(attr);
                }
            }
        });

        this.text = code.html();
    }

    private applyWiki() {
        let that = this;

        this.replaceCode();
        this.replaceList("*", "<ul>", "</ul>");
        this.replaceList("#", "<ol>", "</ol>");
        this.replaceList("?", "<ol type='A'>", "</ol>");
        this.replaceTable();
    }

    private replaceCode() {
        let that = this;

        let content = $("<div>").html(this.text);

        let listLevel = 0;
        let newHTML = "";

        let inCode = false;
        let firstCodeLine = false;

        $.each(content[0].childNodes, function (childIdx, child) {
            let html = child.outerHTML ? child.outerHTML : child.textContent;
            let text = child.textContent;

            if (!inCode && text.indexOf("{{{") == 0) {
                newHTML += "<pre>";
                inCode = true;
                firstCodeLine = true;
            } else if (inCode && text.indexOf("}}}") == 0) {
                newHTML += "</pre>";
                inCode = false;
            } else if (inCode) {
                if (!firstCodeLine) {
                    newHTML += "\n";
                } else {
                    firstCodeLine = false;
                }

                newHTML += text;
            } else {
                newHTML += html;
            }
        });
        // in case table did not end in text
        if (inCode) {
            newHTML += "</pre>";
        }

        this.text = newHTML;
    }
    private replaceTable() {
        let that = this;

        let content = $("<div>").html(this.text);

        let listLevel = 0;
        let newHTML = "";

        let inTable = false;

        $.each(content[0].childNodes, function (childIdx, child) {
            let html = child.outerHTML ? child.outerHTML : child.textContent;
            let text = child.textContent;

            let isTableLine = text.indexOf("|") == 0 && text.split("|").length > 2; // at least 2 | one of them in first pos
            let isTableLineHeader = text.indexOf("||") == 0 && text.split("||").length > 2; // at least 2 | one of them in first pos
            if (child.nodeName == "PRE") {
                isTableLine = false;
                isTableLineHeader = false;
            } else if (inTable && !isTableLine) {
                newHTML += "</table>";
                newHTML += html;
                inTable = false;
            } else if (!inTable && isTableLine) {
                newHTML += "<table class='table table-bordered'>";
                inTable = true;
            }

            if (isTableLineHeader) {
                newHTML += "<tr>";
                $.each(text.split("||"), function (idx, part) {
                    if (idx != 0 && (idx != text.split("||").length - 1 || part != "")) {
                        newHTML += "<td><b>" + part + "</b></td>";
                    }
                });
                newHTML += "</tr>";
            } else if (isTableLine) {
                newHTML += "<tr>";
                $.each(text.split("|"), function (idx, part) {
                    if (idx != 0 && (idx != text.split("|").length - 1 || part != "")) {
                        newHTML += "<td>" + part + "</td>";
                    }
                });
                newHTML += "</tr>";
            } else {
                newHTML += html;
            }
        });
        // in case table did not end in text
        if (inTable) {
            newHTML += "</table>";
            inTable = false;
        }

        this.text = newHTML;
    }

    private replaceList(bullet: string, ul: string, eul: string) {
        let that = this;

        let content = $("<div>").html(this.text);

        let listLevel = 0;
        let newHTML = "";

        $.each(content[0].childNodes, function (childIdx, child) {
            let html = child.outerHTML ? child.outerHTML : child.textContent;
            let text = child.textContent;
            let inner = child.innerHTML ? child.innerHTML : child.textContent;

            let lineListLevel = that.getListLevel(text, bullet);
            if (child.nodeName == "PRE") {
                newHTML += html;
                lineListLevel = 0;
            } else if (lineListLevel > 0) {
                // insert ul's
                for (let ill = 0; ill < lineListLevel - listLevel; ill++) {
                    newHTML += ul;
                }
                // or close
                for (let ill = 0; ill < listLevel - lineListLevel; ill++) {
                    newHTML += eul;
                }

                // ignore the <p> and remove first lineListLevel *s
                for (let llidx = 0; llidx < lineListLevel; llidx++) {
                    inner = inner.replace(bullet, "");
                }
                newHTML += "<li>" + inner + "</li>";
            } else {
                // insert end ul's
                for (let ill = 0; ill < listLevel - lineListLevel; ill++) {
                    newHTML += eul;
                }
                newHTML += html;
            }

            // store current list level
            listLevel = lineListLevel;
        });
        // in case list did not end in text
        for (let ill = 0; ill < listLevel; ill++) {
            newHTML += eul;
        }

        this.text = newHTML;
    }
    private getListLevel(text: string, bullet: string): number {
        if (!text) return 0;
        let idx = 0;
        while (text.length > idx && text[idx] == bullet) {
            idx++;
        }
        return idx;
    }

    public getText() {
        return this.text;
    }
    private setMaxImageSize() {
        let code = $("<div>").html(this.text);
        $("img", code).each(function (idx, img) {
            $(img).css("max-width", "604px"); // 16 cm
            $(img).css("max-height", "800px"); // 22 cm
        });

        this.text = code.html();
    }
    private replaceCount(what: string, wth: string): number {
        let replaced = this.text.indexOf(what) != -1;
        this.replaceNoCount(what, wth);

        return replaced ? 1 : 0;
    }

    private replaceNoCount(what: string, wth: string) {
        this.text = this.text.replace(new RegExp(what, "g"), wth);
    }
    public replaceTags(cleaningLevel: HTMLCleaner.CleanLevel): number {
        let code = $("<div>").html(this.text);

        let count = 0;
        $.each(
            cleaningLevel == HTMLCleaner.CleanLevel.StrictDoc
                ? this.cleanConfig.tagReplacementsDoc
                : this.cleanConfig.tagReplacements,
            function (tgidx, tr) {
                let replaced = false;
                do {
                    replaced = false;
                    $(tr.what, code).each(function (idx, x) {
                        let y = $(tr.with).html($(x).html());
                        $(x).replaceWith(y);
                        replaced = true;
                        count++;
                    });
                } while (replaced);
            },
        );
        this.text = code.html();
        return count;
    }

    public removeTags() {
        let code = this.autoEscape ? $("<div>").text(this.text) : $("<div>").html(this.text); // text escapes <
        $.each(this.cleanConfig.blackLists.removeTags, function (csidx: number, tag: string) {
            $(tag, code).each(function (idx, cb) {
                $(cb).remove();
            });
        });
        this.text = code.html();
    }

    public removeInsideTags() {
        let that = this;
        let code = $("<div>").html(this.text);

        $.each(this.cleanConfig.blackLists.keepOnlyInside, function (csidx: number, tag: string) {
            $(tag, code).contents().unwrap();
        });
        this.text = code.html();
    }

    // clean inline style: allow only stuff which can be created with the editor
    public cleanInlineStyle() {
        let that = this;
        let code = $("<div>").html(this.text);
        $("*", code).each(function (idx, node) {
            let newStyle: string[] = [];
            let style = $(node).attr("style");
            if (style) {
                let sns = style.replace(/ /g, ""); // note this could cause problems with font names with spaces, but these are removed anyway
                $.each(that.cleanConfig.whiteLists.styles, function (csidx: number, conf: IHTMLWhiteList) {
                    if (conf.tagName === "*" || conf.tagName === $(node).prop("tagName")) {
                        $.each(conf.allowed, function (id, ks) {
                            if (sns.indexOf(ks) !== -1) {
                                newStyle.push(ks);
                            }
                        });
                    }
                });
                let padded = ";" + sns + ";";
                // background and foreground colors
                if ($(node).prop("tagName") === "SPAN") {
                    let bc = padded.match(/;background-color[^;]+/);
                    if (bc && bc.length > 0) {
                        newStyle.push(bc[bc.length - 1].substr(1));
                    }
                    let fc = padded.match(/;color[^;]+/);
                    if (fc && fc.length > 0) {
                        newStyle.push(fc[fc.length - 1].substr(1));
                    }
                }
                // width and height of images
                if ($(node).prop("tagName") === "IMG") {
                    let iw = padded.match(/;width[^;]+/);
                    if (iw && iw.length > 0) {
                        newStyle.push(iw[iw.length - 1].substr(1));
                    }
                    let ih = padded.match(/;height[^;]+/);
                    if (ih && ih.length > 0) {
                        newStyle.push(ih[ih.length - 1].substr(1));
                    }
                }
                if (newStyle.length > 0) {
                    $(node).attr("style", newStyle.join(";"));
                } else {
                    $(node).removeAttr("style");
                }
            }
        });
        this.text = code.html();
    }

    // clean classes: only some classes are meaningful
    public cleanClasses() {
        let that = this;

        let code = $("<div>").html(this.text);
        $("*", code).each(function (idx, node) {
            let newClasses: string[] = [];
            let cs = $(node).attr("class");
            if (cs) {
                let classes = cs.split(" ");

                $.each(that.cleanConfig.whiteLists.classes, function (csidx: number, conf: IHTMLWhiteList) {
                    if (conf.tagName === "*" || conf.tagName === $(node).prop("tagName")) {
                        $.each(conf.allowed, function (id, ks) {
                            if (classes.indexOf(ks) !== -1) {
                                newClasses.push(ks);
                            }
                        });
                    }
                });
            }
            // always make nice bordered tables with full width
            if ($(node).prop("tagName") === "TABLE") {
                newClasses.push("table");
                newClasses.push("table-bordered");
            }

            $(node).removeAttr("class");
            if (newClasses.length > 0) {
                $(node).attr("class", newClasses.join(" "));
            }
        });
        this.text = code.html();
    }

    // clean properties: only some properites can exist
    private removeProps(node: JQuery, whiteList: string[]) {
        let attributes = node[0].attributes;
        let i = attributes.length;
        while (i--) {
            let attr = attributes[i];
            if (
                whiteList.indexOf(attr.name) == -1 &&
                attr.name.indexOf("data-") !== 0 &&
                attr.name != "class" &&
                attr.name != "style"
            ) {
                node.removeAttr(attr.name);
            }
        }
    }

    public cleanProperties() {
        let that = this;

        let code = $("<div>").html(this.text);
        $("*", code).each(function (idx, node) {
            let hasRule = false;
            $.each(that.cleanConfig.whiteLists.properties, function (csidx: number, conf: IHTMLWhiteList) {
                if (conf.tagName === $(node).prop("tagName")) {
                    hasRule = true;
                    that.removeProps($(node), conf.allowed);
                }
            });
            if (!hasRule) {
                that.removeProps($(node), []);
            }
        });
        this.text = code.html();
    }

    public cleanData() {
        let that = this;

        let code = $("<div>").html(this.text);
        $("*", code).each(function (idx, node) {
            // node jquery getAttr(does not work for stuff like data-vr-xxxx, it will return the attribute name vrXxxx instead of vr-xxxx
            let toRemove: string[] = [];
            $.each(node.attributes, function (key: number, attr: Attr) {
                if (attr.name.indexOf("data-") === 0) {
                    let attrName = attr.name.replace("data-", "");
                    let isWhiteListed = false;

                    $.each(that.cleanConfig.whiteLists.data, function (csidx: number, conf: IHTMLWhiteList) {
                        if (conf.tagName === $(node).prop("tagName") && conf.allowed.indexOf(attrName) !== -1) {
                            isWhiteListed = true;
                        }
                    });
                    if (!isWhiteListed) {
                        toRemove.push(attrName);
                    }
                }
            });
            $.each(toRemove, function (ridx, remove) {
                node.removeAttribute("data-" + remove);
            });
        });
        this.text = code.html();
    }

    public removeUseLessStuff() {
        let keepGoing = true;
        while (keepGoing) {
            let code = $("<div>").html(this.text);
            keepGoing = this.removeSpansWithoutStyle(code);
            keepGoing = keepGoing || this.removeEmpty(code);
            keepGoing = keepGoing || this.unwrapEmptyStuff(code, "div", "DIV");
            keepGoing = keepGoing || this.unwrapEmptyStuff(code, "div", "P");
            // removing the fiv with a span inside is bad, the div make a paragraph
            // keepGoing = keepGoing || this.unwrapEmptyStuff( code, "div","SPAN");
            keepGoing = keepGoing || this.unwrapEmptyStuff(code, "div", "UL");
            this.text = code.html();
            keepGoing = keepGoing || this.removeDivDiv();
        }
    }
    public handleWordLists() {
        let that = this;
        /*
         *
         * <p class="MsoListParagraphCxSpFirst" style="....">
         * <!--[if !supportLists]--><span style="..;">...</span><!--[endif]-->      QMS: this is the core of the quality management system, it will contain all your SOPs, quality work instructions, forms and templates, etc.<o:p></o:p></p>
         *
         * <ul><li>QMS...</li>
  <p class="MsoListParagraphCxSpMiddle" style="text-indent:-.25in;mso-list:l0 level1 lfo1"><!--[if !supportLists]--><span style="font-family:Symbol;mso-fareast-font-family:Symbol;mso-bidi-font-family: Symbol">·<span style="font-variant-numeric: normal; font-stretch: normal; font-size: 7pt; line-height: normal; font-family: &quot;Times New Roman&quot;;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </span></span><!--[endif]-->sadasdada<o:p></o:p></p>
  <p class="MsoListParagraphCxSpMiddle" style="text-indent:-.25in;mso-list:l0 level1 lfo1"><!--[if !supportLists]--><span style="font-family:Symbol;mso-fareast-font-family:Symbol;mso-bidi-font-family: Symbol">·<span style="font-variant-numeric: normal; font-stretch: normal; font-size: 7pt; line-height: normal; font-family: &quot;Times New Roman&quot;;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </span></span><!--[endif]-->QMS-FILE: this project will contain the plans, reports and records of your application of the quality system, e.g. audit plans and reports, management review minutes, etc.<o:p></o:p></p>
  <p class="MsoListParagraphCxSpLast" style="text-indent:-.25in;mso-list:l0 level1 lfo1"><!--[if !supportLists]--><span style="font-famil
         *
         *
         */

        // replace the funny points
        this.text = this.text.replace(/<!--\[if !supportLists]-->.*?<!--\[endif]-->/g, "");
        let code = $("<div>").html(this.text);
        $.each($("[class$='CxSpFirst']", code), function (idx, li) {
            let next = $(li).next();
            let lstart = $("<ul>").append($("<li>").append($(li).html()));
            $(li).replaceWith(lstart);
            let level = 0;
            let levelStack: JQuery[] = [lstart];
            while (next && next.length > 0) {
                let x = next.next();
                if (next.is("[class$='CxSpMiddle']") || next.is("[class$='CxSpLast']")) {
                    let nextLevel = level;
                    let nextStyle = next.attr("style");
                    if (!nextStyle || nextStyle.indexOf("level1") != -1) nextLevel = 0;
                    else if (nextStyle.indexOf("level2") != -1) nextLevel = 1;
                    else if (nextStyle.indexOf("level3") != -1) nextLevel = 2;
                    else if (nextStyle.indexOf("level4") != -1) nextLevel = 3;
                    else if (nextStyle.indexOf("level5") != -1) nextLevel = 4;
                    else if (nextStyle.indexOf("level6") != -1) nextLevel = 5;

                    if (nextLevel === level) {
                        lstart.append($("<li>").append($(next).html()));
                    } else if (nextLevel > level) {
                        // indent
                        let newStart = $("<ul>").append($("<li>").append($(next).html()));
                        levelStack.push(newStart);
                        lstart.append(newStart);
                        lstart = newStart;
                    } else {
                        // outdent
                        if (nextLevel < levelStack.length) {
                            lstart = levelStack[nextLevel]; // go back to good level and remove other lists
                            levelStack.splice(nextLevel + 1, 100);
                        } else {
                            // doesn't make sense really
                            lstart = $("<ul>").append($("<li>").append($(li).html()));
                            levelStack.push(lstart);
                        }
                        lstart.append($("<li>").append($(next).html()));
                    }

                    level = nextLevel;
                    $(next).remove();
                }
                next = x;
            }
        });
        this.text = code.html();
    }

    // <span>a<span> -> a
    // <span>a<span>b</span><span> -> b
    // <span>a<span style='c'>b</span><span> -> <span style='c'>b</span>

    private removeSpansWithoutStyle(node: JQuery) {
        let replaced = false;
        $.each($("span", node), function (idx, span) {
            let hasMeta = $(span).attr("style") || $(span).attr("class") || $(span).attr("data");
            if (!hasMeta) {
                replaced = true;
                $(span).replaceWith($(span).html());
            }
        });
        return replaced;
    }
    private removeEmpty(node: JQuery) {
        let replaced = false;
        $.each(this.cleanConfig.blackLists.removeEmpty, function (idx, re) {
            $.each($(re, node), function (idx, p) {
                if ($(p).html() === "") {
                    replaced = true;
                    $(p).remove();
                }
            });
        });
        return replaced;
    }
    private unwrapEmptyStuff(node: JQuery, outer: string, inner: string) {
        let unwraped = false;
        $(outer, node).each(function (idx, out) {
            let hasMeta = $(out).attr("style") || $(out).attr("class") || $(out).attr("data");
            if (
                !hasMeta &&
                out.children.length == 1 &&
                (<HTMLElement>out.children[0]).tagName === inner &&
                out.innerHTML == (<HTMLElement>out.children[0]).outerHTML
            ) {
                //console.log( out.tagName + ": " + out.innerHTML )
                unwraped = true;
                $(out).contents().unwrap();
            }
        });
        return unwraped;
    }
    private removeDivDiv() {
        let replaced = this.text;
        this.text = this.text.replace(/<div><\/div><div>/g, "<div>");

        //if(replaced != this.text) console.log( "removed <div></div><div></div>");

        return replaced != this.text;
    }
}

namespace HTMLCleaner {
    export enum CleanLevel {
        PurifyOnly,
        Basic,
        Soft,
        Strict,
        BasicSafety,
        StrictDoc,
        Server, // server like cleaning -> remove all but a whitelist
    }
}

/*

known tags
----------
HR, A, SPAN, BR, TABLE, IMG, P, DIV, TR,TD, UL, OL, LI, blockquote, PRE


data
----
SPAN  data-mid="1489062317559"

*/
