diff --git a/ATTRIBUTIONS.md b/ATTRIBUTIONS.md deleted file mode 100644 index f512eec..0000000 --- a/ATTRIBUTIONS.md +++ /dev/null @@ -1,5 +0,0 @@ -This repo includes OSS code from: - -### StackOverflow - -Tag whitelisting is inspired by this answer: https://stackoverflow.com/a/28533511 diff --git a/HtmlSanitizer.js b/HtmlSanitizer.js index b696c23..5ce0c11 100644 --- a/HtmlSanitizer.js +++ b/HtmlSanitizer.js @@ -1,35 +1,31 @@ -//JavaScript HTML Sanitizer, (c) Alexander Yumashev, Jitbit Software. +/* + * JavaScript HTML Sanitizer, (c) Alexander Yumashev, Jitbit Software. + * homepage https://github.com/jitbit/HtmlSanitizer + * License: GNU GPL v3 https://github.com/jitbit/HtmlSanitizer/blob/master/LICENSE + */ -//homepage https://github.com/jitbit/HtmlSanitizer +var HtmlSanitizer = (function () { -//License: GNU GPL v3 https://github.com/jitbit/HtmlSanitizer/blob/master/LICENSE - -console.log('Sanitizer loading'); - -var HtmlSanitizer = new (function () { - - var tagWhitelist_ = { + var _tagWhitelist = { 'A': true, 'ABBR': true, 'B': true, 'BLOCKQUOTE': true, 'BODY': true, 'BR': true, 'CENTER': true, 'CODE': true, 'DIV': true, 'EM': true, 'FONT': true, 'H1': true, 'H2': true, 'H3': true, 'H4': true, 'H5': true, 'H6': true, 'HR': true, 'I': true, 'IMG': true, 'LABEL': true, 'LI': true, 'OL': true, 'P': true, 'PRE': true, 'SMALL': true, 'SOURCE': true, 'SPAN': true, 'STRONG': true, 'TABLE': true, 'TBODY': true, 'TR': true, 'TD': true, 'TH': true, 'THEAD': true, 'UL': true, 'U': true, 'VIDEO': true }; + var _contentTagWhiteList = { 'FORM': true }; //tags that will be converted to DIVs + var _attributeWhitelist = { 'align': true, 'color': true, 'controls': true, 'height': true, 'href': true, 'src': true, 'style': true, 'target': true, 'title': true, 'type': true, 'width': true }; + var _cssWhitelist = { 'color': true, 'background-color': true, 'font-size': true, 'text-align': true, 'text-decoration': true, 'font-weight': true }; + var _schemaWhiteList = [ 'http:', 'https:', 'data:', 'm-files:', 'file:', 'ftp:' ]; //which "protocols" are allowed in "href", "src" etc + var _uriAttributes = { 'href': true, 'action': true, 'src': true }; + var _uriContainsWhiteList = [ ]; - var contentTagWhiteList_ = { 'FORM': true }; //tags that will be converted to DIVs - - var attributeWhitelist_ = { 'align': true, 'color': true, 'controls': true, 'height': true, 'href': true, 'src': true, 'style': true, 'target': true, 'title': true, 'type': true, 'width': true }; - - var cssWhitelist_ = { 'color': true, 'background-color': true, 'font-size': true, 'text-align': true, 'text-decoration': true, 'font-weight': true }; + var SanitizeHtml = function(input) { - var schemaWhiteList_ = [ 'http:', 'https:', 'data:', 'm-files:', 'file:', 'ftp:' ]; //which "protocols" are allowed in "href", "src" etc - - var uriAttributes_ = { 'href': true, 'action': true }; - - this.SanitizeHtml = function(input) { input = input.trim(); - if (input == "") return ""; //to save performance and not create iframe - //firefox "bogus node" workaround - if (input == "
") return ""; + //to save performance and not create iframe and firefox "bogus node" workaround + if (input == "" || input == "
") { + return "" + }; var iframe = document.createElement('iframe'); if (iframe['sandbox'] === undefined) { @@ -40,13 +36,18 @@ var HtmlSanitizer = new (function () { iframe.style.display = 'none'; document.body.appendChild(iframe); // necessary so the iframe contains a document var iframedoc = iframe.contentDocument || iframe.contentWindow.document; - if (iframedoc.body == null) iframedoc.write(""); // null in IE + if (iframedoc.body == null){ + iframedoc.write(""); // null in IE + } iframedoc.body.innerHTML = input; function makeSanitizedCopy(node) { + + var newNode; + if (node.nodeType == Node.TEXT_NODE) { - var newNode = node.cloneNode(true); - } else if (node.nodeType == Node.ELEMENT_NODE && (tagWhitelist_[node.tagName] || contentTagWhiteList_[node.tagName])) { + newNode = node.cloneNode(true); + } else if (node.nodeType == Node.ELEMENT_NODE && (_tagWhitelist[node.tagName] || _contentTagWhiteList[node.tagName])) { //remove useless empty spans (lots of those when pasting from MS Outlook) if ((node.tagName == "SPAN" || node.tagName == "B" || node.tagName == "I" || node.tagName == "U") @@ -54,25 +55,28 @@ var HtmlSanitizer = new (function () { return document.createDocumentFragment(); } - if (contentTagWhiteList_[node.tagName]) + if (_contentTagWhiteList[node.tagName]) { newNode = iframedoc.createElement('DIV'); //convert to DIV - else + } else { newNode = iframedoc.createElement(node.tagName); + } for (var i = 0; i < node.attributes.length; i++) { var attr = node.attributes[i]; - if (attributeWhitelist_[attr.name]) { + if (_attributeWhitelist[attr.name]) { if (attr.name == "style") { for (s = 0; s < node.style.length; s++) { var styleName = node.style[s]; - if (cssWhitelist_[styleName]) + if (_cssWhitelist[styleName]){ newNode.style.setProperty(styleName, node.style.getPropertyValue(styleName)); + } } } else { - if (uriAttributes_[attr.name]) { //if this is a "uri" attribute, that can have "javascript:" or something - if (attr.value.indexOf(":") > -1 && !startsWithAny(attr.value, schemaWhiteList_)) + if (_uriAttributes[attr.name]) { //if this is a "uri" attribute, that can have "javascript:" or something + if (attr.value.indexOf(":") > -1 && !URIstartsWithAndContains(attr.value)){ continue; + } } newNode.setAttribute(attr.name, attr.value); } @@ -86,26 +90,43 @@ var HtmlSanitizer = new (function () { newNode = document.createDocumentFragment(); } return newNode; - }; + } var resultElement = makeSanitizedCopy(iframedoc.body); document.body.removeChild(iframe); return resultElement.innerHTML .replace(/]*>(\S)/g, "
\n$1") .replace(/div>
\n