Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## Issue #565

- Upgrades `htmlparser2` to new major version `^8.0.0`.

## 2.7.1 (2022-07-20)

- Protocol-relative URLs are properly supported for script tags. Thanks to [paweljq](https://github.com/paweljq).
Expand Down
71 changes: 35 additions & 36 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -210,52 +210,52 @@ function sanitizeHtml(html, options, _recursing) {
initializeState();

const parser = new htmlparser.Parser({
onopentag: function(name, attribs) {
onopentag: function(tagname, attributes) {
// If `enforceHtmlBoundary` is `true` and this has found the opening
// `html` tag, reset the state.
if (options.enforceHtmlBoundary && name === 'html') {
if (options.enforceHtmlBoundary && tagname === 'html') {
initializeState();
}

if (skipText) {
skipTextDepth++;
return;
}
const frame = new Frame(name, attribs);
const frame = new Frame(tagname, attributes);
stack.push(frame);

let skip = false;
const hasText = !!frame.text;
let transformedTag;
if (has(transformTagsMap, name)) {
transformedTag = transformTagsMap[name](name, attribs);
if (has(transformTagsMap, tagname)) {
transformedTag = transformTagsMap[tagname](tagname, attributes);

frame.attribs = attribs = transformedTag.attribs;
frame.attribs = attributes = transformedTag.attribs;

if (transformedTag.text !== undefined) {
frame.innerText = transformedTag.text;
}

if (name !== transformedTag.tagName) {
frame.name = name = transformedTag.tagName;
if (tagname !== transformedTag.tagName) {
frame.name = tagname = transformedTag.tagName;
transformMap[depth] = transformedTag.tagName;
}
}
if (transformTagsAll) {
transformedTag = transformTagsAll(name, attribs);
transformedTag = transformTagsAll(tagname, attributes);

frame.attribs = attribs = transformedTag.attribs;
if (name !== transformedTag.tagName) {
frame.name = name = transformedTag.tagName;
frame.attribs = attributes = transformedTag.attribs;
if (tagname !== transformedTag.tagName) {
frame.name = tagname = transformedTag.tagName;
transformMap[depth] = transformedTag.tagName;
}
}

if ((options.allowedTags && options.allowedTags.indexOf(name) === -1) || (options.disallowedTagsMode === 'recursiveEscape' && !isEmptyObject(skipMap)) || (options.nestingLimit != null && depth >= options.nestingLimit)) {
if ((options.allowedTags && options.allowedTags.indexOf(tagname) === -1) || (options.disallowedTagsMode === 'recursiveEscape' && !isEmptyObject(skipMap)) || (options.nestingLimit != null && depth >= options.nestingLimit)) {
skip = true;
skipMap[depth] = true;
if (options.disallowedTagsMode === 'discard') {
if (nonTextTagsArray.indexOf(name) !== -1) {
if (nonTextTagsArray.indexOf(tagname) !== -1) {
skipText = true;
skipTextDepth = 1;
}
Expand All @@ -271,16 +271,16 @@ function sanitizeHtml(html, options, _recursing) {
tempResult = result;
result = '';
}
result += '<' + name;
result += '<' + tagname;

if (name === 'script') {
if (tagname === 'script') {
if (options.allowedScriptHostnames || options.allowedScriptDomains) {
frame.innerText = '';
}
}

if (!allowedAttributesMap || has(allowedAttributesMap, name) || allowedAttributesMap['*']) {
each(attribs, function(value, a) {
if (!allowedAttributesMap || has(allowedAttributesMap, tagname) || allowedAttributesMap['*']) {
each(attributes, function(value, a) {
if (!VALID_HTML_ATTRIBUTE_NAME.test(a)) {
// This prevents part of an attribute name in the output from being
// interpreted as the end of an attribute, or end of a tag.
Expand All @@ -291,13 +291,13 @@ function sanitizeHtml(html, options, _recursing) {
// as necessary if there are specific values defined.
let passedAllowedAttributesMapCheck = false;
if (!allowedAttributesMap ||
(has(allowedAttributesMap, name) && allowedAttributesMap[name].indexOf(a) !== -1) ||
(has(allowedAttributesMap, tagname) && allowedAttributesMap[tagname].indexOf(a) !== -1) ||
(allowedAttributesMap['*'] && allowedAttributesMap['*'].indexOf(a) !== -1) ||
(has(allowedAttributesGlobMap, name) && allowedAttributesGlobMap[name].test(a)) ||
(has(allowedAttributesGlobMap, tagname) && allowedAttributesGlobMap[tagname].test(a)) ||
(allowedAttributesGlobMap['*'] && allowedAttributesGlobMap['*'].test(a))) {
passedAllowedAttributesMapCheck = true;
} else if (allowedAttributesMap && allowedAttributesMap[name]) {
for (const o of allowedAttributesMap[name]) {
} else if (allowedAttributesMap && allowedAttributesMap[tagname]) {
for (const o of allowedAttributesMap[tagname]) {
if (isPlainObject(o) && o.name && (o.name === a)) {
passedAllowedAttributesMapCheck = true;
let newValue = '';
Expand All @@ -323,13 +323,13 @@ function sanitizeHtml(html, options, _recursing) {
}
if (passedAllowedAttributesMapCheck) {
if (options.allowedSchemesAppliedToAttributes.indexOf(a) !== -1) {
if (naughtyHref(name, value)) {
if (naughtyHref(tagname, value)) {
delete frame.attribs[a];
return;
}
}

if (name === 'script' && a === 'src') {
if (tagname === 'script' && a === 'src') {

let allowed = true;

Expand All @@ -355,7 +355,7 @@ function sanitizeHtml(html, options, _recursing) {
}
}

if (name === 'iframe' && a === 'src') {
if (tagname === 'iframe' && a === 'src') {
let allowed = true;
try {
const parsed = parseUrl(value);
Expand Down Expand Up @@ -411,10 +411,10 @@ function sanitizeHtml(html, options, _recursing) {
}
}
if (a === 'class') {
const allowedSpecificClasses = allowedClassesMap[name];
const allowedSpecificClasses = allowedClassesMap[tagname];
const allowedWildcardClasses = allowedClassesMap['*'];
const allowedSpecificClassesGlob = allowedClassesGlobMap[name];
const allowedSpecificClassesRegex = allowedClassesRegexMap[name];
const allowedSpecificClassesGlob = allowedClassesGlobMap[tagname];
const allowedSpecificClassesRegex = allowedClassesRegexMap[tagname];
const allowedWildcardClassesGlob = allowedClassesGlobMap['*'];
const allowedClassesGlobs = [
allowedSpecificClassesGlob,
Expand All @@ -436,7 +436,7 @@ function sanitizeHtml(html, options, _recursing) {
}
if (a === 'style') {
try {
const abstractSyntaxTree = postcssParse(name + ' {' + value + '}');
const abstractSyntaxTree = postcssParse(tagname + ' {' + value + '}');
const filteredAST = filterCss(abstractSyntaxTree, options.allowedStyles);

value = stringifyStyleAttributes(filteredAST);
Expand All @@ -459,7 +459,7 @@ function sanitizeHtml(html, options, _recursing) {
}
});
}
if (options.selfClosing.indexOf(name) !== -1) {
if (options.selfClosing.indexOf(tagname) !== -1) {
result += ' />';
} else {
result += '>';
Expand Down Expand Up @@ -505,8 +505,7 @@ function sanitizeHtml(html, options, _recursing) {
frame.text += text;
}
},
onclosetag: function(name) {

onclosetag: function(tagname) {
if (skipText) {
skipTextDepth--;
if (!skipTextDepth) {
Expand All @@ -521,7 +520,7 @@ function sanitizeHtml(html, options, _recursing) {
// Do not crash on bad markup
return;
}
skipText = options.enforceHtmlBoundary ? name === 'html' : false;
skipText = options.enforceHtmlBoundary ? tagname === 'html' : false;
depth--;
const skip = skipMap[depth];
if (skip) {
Expand All @@ -535,7 +534,7 @@ function sanitizeHtml(html, options, _recursing) {
}

if (transformMap[depth]) {
name = transformMap[depth];
tagname = transformMap[depth];
delete transformMap[depth];
}

Expand All @@ -547,7 +546,7 @@ function sanitizeHtml(html, options, _recursing) {
frame.updateParentNodeMediaChildren();
frame.updateParentNodeText();

if (options.selfClosing.indexOf(name) !== -1) {
if (options.selfClosing.indexOf(tagname) !== -1) {
// Already output />
if (skip) {
result = tempResult;
Expand All @@ -556,7 +555,7 @@ function sanitizeHtml(html, options, _recursing) {
return;
}

result += '</' + name + '>';
result += '</' + tagname + '>';
if (skip) {
result = tempResult + escapeHtml(result);
tempResult = '';
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"dependencies": {
"deepmerge": "^4.2.2",
"escape-string-regexp": "^4.0.0",
"htmlparser2": "^6.0.0",
"htmlparser2": "^8.0.0",
"is-plain-object": "^5.0.0",
"parse-srcset": "^1.0.2",
"postcss": "^8.3.11"
Expand Down
7 changes: 0 additions & 7 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,6 @@ describe('sanitizeHtml', function() {
it('should dump character codes 1-32 before testing scheme', function() {
assert.equal(sanitizeHtml('<a href="java\0&#14;\t\r\n script:alert(\'foo\')">Hax</a>'), '<a>Hax</a>');
});
it('should dump character codes 1-32 even when escaped with padding rather than trailing ;', function() {
assert.equal(sanitizeHtml('<a href="java&#0000001script:alert(\'foo\')">Hax</a>'), '<a>Hax</a>');
// This one is weird, but the browser does not interpret it
// as a scheme, so we're OK. That character is 65535, not null. I
// think it's a limitation of the entities module
assert.equal(sanitizeHtml('<a href="java&#0000000script:alert(\'foo\')">Hax</a>'), '<a href="java�script:alert(\'foo\')">Hax</a>');
});
it('should still like nice schemes', function() {
assert.equal(sanitizeHtml('<a href="http://google.com/">Hi</a>'), '<a href="http://google.com/">Hi</a>');
});
Expand Down