Skip to content

Commit 8d1461d

Browse files
committed
url: forbid certain confusable characters from being introduced by toASCII
The legacy url.parse() function attempts to convert Unicode domains (IDNs) into their ASCII/Punycode form through the use of the toASCII function. However, toASCII can introduce various characters that at best invalidate the parsed URL, and at worst cause hostname spoofing: url.parse('http://bad.c℀.good.com/').href === 'http://bad.ca/c.good.com/' (from [1]) While changes to the legacy URL parser are discouraged in general, the security implications here outweigh the desire for strict compatibility. This is since this commit only changes behavior when non-ASCII characters appear in the hostname, an unusual situation for most use cases. Additionally, despite the availability of the WHATWG URL API, url.parse remain widely deployed in the Node.js ecosystem, as exemplified by the recent un-deprecation of the legacy API. This change is similar in spirit to CPython 3.8's change [2] fixing bpo-36216 [3] aka CVE-2019-9636, which also occurred despite potential compatibility concerns. [1]: https://hackerone.com/reports/678487 [2]: python/cpython@16e6f7d [3]: https://bugs.python.org/issue36216
1 parent 7c8a608 commit 8d1461d

File tree

3 files changed

+62
-5
lines changed

3 files changed

+62
-5
lines changed

doc/api/errors.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1677,10 +1677,10 @@ An invalid URI was passed.
16771677
<a id="ERR_INVALID_URL"></a>
16781678
### `ERR_INVALID_URL`
16791679

1680-
An invalid URL was passed to the [WHATWG][WHATWG URL API]
1681-
[`URL` constructor][`new URL(input)`] to be parsed. The thrown error object
1682-
typically has an additional property `'input'` that contains the URL that failed
1683-
to parse.
1680+
An invalid URL was passed to the [WHATWG][WHATWG URL API] [`URL`
1681+
constructor][`new URL(input)`] or the legacy [`url.parse()`][] to be parsed.
1682+
The thrown error object typically has an additional property `'input'` that
1683+
contains the URL that failed to parse.
16841684

16851685
<a id="ERR_INVALID_URL_SCHEME"></a>
16861686
### `ERR_INVALID_URL_SCHEME`
@@ -2824,6 +2824,7 @@ The native call from `process.cpuUsage` could not be processed.
28242824
[`stream.write()`]: stream.md#stream_writable_write_chunk_encoding_callback
28252825
[`subprocess.kill()`]: child_process.md#child_process_subprocess_kill_signal
28262826
[`subprocess.send()`]: child_process.md#child_process_subprocess_send_message_sendhandle_options_callback
2827+
[`url.parse()`]: url.md#url_url_parse_urlstring_parsequerystring_slashesdenotehost
28272828
[`util.getSystemErrorName(error.errno)`]: util.md#util_util_getsystemerrorname_err
28282829
[`zlib`]: zlib.md
28292830
[crypto digest algorithm]: crypto.md#crypto_crypto_gethashes

lib/url.js

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ const { toASCII } = require('internal/idna');
3434
const { encodeStr, hexTable } = require('internal/querystring');
3535

3636
const {
37-
ERR_INVALID_ARG_TYPE
37+
ERR_INVALID_ARG_TYPE,
38+
ERR_INVALID_URL,
3839
} = require('internal/errors').codes;
3940
const { validateString } = require('internal/validators');
4041

@@ -167,6 +168,20 @@ function isIpv6Hostname(hostname) {
167168
);
168169
}
169170

171+
// This prevents some common spoofing bugs due to our use of IDNA toASCII. For
172+
// compatibility, the set of forbidden characters is the _intersection_ of
173+
// "forbidden host code point" in the WHATWG URL Standard and the characters in
174+
// the host parsing loop in Url.prototype.parse, with the following additions:
175+
//
176+
// - ':' since this could cause a "protocol spoofing" bug
177+
// - '@' since this could cause parts of the hostname to be confused with auth
178+
// - '[' and ']' since this could cause a non-IPv6 hostname to be interpreted
179+
// as IPv6 by isIpv6Hostname above
180+
//
181+
// All four of these characters are also included in "forbidden host code
182+
// point". See https://url.spec.whatwg.org/#forbidden-host-code-point
183+
const forbiddenHostChars = /[\t\n\r #%/:<>?@[\\\]^|]/;
184+
170185
Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
171186
validateString(url, 'url');
172187

@@ -398,6 +413,17 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
398413
// Use lenient mode (`true`) to try to support even non-compliant
399414
// URLs.
400415
this.hostname = toASCII(this.hostname, true);
416+
417+
if (forbiddenHostChars.test(this.hostname)) {
418+
// These forbidden characters could not have entered this.hostname
419+
// through the URL string directly since getHostname should have
420+
// filtered them out. Thus, they must have creeped in due to IDNA
421+
// toASCII conversion. This is probably a sign of potential hostname
422+
// spoofing. Rather than moving the non-host part to the pathname as
423+
// we've done in getHostname, throw an exception to convey its
424+
// severity.
425+
throw new ERR_INVALID_URL(url);
426+
}
401427
}
402428

403429
const p = this.port ? ':' + this.port : '';

test/parallel/test-url-parse-invalid-input.js

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,33 @@ assert.throws(() => { url.parse('http://%E0%A4%A@fail'); },
3636
// JS engine errors do not have the `code` property.
3737
return e.code === undefined;
3838
});
39+
40+
if (common.hasIntl) {
41+
// An array of Unicode code points whose Unicode NFKD contains a "bad
42+
// character".
43+
const badIDNA = (() => {
44+
const BAD_CHARS = '#%/:?@[\\]^|';
45+
const out = [];
46+
for (let i = 0x80; i < 0x110000; i++) {
47+
const cp = String.fromCodePoint(i);
48+
for (const badChar of BAD_CHARS) {
49+
if (cp.normalize('NFKD').includes(badChar)) {
50+
out.push(cp);
51+
}
52+
}
53+
}
54+
return out;
55+
})();
56+
57+
// The generation logic above should at a minimum produce these two
58+
// characters.
59+
assert(badIDNA.includes('℀'));
60+
assert(badIDNA.includes('@'));
61+
62+
for (const badCodePoint of badIDNA) {
63+
const badURL = `http://fail${badCodePoint}fail.com/`;
64+
assert.throws(() => { url.parse(badURL); },
65+
(e) => e.code === 'ERR_INVALID_URL',
66+
`parsing ${badURL}`);
67+
}
68+
}

0 commit comments

Comments
 (0)