55#include " node_i18n.h"
66#include " util-inl.h"
77
8+ #include < algorithm>
89#include < cmath>
910#include < cstdio>
1011#include < numeric>
@@ -58,7 +59,7 @@ class URLHost {
5859 public:
5960 ~URLHost ();
6061
61- void ParseIPv4Host (const char * input, size_t length, bool * is_ipv4 );
62+ void ParseIPv4Host (const char * input, size_t length);
6263 void ParseIPv6Host (const char * input, size_t length);
6364 void ParseOpaqueHost (const char * input, size_t length);
6465 void ParseHost (const char * input,
@@ -165,6 +166,9 @@ enum url_cb_args {
165166// https://infra.spec.whatwg.org/#ascii-tab-or-newline
166167CHAR_TEST (8 , IsASCIITabOrNewline, (ch == ' \t ' || ch == ' \n ' || ch == ' \r ' ))
167168
169+ // https://infra.spec.whatwg.org/#c0-control
170+ CHAR_TEST (8 , IsC0Control, (ch >= ' \0 ' && ch <= ' \x1f ' ))
171+
168172// https://infra.spec.whatwg.org/#c0-control-or-space
169173CHAR_TEST (8 , IsC0ControlOrSpace, (ch >= ' \0 ' && ch <= ' ' ))
170174
@@ -190,12 +194,18 @@ T ASCIILowercase(T ch) {
190194}
191195
192196// https://url.spec.whatwg.org/#forbidden-host-code-point
193- CHAR_TEST (8 , IsForbiddenHostCodePoint,
194- ch == ' \0 ' || ch == ' \t ' || ch == ' \n ' || ch == ' \r ' ||
195- ch == ' ' || ch == ' #' || ch == ' %' || ch == ' /' ||
196- ch == ' :' || ch == ' ?' || ch == ' @' || ch == ' [' ||
197- ch == ' <' || ch == ' >' || ch == ' \\ ' || ch == ' ]' ||
198- ch == ' ^' || ch == ' |' )
197+ CHAR_TEST (8 ,
198+ IsForbiddenHostCodePoint,
199+ ch == ' \0 ' || ch == ' \t ' || ch == ' \n ' || ch == ' \r ' || ch == ' ' ||
200+ ch == ' #' || ch == ' /' || ch == ' :' || ch == ' ?' || ch == ' @' ||
201+ ch == ' [' || ch == ' <' || ch == ' >' || ch == ' \\ ' || ch == ' ]' ||
202+ ch == ' ^' || ch == ' |' )
203+
204+ // https://url.spec.whatwg.org/#forbidden-domain-code-point
205+ CHAR_TEST (8 ,
206+ IsForbiddenDomainCodePoint,
207+ IsForbiddenHostCodePoint (ch) || IsC0Control(ch) || ch == '%' ||
208+ ch == '\x7f')
199209
200210// https://url.spec.whatwg.org/#windows-drive-letter
201211TWO_CHAR_STRING_TEST(8 , IsWindowsDriveLetter,
@@ -359,18 +369,21 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) {
359369 type_ = HostType::H_IPV6;
360370}
361371
362- int64_t ParseNumber (const char * start, const char * end) {
372+ // https://url.spec.whatwg.org/#ipv4-number-parser
373+ int64_t ParseIPv4Number (const char * start, const char * end) {
374+ if (end - start == 0 ) return -1 ;
375+
363376 unsigned R = 10 ;
364377 if (end - start >= 2 && start[0 ] == ' 0' && (start[1 ] | 0x20 ) == ' x' ) {
365378 start += 2 ;
366379 R = 16 ;
367- }
368- if (end - start == 0 ) {
369- return 0 ;
370- } else if (R == 10 && end - start > 1 && start[0 ] == ' 0' ) {
380+ } else if (end - start >= 2 && start[0 ] == ' 0' ) {
371381 start++;
372382 R = 8 ;
373383 }
384+
385+ if (end - start == 0 ) return 0 ;
386+
374387 const char * p = start;
375388
376389 while (p < end) {
@@ -394,9 +407,33 @@ int64_t ParseNumber(const char* start, const char* end) {
394407 return strtoll (start, nullptr , R);
395408}
396409
397- void URLHost::ParseIPv4Host (const char * input, size_t length, bool * is_ipv4) {
410+ // https://url.spec.whatwg.org/#ends-in-a-number-checker
411+ bool EndsInANumber (const std::string& input) {
412+ std::vector<std::string> parts = SplitString (input, ' .' , false );
413+
414+ if (parts.empty ()) return false ;
415+
416+ if (parts.back ().empty ()) {
417+ if (parts.size () == 1 ) return false ;
418+ parts.pop_back ();
419+ }
420+
421+ const std::string& last = parts.back ();
422+
423+ // If last is non-empty and contains only ASCII digits, then return true
424+ if (!last.empty () && std::all_of (last.begin (), last.end (), ::isdigit)) {
425+ return true ;
426+ }
427+
428+ const char * last_str = last.c_str ();
429+ int64_t num = ParseIPv4Number (last_str, last_str + last.size ());
430+ if (num >= 0 ) return true ;
431+
432+ return false ;
433+ }
434+
435+ void URLHost::ParseIPv4Host (const char * input, size_t length) {
398436 CHECK_EQ (type_, HostType::H_FAILED);
399- *is_ipv4 = false ;
400437 const char * pointer = input;
401438 const char * mark = input;
402439 const char * end = pointer + length;
@@ -414,7 +451,7 @@ void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
414451 if (++parts > static_cast <int >(arraysize (numbers))) return ;
415452 if (pointer == mark)
416453 return ;
417- int64_t n = ParseNumber (mark, pointer);
454+ int64_t n = ParseIPv4Number (mark, pointer);
418455 if (n < 0 )
419456 return ;
420457
@@ -429,7 +466,6 @@ void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
429466 pointer++;
430467 }
431468 CHECK_GT (parts, 0 );
432- *is_ipv4 = true ;
433469
434470 // If any but the last item in numbers is greater than 255, return failure.
435471 // If the last item in numbers is greater than or equal to
@@ -457,7 +493,7 @@ void URLHost::ParseOpaqueHost(const char* input, size_t length) {
457493 output.reserve (length);
458494 for (size_t i = 0 ; i < length; i++) {
459495 const char ch = input[i];
460- if (ch != ' % ' && IsForbiddenHostCodePoint (ch)) {
496+ if (IsForbiddenHostCodePoint (ch)) {
461497 return ;
462498 } else {
463499 AppendOrEscape (&output, ch, C0_CONTROL_ENCODE_SET);
@@ -496,16 +532,15 @@ void URLHost::ParseHost(const char* input,
496532 // If any of the following characters are still present, we have to fail
497533 for (size_t n = 0 ; n < decoded.size (); n++) {
498534 const char ch = decoded[n];
499- if (IsForbiddenHostCodePoint (ch)) {
535+ if (IsForbiddenDomainCodePoint (ch)) {
500536 return ;
501537 }
502538 }
503539
504- // Check to see if it's an IPv4 IP address
505- bool is_ipv4;
506- ParseIPv4Host (decoded.c_str (), decoded.length (), &is_ipv4);
507- if (is_ipv4)
508- return ;
540+ // If domain ends in a number, then return the result of IPv4 parsing domain
541+ if (EndsInANumber (decoded)) {
542+ return ParseIPv4Host (decoded.c_str (), decoded.length ());
543+ }
509544
510545 // If the unicode flag is set, run the result through punycode ToUnicode
511546 if (unicode && !ToUnicode (decoded, &decoded))
0 commit comments