@@ -77,54 +77,72 @@ bool InitializeICUDirectory(const std::string& path) {
7777 }
7878}
7979
80- static int32_t ToUnicode (MaybeStackBuffer<char >* buf,
81- const char * input,
82- size_t length) {
80+ int32_t ToUnicode (MaybeStackBuffer<char >* buf,
81+ const char * input,
82+ size_t length) {
8383 UErrorCode status = U_ZERO_ERROR;
84- uint32_t options = UIDNA_DEFAULT;
85- options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
84+ uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
8685 UIDNA* uidna = uidna_openUTS46 (options, &status);
8786 if (U_FAILURE (status))
8887 return -1 ;
8988 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
9089
9190 int32_t len = uidna_nameToUnicodeUTF8 (uidna,
9291 input, length,
93- **buf, buf->length (),
92+ **buf, buf->capacity (),
9493 &info,
9594 &status);
9695
96+ // Do not check info.errors like we do with ToASCII since ToUnicode always
97+ // returns a string, despite any possible errors that may have occurred.
98+
9799 if (status == U_BUFFER_OVERFLOW_ERROR) {
98100 status = U_ZERO_ERROR;
99101 buf->AllocateSufficientStorage (len);
100102 len = uidna_nameToUnicodeUTF8 (uidna,
101103 input, length,
102- **buf, buf->length (),
104+ **buf, buf->capacity (),
103105 &info,
104106 &status);
105107 }
106108
107- if (U_FAILURE (status))
109+ // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
110+ // string, regardless of whether an error occurred.
111+
112+ if (U_FAILURE (status)) {
108113 len = -1 ;
114+ buf->SetLength (0 );
115+ } else {
116+ buf->SetLength (len);
117+ }
109118
110119 uidna_close (uidna);
111120 return len;
112121}
113122
114- static int32_t ToASCII (MaybeStackBuffer<char >* buf,
115- const char * input,
116- size_t length) {
123+ int32_t ToASCII (MaybeStackBuffer<char >* buf,
124+ const char * input,
125+ size_t length,
126+ enum idna_mode mode) {
117127 UErrorCode status = U_ZERO_ERROR;
118- uint32_t options = UIDNA_DEFAULT;
119- options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
128+ uint32_t options = // CheckHyphens = false; handled later
129+ UIDNA_CHECK_BIDI | // CheckBidi = true
130+ UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true
131+ UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing
132+ if (mode == IDNA_STRICT) {
133+ options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict
134+ // VerifyDnsLength = beStrict;
135+ // handled later
136+ }
137+
120138 UIDNA* uidna = uidna_openUTS46 (options, &status);
121139 if (U_FAILURE (status))
122140 return -1 ;
123141 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
124142
125143 int32_t len = uidna_nameToASCII_UTF8 (uidna,
126144 input, length,
127- **buf, buf->length (),
145+ **buf, buf->capacity (),
128146 &info,
129147 &status);
130148
@@ -133,13 +151,45 @@ static int32_t ToASCII(MaybeStackBuffer<char>* buf,
133151 buf->AllocateSufficientStorage (len);
134152 len = uidna_nameToASCII_UTF8 (uidna,
135153 input, length,
136- **buf, buf->length (),
154+ **buf, buf->capacity (),
137155 &info,
138156 &status);
139157 }
140158
141- if (U_FAILURE (status))
159+ // In UTS #46 which specifies ToASCII, certain error conditions are
160+ // configurable through options, and the WHATWG URL Standard promptly elects
161+ // to disable some of them to accommodate for real-world use cases.
162+ // Unfortunately, ICU4C's IDNA module does not support disabling some of
163+ // these options through `options` above, and thus continues throwing
164+ // unnecessary errors. To counter this situation, we just filter out the
165+ // errors that may have happened afterwards, before deciding whether to
166+ // return an error from this function.
167+
168+ // CheckHyphens = false
169+ // (Specified in the current UTS #46 draft rev. 18.)
170+ // Refs:
171+ // - https://github.com/whatwg/url/issues/53
172+ // - https://github.com/whatwg/url/pull/309
173+ // - http://www.unicode.org/review/pri317/
174+ // - http://www.unicode.org/reports/tr46/tr46-18.html
175+ // - https://www.icann.org/news/announcement-2000-01-07-en
176+ info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
177+ info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
178+ info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
179+
180+ if (mode != IDNA_STRICT) {
181+ // VerifyDnsLength = beStrict
182+ info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
183+ info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
184+ info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
185+ }
186+
187+ if (U_FAILURE (status) || (mode != IDNA_LENIENT && info.errors != 0 )) {
142188 len = -1 ;
189+ buf->SetLength (0 );
190+ } else {
191+ buf->SetLength (len);
192+ }
143193
144194 uidna_close (uidna);
145195 return len;
@@ -169,8 +219,12 @@ static void ToASCII(const FunctionCallbackInfo<Value>& args) {
169219 CHECK_GE (args.Length (), 1 );
170220 CHECK (args[0 ]->IsString ());
171221 Utf8Value val (env->isolate (), args[0 ]);
222+ // optional arg
223+ bool lenient = args[1 ]->BooleanValue (env->context ()).FromJust ();
224+ enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT;
225+
172226 MaybeStackBuffer<char > buf;
173- int32_t len = ToASCII (&buf, *val, val.length ());
227+ int32_t len = ToASCII (&buf, *val, val.length (), mode );
174228
175229 if (len < 0 ) {
176230 return env->ThrowError (" Cannot convert name to ASCII" );
0 commit comments