@@ -145,6 +145,7 @@ static Immutable from(String scheme, String host, int port, String pathQuery)
145
145
/**
146
146
* Get a URI path parameter. Multiple and in segment parameters are ignored and only
147
147
* the last trailing parameter is returned.
148
+ *
148
149
* @return The last path parameter or null
149
150
*/
150
151
String getParam ();
@@ -526,6 +527,79 @@ private enum State
526
527
.with ("%u002e%u002e" , Boolean .TRUE )
527
528
.build ();
528
529
530
+ private static final boolean [] __unreservedPctEncodedSubDelims ;
531
+
532
+ private static boolean isHexDigit (char c )
533
+ {
534
+ return (((c >= 'a' ) && (c <= 'f' )) || // ALPHA (lower)
535
+ ((c >= 'A' ) && (c <= 'F' )) || // ALPHA (upper)
536
+ ((c >= '0' ) && (c <= '9' )));
537
+ }
538
+
539
+ private static boolean isUnreserved (char c )
540
+ {
541
+ return (((c >= 'a' ) && (c <= 'z' )) || // ALPHA (lower)
542
+ ((c >= 'A' ) && (c <= 'Z' )) || // ALPHA (upper)
543
+ ((c >= '0' ) && (c <= '9' )) || // DIGIT
544
+ (c == '-' ) || (c == '.' ) || (c == '_' ) || (c == '~' ));
545
+ }
546
+
547
+ private static boolean isSubDelim (char c )
548
+ {
549
+ return c == '!' || c == '$' || c == '&' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' || c == '=' ;
550
+ }
551
+
552
+ static boolean isUnreservedPctEncodedOrSubDelim (char c )
553
+ {
554
+ return c < __unreservedPctEncodedSubDelims .length && __unreservedPctEncodedSubDelims [c ];
555
+ }
556
+
557
+ static
558
+ {
559
+ // Establish allowed and disallowed characters per the path rules of
560
+ // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
561
+ // ABNF
562
+ // path = path-abempty ; begins with "/" or is empty
563
+ // / path-absolute ; begins with "/" but not "//"
564
+ // / path-noscheme ; begins with a non-colon segment
565
+ // / path-rootless ; begins with a segment
566
+ // / path-empty ; zero characters
567
+ // path-abempty = *( "/" segment )
568
+ // path-absolute = "/" [ segment-nz *( "/" segment ) ]
569
+ // path-noscheme = segment-nz-nc *( "/" segment )
570
+ // path-rootless = segment-nz *( "/" segment )
571
+ // path-empty = 0<pchar>
572
+ //
573
+ // segment = *pchar
574
+ // segment-nz = 1*pchar
575
+ // segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
576
+ // ; non-zero-length segment without any colon ":"
577
+ // pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
578
+ // pct-encoded = "%" HEXDIG HEXDIG
579
+ //
580
+ // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
581
+ // reserved = gen-delims / sub-delims
582
+ // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
583
+ // sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
584
+ // / "*" / "+" / "," / ";" / "="
585
+ //
586
+ // authority = [ userinfo "@" ] host [ ":" port ]
587
+ // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
588
+ // host = IP-literal / IPv4address / reg-name
589
+ // port = *DIGIT
590
+ //
591
+ // reg-name = *( unreserved / pct-encoded / sub-delims )
592
+ //
593
+ // we are limited to US-ASCII per https://datatracker.ietf.org/doc/html/rfc3986#section-2
594
+ __unreservedPctEncodedSubDelims = new boolean [128 ];
595
+
596
+ for (int i = 0 ; i < __unreservedPctEncodedSubDelims .length ; i ++)
597
+ {
598
+ char c = (char )i ;
599
+ __unreservedPctEncodedSubDelims [i ] = isUnreserved (c ) || c == '%' || isSubDelim (c );
600
+ }
601
+ }
602
+
529
603
private String _scheme ;
530
604
private String _user ;
531
605
private String _host ;
@@ -980,7 +1054,7 @@ private void parse(State state, final String uri)
980
1054
int mark = 0 ; // the start of the current section being parsed
981
1055
int pathMark = 0 ; // the start of the path section
982
1056
int segment = 0 ; // the start of the current segment within the path
983
- boolean encodedPath = false ; // set to true if the path contains % encoded characters
1057
+ boolean encoded = false ; // set to true if the path contains % encoded characters
984
1058
boolean encodedUtf16 = false ; // Is the current encoding for UTF16?
985
1059
int encodedCharacters = 0 ; // partial state of parsing a % encoded character<x>
986
1060
int encodedValue = 0 ; // the partial encoded value
@@ -1025,7 +1099,7 @@ private void parse(State state, final String uri)
1025
1099
state = State .ASTERISK ;
1026
1100
break ;
1027
1101
case '%' :
1028
- encodedPath = true ;
1102
+ encoded = true ;
1029
1103
encodedCharacters = 2 ;
1030
1104
encodedValue = 0 ;
1031
1105
mark = pathMark = segment = i ;
@@ -1078,7 +1152,7 @@ private void parse(State state, final String uri)
1078
1152
break ;
1079
1153
case '%' :
1080
1154
// must have been in an encoded path
1081
- encodedPath = true ;
1155
+ encoded = true ;
1082
1156
encodedCharacters = 2 ;
1083
1157
encodedValue = 0 ;
1084
1158
state = State .PATH ;
@@ -1128,7 +1202,10 @@ private void parse(State state, final String uri)
1128
1202
switch (c )
1129
1203
{
1130
1204
case '/' :
1205
+ if (encodedCharacters > 0 )
1206
+ throw new IllegalArgumentException ("Bad authority" );
1131
1207
_host = uri .substring (mark , i );
1208
+ encoded = false ;
1132
1209
pathMark = mark = i ;
1133
1210
segment = mark + 1 ;
1134
1211
state = State .PATH ;
@@ -1143,12 +1220,35 @@ private void parse(State state, final String uri)
1143
1220
if (_user != null )
1144
1221
throw new IllegalArgumentException ("Bad authority" );
1145
1222
_user = uri .substring (mark , i );
1223
+ _violations .add (Violation .USER_INFO );
1146
1224
mark = i + 1 ;
1147
1225
break ;
1148
1226
case '[' :
1227
+ if (i != mark )
1228
+ throw new IllegalArgumentException ("Bad authority" );
1149
1229
state = State .IPV6 ;
1150
1230
break ;
1231
+ case '%' :
1232
+ if (encodedCharacters > 0 )
1233
+ throw new IllegalArgumentException ("Bad authority" );
1234
+ encodedCharacters = 2 ;
1235
+ encoded = true ;
1236
+ break ;
1237
+ case '#' :
1238
+ case ';' :
1239
+ throw new IllegalArgumentException ("Bad authority" );
1240
+
1151
1241
default :
1242
+ if (encodedCharacters > 0 )
1243
+ {
1244
+ if (!isHexDigit (c ))
1245
+ throw new IllegalArgumentException ("Bad authority" );
1246
+ encodedCharacters --;
1247
+ }
1248
+ else if (!isUnreservedPctEncodedOrSubDelim (c ))
1249
+ {
1250
+ throw new IllegalArgumentException ("Bad authority" );
1251
+ }
1152
1252
break ;
1153
1253
}
1154
1254
break ;
@@ -1173,7 +1273,11 @@ private void parse(State state, final String uri)
1173
1273
state = State .PATH ;
1174
1274
}
1175
1275
break ;
1276
+ case ':' :
1277
+ break ;
1176
1278
default :
1279
+ if (!isHexDigit (c ))
1280
+ throw new IllegalArgumentException ("Bad authority" );
1177
1281
break ;
1178
1282
}
1179
1283
break ;
@@ -1186,6 +1290,7 @@ private void parse(State state, final String uri)
1186
1290
throw new IllegalArgumentException ("Bad authority" );
1187
1291
// It wasn't a port, but a password!
1188
1292
_user = _host + ":" + uri .substring (mark , i );
1293
+ _violations .add (Violation .USER_INFO );
1189
1294
mark = i + 1 ;
1190
1295
state = State .HOST ;
1191
1296
}
@@ -1261,7 +1366,7 @@ else if (c == '/')
1261
1366
dot |= segment == i ;
1262
1367
break ;
1263
1368
case '%' :
1264
- encodedPath = true ;
1369
+ encoded = true ;
1265
1370
encodedUtf16 = false ;
1266
1371
encodedCharacters = 2 ;
1267
1372
encodedValue = 0 ;
@@ -1289,7 +1394,7 @@ else if (c == '/')
1289
1394
state = State .FRAGMENT ;
1290
1395
break ;
1291
1396
case '/' :
1292
- encodedPath = true ;
1397
+ encoded = true ;
1293
1398
segment = i + 1 ;
1294
1399
state = State .PATH ;
1295
1400
break ;
@@ -1368,7 +1473,7 @@ else if (c == '/')
1368
1473
throw new IllegalStateException (state .toString ());
1369
1474
}
1370
1475
1371
- if (!encodedPath && !dot )
1476
+ if (!encoded && !dot )
1372
1477
{
1373
1478
if (_param == null )
1374
1479
_decodedPath = _path ;
0 commit comments