Skip to content

Commit 7d91ca0

Browse files
committed
Implement new approach for URI normalization (fixes #287)
1 parent 1162ff7 commit 7d91ca0

File tree

2 files changed

+53
-1
lines changed

2 files changed

+53
-1
lines changed

src/Util/UrlEncoder.php

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,53 @@ public static function unescapeAndEncode($uri)
4848
{
4949
$decoded = html_entity_decode($uri);
5050

51-
return strtr(rawurlencode(rawurldecode($decoded)), self::$dontEncode);
51+
return self::encode(self::decode($decoded));
52+
}
53+
54+
/**
55+
* Decode a percent-encoded URI
56+
*
57+
* @param string $uri
58+
*
59+
* @return string
60+
*/
61+
private static function decode($uri)
62+
{
63+
return preg_replace_callback('/%([0-9a-f]{2})/iu', function($matches) {
64+
// Convert percent-encoded codes to uppercase
65+
$upper = strtoupper($matches[0]);
66+
// Keep excluded characters as-is
67+
if (array_key_exists($upper, self::$dontEncode)) {
68+
return $upper;
69+
}
70+
71+
// Otherwise, return the character for this codepoint
72+
return chr(hexdec($matches[1]));
73+
}, $uri);
74+
}
75+
76+
/**
77+
* Encode a URI, preserving already-encoded and excluded characters
78+
*
79+
* @param string $uri
80+
*
81+
* @return string
82+
*/
83+
private static function encode($uri)
84+
{
85+
return preg_replace_callback('/(%[0-9a-f]{2})|./iu', function($matches){
86+
// Keep already-encoded characters as-is
87+
if (count($matches) > 1) {
88+
return $matches[0];
89+
}
90+
91+
// Keep excluded characters as-is
92+
if (in_array($matches[0], self::$dontEncode)) {
93+
return $matches[0];
94+
}
95+
96+
// Otherwise, encode the character
97+
return rawurlencode($matches[0]);
98+
}, $uri);
5299
}
53100
}

tests/unit/Util/UrlEncoderTest.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ public function unescapeAndEncodeTestProvider()
5151
['<', '%3C'],
5252
['>', '%3E'],
5353
['?', '?'],
54+
['https://en.wikipedia.org/wiki/Markdown#CommonMark', 'https://en.wikipedia.org/wiki/Markdown#CommonMark'],
55+
['https://img.shields.io/badge/help-%23hoaproject-ff0066.svg', 'https://img.shields.io/badge/help-%23hoaproject-ff0066.svg'],
56+
['http://example.com/a%62%63%2fd%3Fe', 'http://example.com/abc%2Fd%3Fe'],
57+
['http://ko.wikipedia.org/wiki/위키백과:대문', 'http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8'],
58+
['http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8', 'http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8'],
5459
];
5560
}
5661
}

0 commit comments

Comments
 (0)