@@ -111,8 +111,7 @@ const fn uniform_block(b: u8) -> usize {
111111
112112// A byte-wise range-check on an enire word/block,
113113// ensuring all bytes in the word satisfy
114- // `33 <= x <= 126 && x != '>' && x != '<'`
115- // IMPORTANT: it false negatives if the block contains '?'
114+ // `33 <= x <= 126
116115#[ inline]
117116fn match_uri_char_8_swar ( block : ByteBlock ) -> usize {
118117 // 33 <= x <= 126
@@ -126,36 +125,7 @@ fn match_uri_char_8_swar(block: ByteBlock) -> usize {
126125 let lt = x. wrapping_sub ( BM ) & !x; // <= m
127126 let gt = x. wrapping_add ( BN ) | x; // >= n
128127
129- // XOR checks to catch '<' & '>' for correctness
130- //
131- // XOR can be thought of as a "distance function"
132- // (somewhat extrapolating from the `xor(x, x) = 0` identity and ∀ x != y: xor(x, y) != 0`
133- // (each u8 "xor key" providing a unique total ordering of u8)
134- // '<' and '>' have a "xor distance" of 2 (`xor('<', '>') = 2`)
135- // xor(x, '>') <= 2 => {'>', '?', '<'}
136- // xor(x, '<') <= 2 => {'<', '=', '>'}
137- //
138- // We assume P('=') > P('?'),
139- // given well/commonly-formatted URLs with querystrings contain
140- // a single '?' but possibly many '='
141- //
142- // Thus it's preferable/near-optimal to "xor distance" on '>',
143- // since we'll slowpath at most one block per URL
144- //
145- // Some rust code to sanity check this yourself:
146- // ```rs
147- // fn xordist(x: u8, n: u8) -> Vec<(char, u8)> {
148- // (0..=255).into_iter().map(|c| (c as char, c ^ x)).filter(|(_c, y)| *y <= n).collect()
149- // }
150- // (xordist(b'<', 2), xordist(b'>', 2))
151- // ```
152- const B3 : usize = uniform_block ( 3 ) ; // (dist <= 2) + 1 to wrap
153- const BGT : usize = uniform_block ( b'>' ) ;
154-
155- let xgt = x ^ BGT ;
156- let ltgtq = xgt. wrapping_sub ( B3 ) & !xgt;
157-
158- offsetnz ( ( ltgtq | lt | gt) & M128 )
128+ offsetnz ( ( lt | gt) & M128 )
159129}
160130
161131// A byte-wise range-check on an entire word/block,
@@ -228,11 +198,6 @@ fn test_is_uri_block() {
228198 for b in 0 ..33_u8 {
229199 assert ! ( !is_uri_block( [ b; BLOCK_SIZE ] ) , "b={}" , b) ;
230200 }
231- // 33..127 => true if b not in { '<', '?', '>' }
232- let falsy = |b| b"<?>" . contains ( & b) ;
233- for b in 33 ..127_u8 {
234- assert_eq ! ( is_uri_block( [ b; BLOCK_SIZE ] ) , !falsy( b) , "b={}" , b) ;
235- }
236201 // 127..=255 => false
237202 for b in 127 ..=255_u8 {
238203 assert ! ( !is_uri_block( [ b; BLOCK_SIZE ] ) , "b={}" , b) ;
0 commit comments