Skip to content

Commit 4e8b1a1

Browse files
committed
fix(swar): allow caret characters when validating uri
1 parent 575e040 commit 4e8b1a1

File tree

1 file changed

+2
-37
lines changed

1 file changed

+2
-37
lines changed

src/simd/swar.rs

Lines changed: 2 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,7 @@ const fn uniform_block(b: u8) -> usize {
111111

112112
// A byte-wise range-check on an enire word/block,
113113
// ensuring all bytes in the word satisfy
114-
// `33 <= x <= 126 && x != '>' && x != '<'`
115-
// IMPORTANT: it false negatives if the block contains '?'
114+
// `33 <= x <= 126
116115
#[inline]
117116
fn match_uri_char_8_swar(block: ByteBlock) -> usize {
118117
// 33 <= x <= 126
@@ -126,36 +125,7 @@ fn match_uri_char_8_swar(block: ByteBlock) -> usize {
126125
let lt = x.wrapping_sub(BM) & !x; // <= m
127126
let gt = x.wrapping_add(BN) | x; // >= n
128127

129-
// XOR checks to catch '<' & '>' for correctness
130-
//
131-
// XOR can be thought of as a "distance function"
132-
// (somewhat extrapolating from the `xor(x, x) = 0` identity and ∀ x != y: xor(x, y) != 0`
133-
// (each u8 "xor key" providing a unique total ordering of u8)
134-
// '<' and '>' have a "xor distance" of 2 (`xor('<', '>') = 2`)
135-
// xor(x, '>') <= 2 => {'>', '?', '<'}
136-
// xor(x, '<') <= 2 => {'<', '=', '>'}
137-
//
138-
// We assume P('=') > P('?'),
139-
// given well/commonly-formatted URLs with querystrings contain
140-
// a single '?' but possibly many '='
141-
//
142-
// Thus it's preferable/near-optimal to "xor distance" on '>',
143-
// since we'll slowpath at most one block per URL
144-
//
145-
// Some rust code to sanity check this yourself:
146-
// ```rs
147-
// fn xordist(x: u8, n: u8) -> Vec<(char, u8)> {
148-
// (0..=255).into_iter().map(|c| (c as char, c ^ x)).filter(|(_c, y)| *y <= n).collect()
149-
// }
150-
// (xordist(b'<', 2), xordist(b'>', 2))
151-
// ```
152-
const B3: usize = uniform_block(3); // (dist <= 2) + 1 to wrap
153-
const BGT: usize = uniform_block(b'>');
154-
155-
let xgt = x ^ BGT;
156-
let ltgtq = xgt.wrapping_sub(B3) & !xgt;
157-
158-
offsetnz((ltgtq | lt | gt) & M128)
128+
offsetnz((lt | gt) & M128)
159129
}
160130

161131
// A byte-wise range-check on an entire word/block,
@@ -228,11 +198,6 @@ fn test_is_uri_block() {
228198
for b in 0..33_u8 {
229199
assert!(!is_uri_block([b; BLOCK_SIZE]), "b={}", b);
230200
}
231-
// 33..127 => true if b not in { '<', '?', '>' }
232-
let falsy = |b| b"<?>".contains(&b);
233-
for b in 33..127_u8 {
234-
assert_eq!(is_uri_block([b; BLOCK_SIZE]), !falsy(b), "b={}", b);
235-
}
236201
// 127..=255 => false
237202
for b in 127..=255_u8 {
238203
assert!(!is_uri_block([b; BLOCK_SIZE]), "b={}", b);

0 commit comments

Comments
 (0)