Skip to content

Commit 67f67c1

Browse files
authored
perf(es/parser): Optimize byte_search! (#11323)
**Description:** Align the implementation to oxc. In terms of swc, this pr reduces the calls of `bump_bytes`.
1 parent 391f1e3 commit 67f67c1

File tree

3 files changed

+70
-50
lines changed

3 files changed

+70
-50
lines changed

.changeset/perfect-kids-sleep.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
swc_core: patch
3+
swc_ecma_parser: patch
4+
---
5+
6+
perf(es/parser): Optimize `byte_search!`

crates/swc_ecma_parser/src/lexer/mod.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,7 @@ impl<'a> Lexer<'a> {
754754
let slice_start = self.cur_pos();
755755

756756
let had_line_break_before_last = self.had_line_break_before_last();
757+
let mut had_line_break = false;
757758

758759
byte_search! {
759760
lexer: self,
@@ -768,8 +769,8 @@ impl<'a> Lexer<'a> {
768769
let bytes = current_slice.as_bytes();
769770
let next2 = [bytes[byte_pos + 1], bytes[byte_pos + 2]];
770771
if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
771-
self.state_mut().mark_had_line_break();
772-
self.input_mut().bump_bytes(2);
772+
had_line_break = true;
773+
pos_offset += 2;
773774
}
774775
}
775776
true
@@ -818,13 +819,16 @@ impl<'a> Lexer<'a> {
818819
}
819820
}
820821

822+
if had_line_break {
823+
self.state_mut().mark_had_line_break();
824+
}
821825
return;
822826
}
823827

824828
true
825829
}
826830
_ => {
827-
self.state_mut().mark_had_line_break();
831+
had_line_break = true;
828832
true
829833
},
830834
}
@@ -836,6 +840,10 @@ impl<'a> Lexer<'a> {
836840
return;
837841
}
838842
};
843+
844+
if had_line_break {
845+
self.state_mut().mark_had_line_break();
846+
}
839847
}
840848

841849
/// Ensure that ident cannot directly follow numbers.

crates/swc_ecma_parser/src/lexer/search.rs

Lines changed: 53 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -81,60 +81,66 @@ macro_rules! byte_search {
8181
handle_eof: $eof_handler:expr $(,)?
8282
) => {{
8383
$table.use_table();
84-
loop {
85-
// Open a new scope so the immutable borrow (slice/bytes) ends before we
86-
// call `bump_bytes`, which requires `&mut`.
87-
let (found_idx, $byte) = {
88-
let slice = $lexer.input().as_str();
89-
if slice.is_empty() {
90-
$eof_handler
91-
}
92-
let bytes = slice.as_bytes();
93-
let mut idx = 0usize;
94-
let len = bytes.len();
95-
let mut found: Option<(usize, u8)> = None;
96-
while idx < len {
97-
let end = (idx + $crate::lexer::search::SEARCH_BATCH_SIZE).min(len);
98-
let mut i = idx;
99-
while i < end {
100-
let b = bytes[i];
101-
if $table.matches(b) {
102-
found = Some((i, b));
103-
break;
84+
let mut $pos = 0;
85+
let bytes = $lexer.input().as_str().as_bytes();
86+
let len = bytes.len();
87+
let bytes = bytes.as_ptr();
88+
89+
let $byte = 'outer: loop {
90+
let batch_end = $pos + $crate::lexer::search::SEARCH_BATCH_SIZE;
91+
let $byte = if batch_end < len {
92+
// Safety: `batch_end < len`
93+
let batch = unsafe {
94+
std::slice::from_raw_parts(
95+
bytes.add($pos),
96+
$crate::lexer::search::SEARCH_BATCH_SIZE,
97+
)
98+
};
99+
'inner: loop {
100+
for (i, &byte) in batch.iter().enumerate() {
101+
if $table.matches(byte) {
102+
// We find a matched byte, jump out to check with continue_if
103+
$pos += i;
104+
break 'inner byte;
104105
}
105-
i += 1;
106-
}
107-
if found.is_some() {
108-
break;
109106
}
110-
idx = end;
111-
}
112-
match found {
113-
Some((i, b)) => (Some(i), b),
114-
None => (None, 0),
115-
}
116-
}; // immutable borrow ends here
117107

118-
match found_idx {
119-
Some(i) => {
120-
// Check if we should continue searching
121-
let $pos = i; // Index within current slice
122-
if $should_continue {
123-
// Continue searching from next position
124-
$lexer.input_mut().bump_bytes(i + 1);
125-
continue;
126-
} else {
127-
$lexer.input_mut().bump_bytes(i);
128-
break $byte;
129-
}
108+
// We don't find a matched byte in this batch,
109+
// So continue to try the next batch/remaining
110+
$pos = batch_end;
111+
continue 'outer;
130112
}
131-
None => {
132-
// Consume remainder then run handler.
133-
let len = $lexer.input().as_str().len();
113+
} else {
114+
'inner: loop {
115+
// The remaining is shorter than batch size.
116+
let remaining =
117+
unsafe { std::slice::from_raw_parts(bytes.add($pos), len - $pos) };
118+
for (i, &byte) in remaining.iter().enumerate() {
119+
if $table.matches(byte) {
120+
// We find a matched byte, jump out to check with continue_if
121+
$pos += i;
122+
break 'inner byte;
123+
}
124+
}
125+
126+
// We don't find a matched byte in the remaining,
127+
// which also means we have reached the end of the input.
134128
$lexer.input_mut().bump_bytes(len);
135129
$eof_handler
136130
}
131+
};
132+
133+
// Check if we should continue searching
134+
if $should_continue {
135+
// Continue searching from next position
136+
$pos += 1;
137+
continue;
137138
}
138-
}
139+
140+
break $byte;
141+
};
142+
143+
$lexer.input_mut().bump_bytes($pos);
144+
$byte
139145
}};
140146
}

0 commit comments

Comments
 (0)