@@ -81,60 +81,66 @@ macro_rules! byte_search {
8181 handle_eof: $eof_handler: expr $( , ) ?
8282 ) => { {
8383 $table. use_table( ) ;
84- loop {
85- // Open a new scope so the immutable borrow (slice/bytes) ends before we
86- // call `bump_bytes`, which requires `&mut`.
87- let ( found_idx, $byte) = {
88- let slice = $lexer. input( ) . as_str( ) ;
89- if slice. is_empty( ) {
90- $eof_handler
91- }
92- let bytes = slice. as_bytes( ) ;
93- let mut idx = 0usize ;
94- let len = bytes. len( ) ;
95- let mut found: Option <( usize , u8 ) > = None ;
96- while idx < len {
97- let end = ( idx + $crate:: lexer:: search:: SEARCH_BATCH_SIZE ) . min( len) ;
98- let mut i = idx;
99- while i < end {
100- let b = bytes[ i] ;
101- if $table. matches( b) {
102- found = Some ( ( i, b) ) ;
103- break ;
84+ let mut $pos = 0 ;
85+ let bytes = $lexer. input( ) . as_str( ) . as_bytes( ) ;
86+ let len = bytes. len( ) ;
87+ let bytes = bytes. as_ptr( ) ;
88+
89+ let $byte = ' outer: loop {
90+ let batch_end = $pos + $crate:: lexer:: search:: SEARCH_BATCH_SIZE ;
91+ let $byte = if batch_end < len {
92+ // Safety: `batch_end < len`
93+ let batch = unsafe {
94+ std:: slice:: from_raw_parts(
95+ bytes. add( $pos) ,
96+ $crate:: lexer:: search:: SEARCH_BATCH_SIZE ,
97+ )
98+ } ;
99+ ' inner: loop {
100+ for ( i, & byte) in batch. iter( ) . enumerate( ) {
101+ if $table. matches( byte) {
102+ // We find a matched byte, jump out to check with continue_if
103+ $pos += i;
104+ break ' inner byte;
104105 }
105- i += 1 ;
106- }
107- if found. is_some( ) {
108- break ;
109106 }
110- idx = end;
111- }
112- match found {
113- Some ( ( i, b) ) => ( Some ( i) , b) ,
114- None => ( None , 0 ) ,
115- }
116- } ; // immutable borrow ends here
117107
118- match found_idx {
119- Some ( i) => {
120- // Check if we should continue searching
121- let $pos = i; // Index within current slice
122- if $should_continue {
123- // Continue searching from next position
124- $lexer. input_mut( ) . bump_bytes( i + 1 ) ;
125- continue ;
126- } else {
127- $lexer. input_mut( ) . bump_bytes( i) ;
128- break $byte;
129- }
108+ // We don't find a matched byte in this batch,
109+ // So continue to try the next batch/remaining
110+ $pos = batch_end;
111+ continue ' outer;
130112 }
131- None => {
132- // Consume remainder then run handler.
133- let len = $lexer. input( ) . as_str( ) . len( ) ;
113+ } else {
114+ ' inner: loop {
115+ // The remaining is shorter than batch size.
116+ let remaining =
117+ unsafe { std:: slice:: from_raw_parts( bytes. add( $pos) , len - $pos) } ;
118+ for ( i, & byte) in remaining. iter( ) . enumerate( ) {
119+ if $table. matches( byte) {
120+ // We find a matched byte, jump out to check with continue_if
121+ $pos += i;
122+ break ' inner byte;
123+ }
124+ }
125+
126+ // We don't find a matched byte in the remaining,
127+ // which also means we have reached the end of the input.
134128 $lexer. input_mut( ) . bump_bytes( len) ;
135129 $eof_handler
136130 }
131+ } ;
132+
133+ // Check if we should continue searching
134+ if $should_continue {
135+ // Continue searching from next position
136+ $pos += 1 ;
137+ continue ;
137138 }
138- }
139+
140+ break $byte;
141+ } ;
142+
143+ $lexer. input_mut( ) . bump_bytes( $pos) ;
144+ $byte
139145 } } ;
140146}
0 commit comments