1- // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
1+ // Copyright 2012-2017 The Rust Project Developers. See the COPYRIGHT
22// file at the top-level directory of this distribution and at
33// http://rust-lang.org/COPYRIGHT.
44//
88// option. This file may not be copied, modified, or distributed
99// except according to those terms.
1010
11- //! This is an Earley-like parser, without support for in-grammar nonterminals,
12- //! only by calling out to the main rust parser for named nonterminals (which it
13- //! commits to fully when it hits one in a grammar). This means that there are no
14- //! completer or predictor rules, and therefore no need to store one column per
15- //! token: instead, there's a set of current Earley items and a set of next
16- //! ones. Instead of NTs, we have a special case for Kleene star. The big-O, in
17- //! pathological cases, is worse than traditional Earley parsing, but it's an
18- //! easier fit for Macro-by-Example-style rules, and I think the overhead is
19- //! lower. (In order to prevent the pathological case, we'd need to lazily
20- //! construct the resulting `NamedMatch`es at the very end. It'd be a pain,
21- //! and require more memory to keep around old items, but it would also save
22- //! overhead)
11+ //! This is an NFA-based parser, which calls out to the main rust parser for named nonterminals
12+ //! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads
13+ //! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in
14+ //! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier
15+ //! fit for Macro-by-Example-style rules.
16+ //!
17+ //! (In order to prevent the pathological case, we'd need to lazily construct the resulting
18+ //! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old
19+ //! items, but it would also save overhead)
20+ //!
21+ //! We don't say this parser uses the Earley algorithm, because it's unnecessarily innacurate.
22+ //! The macro parser restricts itself to the features of finite state automata. Earley parsers
23+ //! can be described as an extension of NFAs with completion rules, prediction rules, and recursion.
2324//!
2425//! Quick intro to how the parser works:
2526//!
2627//! A 'position' is a dot in the middle of a matcher, usually represented as a
2728//! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`.
2829//!
2930//! The parser walks through the input a character at a time, maintaining a list
30- //! of items consistent with the current position in the input string: `cur_eis `.
31+ //! of threads consistent with the current position in the input string: `cur_items `.
3132//!
32- //! As it processes them, it fills up `eof_eis ` with items that would be valid if
33- //! the macro invocation is now over, `bb_eis ` with items that are waiting on
34- //! a Rust nonterminal like `$e:expr`, and `next_eis ` with items that are waiting
33+ //! As it processes them, it fills up `eof_items ` with threads that would be valid if
34+ //! the macro invocation is now over, `bb_items ` with threads that are waiting on
35+ //! a Rust nonterminal like `$e:expr`, and `next_items ` with threads that are waiting
3536//! on a particular token. Most of the logic concerns moving the · through the
36- //! repetitions indicated by Kleene stars. It only advances or calls out to the
37- //! real Rust parser when no `cur_eis` items remain
37+ //! repetitions indicated by Kleene stars. The rules for moving the · without
38+ //! consuming any input are called epsilon transitions. It only advances or calls
39+ //! out to the real Rust parser when no `cur_items` threads remain.
3840//!
3941//! Example:
4042//!
4143//! ```text, ignore
4244//! Start parsing a a a a b against [· a $( a )* a b].
4345//!
4446//! Remaining input: a a a a b
45- //! next_eis : [· a $( a )* a b]
47+ //! next : [· a $( a )* a b]
4648//!
4749//! - - - Advance over an a. - - -
4850//!
5456//! - - - Advance over an a. - - -
5557//!
5658//! Remaining input: a a b
57- //! cur: [a $( a · )* a b] next: [a $( a )* a · b]
58- //! Finish/Repeat (first item)
59+ //! cur: [a $( a · )* a b] [a $( a )* a · b]
60+ //! Follow epsilon transition: Finish/Repeat (first item)
5961//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
6062//!
6163//! - - - Advance over an a. - - - (this looks exactly like the last step)
6264//!
6365//! Remaining input: a b
64- //! cur: [a $( a · )* a b] next: [a $( a )* a · b]
65- //! Finish/Repeat (first item)
66+ //! cur: [a $( a · )* a b] [a $( a )* a · b]
67+ //! Follow epsilon transition: Finish/Repeat (first item)
6668//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
6769//!
6870//! - - - Advance over an a. - - - (this looks exactly like the last step)
6971//!
7072//! Remaining input: b
71- //! cur: [a $( a · )* a b] next: [a $( a )* a · b]
72- //! Finish/Repeat (first item)
73- //! next: [a $( a )* · a b] [a $( · a )* a b]
73+ //! cur: [a $( a · )* a b] [a $( a )* a · b]
74+ //! Follow epsilon transition: Finish/Repeat (first item)
75+ //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
7476//!
7577//! - - - Advance over a b. - - -
7678//!
@@ -289,94 +291,94 @@ fn create_matches(len: usize) -> Vec<Rc<Vec<NamedMatch>>> {
289291}
290292
291293fn inner_parse_loop ( sess : & ParseSess ,
292- cur_eis : & mut SmallVector < Box < MatcherPos > > ,
293- next_eis : & mut Vec < Box < MatcherPos > > ,
294- eof_eis : & mut SmallVector < Box < MatcherPos > > ,
295- bb_eis : & mut SmallVector < Box < MatcherPos > > ,
294+ cur_items : & mut SmallVector < Box < MatcherPos > > ,
295+ next_items : & mut Vec < Box < MatcherPos > > ,
296+ eof_items : & mut SmallVector < Box < MatcherPos > > ,
297+ bb_items : & mut SmallVector < Box < MatcherPos > > ,
296298 token : & Token ,
297299 span : syntax_pos:: Span )
298300 -> ParseResult < ( ) > {
299- while let Some ( mut ei ) = cur_eis . pop ( ) {
301+ while let Some ( mut item ) = cur_items . pop ( ) {
300302 // When unzipped trees end, remove them
301- while ei . idx >= ei . top_elts . len ( ) {
302- match ei . stack . pop ( ) {
303+ while item . idx >= item . top_elts . len ( ) {
304+ match item . stack . pop ( ) {
303305 Some ( MatcherTtFrame { elts, idx } ) => {
304- ei . top_elts = elts;
305- ei . idx = idx + 1 ;
306+ item . top_elts = elts;
307+ item . idx = idx + 1 ;
306308 }
307309 None => break
308310 }
309311 }
310312
311- let idx = ei . idx ;
312- let len = ei . top_elts . len ( ) ;
313+ let idx = item . idx ;
314+ let len = item . top_elts . len ( ) ;
313315
314316 // at end of sequence
315317 if idx >= len {
316318 // We are repeating iff there is a parent
317- if ei . up . is_some ( ) {
319+ if item . up . is_some ( ) {
318320 // Disregarding the separator, add the "up" case to the tokens that should be
319321 // examined.
320322 // (remove this condition to make trailing seps ok)
321323 if idx == len {
322- let mut new_pos = ei . up . clone ( ) . unwrap ( ) ;
324+ let mut new_pos = item . up . clone ( ) . unwrap ( ) ;
323325
324326 // update matches (the MBE "parse tree") by appending
325327 // each tree as a subtree.
326328
327329 // Only touch the binders we have actually bound
328- for idx in ei . match_lo ..ei . match_hi {
329- let sub = ei . matches [ idx] . clone ( ) ;
330- new_pos. push_match ( idx, MatchedSeq ( sub, Span { lo : ei . sp_lo , ..span } ) ) ;
330+ for idx in item . match_lo ..item . match_hi {
331+ let sub = item . matches [ idx] . clone ( ) ;
332+ new_pos. push_match ( idx, MatchedSeq ( sub, Span { lo : item . sp_lo , ..span } ) ) ;
331333 }
332334
333- new_pos. match_cur = ei . match_hi ;
335+ new_pos. match_cur = item . match_hi ;
334336 new_pos. idx += 1 ;
335- cur_eis . push ( new_pos) ;
337+ cur_items . push ( new_pos) ;
336338 }
337339
338340 // Check if we need a separator
339- if idx == len && ei . sep . is_some ( ) {
341+ if idx == len && item . sep . is_some ( ) {
340342 // We have a separator, and it is the current token.
341- if ei . sep . as_ref ( ) . map ( |sep| token_name_eq ( token, sep) ) . unwrap_or ( false ) {
342- ei . idx += 1 ;
343- next_eis . push ( ei ) ;
343+ if item . sep . as_ref ( ) . map ( |sep| token_name_eq ( token, sep) ) . unwrap_or ( false ) {
344+ item . idx += 1 ;
345+ next_items . push ( item ) ;
344346 }
345347 } else { // we don't need a separator
346- ei . match_cur = ei . match_lo ;
347- ei . idx = 0 ;
348- cur_eis . push ( ei ) ;
348+ item . match_cur = item . match_lo ;
349+ item . idx = 0 ;
350+ cur_items . push ( item ) ;
349351 }
350352 } else {
351353 // We aren't repeating, so we must be potentially at the end of the input.
352- eof_eis . push ( ei ) ;
354+ eof_items . push ( item ) ;
353355 }
354356 } else {
355- match ei . top_elts . get_tt ( idx) {
357+ match item . top_elts . get_tt ( idx) {
356358 /* need to descend into sequence */
357359 TokenTree :: Sequence ( sp, seq) => {
358360 if seq. op == quoted:: KleeneOp :: ZeroOrMore {
359361 // Examine the case where there are 0 matches of this sequence
360- let mut new_ei = ei . clone ( ) ;
361- new_ei . match_cur += seq. num_captures ;
362- new_ei . idx += 1 ;
363- for idx in ei . match_cur ..ei . match_cur + seq. num_captures {
364- new_ei . push_match ( idx, MatchedSeq ( Rc :: new ( vec ! [ ] ) , sp) ) ;
362+ let mut new_item = item . clone ( ) ;
363+ new_item . match_cur += seq. num_captures ;
364+ new_item . idx += 1 ;
365+ for idx in item . match_cur ..item . match_cur + seq. num_captures {
366+ new_item . push_match ( idx, MatchedSeq ( Rc :: new ( vec ! [ ] ) , sp) ) ;
365367 }
366- cur_eis . push ( new_ei ) ;
368+ cur_items . push ( new_item ) ;
367369 }
368370
369371 // Examine the case where there is at least one match of this sequence
370- let matches = create_matches ( ei . matches . len ( ) ) ;
371- cur_eis . push ( Box :: new ( MatcherPos {
372+ let matches = create_matches ( item . matches . len ( ) ) ;
373+ cur_items . push ( Box :: new ( MatcherPos {
372374 stack : vec ! [ ] ,
373375 sep : seq. separator . clone ( ) ,
374376 idx : 0 ,
375377 matches : matches,
376- match_lo : ei . match_cur ,
377- match_cur : ei . match_cur ,
378- match_hi : ei . match_cur + seq. num_captures ,
379- up : Some ( ei ) ,
378+ match_lo : item . match_cur ,
379+ match_cur : item . match_cur ,
380+ match_hi : item . match_cur + seq. num_captures ,
381+ up : Some ( item ) ,
380382 sp_lo : sp. lo ,
381383 top_elts : Tt ( TokenTree :: Sequence ( sp, seq) ) ,
382384 } ) ) ;
@@ -390,22 +392,22 @@ fn inner_parse_loop(sess: &ParseSess,
390392 // Built-in nonterminals never start with these tokens,
391393 // so we can eliminate them from consideration.
392394 if may_begin_with ( & * id. name . as_str ( ) , token) {
393- bb_eis . push ( ei ) ;
395+ bb_items . push ( item ) ;
394396 }
395397 }
396398 seq @ TokenTree :: Delimited ( ..) | seq @ TokenTree :: Token ( _, DocComment ( ..) ) => {
397- let lower_elts = mem:: replace ( & mut ei . top_elts , Tt ( seq) ) ;
398- let idx = ei . idx ;
399- ei . stack . push ( MatcherTtFrame {
399+ let lower_elts = mem:: replace ( & mut item . top_elts , Tt ( seq) ) ;
400+ let idx = item . idx ;
401+ item . stack . push ( MatcherTtFrame {
400402 elts : lower_elts,
401403 idx : idx,
402404 } ) ;
403- ei . idx = 0 ;
404- cur_eis . push ( ei ) ;
405+ item . idx = 0 ;
406+ cur_items . push ( item ) ;
405407 }
406408 TokenTree :: Token ( _, ref t) if token_name_eq ( t, token) => {
407- ei . idx += 1 ;
408- next_eis . push ( ei ) ;
409+ item . idx += 1 ;
410+ next_items . push ( item ) ;
409411 }
410412 TokenTree :: Token ( ..) | TokenTree :: MetaVar ( ..) => { }
411413 }
@@ -422,38 +424,38 @@ pub fn parse(sess: &ParseSess,
422424 recurse_into_modules : bool )
423425 -> NamedParseResult {
424426 let mut parser = Parser :: new ( sess, tts, directory, recurse_into_modules, true ) ;
425- let mut cur_eis = SmallVector :: one ( initial_matcher_pos ( ms. to_owned ( ) , parser. span . lo ) ) ;
426- let mut next_eis = Vec :: new ( ) ; // or proceed normally
427+ let mut cur_items = SmallVector :: one ( initial_matcher_pos ( ms. to_owned ( ) , parser. span . lo ) ) ;
428+ let mut next_items = Vec :: new ( ) ; // or proceed normally
427429
428430 loop {
429- let mut bb_eis = SmallVector :: new ( ) ; // black-box parsed by parser.rs
430- let mut eof_eis = SmallVector :: new ( ) ;
431- assert ! ( next_eis . is_empty( ) ) ;
431+ let mut bb_items = SmallVector :: new ( ) ; // black-box parsed by parser.rs
432+ let mut eof_items = SmallVector :: new ( ) ;
433+ assert ! ( next_items . is_empty( ) ) ;
432434
433- match inner_parse_loop ( sess, & mut cur_eis , & mut next_eis , & mut eof_eis , & mut bb_eis ,
435+ match inner_parse_loop ( sess, & mut cur_items , & mut next_items , & mut eof_items , & mut bb_items ,
434436 & parser. token , parser. span ) {
435437 Success ( _) => { } ,
436438 Failure ( sp, tok) => return Failure ( sp, tok) ,
437439 Error ( sp, msg) => return Error ( sp, msg) ,
438440 }
439441
440- // inner parse loop handled all cur_eis , so it's empty
441- assert ! ( cur_eis . is_empty( ) ) ;
442+ // inner parse loop handled all cur_items , so it's empty
443+ assert ! ( cur_items . is_empty( ) ) ;
442444
443445 /* error messages here could be improved with links to orig. rules */
444446 if token_name_eq ( & parser. token , & token:: Eof ) {
445- if eof_eis . len ( ) == 1 {
446- let matches = eof_eis [ 0 ] . matches . iter_mut ( ) . map ( |mut dv| {
447+ if eof_items . len ( ) == 1 {
448+ let matches = eof_items [ 0 ] . matches . iter_mut ( ) . map ( |mut dv| {
447449 Rc :: make_mut ( dv) . pop ( ) . unwrap ( )
448450 } ) ;
449451 return nameize ( sess, ms, matches) ;
450- } else if eof_eis . len ( ) > 1 {
452+ } else if eof_items . len ( ) > 1 {
451453 return Error ( parser. span , "ambiguity: multiple successful parses" . to_string ( ) ) ;
452454 } else {
453455 return Failure ( parser. span , token:: Eof ) ;
454456 }
455- } else if ( !bb_eis . is_empty ( ) && !next_eis . is_empty ( ) ) || bb_eis . len ( ) > 1 {
456- let nts = bb_eis . iter ( ) . map ( |ei | match ei . top_elts . get_tt ( ei . idx ) {
457+ } else if ( !bb_items . is_empty ( ) && !next_items . is_empty ( ) ) || bb_items . len ( ) > 1 {
458+ let nts = bb_items . iter ( ) . map ( |item | match item . top_elts . get_tt ( item . idx ) {
457459 TokenTree :: MetaVarDecl ( _, bind, name) => {
458460 format ! ( "{} ('{}')" , name, bind)
459461 }
@@ -462,33 +464,33 @@ pub fn parse(sess: &ParseSess,
462464
463465 return Error ( parser. span , format ! (
464466 "local ambiguity: multiple parsing options: {}" ,
465- match next_eis . len( ) {
467+ match next_items . len( ) {
466468 0 => format!( "built-in NTs {}." , nts) ,
467469 1 => format!( "built-in NTs {} or 1 other option." , nts) ,
468470 n => format!( "built-in NTs {} or {} other options." , nts, n) ,
469471 }
470472 ) ) ;
471- } else if bb_eis . is_empty ( ) && next_eis . is_empty ( ) {
473+ } else if bb_items . is_empty ( ) && next_items . is_empty ( ) {
472474 return Failure ( parser. span , parser. token ) ;
473- } else if !next_eis . is_empty ( ) {
475+ } else if !next_items . is_empty ( ) {
474476 /* Now process the next token */
475- cur_eis . extend ( next_eis . drain ( ..) ) ;
477+ cur_items . extend ( next_items . drain ( ..) ) ;
476478 parser. bump ( ) ;
477- } else /* bb_eis .len() == 1 */ {
478- let mut ei = bb_eis . pop ( ) . unwrap ( ) ;
479- if let TokenTree :: MetaVarDecl ( span, _, ident) = ei . top_elts . get_tt ( ei . idx ) {
480- let match_cur = ei . match_cur ;
481- ei . push_match ( match_cur,
479+ } else /* bb_items .len() == 1 */ {
480+ let mut item = bb_items . pop ( ) . unwrap ( ) ;
481+ if let TokenTree :: MetaVarDecl ( span, _, ident) = item . top_elts . get_tt ( item . idx ) {
482+ let match_cur = item . match_cur ;
483+ item . push_match ( match_cur,
482484 MatchedNonterminal ( Rc :: new ( parse_nt ( & mut parser, span, & ident. name . as_str ( ) ) ) ) ) ;
483- ei . idx += 1 ;
484- ei . match_cur += 1 ;
485+ item . idx += 1 ;
486+ item . match_cur += 1 ;
485487 } else {
486488 unreachable ! ( )
487489 }
488- cur_eis . push ( ei ) ;
490+ cur_items . push ( item ) ;
489491 }
490492
491- assert ! ( !cur_eis . is_empty( ) ) ;
493+ assert ! ( !cur_items . is_empty( ) ) ;
492494 }
493495}
494496
0 commit comments