@@ -21,40 +21,119 @@ type parser struct {
2121 splitKind TokenKind
2222}
2323
24- // Parse converts a Pipeline Query Language tabular expression
24+ // Parse converts a Pipeline Query Language query
2525// into an Abstract Syntax Tree (AST).
26- func Parse (query string ) (* TabularExpr , error ) {
26+ func Parse (query string ) ([] Statement , error ) {
2727 p := & parser {
2828 source : query ,
2929 tokens : Scan (query ),
3030 }
31- expr , err := p .tabularExpr ()
32- if p .pos < len (p .tokens ) {
33- trailingToken := p .tokens [p .pos ]
34- if trailingToken .Kind == TokenError {
35- err = joinErrors (err , & parseError {
36- source : p .source ,
37- span : trailingToken .Span ,
38- err : errors .New (trailingToken .Value ),
39- })
31+ var result []Statement
32+ var resultError error
33+ for {
34+ stmtParser := p .splitSemi ()
35+
36+ stmt , err := firstParse (
37+ func () (Statement , error ) {
38+ stmt , err := stmtParser .letStatement ()
39+ if stmt == nil {
40+ // Prevent returning a non-nil interface.
41+ return nil , err
42+ }
43+ return stmt , err
44+ },
45+ func () (Statement , error ) {
46+ expr , err := stmtParser .tabularExpr ()
47+ if expr == nil {
48+ // Prevent returning a non-nil interface.
49+ return nil , err
50+ }
51+ return expr , err
52+ },
53+ )
54+
55+ if isNotFound (err ) {
56+ // We're okay with empty statements, we just ignore them.
57+ if stmtParser .pos < len (stmtParser .tokens ) {
58+ trailingToken := stmtParser .tokens [stmtParser .pos ]
59+ if trailingToken .Kind == TokenError {
60+ resultError = joinErrors (err , & parseError {
61+ source : p .source ,
62+ span : trailingToken .Span ,
63+ err : errors .New (trailingToken .Value ),
64+ })
65+ } else {
66+ resultError = joinErrors (err , & parseError {
67+ source : p .source ,
68+ span : trailingToken .Span ,
69+ err : errors .New ("unrecognized token" ),
70+ })
71+ }
72+ }
4073 } else {
41- err = joinErrors (err , & parseError {
42- source : p .source ,
43- span : trailingToken .Span ,
44- err : errors .New ("unrecognized token" ),
45- })
74+ if stmt != nil {
75+ result = append (result , stmt )
76+ }
77+ resultError = joinErrors (resultError , makeErrorOpaque (err ))
78+ resultError = joinErrors (resultError , stmtParser .endSplit ())
79+ }
80+
81+ // Next token, if present, guaranteed to be a semicolon.
82+ if _ , ok := p .next (); ! ok {
83+ break
4684 }
47- } else if isNotFound (err ) {
48- err = & parseError {
85+ }
86+
87+ if resultError != nil {
88+ return result , fmt .Errorf ("parse pipeline query language: %w" , resultError )
89+ }
90+ return result , nil
91+ }
92+
93+ func firstParse [T any ](productions ... func () (T , error )) (T , error ) {
94+ for _ , p := range productions [:len (productions )- 1 ] {
95+ x , err := p ()
96+ if ! isNotFound (err ) {
97+ return x , err
98+ }
99+ }
100+ return productions [len (productions )- 1 ]()
101+ }
102+
103+ func (p * parser ) letStatement () (* LetStatement , error ) {
104+ keyword , _ := p .next ()
105+ if keyword .Kind != TokenIdentifier || keyword .Value != "let" {
106+ p .prev ()
107+ return nil , & parseError {
108+ source : p .source ,
109+ span : keyword .Span ,
110+ err : notFoundError {fmt .Errorf ("expected 'let', got %s" , formatToken (p .source , keyword ))},
111+ }
112+ }
113+
114+ stmt := & LetStatement {
115+ Keyword : keyword .Span ,
116+ Assign : nullSpan (),
117+ }
118+ var err error
119+ stmt .Name , err = p .ident ()
120+ if err != nil {
121+ return stmt , makeErrorOpaque (err )
122+ }
123+ assign , _ := p .next ()
124+ if assign .Kind != TokenAssign {
125+ return stmt , & parseError {
49126 source : p .source ,
50- span : indexSpan ( len ( query )) ,
51- err : errors . New ( "empty query" ),
127+ span : assign . Span ,
128+ err : fmt . Errorf ( "expected '=', got %s" , formatToken ( p . source , assign ) ),
52129 }
53130 }
131+ stmt .Assign = assign .Span
132+ stmt .X , err = p .expr ()
54133 if err != nil {
55- return expr , fmt . Errorf ( "parse pipeline query language: %w" , err )
134+ return stmt , makeErrorOpaque ( err )
56135 }
57- return expr , nil
136+ return stmt , nil
58137}
59138
60139func (p * parser ) tabularExpr () (* TabularExpr , error ) {
@@ -294,27 +373,10 @@ func (p *parser) takeOperator(pipe, keyword Token) (*TakeOperator, error) {
294373 Pipe : pipe .Span ,
295374 Keyword : keyword .Span ,
296375 }
297-
298- tok , _ := p .next ()
299- if tok .Kind != TokenNumber {
300- return op , & parseError {
301- source : p .source ,
302- span : tok .Span ,
303- err : fmt .Errorf ("expected integer, got %s" , formatToken (p .source , tok )),
304- }
305- }
306- rowCount := & BasicLit {
307- Kind : tok .Kind ,
308- Value : tok .Value ,
309- ValueSpan : tok .Span ,
310- }
311- op .RowCount = rowCount
312- if ! rowCount .IsInteger () {
313- return op , & parseError {
314- source : p .source ,
315- span : tok .Span ,
316- err : fmt .Errorf ("expected integer, got %s" , formatToken (p .source , tok )),
317- }
376+ var err error
377+ op .RowCount , err = p .rowCount ()
378+ if err != nil {
379+ return op , makeErrorOpaque (err )
318380 }
319381 return op , nil
320382}
@@ -326,30 +388,13 @@ func (p *parser) topOperator(pipe, keyword Token) (*TopOperator, error) {
326388 By : nullSpan (),
327389 }
328390
329- tok , _ := p .next ()
330- if tok .Kind != TokenNumber {
331- p .prev ()
332- return op , & parseError {
333- source : p .source ,
334- span : tok .Span ,
335- err : fmt .Errorf ("expected integer, got %s" , formatToken (p .source , tok )),
336- }
337- }
338- rowCount := & BasicLit {
339- Kind : tok .Kind ,
340- Value : tok .Value ,
341- ValueSpan : tok .Span ,
342- }
343- op .RowCount = rowCount
344- if ! rowCount .IsInteger () {
345- return op , & parseError {
346- source : p .source ,
347- span : tok .Span ,
348- err : fmt .Errorf ("expected integer, got %s" , formatToken (p .source , tok )),
349- }
391+ var err error
392+ op .RowCount , err = p .rowCount ()
393+ if err != nil {
394+ return op , makeErrorOpaque (err )
350395 }
351396
352- tok , _ = p .next ()
397+ tok , _ : = p .next ()
353398 if tok .Kind != TokenBy {
354399 p .prev ()
355400 return op , & parseError {
@@ -360,11 +405,28 @@ func (p *parser) topOperator(pipe, keyword Token) (*TopOperator, error) {
360405 }
361406 op .By = tok .Span
362407
363- var err error
364408 op .Col , err = p .sortTerm ()
365409 return op , makeErrorOpaque (err )
366410}
367411
412+ func (p * parser ) rowCount () (Expr , error ) {
413+ x , err := p .expr ()
414+ if err != nil {
415+ return x , err
416+ }
417+ if lit , ok := x .(* BasicLit ); ok {
418+ // Do basic check for common case of literals.
419+ if ! lit .IsInteger () {
420+ return x , fmt .Errorf ("expected integer, got %s" , formatToken (p .source , Token {
421+ Kind : lit .Kind ,
422+ Span : lit .ValueSpan ,
423+ Value : lit .Value ,
424+ }))
425+ }
426+ }
427+ return x , nil
428+ }
429+
368430func (p * parser ) projectOperator (pipe , keyword Token ) (* ProjectOperator , error ) {
369431 op := & ProjectOperator {
370432 Pipe : pipe .Span ,
@@ -1042,7 +1104,9 @@ func (p *parser) qualifiedIdent() (*QualifiedIdent, error) {
10421104// split advances the parser to right before the next token of the given kind,
10431105// and returns a new parser that reads the tokens that were skipped over.
10441106// It ignores tokens that are in parenthetical groups after the initial parse position.
1045- // If no such token is found, skipTo advances to EOF.
1107+ // If no such token is found, split advances to EOF.
1108+ //
1109+ // For splitting by semicolon, see [*parser.splitSemi].
10461110func (p * parser ) split (search TokenKind ) * parser {
10471111 // stack is the list of expected closing parentheses/brackets.
10481112 // When a closing parenthesis/bracket is encountered,
@@ -1103,6 +1167,31 @@ loop:
11031167 }
11041168}
11051169
1170+ // splitSemi advances the parser to right before the next semicolon,
1171+ // and returns a new parser that reads the tokens that were skipped over.
1172+ // If no semicolon is found, splitSemi advances to EOF.
1173+ func (p * parser ) splitSemi () * parser {
1174+ start := p .pos
1175+ for {
1176+ tok , ok := p .next ()
1177+ if ! ok {
1178+ return & parser {
1179+ source : p .source ,
1180+ tokens : p .tokens [start :],
1181+ splitKind : TokenSemi ,
1182+ }
1183+ }
1184+ if tok .Kind == TokenSemi {
1185+ p .prev ()
1186+ return & parser {
1187+ source : p .source ,
1188+ tokens : p .tokens [start :p .pos ],
1189+ splitKind : TokenSemi ,
1190+ }
1191+ }
1192+ }
1193+ }
1194+
11061195func (p * parser ) endSplit () error {
11071196 if p .splitKind == 0 {
11081197 // This is a bug, but treating as an error instead of panicing.
0 commit comments