Skip to content

Commit 73d23b4

Browse files
authored
Merge pull request #52
Add `let` statement. Requires a breaking change in API to parser.Parse to support multiple statements, but pql.Compile does not change. The REPL also required some tweaking. Fixes #40
2 parents a015aeb + 60e4228 commit 73d23b4

File tree

9 files changed

+428
-178
lines changed

9 files changed

+428
-178
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ documentation is representative of the current pql api.
6969
- [`as`](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/as-operator)
7070
- [`count`](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/count-operator)
7171
- [`join`](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/join-operator)
72+
- [`let` statements](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/let-statement),
73+
but only scalar expressions are supported.
7274
- [`project`](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/project-operator)
7375
- [`extend`](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/extend-operator)
7476
- [`sort`/`order`](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/sort-operator)

cmd/pql/main.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ func run(ctx context.Context, output io.Writer, input io.Reader, logError func(e
7070
}
7171

7272
var finalError error
73+
letStatements := new(strings.Builder)
7374
for scanner.Scan() {
7475
sb.Write(scanner.Bytes())
7576
sb.WriteByte('\n')
@@ -80,7 +81,20 @@ func run(ctx context.Context, output io.Writer, input io.Reader, logError func(e
8081
}
8182

8283
for _, stmt := range statements[:len(statements)-1] {
83-
sql, err := pql.Compile(stmt)
84+
// Valid let statements are prepended to an ongoing prelude.
85+
tokens := parser.Scan(stmt)
86+
if len(tokens) > 0 && tokens[0].Kind == parser.TokenIdentifier && tokens[0].Value == "let" {
87+
if _, err := pql.Compile(letStatements.String() + stmt + ";X"); err != nil {
88+
logError(err)
89+
finalError = errors.New("one or more statements could not be compiled")
90+
} else {
91+
letStatements.WriteString(stmt)
92+
letStatements.WriteString(";\n")
93+
}
94+
continue
95+
}
96+
97+
sql, err := pql.Compile(letStatements.String() + stmt)
8498
if err != nil {
8599
logError(err)
86100
finalError = errors.New("one or more statements could not be compiled")

parser/ast.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,20 @@ func (id *QualifiedIdent) Span() Span {
7272

7373
func (id *QualifiedIdent) expression() {}
7474

75+
type Statement interface {
76+
Node
77+
statement()
78+
}
79+
7580
// TabularExpr is a query expression that produces a table.
81+
// It implements [Statement].
7682
type TabularExpr struct {
7783
Source TabularDataSource
7884
Operators []TabularOperator
7985
}
8086

87+
func (x *TabularExpr) statement() {}
88+
8189
func (x *TabularExpr) Span() Span {
8290
if x == nil {
8391
return nullSpan()
@@ -547,6 +555,29 @@ func (idx *IndexExpr) Span() Span {
547555

548556
func (idx *IndexExpr) expression() {}
549557

558+
// A LetStatement node represents a let statement,
559+
// assigning an expression to a name.
560+
// It implements [Statement].
561+
type LetStatement struct {
562+
Keyword Span
563+
Name *Ident
564+
Assign Span
565+
X Expr
566+
}
567+
568+
func (stmt *LetStatement) statement() {}
569+
570+
func (stmt *LetStatement) Span() Span {
571+
if stmt == nil {
572+
return nullSpan()
573+
}
574+
xSpan := nullSpan()
575+
if stmt.X != nil {
576+
xSpan = stmt.X.Span()
577+
}
578+
return unionSpans(stmt.Keyword, stmt.Name.Span(), stmt.Assign, xSpan)
579+
}
580+
550581
// Walk traverses an AST in depth-first order.
551582
// If the visit function returns true for a node,
552583
// the visit function will be called for its children.
@@ -684,6 +715,11 @@ func Walk(n Node, visit func(n Node) bool) {
684715
stack = append(stack, n.Index)
685716
stack = append(stack, n.X)
686717
}
718+
case *LetStatement:
719+
if visit(n) {
720+
stack = append(stack, n.X)
721+
stack = append(stack, n.Name)
722+
}
687723
default:
688724
panic(fmt.Errorf("unknown Node type %T", n))
689725
}

parser/parser.go

Lines changed: 156 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -21,40 +21,119 @@ type parser struct {
2121
splitKind TokenKind
2222
}
2323

24-
// Parse converts a Pipeline Query Language tabular expression
24+
// Parse converts a Pipeline Query Language query
2525
// into an Abstract Syntax Tree (AST).
26-
func Parse(query string) (*TabularExpr, error) {
26+
func Parse(query string) ([]Statement, error) {
2727
p := &parser{
2828
source: query,
2929
tokens: Scan(query),
3030
}
31-
expr, err := p.tabularExpr()
32-
if p.pos < len(p.tokens) {
33-
trailingToken := p.tokens[p.pos]
34-
if trailingToken.Kind == TokenError {
35-
err = joinErrors(err, &parseError{
36-
source: p.source,
37-
span: trailingToken.Span,
38-
err: errors.New(trailingToken.Value),
39-
})
31+
var result []Statement
32+
var resultError error
33+
for {
34+
stmtParser := p.splitSemi()
35+
36+
stmt, err := firstParse(
37+
func() (Statement, error) {
38+
stmt, err := stmtParser.letStatement()
39+
if stmt == nil {
40+
// Prevent returning a non-nil interface.
41+
return nil, err
42+
}
43+
return stmt, err
44+
},
45+
func() (Statement, error) {
46+
expr, err := stmtParser.tabularExpr()
47+
if expr == nil {
48+
// Prevent returning a non-nil interface.
49+
return nil, err
50+
}
51+
return expr, err
52+
},
53+
)
54+
55+
if isNotFound(err) {
56+
// We're okay with empty statements, we just ignore them.
57+
if stmtParser.pos < len(stmtParser.tokens) {
58+
trailingToken := stmtParser.tokens[stmtParser.pos]
59+
if trailingToken.Kind == TokenError {
60+
resultError = joinErrors(err, &parseError{
61+
source: p.source,
62+
span: trailingToken.Span,
63+
err: errors.New(trailingToken.Value),
64+
})
65+
} else {
66+
resultError = joinErrors(err, &parseError{
67+
source: p.source,
68+
span: trailingToken.Span,
69+
err: errors.New("unrecognized token"),
70+
})
71+
}
72+
}
4073
} else {
41-
err = joinErrors(err, &parseError{
42-
source: p.source,
43-
span: trailingToken.Span,
44-
err: errors.New("unrecognized token"),
45-
})
74+
if stmt != nil {
75+
result = append(result, stmt)
76+
}
77+
resultError = joinErrors(resultError, makeErrorOpaque(err))
78+
resultError = joinErrors(resultError, stmtParser.endSplit())
79+
}
80+
81+
// Next token, if present, guaranteed to be a semicolon.
82+
if _, ok := p.next(); !ok {
83+
break
4684
}
47-
} else if isNotFound(err) {
48-
err = &parseError{
85+
}
86+
87+
if resultError != nil {
88+
return result, fmt.Errorf("parse pipeline query language: %w", resultError)
89+
}
90+
return result, nil
91+
}
92+
93+
func firstParse[T any](productions ...func() (T, error)) (T, error) {
94+
for _, p := range productions[:len(productions)-1] {
95+
x, err := p()
96+
if !isNotFound(err) {
97+
return x, err
98+
}
99+
}
100+
return productions[len(productions)-1]()
101+
}
102+
103+
func (p *parser) letStatement() (*LetStatement, error) {
104+
keyword, _ := p.next()
105+
if keyword.Kind != TokenIdentifier || keyword.Value != "let" {
106+
p.prev()
107+
return nil, &parseError{
108+
source: p.source,
109+
span: keyword.Span,
110+
err: notFoundError{fmt.Errorf("expected 'let', got %s", formatToken(p.source, keyword))},
111+
}
112+
}
113+
114+
stmt := &LetStatement{
115+
Keyword: keyword.Span,
116+
Assign: nullSpan(),
117+
}
118+
var err error
119+
stmt.Name, err = p.ident()
120+
if err != nil {
121+
return stmt, makeErrorOpaque(err)
122+
}
123+
assign, _ := p.next()
124+
if assign.Kind != TokenAssign {
125+
return stmt, &parseError{
49126
source: p.source,
50-
span: indexSpan(len(query)),
51-
err: errors.New("empty query"),
127+
span: assign.Span,
128+
err: fmt.Errorf("expected '=', got %s", formatToken(p.source, assign)),
52129
}
53130
}
131+
stmt.Assign = assign.Span
132+
stmt.X, err = p.expr()
54133
if err != nil {
55-
return expr, fmt.Errorf("parse pipeline query language: %w", err)
134+
return stmt, makeErrorOpaque(err)
56135
}
57-
return expr, nil
136+
return stmt, nil
58137
}
59138

60139
func (p *parser) tabularExpr() (*TabularExpr, error) {
@@ -294,27 +373,10 @@ func (p *parser) takeOperator(pipe, keyword Token) (*TakeOperator, error) {
294373
Pipe: pipe.Span,
295374
Keyword: keyword.Span,
296375
}
297-
298-
tok, _ := p.next()
299-
if tok.Kind != TokenNumber {
300-
return op, &parseError{
301-
source: p.source,
302-
span: tok.Span,
303-
err: fmt.Errorf("expected integer, got %s", formatToken(p.source, tok)),
304-
}
305-
}
306-
rowCount := &BasicLit{
307-
Kind: tok.Kind,
308-
Value: tok.Value,
309-
ValueSpan: tok.Span,
310-
}
311-
op.RowCount = rowCount
312-
if !rowCount.IsInteger() {
313-
return op, &parseError{
314-
source: p.source,
315-
span: tok.Span,
316-
err: fmt.Errorf("expected integer, got %s", formatToken(p.source, tok)),
317-
}
376+
var err error
377+
op.RowCount, err = p.rowCount()
378+
if err != nil {
379+
return op, makeErrorOpaque(err)
318380
}
319381
return op, nil
320382
}
@@ -326,30 +388,13 @@ func (p *parser) topOperator(pipe, keyword Token) (*TopOperator, error) {
326388
By: nullSpan(),
327389
}
328390

329-
tok, _ := p.next()
330-
if tok.Kind != TokenNumber {
331-
p.prev()
332-
return op, &parseError{
333-
source: p.source,
334-
span: tok.Span,
335-
err: fmt.Errorf("expected integer, got %s", formatToken(p.source, tok)),
336-
}
337-
}
338-
rowCount := &BasicLit{
339-
Kind: tok.Kind,
340-
Value: tok.Value,
341-
ValueSpan: tok.Span,
342-
}
343-
op.RowCount = rowCount
344-
if !rowCount.IsInteger() {
345-
return op, &parseError{
346-
source: p.source,
347-
span: tok.Span,
348-
err: fmt.Errorf("expected integer, got %s", formatToken(p.source, tok)),
349-
}
391+
var err error
392+
op.RowCount, err = p.rowCount()
393+
if err != nil {
394+
return op, makeErrorOpaque(err)
350395
}
351396

352-
tok, _ = p.next()
397+
tok, _ := p.next()
353398
if tok.Kind != TokenBy {
354399
p.prev()
355400
return op, &parseError{
@@ -360,11 +405,28 @@ func (p *parser) topOperator(pipe, keyword Token) (*TopOperator, error) {
360405
}
361406
op.By = tok.Span
362407

363-
var err error
364408
op.Col, err = p.sortTerm()
365409
return op, makeErrorOpaque(err)
366410
}
367411

412+
func (p *parser) rowCount() (Expr, error) {
413+
x, err := p.expr()
414+
if err != nil {
415+
return x, err
416+
}
417+
if lit, ok := x.(*BasicLit); ok {
418+
// Do basic check for common case of literals.
419+
if !lit.IsInteger() {
420+
return x, fmt.Errorf("expected integer, got %s", formatToken(p.source, Token{
421+
Kind: lit.Kind,
422+
Span: lit.ValueSpan,
423+
Value: lit.Value,
424+
}))
425+
}
426+
}
427+
return x, nil
428+
}
429+
368430
func (p *parser) projectOperator(pipe, keyword Token) (*ProjectOperator, error) {
369431
op := &ProjectOperator{
370432
Pipe: pipe.Span,
@@ -1042,7 +1104,9 @@ func (p *parser) qualifiedIdent() (*QualifiedIdent, error) {
10421104
// split advances the parser to right before the next token of the given kind,
10431105
// and returns a new parser that reads the tokens that were skipped over.
10441106
// It ignores tokens that are in parenthetical groups after the initial parse position.
1045-
// If no such token is found, skipTo advances to EOF.
1107+
// If no such token is found, split advances to EOF.
1108+
//
1109+
// For splitting by semicolon, see [*parser.splitSemi].
10461110
func (p *parser) split(search TokenKind) *parser {
10471111
// stack is the list of expected closing parentheses/brackets.
10481112
// When a closing parenthesis/bracket is encountered,
@@ -1103,6 +1167,31 @@ loop:
11031167
}
11041168
}
11051169

1170+
// splitSemi advances the parser to right before the next semicolon,
1171+
// and returns a new parser that reads the tokens that were skipped over.
1172+
// If no semicolon is found, splitSemi advances to EOF.
1173+
func (p *parser) splitSemi() *parser {
1174+
start := p.pos
1175+
for {
1176+
tok, ok := p.next()
1177+
if !ok {
1178+
return &parser{
1179+
source: p.source,
1180+
tokens: p.tokens[start:],
1181+
splitKind: TokenSemi,
1182+
}
1183+
}
1184+
if tok.Kind == TokenSemi {
1185+
p.prev()
1186+
return &parser{
1187+
source: p.source,
1188+
tokens: p.tokens[start:p.pos],
1189+
splitKind: TokenSemi,
1190+
}
1191+
}
1192+
}
1193+
}
1194+
11061195
func (p *parser) endSplit() error {
11071196
if p.splitKind == 0 {
11081197
// This is a bug, but treating as an error instead of panicing.

0 commit comments

Comments
 (0)