Skip to content

Commit 303b65d

Browse files
committed
feat: improve tracing
`--trace` now outputs a JSON structure with tracing information, eg. ``` {"lexer":"markdown","state":"root","rule":15,"pos":0,"elapsedMs":0.022875} {"lexer":"markdown","state":"root","rule":15,"pos":1,"elapsedMs":0.002667} {"lexer":"markdown","state":"root","rule":15,"pos":2,"elapsedMs":0.001833} {"lexer":"markdown","state":"root","rule":15,"pos":3,"elapsedMs":0.002166} {"lexer":"markdown","state":"root","rule":15,"pos":4,"elapsedMs":0.002125} ``` This should generally be much more amenable to analysis, eg. convenient filtering using jq to help track down hotspots: ``` chroma --trace docs.md 2>&1 > /dev/null | jq 'select(. | .elapsedMs > 0.1)' | less ```
1 parent 1f48e65 commit 303b65d

File tree

4 files changed

+61
-5
lines changed

4 files changed

+61
-5
lines changed

cmd/chroma/main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,8 @@ func listAll() {
346346
}
347347

348348
func lex(ctx *kong.Context, lexer chroma.Lexer, contents string) chroma.Iterator {
349-
if rel, ok := lexer.(*chroma.RegexLexer); ok {
350-
rel.Trace(cli.Trace)
349+
if rel, ok := lexer.(chroma.TracingLexer); ok {
350+
rel.SetTracing(cli.Trace)
351351
}
352352
lexer = chroma.Coalesce(lexer)
353353
it, err := lexer.Tokenise(nil, contents)

delegate.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,15 @@ func DelegatingLexer(root Lexer, language Lexer) Lexer {
2424
}
2525
}
2626

27+
func (d *delegatingLexer) SetTracing(enable bool) {
28+
if l, ok := d.language.(TracingLexer); ok {
29+
l.SetTracing(enable)
30+
}
31+
if l, ok := d.root.(TracingLexer); ok {
32+
l.SetTracing(enable)
33+
}
34+
}
35+
2736
func (d *delegatingLexer) AnalyseText(text string) float32 {
2837
return d.root.AnalyseText(text)
2938
}

lexer.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,23 @@ type Lexer interface {
130130
AnalyseText(text string) float32
131131
}
132132

133+
// Trace is the trace of a tokenisation process.
134+
type Trace struct {
135+
Lexer string `json:"lexer"`
136+
State string `json:"state"`
137+
Rule int `json:"rule"`
138+
Pattern string `json:"pattern"`
139+
Pos int `json:"pos"`
140+
Length int `json:"length"`
141+
Elapsed float64 `json:"elapsedMs"` // Elapsed time spent matching for this rule.
142+
}
143+
144+
// TracingLexer is a Lexer that can trace its tokenisation process.
145+
type TracingLexer interface {
146+
Lexer
147+
SetTracing(enable bool)
148+
}
149+
133150
// Lexers is a slice of lexers sortable by name.
134151
type Lexers []Lexer
135152

regexp.go

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package chroma
22

33
import (
4+
"encoding/json"
45
"fmt"
56
"os"
67
"path/filepath"
@@ -135,11 +136,20 @@ func NewLexer(config *Config, rulesFunc func() Rules) (*RegexLexer, error) {
135136
}
136137

137138
// Trace enables debug tracing.
139+
//
140+
// Deprecated: Use SetTracing instead.
138141
func (r *RegexLexer) Trace(trace bool) *RegexLexer {
139142
r.trace = trace
140143
return r
141144
}
142145

146+
// SetTracing enables debug tracing.
147+
//
148+
// This complies with the [TracingLexer] interface.
149+
func (r *RegexLexer) SetTracing(trace bool) {
150+
r.trace = trace
151+
}
152+
143153
// A CompiledRule is a Rule with a pre-compiled regex.
144154
//
145155
// Note that regular expressions are lazily compiled on first use of the lexer.
@@ -185,6 +195,7 @@ func (l *LexerState) Get(key interface{}) interface{} {
185195

186196
// Iterator returns the next Token from the lexer.
187197
func (l *LexerState) Iterator() Token { // nolint: gocognit
198+
trace := json.NewEncoder(os.Stderr)
188199
end := len(l.Text)
189200
if l.newlineAdded {
190201
end--
@@ -205,14 +216,33 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit
205216
}
206217

207218
l.State = l.Stack[len(l.Stack)-1]
208-
if l.Lexer.trace {
209-
fmt.Fprintf(os.Stderr, "%s: pos=%d, text=%q\n", l.State, l.Pos, string(l.Text[l.Pos:]))
210-
}
211219
selectedRule, ok := l.Rules[l.State]
212220
if !ok {
213221
panic("unknown state " + l.State)
214222
}
223+
var start time.Time
224+
if l.Lexer.trace {
225+
start = time.Now()
226+
}
215227
ruleIndex, rule, groups, namedGroups := matchRules(l.Text, l.Pos, selectedRule)
228+
if l.Lexer.trace {
229+
var length int
230+
if groups != nil {
231+
length = len(groups[0])
232+
} else {
233+
length = -1
234+
}
235+
_ = trace.Encode(Trace{ //nolint
236+
Lexer: l.Lexer.config.Name,
237+
State: l.State,
238+
Rule: ruleIndex,
239+
Pattern: rule.Pattern,
240+
Pos: l.Pos,
241+
Length: length,
242+
Elapsed: float64(time.Since(start)) / float64(time.Millisecond),
243+
})
244+
// fmt.Fprintf(os.Stderr, "%s: pos=%d, text=%q, elapsed=%s\n", l.State, l.Pos, string(l.Text[l.Pos:]), time.Since(start))
245+
}
216246
// No match.
217247
if groups == nil {
218248
// From Pygments :\

0 commit comments

Comments
 (0)