Skip to content

Commit ec27e50

Browse files
committed
go[PERF]: Prevent creation of a slice header in decodeU16
The previous change made me realize that every call to the function `decodeU16` was actually creating a slice header. Instead, we're now just passing both the code slice and an offset. That has shown very good improvements in the benchmarks.
1 parent 90c0ec3 commit ec27e50

File tree

1 file changed

+28
-19
lines changed

1 file changed

+28
-19
lines changed

go/vm.go

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -193,15 +193,21 @@ func (vm *virtualMachine) MatchRule(input Input, ruleAddress int) (Value, int, e
193193
// dbg := func(m string) {}
194194
// dbg = func(m string) { fmt.Print(m) }
195195

196+
// we want to reset the VM state every match
196197
vm.reset()
197198

199+
// take a local reference of the code
200+
code := vm.bytecode.code
201+
202+
// if a rule was received, push a call frame for it and set
203+
// the program appropriately
198204
if ruleAddress > 0 {
199205
vm.stack.push(vm.mkCallFrame(opCallSizeInBytes))
200206
vm.pc = ruleAddress
201207
}
202208
code:
203209
for {
204-
op := vm.bytecode.code[vm.pc]
210+
op := code[vm.pc]
205211
// dbg(fmt.Sprintf("in[c=%02d, pc=%02d]: 0x%x=%s\n", vm.cursor, vm.pc, op, opNames[op]))
206212

207213
switch op {
@@ -226,7 +232,7 @@ code:
226232
vm.pc += opAnySizeInBytes
227233

228234
case opChar:
229-
e := rune(decodeU16(vm.bytecode.code[vm.pc+1:]))
235+
e := rune(decodeU16(code, vm.pc+1))
230236
c, s, err := input.ReadRune()
231237
if err != nil {
232238
if err == io.EOF {
@@ -249,8 +255,8 @@ code:
249255
}
250256
return nil, vm.cursor, err
251257
}
252-
a := rune(decodeU16(vm.bytecode.code[vm.pc+1:]))
253-
b := rune(decodeU16(vm.bytecode.code[vm.pc+3:]))
258+
a := rune(decodeU16(code, vm.pc+1))
259+
b := rune(decodeU16(code, vm.pc+3))
254260
if c < a || c > b {
255261
vm.updateFFP(expected{a: a, b: b})
256262
goto fail
@@ -266,7 +272,7 @@ code:
266272
}
267273
return nil, vm.cursor, err
268274
}
269-
i := decodeU16(vm.bytecode.code[vm.pc+1:])
275+
i := decodeU16(code, vm.pc+1)
270276
ru := rune(c)
271277
if !vm.bytecode.sets[i].has(ru) {
272278
vm.updateSetFFP(i)
@@ -276,7 +282,7 @@ code:
276282
vm.pc += opSetSizeInBytes
277283

278284
case opSpan:
279-
sid := decodeU16(vm.bytecode.code[vm.pc+1:])
285+
sid := decodeU16(code, vm.pc+1)
280286
set := vm.bytecode.sets[sid]
281287
for {
282288
c, err := input.PeekByte()
@@ -304,23 +310,23 @@ code:
304310
goto fail
305311

306312
case opChoice:
307-
lb := int(decodeU16(vm.bytecode.code[vm.pc+1:]))
313+
lb := int(decodeU16(code, vm.pc+1))
308314
vm.stack.push(vm.mkBacktrackFrame(lb))
309315
vm.pc += opChoiceSizeInBytes
310316

311317
case opChoicePred:
312-
lb := int(decodeU16(vm.bytecode.code[vm.pc+1:]))
318+
lb := int(decodeU16(code, vm.pc+1))
313319
vm.stack.push(vm.mkBacktrackPredFrame(lb))
314320
vm.pc += opChoiceSizeInBytes
315321
vm.predicate = true
316322

317323
case opCommit:
318324
f := vm.stack.pop()
319325
vm.stack.capture(f.values...)
320-
vm.pc = int(decodeU16(vm.bytecode.code[vm.pc+1:]))
326+
vm.pc = int(decodeU16(code, vm.pc+1))
321327

322328
case opPartialCommit:
323-
vm.pc = int(decodeU16(vm.bytecode.code[vm.pc+1:]))
329+
vm.pc = int(decodeU16(code, vm.pc+1))
324330
top := vm.stack.top()
325331
top.cursor = vm.cursor
326332
// Skip collectCaptures if the top frame is
@@ -335,23 +341,26 @@ code:
335341
f := vm.stack.pop()
336342
vm.stack.capture(f.values...)
337343
vm.backtrackToFrame(input, f)
338-
vm.pc = int(decodeU16(vm.bytecode.code[vm.pc+1:]))
344+
vm.pc = int(decodeU16(code, vm.pc+1))
339345

340346
case opCall:
341347
vm.stack.push(vm.mkCallFrame(vm.pc + opCallSizeInBytes))
342-
vm.pc = int(decodeU16(vm.bytecode.code[vm.pc+1:]))
348+
vm.pc = int(decodeU16(code, vm.pc+1))
343349

344350
case opReturn:
345351
f := vm.stack.pop()
346352
vm.stack.capture(f.values...)
347353
vm.pc = f.pc
348354

355+
case opJump:
356+
vm.pc = int(decodeU16(code, vm.pc+1))
357+
349358
case opThrow:
350359
if vm.predicate {
351360
vm.pc += opThrowSizeInBytes
352361
goto fail
353362
}
354-
lb := int(decodeU16(vm.bytecode.code[vm.pc+1:]))
363+
lb := int(decodeU16(code, vm.pc+1))
355364
id := vm.bytecode.strs[lb]
356365
if addr, ok := vm.bytecode.rxps[lb]; ok {
357366
vm.stack.push(vm.mkCallFrame(vm.pc + opThrowSizeInBytes))
@@ -361,7 +370,7 @@ code:
361370
return nil, vm.cursor, vm.mkErr(input, id, vm.cursor)
362371

363372
case opCapBegin:
364-
id := int(decodeU16(vm.bytecode.code[vm.pc+1:]))
373+
id := int(decodeU16(code, vm.pc+1))
365374
vm.stack.push(vm.mkCaptureFrame(id))
366375
vm.pc += opCapBeginSizeInBytes
367376

@@ -370,12 +379,12 @@ code:
370379
vm.pc += opCapEndSizeInBytes
371380

372381
case opCapTerm:
373-
vm.newTermNode(input, int(decodeU16(vm.bytecode.code[vm.pc+1:])))
382+
vm.newTermNode(input, int(decodeU16(code, vm.pc+1)))
374383
vm.pc += opCapTermSizeInBytes
375384

376385
case opCapNonTerm:
377-
id := int(decodeU16(vm.bytecode.code[vm.pc+1:]))
378-
offset := int(decodeU16(vm.bytecode.code[vm.pc+3:]))
386+
id := int(decodeU16(code, vm.pc+1))
387+
offset := int(decodeU16(code, vm.pc+3))
379388
vm.newNonTermNode(input, id, offset)
380389
vm.pc += opCapNonTermSizeInBytes
381390

@@ -646,6 +655,6 @@ func (vm *virtualMachine) mkErr(input Input, errLabel string, errCursor int) err
646655

647656
// decodeU16 decodes a uint16 from byte array `b`. See
648657
// https://github.com/golang/go/issues/14808
649-
func decodeU16(b []byte) uint16 {
650-
return uint16(b[0]) | uint16(b[1])<<8
658+
func decodeU16(code []byte, offset int) uint16 {
659+
return uint16(code[offset]) | uint16(code[offset+1])<<8
651660
}

0 commit comments

Comments
 (0)