Skip to content

Commit 4cc383c

Browse files
use a pool of buffers to alleviate memory allocs in csv; related to #553
When iterating over multiple files, csv detector allocated a new buffer for each file. This change adds a pool of buffers that can be reused between detections. The same pool is shared between csv and tsv detectors.
1 parent d7081cc commit 4cc383c

File tree

1 file changed

+21
-1
lines changed

1 file changed

+21
-1
lines changed

internal/magic/text_csv.go

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,28 @@
11
package magic
22

33
import (
4+
"bufio"
45
"bytes"
56
"encoding/csv"
67
"errors"
78
"io"
9+
"sync"
810
)
911

12+
// A bufio.Reader pool to alleviate problems with memory allocations.
13+
var readerPool = sync.Pool{
14+
New: func() any {
15+
// Initiate with empty source reader.
16+
return bufio.NewReader(nil)
17+
},
18+
}
19+
20+
func newReader(r io.Reader) *bufio.Reader {
21+
br := readerPool.Get().(*bufio.Reader)
22+
br.Reset(r)
23+
return br
24+
}
25+
1026
// Csv matches a comma-separated values file.
1127
func Csv(raw []byte, limit uint32) bool {
1228
return sv(raw, ',', limit)
@@ -18,7 +34,11 @@ func Tsv(raw []byte, limit uint32) bool {
1834
}
1935

2036
func sv(in []byte, comma rune, limit uint32) bool {
21-
r := csv.NewReader(bytes.NewReader(dropLastLine(in, limit)))
37+
in = dropLastLine(in, limit)
38+
39+
br := newReader(bytes.NewReader(in))
40+
defer readerPool.Put(br)
41+
r := csv.NewReader(br)
2242
r.Comma = comma
2343
r.ReuseRecord = true
2444
r.LazyQuotes = true

0 commit comments

Comments
 (0)