@@ -21,8 +21,9 @@ type PromLoader interface {
21
21
}
22
22
23
23
type promLoader struct {
24
- db * tsdb.DBReadOnly
25
- blocks []tsdb.BlockReader
24
+ db * tsdb.DBReadOnly
25
+ blocks []tsdb.BlockReader
26
+ inMemory bool
26
27
}
27
28
28
29
// PromIterator allows us to iterate over Prometheus data
@@ -31,6 +32,27 @@ type PromIterator interface {
31
32
Get () TimeSeries
32
33
}
33
34
35
+ type inMemoryIterator struct {
36
+ data []TimeSeries
37
+ curIdx int
38
+ }
39
+
40
+ func (s * inMemoryIterator ) Next () bool {
41
+ if s .curIdx == len (s .data )- 1 {
42
+ return false
43
+ }
44
+ s .curIdx ++
45
+ return true
46
+ }
47
+
48
+ func (s * inMemoryIterator ) Get () TimeSeries {
49
+ return s .data [s .curIdx ]
50
+ }
51
+
52
+ func (s * inMemoryIterator ) append (ts TimeSeries ) {
53
+ s .data = append (s .data , ts )
54
+ }
55
+
34
56
type TimeSeries struct {
35
57
seriesHash uint64
36
58
Val prompb.TimeSeries
@@ -47,7 +69,7 @@ type promIterator struct {
47
69
blocks []tsdb.BlockReader
48
70
curBlockIdx int
49
71
labelsCache map [uint64 ]labels.Labels
50
- blockSamples []* BlockSample
72
+ blockSamples []BlockSample
51
73
curSampleIdx int
52
74
}
53
75
@@ -81,7 +103,7 @@ func (i *promIterator) Next() bool {
81
103
func (it * promIterator ) loadBlockSamples () error {
82
104
log .Info ("msg" , "loading blocks" , "total samples" , it .blocks [it .curBlockIdx ].Meta ().Stats .NumSamples ,
83
105
"series" , it .blocks [it .curBlockIdx ].Meta ().Stats .NumSeries )
84
- it .blockSamples = make ([]* BlockSample , it .blocks [it .curBlockIdx ].Meta ().Stats .NumSamples )
106
+ it .blockSamples = make ([]BlockSample , it .blocks [it .curBlockIdx ].Meta ().Stats .NumSamples )
85
107
it .labelsCache = make (map [uint64 ]labels.Labels , it .blocks [it .curBlockIdx ].Meta ().Stats .NumSeries )
86
108
querier , err := tsdb .NewBlockQuerier (it .blocks [it .curBlockIdx ], math .MinInt64 , math .MaxInt64 )
87
109
if err != nil {
@@ -103,8 +125,7 @@ func (it *promIterator) loadBlockSamples() error {
103
125
}
104
126
for seriesIt .Next () {
105
127
ts , val := seriesIt .At ()
106
- sample := & BlockSample {ts , val , lblsHash }
107
- it .blockSamples [sampleCounter ] = sample
128
+ it .blockSamples [sampleCounter ] = BlockSample {ts , val , lblsHash }
108
129
sampleCounter ++
109
130
}
110
131
}
@@ -130,11 +151,13 @@ func (i *promIterator) Get() TimeSeries {
130
151
Labels : protoLabels ,
131
152
Samples : []prompb.Sample {sample },
132
153
}
133
-
134
154
return TimeSeries {blockSample .lblsHash , ts }
135
155
}
136
156
137
- func NewPromLoader (dataDir string ) (PromLoader , error ) {
157
+ // PromLoader can preload the whole dataset in memory which can be useful to
158
+ // get accurate memory allocations when benchmarking. However it does mean that bench
159
+ // test needs more memory to run so make sure that test dataset can fit into memory
160
+ func NewPromLoader (dataDir string , inMemory bool ) (PromLoader , error ) {
138
161
db , err := tsdb .OpenDBReadOnly (dataDir , nil )
139
162
if err != nil {
140
163
return nil , fmt .Errorf ("error starting Prometheus TSDB in read-only: %v" , err )
@@ -143,11 +166,20 @@ func NewPromLoader(dataDir string) (PromLoader, error) {
143
166
if err != nil {
144
167
return nil , fmt .Errorf ("error loading data blocks: %v" , err )
145
168
}
146
- return & promLoader {db : db , blocks : blocks }, nil
169
+ return & promLoader {db : db , blocks : blocks , inMemory : inMemory }, nil
147
170
}
148
171
149
172
func (loader * promLoader ) Iterator () PromIterator {
150
- return & promIterator {blocks : loader .blocks , curSampleIdx : - 1 , curBlockIdx : - 1 }
173
+ it := & promIterator {blocks : loader .blocks , curSampleIdx : - 1 , curBlockIdx : - 1 }
174
+ if loader .inMemory {
175
+ store := & inMemoryIterator {data : make ([]TimeSeries , 0 ), curIdx : - 1 }
176
+ for it .Next () {
177
+ ts := it .Get ()
178
+ store .append (ts )
179
+ }
180
+ return store
181
+ }
182
+ return it
151
183
}
152
184
153
185
func (loader * promLoader ) Close () error {
@@ -200,29 +232,40 @@ func (si *sampleIngestor) shardSamples() {
200
232
si .shards [shardIdx ] <- sample .Val
201
233
if si .rate != nil {
202
234
if err := si .rate .Wait (context .Background ()); err != nil {
203
- log .Error (err )
235
+ log .Error ("msg" , err )
204
236
}
205
237
}
206
238
}
207
239
}()
208
240
}
209
241
210
242
func (si * sampleIngestor ) ingestSamples (ingest IngestFunc ) {
211
- var wg sync.WaitGroup
243
+ var shardWg sync.WaitGroup
244
+ var ingestWg sync.WaitGroup
245
+ reqCh := make (chan prompb.WriteRequest , 100 )
212
246
for i := 0 ; i < len (si .shards ); i ++ {
213
- wg .Add (1 )
247
+ ingestWg .Add (1 )
248
+ go func () {
249
+ defer func () {
250
+ ingestWg .Done ()
251
+ }()
252
+ for req := range reqCh {
253
+ if _ , _ , err := ingest (context .Background (), & req ); err != nil {
254
+ log .Error ("msg" , err )
255
+ }
256
+ }
257
+ }()
258
+ shardWg .Add (1 )
214
259
go func (shard int ) {
215
260
defer func () {
216
- wg .Done ()
261
+ shardWg .Done ()
217
262
}()
218
263
var req prompb.WriteRequest = prompb.WriteRequest {Timeseries : make ([]prompb.TimeSeries , si .batchSize )}
219
264
counter := 0
220
265
for ts := range si .shards [shard ] {
221
266
if counter == si .batchSize {
222
- if _ , _ , err := ingest (context .Background (), & req ); err != nil {
223
- log .Error (err )
224
- }
225
267
req = prompb.WriteRequest {Timeseries : make ([]prompb.TimeSeries , si .batchSize )}
268
+ reqCh <- req
226
269
counter = 0
227
270
} else {
228
271
req .Timeseries [counter ] = ts
@@ -231,11 +274,11 @@ func (si *sampleIngestor) ingestSamples(ingest IngestFunc) {
231
274
}
232
275
if len (req .Timeseries ) > 0 {
233
276
// flush leftovers
234
- if _ , _ , err := ingest (context .Background (), & req ); err != nil {
235
- log .Error (err )
236
- }
277
+ reqCh <- req
237
278
}
238
279
}(i )
239
280
}
240
- wg .Wait ()
281
+ shardWg .Wait ()
282
+ close (reqCh )
283
+ ingestWg .Wait ()
241
284
}
0 commit comments