Skip to content

Commit ecdecd1

Browse files
burmanmpavolloffay
authored andcommitted
Fix badger merge-join algorithm to correctly filter indexes (#1721)
* Fix merge-join algorithm to correctly filter indexes, closes #1719 Signed-off-by: Michael Burman <yak@iki.fi> * Address comments Signed-off-by: Michael Burman <yak@iki.fi>
1 parent 9740087 commit ecdecd1

File tree

3 files changed

+88
-37
lines changed

3 files changed

+88
-37
lines changed

plugin/storage/badger/spanstore/read_write_test.go

+6
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,11 @@ func TestIndexSeeks(t *testing.T) {
161161
VStr: fmt.Sprintf("val%d", j),
162162
VType: model.StringType,
163163
},
164+
{
165+
Key: "error",
166+
VType: model.BoolType,
167+
VBool: true,
168+
},
164169
},
165170
}
166171
err := sw.WriteSpan(&s)
@@ -200,6 +205,7 @@ func TestIndexSeeks(t *testing.T) {
200205
params.OperationName = "operation-1"
201206
tags := make(map[string]string)
202207
tags["k11"] = "val0"
208+
tags["error"] = "true"
203209
params.Tags = tags
204210
params.DurationMin = time.Duration(1 * time.Millisecond)
205211
// params.DurationMax = time.Duration(1 * time.Hour)

plugin/storage/badger/spanstore/reader.go

+48-37
Original file line numberDiff line numberDiff line change
@@ -105,13 +105,12 @@ func (r *TraceReader) getTraces(traceIDs []model.TraceID) ([]*model.Trace, error
105105

106106
err := r.store.View(func(txn *badger.Txn) error {
107107
opts := badger.DefaultIteratorOptions
108-
opts.PrefetchSize = 10 // TraceIDs are not sorted, pointless to prefetch large amount of values
109108
it := txn.NewIterator(opts)
110109
defer it.Close()
111110

112111
val := []byte{}
113112
for _, prefix := range prefixes {
114-
spans := make([]*model.Span, 0, 4) // reduce reallocation requirements by defining some initial length
113+
spans := make([]*model.Span, 0, 32) // reduce reallocation requirements by defining some initial length
115114

116115
for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
117116
// Add value to the span store (decode from JSON / defined encoding first)
@@ -346,53 +345,65 @@ func (r *TraceReader) durationQueries(query *spanstore.TraceQueryParameters, ids
346345
return ids
347346
}
348347

348+
func mergeJoinIds(left, right [][]byte) [][]byte {
349+
// len(left) or len(right) is the maximum, whichever is the smallest
350+
allocateSize := len(left)
351+
if len(right) < allocateSize {
352+
allocateSize = len(right)
353+
}
354+
355+
merged := make([][]byte, 0, allocateSize)
356+
357+
lMax := len(left) - 1
358+
rMax := len(right) - 1
359+
for r, l := 0, 0; r <= rMax && l <= lMax; {
360+
switch bytes.Compare(left[l], right[r]) {
361+
case 0:
362+
// Left matches right - merge
363+
merged = append(merged, left[l])
364+
// Advance both
365+
l++
366+
r++
367+
case 1:
368+
// left > right, increase right one
369+
r++
370+
case -1:
371+
// left < right, increase left one
372+
l++
373+
}
374+
}
375+
return merged
376+
}
377+
349378
// sortMergeIds does a sort-merge join operation to the list of TraceIDs to remove duplicates
350379
func sortMergeIds(query *spanstore.TraceQueryParameters, ids [][][]byte) []model.TraceID {
351380
// Key only scan is a lot faster in the badger - use sort-merge join algorithm instead of hash join since we have the keys in sorted order already
352-
intersected := ids[0]
353-
mergeIntersected := make([][]byte, 0, len(intersected)) // intersected is the maximum size
381+
382+
var merged [][]byte
354383

355384
if len(ids) > 1 {
356-
for i := 1; i < len(ids); i++ {
357-
mergeIntersected = make([][]byte, 0, len(intersected)) // intersected is the maximum size
358-
k := len(intersected) - 1
359-
for j := len(ids[i]) - 1; j >= 0 && k >= 0; {
360-
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
361-
switch bytes.Compare(intersected[k], ids[i][j]) {
362-
case 1:
363-
k-- // Move on to the next item in the intersected list
364-
// a > b
365-
case -1:
366-
j--
367-
// a < b
368-
// Move on to next iteration of j
369-
case 0:
370-
mergeIntersected = append(mergeIntersected, intersected[k])
371-
k-- // Move on to next item
372-
// Match
373-
}
374-
}
375-
intersected = mergeIntersected
385+
merged = mergeJoinIds(ids[0], ids[1])
386+
for i := 2; i < len(ids); i++ {
387+
merged = mergeJoinIds(merged, ids[i])
376388
}
377-
378389
} else {
379-
// mergeIntersected should be reversed intersected
380-
for i, j := 0, len(intersected)-1; j >= 0; i, j = i+1, j-1 {
381-
mergeIntersected = append(mergeIntersected, intersected[j])
382-
}
383-
intersected = mergeIntersected
390+
merged = ids[0]
391+
}
392+
393+
// Get top query.NumTraces results (order in DESC)
394+
if query.NumTraces < len(merged) {
395+
merged = merged[len(merged)-query.NumTraces:]
384396
}
385397

386-
// Get top query.NumTraces results (note, the slice is now in descending timestamp order)
387-
if query.NumTraces < len(intersected) {
388-
intersected = intersected[:query.NumTraces]
398+
// Results are in ASC (badger's default order), but Jaeger uses DESC, thus we need to reverse the array
399+
for left, right := 0, len(merged)-1; left < right; left, right = left+1, right-1 {
400+
merged[left], merged[right] = merged[right], merged[left]
389401
}
390402

391-
// Enrich the traceIds to model.Trace
392-
// result := make([]*model.Trace, 0, len(intersected))
393-
keys := make([]model.TraceID, 0, len(intersected))
403+
// Create the structs from [][]byte to TraceID
404+
keys := make([]model.TraceID, 0, len(merged))
394405

395-
for _, key := range intersected {
406+
for _, key := range merged {
396407
keys = append(keys, model.TraceID{
397408
High: binary.BigEndian.Uint64(key[:8]),
398409
Low: binary.BigEndian.Uint64(key[8:]),

plugin/storage/badger/spanstore/rw_internal_test.go

+34
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,37 @@ func createDummySpan() model.Span {
176176

177177
return testSpan
178178
}
179+
180+
func TestMergeJoin(t *testing.T) {
181+
assert := assert.New(t)
182+
183+
// Test equals
184+
185+
left := make([][]byte, 16)
186+
right := make([][]byte, 16)
187+
188+
for i := 0; i < 16; i++ {
189+
left[i] = make([]byte, 4)
190+
binary.BigEndian.PutUint32(left[i], uint32(i))
191+
192+
right[i] = make([]byte, 4)
193+
binary.BigEndian.PutUint32(right[i], uint32(i))
194+
}
195+
196+
merged := mergeJoinIds(left, right)
197+
assert.Equal(16, len(merged))
198+
199+
// Check order
200+
assert.Equal(uint32(15), binary.BigEndian.Uint32(merged[15]))
201+
202+
// Test simple non-equality different size
203+
204+
merged = mergeJoinIds(left[1:2], right[13:])
205+
assert.Empty(merged)
206+
207+
// Different size, some equalities
208+
209+
merged = mergeJoinIds(left[0:3], right[1:7])
210+
assert.Equal(2, len(merged))
211+
assert.Equal(uint32(2), binary.BigEndian.Uint32(merged[1]))
212+
}

0 commit comments

Comments
 (0)