Use new stream segment reader in clustering compaction ()



Signed-off-by: wayblink <anyang.wang@zilliz.com>
pull/34276/head
wayblink 2024-06-30 20:26:07 +08:00 committed by GitHub
parent 73ffc1b424
commit e5d691d854
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 20 additions and 19 deletions
internal/datanode/compaction

View File

@ -19,6 +19,7 @@ package compaction
import ( import (
"context" "context"
"fmt" "fmt"
sio "io"
"math" "math"
"path" "path"
"sort" "sort"
@ -481,33 +482,33 @@ func (t *clusteringCompactionTask) mappingSegment(
fieldBinlogPaths = append(fieldBinlogPaths, ps) fieldBinlogPaths = append(fieldBinlogPaths, ps)
} }
for _, path := range fieldBinlogPaths { for _, paths := range fieldBinlogPaths {
bytesArr, err := t.binlogIO.Download(ctx, path) allValues, err := t.binlogIO.Download(ctx, paths)
blobs := make([]*storage.Blob, len(bytesArr))
var segmentSize int64
for i := range bytesArr {
blobs[i] = &storage.Blob{Value: bytesArr[i]}
segmentSize = segmentSize + int64(len(bytesArr[i]))
}
if err != nil { if err != nil {
log.Warn("download insertlogs wrong", zap.Strings("path", path), zap.Error(err)) log.Warn("compact wrong, fail to download insertLogs", zap.Error(err))
return err return err
} }
blobs := lo.Map(allValues, func(v []byte, i int) *storage.Blob {
pkIter, err := storage.NewInsertBinlogIterator(blobs, t.primaryKeyField.GetFieldID(), t.primaryKeyField.GetDataType()) return &storage.Blob{Key: paths[i], Value: v}
})
pkIter, err := storage.NewBinlogDeserializeReader(blobs, t.primaryKeyField.GetFieldID())
if err != nil { if err != nil {
log.Warn("new insert binlogs Itr wrong", zap.Strings("path", path), zap.Error(err)) log.Warn("new insert binlogs Itr wrong", zap.Strings("paths", paths), zap.Error(err))
return err return err
} }
var offset int64 = -1 var offset int64 = -1
for pkIter.HasNext() { for {
vInter, _ := pkIter.Next() err := pkIter.Next()
v, ok := vInter.(*storage.Value) if err != nil {
if !ok { if err == sio.EOF {
log.Warn("transfer interface to Value wrong", zap.Strings("path", path)) break
return errors.New("unexpected error") } else {
log.Warn("compact wrong, failed to iter through data", zap.Error(err))
return err
}
} }
v := pkIter.Value()
offset++ offset++
// Filtering deleted entity // Filtering deleted entity
@ -524,7 +525,7 @@ func (t *clusteringCompactionTask) mappingSegment(
row, ok := v.Value.(map[typeutil.UniqueID]interface{}) row, ok := v.Value.(map[typeutil.UniqueID]interface{})
if !ok { if !ok {
log.Warn("transfer interface to map wrong", zap.Strings("path", path)) log.Warn("transfer interface to map wrong", zap.Strings("paths", paths))
return errors.New("unexpected error") return errors.New("unexpected error")
} }