fix: incorrect dim set when creating BM25 doc sparse array (#37717)

issue: https://github.com/milvus-io/milvus/issues/35853

Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
pull/37744/head
Buqian Zheng 2024-11-16 16:36:30 +08:00 committed by GitHub
parent 3cdb485022
commit 00edec2ebd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 10 additions and 8 deletions

View File

@ -458,9 +458,9 @@ func (m *BM25Stats) Deserialize(bs []byte) error {
}
func (m *BM25Stats) BuildIDF(tf []byte) (idf []byte) {
dim := typeutil.SparseFloatRowElementCount(tf)
numElements := typeutil.SparseFloatRowElementCount(tf)
idf = make([]byte, len(tf))
for idx := 0; idx < dim; idx++ {
for idx := 0; idx < numElements; idx++ {
key := typeutil.SparseFloatRowIndexAt(tf, idx)
value := typeutil.SparseFloatRowValueAt(tf, idx)
nq := m.rowsWithToken[key]

View File

@ -107,7 +107,7 @@ func (v *BM25FunctionRunner) run(data []string, dst []map[uint32]float32) error
func (v *BM25FunctionRunner) BatchRun(inputs ...any) ([]any, error) {
if len(inputs) > 1 {
return nil, fmt.Errorf("BM25 function receieve more than one input")
return nil, fmt.Errorf("BM25 function received more than one input column")
}
text, ok := inputs[0].([]string)
@ -158,16 +158,18 @@ func (v *BM25FunctionRunner) GetOutputFields() []*schemapb.FieldSchema {
}
func buildSparseFloatArray(mapdata []map[uint32]float32) *schemapb.SparseFloatArray {
dim := 0
dim := int64(0)
bytes := lo.Map(mapdata, func(sparseMap map[uint32]float32, _ int) []byte {
if len(sparseMap) > dim {
dim = len(sparseMap)
row := typeutil.CreateAndSortSparseFloatRow(sparseMap)
rowDim := typeutil.SparseFloatRowDim(row)
if rowDim > dim {
dim = rowDim
}
return typeutil.CreateAndSortSparseFloatRow(sparseMap)
return row
})
return &schemapb.SparseFloatArray{
Contents: bytes,
Dim: int64(dim),
Dim: dim,
}
}