mirror of https://github.com/milvus-io/milvus.git
436 lines
13 KiB
Go
436 lines
13 KiB
Go
package indexcgowrapper
|
|
|
|
/*
|
|
#cgo pkg-config: milvus_core
|
|
|
|
#include <stdlib.h> // free
|
|
#include "indexbuilder/index_c.h"
|
|
*/
|
|
import "C"
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"path/filepath"
|
|
"runtime"
|
|
"unsafe"
|
|
|
|
"go.uber.org/zap"
|
|
"google.golang.org/protobuf/encoding/prototext"
|
|
"google.golang.org/protobuf/proto"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/internal/proto/indexcgopb"
|
|
"github.com/milvus-io/milvus/internal/storage"
|
|
"github.com/milvus-io/milvus/pkg/log"
|
|
)
|
|
|
|
type Blob = storage.Blob
|
|
|
|
type IndexFileInfo struct {
|
|
FileName string
|
|
FileSize int64
|
|
}
|
|
|
|
type CodecIndex interface {
|
|
Build(*Dataset) error
|
|
Serialize() ([]*Blob, error)
|
|
GetIndexFileInfo() ([]*IndexFileInfo, error)
|
|
Load([]*Blob) error
|
|
Delete() error
|
|
CleanLocalData() error
|
|
UpLoad() (map[string]int64, error)
|
|
}
|
|
|
|
var _ CodecIndex = (*CgoIndex)(nil)
|
|
|
|
type CgoIndex struct {
|
|
indexPtr C.CIndex
|
|
close bool
|
|
}
|
|
|
|
// used only in test
|
|
// TODO: use proto.Marshal instead of proto.MarshalTextString for better compatibility.
|
|
func NewCgoIndex(dtype schemapb.DataType, typeParams, indexParams map[string]string) (CodecIndex, error) {
|
|
protoTypeParams := &indexcgopb.TypeParams{
|
|
Params: make([]*commonpb.KeyValuePair, 0),
|
|
}
|
|
for key, value := range typeParams {
|
|
protoTypeParams.Params = append(protoTypeParams.Params, &commonpb.KeyValuePair{Key: key, Value: value})
|
|
}
|
|
// typeParamsStr := proto.MarshalTextString(protoTypeParams)
|
|
typeParamsStr, _ := prototext.Marshal(protoTypeParams)
|
|
|
|
protoIndexParams := &indexcgopb.IndexParams{
|
|
Params: make([]*commonpb.KeyValuePair, 0),
|
|
}
|
|
for key, value := range indexParams {
|
|
protoIndexParams.Params = append(protoIndexParams.Params, &commonpb.KeyValuePair{Key: key, Value: value})
|
|
}
|
|
// indexParamsStr := proto.MarshalTextString(protoIndexParams)
|
|
indexParamsStr, _ := prototext.Marshal(protoIndexParams)
|
|
|
|
typeParamsPointer := C.CString(string(typeParamsStr))
|
|
indexParamsPointer := C.CString(string(indexParamsStr))
|
|
defer C.free(unsafe.Pointer(typeParamsPointer))
|
|
defer C.free(unsafe.Pointer(indexParamsPointer))
|
|
|
|
var indexPtr C.CIndex
|
|
cintDType := uint32(dtype)
|
|
status := C.CreateIndexV0(cintDType, typeParamsPointer, indexParamsPointer, &indexPtr)
|
|
if err := HandleCStatus(&status, "failed to create index"); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
index := &CgoIndex{
|
|
indexPtr: indexPtr,
|
|
close: false,
|
|
}
|
|
|
|
runtime.SetFinalizer(index, func(index *CgoIndex) {
|
|
if index != nil && !index.close {
|
|
log.Error("there is leakage in index object, please check.")
|
|
}
|
|
})
|
|
|
|
return index, nil
|
|
}
|
|
|
|
func CreateIndex(ctx context.Context, buildIndexInfo *indexcgopb.BuildIndexInfo) (CodecIndex, error) {
|
|
buildIndexInfoBlob, err := proto.Marshal(buildIndexInfo)
|
|
if err != nil {
|
|
log.Ctx(ctx).Warn("marshal buildIndexInfo failed",
|
|
zap.String("clusterID", buildIndexInfo.GetClusterID()),
|
|
zap.Int64("buildID", buildIndexInfo.GetBuildID()),
|
|
zap.Error(err))
|
|
return nil, err
|
|
}
|
|
var indexPtr C.CIndex
|
|
status := C.CreateIndex(&indexPtr, (*C.uint8_t)(unsafe.Pointer(&buildIndexInfoBlob[0])), (C.uint64_t)(len(buildIndexInfoBlob)))
|
|
if err := HandleCStatus(&status, "failed to create index"); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
index := &CgoIndex{
|
|
indexPtr: indexPtr,
|
|
close: false,
|
|
}
|
|
|
|
runtime.SetFinalizer(index, func(index *CgoIndex) {
|
|
if index != nil && !index.close {
|
|
log.Error("there is leakage in index object, please check.")
|
|
}
|
|
})
|
|
|
|
return index, nil
|
|
}
|
|
|
|
func CreateTextIndex(ctx context.Context, buildIndexInfo *indexcgopb.BuildIndexInfo) (map[string]int64, error) {
|
|
buildIndexInfoBlob, err := proto.Marshal(buildIndexInfo)
|
|
if err != nil {
|
|
log.Ctx(ctx).Warn("marshal buildIndexInfo failed",
|
|
zap.String("clusterID", buildIndexInfo.GetClusterID()),
|
|
zap.Int64("buildID", buildIndexInfo.GetBuildID()),
|
|
zap.Error(err))
|
|
return nil, err
|
|
}
|
|
var cBinarySet C.CBinarySet
|
|
status := C.BuildTextIndex(&cBinarySet, (*C.uint8_t)(unsafe.Pointer(&buildIndexInfoBlob[0])), (C.uint64_t)(len(buildIndexInfoBlob)))
|
|
if err := HandleCStatus(&status, "failed to build text index"); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
defer func() {
|
|
if cBinarySet != nil {
|
|
C.DeleteBinarySet(cBinarySet)
|
|
}
|
|
}()
|
|
|
|
res := make(map[string]int64)
|
|
indexFilePaths, err := GetBinarySetKeys(cBinarySet)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, path := range indexFilePaths {
|
|
size, err := GetBinarySetSize(cBinarySet, path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
res[path] = size
|
|
}
|
|
|
|
return res, nil
|
|
}
|
|
|
|
// TODO: this seems to be used only for test. We should mark the method
|
|
// name with ForTest, or maybe move to test file.
|
|
func (index *CgoIndex) Build(dataset *Dataset) error {
|
|
switch dataset.DType {
|
|
case schemapb.DataType_None:
|
|
return fmt.Errorf("build index on supported data type: %s", dataset.DType.String())
|
|
case schemapb.DataType_FloatVector:
|
|
return index.buildFloatVecIndex(dataset)
|
|
case schemapb.DataType_Float16Vector:
|
|
return index.buildFloat16VecIndex(dataset)
|
|
case schemapb.DataType_BFloat16Vector:
|
|
return index.buildBFloat16VecIndex(dataset)
|
|
case schemapb.DataType_BinaryVector:
|
|
return index.buildBinaryVecIndex(dataset)
|
|
case schemapb.DataType_Bool:
|
|
return index.buildBoolIndex(dataset)
|
|
case schemapb.DataType_Int8:
|
|
return index.buildInt8Index(dataset)
|
|
case schemapb.DataType_Int16:
|
|
return index.buildInt16Index(dataset)
|
|
case schemapb.DataType_Int32:
|
|
return index.buildInt32Index(dataset)
|
|
case schemapb.DataType_Int64:
|
|
return index.buildInt64Index(dataset)
|
|
case schemapb.DataType_Float:
|
|
return index.buildFloatIndex(dataset)
|
|
case schemapb.DataType_Double:
|
|
return index.buildDoubleIndex(dataset)
|
|
case schemapb.DataType_String:
|
|
return index.buildStringIndex(dataset)
|
|
case schemapb.DataType_VarChar:
|
|
return index.buildStringIndex(dataset)
|
|
default:
|
|
return fmt.Errorf("build index on unsupported data type: %s", dataset.DType.String())
|
|
}
|
|
}
|
|
|
|
func (index *CgoIndex) buildFloatVecIndex(dataset *Dataset) error {
|
|
vectors := dataset.Data[keyRawArr].([]float32)
|
|
status := C.BuildFloatVecIndex(index.indexPtr, (C.int64_t)(len(vectors)), (*C.float)(&vectors[0]))
|
|
return HandleCStatus(&status, "failed to build float vector index")
|
|
}
|
|
|
|
func (index *CgoIndex) buildFloat16VecIndex(dataset *Dataset) error {
|
|
vectors := dataset.Data[keyRawArr].([]byte)
|
|
status := C.BuildFloat16VecIndex(index.indexPtr, (C.int64_t)(len(vectors)), (*C.uint8_t)(&vectors[0]))
|
|
return HandleCStatus(&status, "failed to build float16 vector index")
|
|
}
|
|
|
|
func (index *CgoIndex) buildBFloat16VecIndex(dataset *Dataset) error {
|
|
vectors := dataset.Data[keyRawArr].([]byte)
|
|
status := C.BuildBFloat16VecIndex(index.indexPtr, (C.int64_t)(len(vectors)), (*C.uint8_t)(&vectors[0]))
|
|
return HandleCStatus(&status, "failed to build bfloat16 vector index")
|
|
}
|
|
|
|
func (index *CgoIndex) buildSparseFloatVecIndex(dataset *Dataset) error {
|
|
vectors := dataset.Data[keyRawArr].([]byte)
|
|
status := C.BuildSparseFloatVecIndex(index.indexPtr, (C.int64_t)(len(vectors)), (C.int64_t)(0), (*C.uint8_t)(&vectors[0]))
|
|
return HandleCStatus(&status, "failed to build sparse float vector index")
|
|
}
|
|
|
|
func (index *CgoIndex) buildBinaryVecIndex(dataset *Dataset) error {
|
|
vectors := dataset.Data[keyRawArr].([]byte)
|
|
status := C.BuildBinaryVecIndex(index.indexPtr, (C.int64_t)(len(vectors)), (*C.uint8_t)(&vectors[0]))
|
|
return HandleCStatus(&status, "failed to build binary vector index")
|
|
}
|
|
|
|
// TODO: investigate if we can pass an bool array to cgo.
|
|
func (index *CgoIndex) buildBoolIndex(dataset *Dataset) error {
|
|
arr := dataset.Data[keyRawArr].([]bool)
|
|
f := &schemapb.BoolArray{
|
|
Data: arr,
|
|
}
|
|
data, err := proto.Marshal(f)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
status := C.BuildScalarIndex(index.indexPtr, (C.int64_t)(len(data)), unsafe.Pointer(&data[0]))
|
|
return HandleCStatus(&status, "failed to build scalar index")
|
|
}
|
|
|
|
// TODO: refactor these duplicated code after generic programming is supported.
|
|
|
|
func (index *CgoIndex) buildInt8Index(dataset *Dataset) error {
|
|
data := dataset.Data[keyRawArr].([]int8)
|
|
status := C.BuildScalarIndex(index.indexPtr, (C.int64_t)(len(data)), unsafe.Pointer(&data[0]))
|
|
return HandleCStatus(&status, "failed to build scalar index")
|
|
}
|
|
|
|
func (index *CgoIndex) buildInt16Index(dataset *Dataset) error {
|
|
data := dataset.Data[keyRawArr].([]int16)
|
|
status := C.BuildScalarIndex(index.indexPtr, (C.int64_t)(len(data)), unsafe.Pointer(&data[0]))
|
|
return HandleCStatus(&status, "failed to build scalar index")
|
|
}
|
|
|
|
func (index *CgoIndex) buildInt32Index(dataset *Dataset) error {
|
|
data := dataset.Data[keyRawArr].([]int32)
|
|
status := C.BuildScalarIndex(index.indexPtr, (C.int64_t)(len(data)), unsafe.Pointer(&data[0]))
|
|
return HandleCStatus(&status, "failed to build scalar index")
|
|
}
|
|
|
|
func (index *CgoIndex) buildInt64Index(dataset *Dataset) error {
|
|
data := dataset.Data[keyRawArr].([]int64)
|
|
status := C.BuildScalarIndex(index.indexPtr, (C.int64_t)(len(data)), unsafe.Pointer(&data[0]))
|
|
return HandleCStatus(&status, "failed to build scalar index")
|
|
}
|
|
|
|
func (index *CgoIndex) buildFloatIndex(dataset *Dataset) error {
|
|
data := dataset.Data[keyRawArr].([]float32)
|
|
status := C.BuildScalarIndex(index.indexPtr, (C.int64_t)(len(data)), unsafe.Pointer(&data[0]))
|
|
return HandleCStatus(&status, "failed to build scalar index")
|
|
}
|
|
|
|
func (index *CgoIndex) buildDoubleIndex(dataset *Dataset) error {
|
|
data := dataset.Data[keyRawArr].([]float64)
|
|
status := C.BuildScalarIndex(index.indexPtr, (C.int64_t)(len(data)), unsafe.Pointer(&data[0]))
|
|
return HandleCStatus(&status, "failed to build scalar index")
|
|
}
|
|
|
|
func (index *CgoIndex) buildStringIndex(dataset *Dataset) error {
|
|
arr := dataset.Data[keyRawArr].([]string)
|
|
f := &schemapb.StringArray{
|
|
Data: arr,
|
|
}
|
|
data, err := proto.Marshal(f)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
status := C.BuildScalarIndex(index.indexPtr, (C.int64_t)(len(data)), unsafe.Pointer(&data[0]))
|
|
return HandleCStatus(&status, "failed to build scalar index")
|
|
}
|
|
|
|
func (index *CgoIndex) Serialize() ([]*Blob, error) {
|
|
var cBinarySet C.CBinarySet
|
|
|
|
status := C.SerializeIndexToBinarySet(index.indexPtr, &cBinarySet)
|
|
defer func() {
|
|
if cBinarySet != nil {
|
|
C.DeleteBinarySet(cBinarySet)
|
|
}
|
|
}()
|
|
if err := HandleCStatus(&status, "failed to serialize index to binary set"); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
keys, err := GetBinarySetKeys(cBinarySet)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
ret := make([]*Blob, 0)
|
|
for _, key := range keys {
|
|
value, err := GetBinarySetValue(cBinarySet, key)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
size, err := GetBinarySetSize(cBinarySet, key)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
blob := &Blob{
|
|
Key: key,
|
|
Value: value,
|
|
MemorySize: size,
|
|
}
|
|
ret = append(ret, blob)
|
|
}
|
|
|
|
return ret, nil
|
|
}
|
|
|
|
func (index *CgoIndex) GetIndexFileInfo() ([]*IndexFileInfo, error) {
|
|
var cBinarySet C.CBinarySet
|
|
|
|
status := C.SerializeIndexToBinarySet(index.indexPtr, &cBinarySet)
|
|
defer func() {
|
|
if cBinarySet != nil {
|
|
C.DeleteBinarySet(cBinarySet)
|
|
}
|
|
}()
|
|
if err := HandleCStatus(&status, "failed to serialize index to binary set"); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
keys, err := GetBinarySetKeys(cBinarySet)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
ret := make([]*IndexFileInfo, 0)
|
|
for _, key := range keys {
|
|
size, err := GetBinarySetSize(cBinarySet, key)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
info := &IndexFileInfo{
|
|
FileName: key,
|
|
FileSize: size,
|
|
}
|
|
ret = append(ret, info)
|
|
}
|
|
|
|
return ret, nil
|
|
}
|
|
|
|
func (index *CgoIndex) Load(blobs []*Blob) error {
|
|
var cBinarySet C.CBinarySet
|
|
status := C.NewBinarySet(&cBinarySet)
|
|
defer C.DeleteBinarySet(cBinarySet)
|
|
|
|
if err := HandleCStatus(&status, "failed to load index"); err != nil {
|
|
return err
|
|
}
|
|
for _, blob := range blobs {
|
|
key := blob.Key
|
|
byteIndex := blob.Value
|
|
indexPtr := unsafe.Pointer(&byteIndex[0])
|
|
indexLen := C.int64_t(len(byteIndex))
|
|
binarySetKey := filepath.Base(key)
|
|
indexKey := C.CString(binarySetKey)
|
|
status = C.AppendIndexBinary(cBinarySet, indexPtr, indexLen, indexKey)
|
|
C.free(unsafe.Pointer(indexKey))
|
|
if err := HandleCStatus(&status, "failed to load index"); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
status = C.LoadIndexFromBinarySet(index.indexPtr, cBinarySet)
|
|
return HandleCStatus(&status, "failed to load index")
|
|
}
|
|
|
|
func (index *CgoIndex) Delete() error {
|
|
if index.close {
|
|
return nil
|
|
}
|
|
status := C.DeleteIndex(index.indexPtr)
|
|
index.close = true
|
|
return HandleCStatus(&status, "failed to delete index")
|
|
}
|
|
|
|
func (index *CgoIndex) CleanLocalData() error {
|
|
status := C.CleanLocalData(index.indexPtr)
|
|
return HandleCStatus(&status, "failed to clean cached data on disk")
|
|
}
|
|
|
|
func (index *CgoIndex) UpLoad() (map[string]int64, error) {
|
|
var cBinarySet C.CBinarySet
|
|
|
|
status := C.SerializeIndexAndUpLoad(index.indexPtr, &cBinarySet)
|
|
defer func() {
|
|
if cBinarySet != nil {
|
|
C.DeleteBinarySet(cBinarySet)
|
|
}
|
|
}()
|
|
if err := HandleCStatus(&status, "failed to serialize index and upload index"); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
res := make(map[string]int64)
|
|
indexFilePaths, err := GetBinarySetKeys(cBinarySet)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, path := range indexFilePaths {
|
|
size, err := GetBinarySetSize(cBinarySet, path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
res[path] = size
|
|
}
|
|
|
|
return res, nil
|
|
}
|