mirror of https://github.com/milvus-io/milvus.git
Update knowhere version, update diskann api and generate cache nodes in build process (#24898)
Signed-off-by: cqy123456 <qianya.cheng@zilliz.com>pull/24880/head
parent
c73219a54d
commit
a519213316
|
@ -71,14 +71,11 @@ VectorDiskAnnIndex<T>::Load(const BinarySet& binary_set /* not used */,
|
|||
"index file paths is empty when load disk ann index data");
|
||||
file_manager_->CacheIndexToDisk(index_files.value());
|
||||
|
||||
// todo : replace by index::load function later
|
||||
knowhere::DataSetPtr qs = std::make_unique<knowhere::DataSet>();
|
||||
qs->SetRows(kPrepareRows);
|
||||
qs->SetDim(kPrepareDim);
|
||||
qs->SetIsOwner(true);
|
||||
auto query = new T[kPrepareRows * kPrepareDim];
|
||||
qs->SetTensor(query);
|
||||
index_.Search(*qs, load_config, nullptr);
|
||||
auto stat = index_.Deserialize(knowhere::BinarySet(), load_config);
|
||||
if (stat != knowhere::Status::success)
|
||||
PanicCodeInfo(
|
||||
ErrorCodeEnum::UnexpectedError,
|
||||
"failed to Deserialize index, " + MatchKnowhereError(stat));
|
||||
|
||||
SetDim(index_.Dim());
|
||||
}
|
||||
|
@ -124,8 +121,10 @@ VectorDiskAnnIndex<T>::BuildWithDataset(const DatasetPtr& dataset,
|
|||
local_chunk_manager.Write(local_data_path, offset, raw_data, data_size);
|
||||
|
||||
knowhere::DataSet* ds_ptr = nullptr;
|
||||
index_.Build(*ds_ptr, build_config);
|
||||
|
||||
auto stat = index_.Build(*ds_ptr, build_config);
|
||||
if (stat != knowhere::Status::success)
|
||||
PanicCodeInfo(ErrorCodeEnum::BuildIndexError,
|
||||
"failed to build index, " + MatchKnowhereError(stat));
|
||||
local_chunk_manager.RemoveDir(
|
||||
storage::GetSegmentRawDataPathPrefix(segment_id));
|
||||
// TODO ::
|
||||
|
|
|
@ -424,7 +424,7 @@ TEST_P(IndexTest, BuildAndQuery) {
|
|||
index_files.emplace_back(binary.first);
|
||||
}
|
||||
load_conf["index_files"] = index_files;
|
||||
vec_index->Load(binary_set, load_conf);
|
||||
ASSERT_NO_THROW(vec_index->Load(binary_set, load_conf));
|
||||
EXPECT_EQ(vec_index->Count(), NB);
|
||||
#endif
|
||||
} else {
|
||||
|
|
|
@ -82,6 +82,8 @@ func NewBigDataExtraParamsFromJSON(jsonStr string) (*BigDataIndexExtraParams, er
|
|||
|
||||
func NewBigDataExtraParamsFromMap(value map[string]string) (*BigDataIndexExtraParams, error) {
|
||||
ret := &BigDataIndexExtraParams{}
|
||||
ret.SearchCacheBudgetGBRatio = DefaultSearchCacheBudgetGBRatio
|
||||
setSearchCache := false
|
||||
var err error
|
||||
buildRatio, ok := value[BuildRatioKey]
|
||||
if !ok {
|
||||
|
@ -93,7 +95,6 @@ func NewBigDataExtraParamsFromMap(value map[string]string) (*BigDataIndexExtraPa
|
|||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
|
||||
PQCodeBudgetGBRatio, ok := valueMap1["pq_code_budget_gb"]
|
||||
if !ok {
|
||||
ret.PQCodeBudgetGBRatio = DefaultPQCodeBudgetGBRatio
|
||||
|
@ -106,6 +107,11 @@ func NewBigDataExtraParamsFromMap(value map[string]string) (*BigDataIndexExtraPa
|
|||
} else {
|
||||
ret.BuildNumThreadsRatio = BuildNumThreadsRatio
|
||||
}
|
||||
SearchCacheBudgetGBRatio, ok := valueMap1["search_cache_budget_gb"]
|
||||
if ok {
|
||||
ret.SearchCacheBudgetGBRatio = SearchCacheBudgetGBRatio
|
||||
setSearchCache = true
|
||||
}
|
||||
}
|
||||
|
||||
prepareRatio, ok := value[PrepareRatioKey]
|
||||
|
@ -119,9 +125,7 @@ func NewBigDataExtraParamsFromMap(value map[string]string) (*BigDataIndexExtraPa
|
|||
return ret, err
|
||||
}
|
||||
SearchCacheBudgetGBRatio, ok := valueMap2["search_cache_budget_gb"]
|
||||
if !ok {
|
||||
ret.SearchCacheBudgetGBRatio = DefaultSearchCacheBudgetGBRatio
|
||||
} else {
|
||||
if ok && !setSearchCache {
|
||||
ret.SearchCacheBudgetGBRatio = SearchCacheBudgetGBRatio
|
||||
}
|
||||
LoadNumThreadRatio, ok := valueMap2["num_threads"]
|
||||
|
@ -142,6 +146,7 @@ func NewBigDataExtraParamsFromMap(value map[string]string) (*BigDataIndexExtraPa
|
|||
ret.BeamWidthRatio = beamWidthRatio
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
|
@ -152,6 +157,7 @@ func FillDiskIndexParams(params *paramtable.ComponentParam, indexParams map[stri
|
|||
searchListSize := params.CommonCfg.SearchListSize.GetValue()
|
||||
pqCodeBudgetGBRatio := params.CommonCfg.PQCodeBudgetGBRatio.GetValue()
|
||||
buildNumThreadsRatio := params.CommonCfg.BuildNumThreadsRatio.GetValue()
|
||||
searchCacheBudgetGBRatio := params.CommonCfg.SearchCacheBudgetGBRatio.GetValue()
|
||||
|
||||
if params.AutoIndexConfig.Enable.GetAsBool() {
|
||||
indexParams := params.AutoIndexConfig.IndexParams.GetAsJSONMap()
|
||||
|
@ -176,6 +182,7 @@ func FillDiskIndexParams(params *paramtable.ComponentParam, indexParams map[stri
|
|||
indexParams[SearchListSizeKey] = searchListSize
|
||||
indexParams[PQCodeBudgetRatioKey] = pqCodeBudgetGBRatio
|
||||
indexParams[NumBuildThreadRatioKey] = buildNumThreadsRatio
|
||||
indexParams[SearchCacheBudgetRatioKey] = searchCacheBudgetGBRatio
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -209,12 +216,20 @@ func SetDiskIndexBuildParams(indexParams map[string]string, numRows int64) error
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
searchCacheBudgetGBRatioStr, ok := indexParams[SearchCacheBudgetRatioKey]
|
||||
if !ok {
|
||||
return fmt.Errorf("index param searchCacheBudgetGBRatio not exist")
|
||||
}
|
||||
SearchCacheBudgetGBRatio, err := strconv.ParseFloat(searchCacheBudgetGBRatioStr, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
indexParams[PQCodeBudgetKey] = fmt.Sprintf("%f",
|
||||
float32(getRowDataSizeOfFloatVector(numRows, dim))*float32(pqCodeBudgetGBRatio)/(1<<30))
|
||||
indexParams[NumBuildThreadKey] = strconv.Itoa(int(float32(hardware.GetCPUNum()) * float32(buildNumThreadsRatio)))
|
||||
indexParams[BuildDramBudgetKey] = fmt.Sprintf("%f", float32(hardware.GetFreeMemoryCount())/(1<<30))
|
||||
|
||||
indexParams[SearchCacheBudgetKey] = fmt.Sprintf("%f",
|
||||
float32(getRowDataSizeOfFloatVector(numRows, dim))*float32(SearchCacheBudgetGBRatio)/(1<<30))
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -44,6 +44,10 @@ func TestDiskIndexParams(t *testing.T) {
|
|||
buildNumThreadsRatio, err := strconv.ParseFloat(indexParams[NumBuildThreadRatioKey], 64)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1.0, buildNumThreadsRatio)
|
||||
|
||||
searchCacheBudgetRatio, err := strconv.ParseFloat(indexParams[SearchCacheBudgetRatioKey], 64)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0.10, searchCacheBudgetRatio)
|
||||
})
|
||||
|
||||
t.Run("fill index params with auto index", func(t *testing.T) {
|
||||
|
@ -129,14 +133,24 @@ func TestDiskIndexParams(t *testing.T) {
|
|||
|
||||
indexParams[common.DimKey] = "128"
|
||||
err = SetDiskIndexBuildParams(indexParams, 100)
|
||||
assert.Error(t, err)
|
||||
|
||||
indexParams[SearchCacheBudgetRatioKey] = "0.125"
|
||||
err = SetDiskIndexBuildParams(indexParams, 100)
|
||||
assert.NoError(t, err)
|
||||
|
||||
indexParams[SearchCacheBudgetRatioKey] = "aabb"
|
||||
err = SetDiskIndexBuildParams(indexParams, 100)
|
||||
assert.Error(t, err)
|
||||
|
||||
_, ok := indexParams[PQCodeBudgetKey]
|
||||
assert.True(t, ok)
|
||||
_, ok = indexParams[BuildDramBudgetKey]
|
||||
assert.True(t, ok)
|
||||
_, ok = indexParams[NumBuildThreadKey]
|
||||
assert.True(t, ok)
|
||||
_, ok = indexParams[SearchCacheBudgetKey]
|
||||
assert.True(t, ok)
|
||||
})
|
||||
|
||||
t.Run("set disk index load params without auto index param", func(t *testing.T) {
|
||||
|
@ -291,6 +305,16 @@ func TestBigDataIndex_parse(t *testing.T) {
|
|||
assert.Equal(t, 8.0, extraParams.LoadNumThreadRatio)
|
||||
assert.Equal(t, 0.125, extraParams.PQCodeBudgetGBRatio)
|
||||
assert.Equal(t, 0.225, extraParams.SearchCacheBudgetGBRatio)
|
||||
|
||||
mapString = make(map[string]string)
|
||||
mapString[BuildRatioKey] = "{\"pq_code_budget_gb\": 0.125, \"num_threads\": 1, \"search_cache_budget_gb\": 0.20}"
|
||||
mapString[PrepareRatioKey] = "{\"num_threads\": 8}"
|
||||
extraParams, err = NewBigDataExtraParamsFromMap(mapString)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1.0, extraParams.BuildNumThreadsRatio)
|
||||
assert.Equal(t, 8.0, extraParams.LoadNumThreadRatio)
|
||||
assert.Equal(t, 0.125, extraParams.PQCodeBudgetGBRatio)
|
||||
assert.Equal(t, 0.20, extraParams.SearchCacheBudgetGBRatio)
|
||||
})
|
||||
|
||||
t.Run("parse with build_ratio partial or wrong", func(t *testing.T) {
|
||||
|
@ -319,6 +343,13 @@ func TestBigDataIndex_parse(t *testing.T) {
|
|||
mapString[PrepareRatioKey] = "{\"search_cache_budget_gb\": 0.225, \"num_threads\": 8}"
|
||||
_, err = NewBigDataExtraParamsFromMap(mapString)
|
||||
assert.Error(t, err)
|
||||
|
||||
mapString = make(map[string]string)
|
||||
mapString[BuildRatioKey] = "{\"pq_code_budget_gb\": 0.125, \"num_threads\": 1}"
|
||||
mapString[PrepareRatioKey] = "{\"num_threads\": 8}"
|
||||
extraParams, err = NewBigDataExtraParamsFromMap(mapString)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0.10, extraParams.SearchCacheBudgetGBRatio)
|
||||
})
|
||||
|
||||
t.Run("parse with prepare_ratio partial or wrong", func(t *testing.T) {
|
||||
|
|
Loading…
Reference in New Issue