Update knowhere version, update diskann api and generate cache nodes in build process (#24898)

Signed-off-by: cqy123456 <qianya.cheng@zilliz.com>
pull/24880/head
cqy123456 2023-06-16 02:20:39 -04:00 committed by GitHub
parent c73219a54d
commit a519213316
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 62 additions and 17 deletions

View File

@ -71,14 +71,11 @@ VectorDiskAnnIndex<T>::Load(const BinarySet& binary_set /* not used */,
"index file paths is empty when load disk ann index data");
file_manager_->CacheIndexToDisk(index_files.value());
// todo : replace by index::load function later
knowhere::DataSetPtr qs = std::make_unique<knowhere::DataSet>();
qs->SetRows(kPrepareRows);
qs->SetDim(kPrepareDim);
qs->SetIsOwner(true);
auto query = new T[kPrepareRows * kPrepareDim];
qs->SetTensor(query);
index_.Search(*qs, load_config, nullptr);
auto stat = index_.Deserialize(knowhere::BinarySet(), load_config);
if (stat != knowhere::Status::success)
PanicCodeInfo(
ErrorCodeEnum::UnexpectedError,
"failed to Deserialize index, " + MatchKnowhereError(stat));
SetDim(index_.Dim());
}
@ -124,8 +121,10 @@ VectorDiskAnnIndex<T>::BuildWithDataset(const DatasetPtr& dataset,
local_chunk_manager.Write(local_data_path, offset, raw_data, data_size);
knowhere::DataSet* ds_ptr = nullptr;
index_.Build(*ds_ptr, build_config);
auto stat = index_.Build(*ds_ptr, build_config);
if (stat != knowhere::Status::success)
PanicCodeInfo(ErrorCodeEnum::BuildIndexError,
"failed to build index, " + MatchKnowhereError(stat));
local_chunk_manager.RemoveDir(
storage::GetSegmentRawDataPathPrefix(segment_id));
// TODO ::

View File

@ -424,7 +424,7 @@ TEST_P(IndexTest, BuildAndQuery) {
index_files.emplace_back(binary.first);
}
load_conf["index_files"] = index_files;
vec_index->Load(binary_set, load_conf);
ASSERT_NO_THROW(vec_index->Load(binary_set, load_conf));
EXPECT_EQ(vec_index->Count(), NB);
#endif
} else {

View File

@ -82,6 +82,8 @@ func NewBigDataExtraParamsFromJSON(jsonStr string) (*BigDataIndexExtraParams, er
func NewBigDataExtraParamsFromMap(value map[string]string) (*BigDataIndexExtraParams, error) {
ret := &BigDataIndexExtraParams{}
ret.SearchCacheBudgetGBRatio = DefaultSearchCacheBudgetGBRatio
setSearchCache := false
var err error
buildRatio, ok := value[BuildRatioKey]
if !ok {
@ -93,7 +95,6 @@ func NewBigDataExtraParamsFromMap(value map[string]string) (*BigDataIndexExtraPa
if err != nil {
return ret, err
}
PQCodeBudgetGBRatio, ok := valueMap1["pq_code_budget_gb"]
if !ok {
ret.PQCodeBudgetGBRatio = DefaultPQCodeBudgetGBRatio
@ -106,6 +107,11 @@ func NewBigDataExtraParamsFromMap(value map[string]string) (*BigDataIndexExtraPa
} else {
ret.BuildNumThreadsRatio = BuildNumThreadsRatio
}
SearchCacheBudgetGBRatio, ok := valueMap1["search_cache_budget_gb"]
if ok {
ret.SearchCacheBudgetGBRatio = SearchCacheBudgetGBRatio
setSearchCache = true
}
}
prepareRatio, ok := value[PrepareRatioKey]
@ -119,9 +125,7 @@ func NewBigDataExtraParamsFromMap(value map[string]string) (*BigDataIndexExtraPa
return ret, err
}
SearchCacheBudgetGBRatio, ok := valueMap2["search_cache_budget_gb"]
if !ok {
ret.SearchCacheBudgetGBRatio = DefaultSearchCacheBudgetGBRatio
} else {
if ok && !setSearchCache {
ret.SearchCacheBudgetGBRatio = SearchCacheBudgetGBRatio
}
LoadNumThreadRatio, ok := valueMap2["num_threads"]
@ -142,6 +146,7 @@ func NewBigDataExtraParamsFromMap(value map[string]string) (*BigDataIndexExtraPa
ret.BeamWidthRatio = beamWidthRatio
}
}
return ret, nil
}
@ -152,6 +157,7 @@ func FillDiskIndexParams(params *paramtable.ComponentParam, indexParams map[stri
searchListSize := params.CommonCfg.SearchListSize.GetValue()
pqCodeBudgetGBRatio := params.CommonCfg.PQCodeBudgetGBRatio.GetValue()
buildNumThreadsRatio := params.CommonCfg.BuildNumThreadsRatio.GetValue()
searchCacheBudgetGBRatio := params.CommonCfg.SearchCacheBudgetGBRatio.GetValue()
if params.AutoIndexConfig.Enable.GetAsBool() {
indexParams := params.AutoIndexConfig.IndexParams.GetAsJSONMap()
@ -176,6 +182,7 @@ func FillDiskIndexParams(params *paramtable.ComponentParam, indexParams map[stri
indexParams[SearchListSizeKey] = searchListSize
indexParams[PQCodeBudgetRatioKey] = pqCodeBudgetGBRatio
indexParams[NumBuildThreadRatioKey] = buildNumThreadsRatio
indexParams[SearchCacheBudgetRatioKey] = searchCacheBudgetGBRatio
return nil
}
@ -209,12 +216,20 @@ func SetDiskIndexBuildParams(indexParams map[string]string, numRows int64) error
if err != nil {
return err
}
searchCacheBudgetGBRatioStr, ok := indexParams[SearchCacheBudgetRatioKey]
if !ok {
return fmt.Errorf("index param searchCacheBudgetGBRatio not exist")
}
SearchCacheBudgetGBRatio, err := strconv.ParseFloat(searchCacheBudgetGBRatioStr, 64)
if err != nil {
return err
}
indexParams[PQCodeBudgetKey] = fmt.Sprintf("%f",
float32(getRowDataSizeOfFloatVector(numRows, dim))*float32(pqCodeBudgetGBRatio)/(1<<30))
indexParams[NumBuildThreadKey] = strconv.Itoa(int(float32(hardware.GetCPUNum()) * float32(buildNumThreadsRatio)))
indexParams[BuildDramBudgetKey] = fmt.Sprintf("%f", float32(hardware.GetFreeMemoryCount())/(1<<30))
indexParams[SearchCacheBudgetKey] = fmt.Sprintf("%f",
float32(getRowDataSizeOfFloatVector(numRows, dim))*float32(SearchCacheBudgetGBRatio)/(1<<30))
return nil
}

View File

@ -44,6 +44,10 @@ func TestDiskIndexParams(t *testing.T) {
buildNumThreadsRatio, err := strconv.ParseFloat(indexParams[NumBuildThreadRatioKey], 64)
assert.NoError(t, err)
assert.Equal(t, 1.0, buildNumThreadsRatio)
searchCacheBudgetRatio, err := strconv.ParseFloat(indexParams[SearchCacheBudgetRatioKey], 64)
assert.NoError(t, err)
assert.Equal(t, 0.10, searchCacheBudgetRatio)
})
t.Run("fill index params with auto index", func(t *testing.T) {
@ -129,14 +133,24 @@ func TestDiskIndexParams(t *testing.T) {
indexParams[common.DimKey] = "128"
err = SetDiskIndexBuildParams(indexParams, 100)
assert.Error(t, err)
indexParams[SearchCacheBudgetRatioKey] = "0.125"
err = SetDiskIndexBuildParams(indexParams, 100)
assert.NoError(t, err)
indexParams[SearchCacheBudgetRatioKey] = "aabb"
err = SetDiskIndexBuildParams(indexParams, 100)
assert.Error(t, err)
_, ok := indexParams[PQCodeBudgetKey]
assert.True(t, ok)
_, ok = indexParams[BuildDramBudgetKey]
assert.True(t, ok)
_, ok = indexParams[NumBuildThreadKey]
assert.True(t, ok)
_, ok = indexParams[SearchCacheBudgetKey]
assert.True(t, ok)
})
t.Run("set disk index load params without auto index param", func(t *testing.T) {
@ -291,6 +305,16 @@ func TestBigDataIndex_parse(t *testing.T) {
assert.Equal(t, 8.0, extraParams.LoadNumThreadRatio)
assert.Equal(t, 0.125, extraParams.PQCodeBudgetGBRatio)
assert.Equal(t, 0.225, extraParams.SearchCacheBudgetGBRatio)
mapString = make(map[string]string)
mapString[BuildRatioKey] = "{\"pq_code_budget_gb\": 0.125, \"num_threads\": 1, \"search_cache_budget_gb\": 0.20}"
mapString[PrepareRatioKey] = "{\"num_threads\": 8}"
extraParams, err = NewBigDataExtraParamsFromMap(mapString)
assert.NoError(t, err)
assert.Equal(t, 1.0, extraParams.BuildNumThreadsRatio)
assert.Equal(t, 8.0, extraParams.LoadNumThreadRatio)
assert.Equal(t, 0.125, extraParams.PQCodeBudgetGBRatio)
assert.Equal(t, 0.20, extraParams.SearchCacheBudgetGBRatio)
})
t.Run("parse with build_ratio partial or wrong", func(t *testing.T) {
@ -319,6 +343,13 @@ func TestBigDataIndex_parse(t *testing.T) {
mapString[PrepareRatioKey] = "{\"search_cache_budget_gb\": 0.225, \"num_threads\": 8}"
_, err = NewBigDataExtraParamsFromMap(mapString)
assert.Error(t, err)
mapString = make(map[string]string)
mapString[BuildRatioKey] = "{\"pq_code_budget_gb\": 0.125, \"num_threads\": 1}"
mapString[PrepareRatioKey] = "{\"num_threads\": 8}"
extraParams, err = NewBigDataExtraParamsFromMap(mapString)
assert.NoError(t, err)
assert.Equal(t, 0.10, extraParams.SearchCacheBudgetGBRatio)
})
t.Run("parse with prepare_ratio partial or wrong", func(t *testing.T) {