remove VectorMemNMIndex (#27000)

Signed-off-by: cqy123456 <qianya.cheng@zilliz.com>
pull/27032/head
cqy123456 2023-09-12 17:13:18 +08:00 committed by GitHub
parent a37b422acd
commit 0ff4ddc76c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 4 additions and 217 deletions

View File

@ -14,7 +14,6 @@ set(INDEX_FILES
Utils.cpp
VectorMemIndex.cpp
IndexFactory.cpp
VectorMemNMIndex.cpp
)
if ( BUILD_DISK_ANN STREQUAL "ON" )

View File

@ -16,7 +16,6 @@
#include "index/IndexFactory.h"
#include "index/VectorMemIndex.h"
#include "index/VectorMemNMIndex.h"
#include "index/Utils.h"
#include "index/Meta.h"
@ -93,10 +92,6 @@ IndexFactory::CreateVectorIndex(const CreateIndexInfo& create_index_info,
}
#endif
if (is_in_nm_list(index_type)) {
return std::make_unique<VectorMemNMIndex>(
index_type, metric_type, file_manager);
}
// create mem index
return std::make_unique<VectorMemIndex>(
index_type, metric_type, file_manager);

View File

@ -1,133 +0,0 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "common/Slice.h"
#include "common/Utils.h"
#include "common/BitsetView.h"
#include "index/VectorMemNMIndex.h"
#include "log/Log.h"
#include "knowhere/factory.h"
#include "knowhere/comp/time_recorder.h"
#define RAW_DATA "RAW_DATA"
#include "common/Tracer.h"
namespace milvus::index {
BinarySet
VectorMemNMIndex::Serialize(const Config& config) {
knowhere::BinarySet ret;
auto stat = index_.Serialize(ret);
if (stat != knowhere::Status::success)
PanicCodeInfo(
ErrorCodeEnum::UnexpectedError,
"failed to serialize index, " + KnowhereStatusString(stat));
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
auto raw_data = std::shared_ptr<uint8_t[]>(
static_cast<uint8_t*>(raw_data_.data()), deleter);
ret.Append(RAW_DATA, raw_data, raw_data_.size());
Disassemble(ret);
return ret;
}
void
VectorMemNMIndex::BuildWithDataset(const DatasetPtr& dataset,
const Config& config) {
VectorMemIndex::BuildWithDataset(dataset, config);
knowhere::TimeRecorder rc("store_raw_data", 1);
store_raw_data(dataset);
rc.ElapseFromBegin("Done");
}
void
VectorMemNMIndex::LoadWithoutAssemble(const BinarySet& binary_set,
const Config& config) {
VectorMemIndex::LoadWithoutAssemble(binary_set, config);
if (binary_set.Contains(RAW_DATA)) {
std::call_once(raw_data_loaded_, [&]() {
LOG_SEGCORE_INFO_ << "NM index load raw data done!";
});
}
}
void
VectorMemNMIndex::AddWithDataset(const DatasetPtr& /*dataset*/,
const Config& /*config*/) {
}
void
VectorMemNMIndex::Load(const BinarySet& binary_set, const Config& config) {
VectorMemIndex::Load(binary_set, config);
if (binary_set.Contains(RAW_DATA)) {
std::call_once(raw_data_loaded_, [&]() {
LOG_SEGCORE_INFO_ << "NM index load raw data done!";
});
}
}
std::unique_ptr<SearchResult>
VectorMemNMIndex::Query(const DatasetPtr dataset,
const SearchInfo& search_info,
const BitsetView& bitset) {
auto load_raw_data_closure = [&]() { LoadRawData(); }; // hide this pointer
// load -> query, raw data has been loaded
// build -> query, this case just for test, should load raw data before query
std::call_once(raw_data_loaded_, load_raw_data_closure);
return VectorMemIndex::Query(dataset, search_info, bitset);
}
void
VectorMemNMIndex::store_raw_data(const DatasetPtr& dataset) {
auto index_type = GetIndexType();
auto tensor = dataset->GetTensor();
auto row_num = dataset->GetRows();
auto dim = dataset->GetDim();
int64_t data_size;
if (is_in_bin_list(index_type)) {
data_size = dim / 8 * row_num;
} else {
data_size = dim * row_num * sizeof(float);
}
raw_data_.resize(data_size);
memcpy(raw_data_.data(), tensor, data_size);
}
void
VectorMemNMIndex::LoadRawData() {
knowhere::BinarySet bs;
auto stat = index_.Serialize(bs);
if (stat != knowhere::Status::success)
PanicCodeInfo(
ErrorCodeEnum::UnexpectedError,
"failed to Serialize index, " + KnowhereStatusString(stat));
auto bptr = std::make_shared<knowhere::Binary>();
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
bptr->data = std::shared_ptr<uint8_t[]>(
static_cast<uint8_t*>(raw_data_.data()), deleter);
bptr->size = raw_data_.size();
bs.Append(RAW_DATA, bptr);
stat = index_.Deserialize(bs);
if (stat != knowhere::Status::success)
PanicCodeInfo(
ErrorCodeEnum::UnexpectedError,
"failed to Deserialize index, " + KnowhereStatusString(stat));
milvus::tracer::AddEvent("VectorMemNMIndex_Loaded_RawData");
}
} // namespace milvus::index

View File

@ -1,74 +0,0 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "index/Utils.h"
#include "index/VectorMemIndex.h"
namespace milvus::index {
class VectorMemNMIndex : public VectorMemIndex {
public:
explicit VectorMemNMIndex(
const IndexType& index_type,
const MetricType& metric_type,
storage::FileManagerImplPtr file_manager = nullptr)
: VectorMemIndex(index_type, metric_type, file_manager) {
AssertInfo(is_in_nm_list(index_type), "not valid nm index type");
}
BinarySet
Serialize(const Config& config) override;
void
BuildWithDataset(const DatasetPtr& dataset,
const Config& config = {}) override;
void
AddWithDataset(const DatasetPtr& dataset, const Config& config) override;
void
Load(const BinarySet& binary_set, const Config& config = {}) override;
std::unique_ptr<SearchResult>
Query(const DatasetPtr dataset,
const SearchInfo& search_info,
const BitsetView& bitset) override;
void
LoadWithoutAssemble(const BinarySet& binary_set,
const Config& config) override;
private:
void
store_raw_data(const DatasetPtr& dataset);
void
LoadRawData();
private:
std::vector<uint8_t> raw_data_;
std::once_flag raw_data_loaded_;
};
using VectorMemNMIndexPtr = std::unique_ptr<VectorMemNMIndex>;
} // namespace milvus::index

View File

@ -16,7 +16,7 @@
#include "common/SystemProperty.h"
#include "segcore/FieldIndexing.h"
#include "index/VectorMemNMIndex.h"
#include "index/VectorMemIndex.h"
#include "IndexConfigGenerator.h"
namespace milvus::segcore {
@ -50,7 +50,7 @@ VectorFieldIndexing::BuildIndexRange(int64_t ack_beg,
data_.grow_to_at_least(ack_end);
for (int chunk_id = ack_beg; chunk_id < ack_end; chunk_id++) {
const auto& chunk = source->get_chunk(chunk_id);
auto indexing = std::make_unique<index::VectorMemNMIndex>(
auto indexing = std::make_unique<index::VectorMemIndex>(
knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, knowhere::metric::L2);
auto dataset = knowhere::GenDataSet(
source->get_size_per_chunk(), dim, chunk.data());

View File

@ -22,7 +22,7 @@
#include "common/Schema.h"
#include "index/ScalarIndexSort.h"
#include "index/StringIndexSort.h"
#include "index/VectorMemNMIndex.h"
#include "index/VectorMemIndex.h"
#include "query/SearchOnIndex.h"
#include "segcore/SegmentGrowingImpl.h"
#include "segcore/SegmentSealedImpl.h"
@ -793,7 +793,7 @@ GenVecIndexing(int64_t N, int64_t dim, const float* vec) {
{knowhere::indexparam::NLIST, "1024"},
{knowhere::meta::DEVICE_ID, 0}};
auto database = knowhere::GenDataSet(N, dim, vec);
auto indexing = std::make_unique<index::VectorMemNMIndex>(
auto indexing = std::make_unique<index::VectorMemIndex>(
knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, knowhere::metric::L2);
indexing->BuildWithDataset(database, conf);
return indexing;