support knowhere 2.0 (#21857)

Signed-off-by: Yusheng.Ma <Yusheng.Ma@zilliz.com>
pull/22118/head
presburger 2023-02-10 14:24:32 +08:00 committed by GitHub
parent 2c25a2d649
commit 9950cacd10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
57 changed files with 339 additions and 541 deletions

View File

@ -26,7 +26,6 @@ ifeq (${USE_ASAN}, true)
useasan = true
endif
export GIT_BRANCH=master
milvus: build-cpp print-build-info
@ -34,7 +33,7 @@ milvus: build-cpp print-build-info
@source $(PWD)/scripts/setenv.sh && \
mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \
GO111MODULE=on $(GO) build -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \
${APPLE_SILICON_FLAG} -o $(INSTALL_PATH)/milvus $(PWD)/cmd/main.go 1>/dev/null
${AARCH64_FLAG} -o $(INSTALL_PATH)/milvus $(PWD)/cmd/main.go 1>/dev/null
get-build-deps:
@(env bash $(PWD)/scripts/install_deps.sh)
@ -105,7 +104,7 @@ meta-migration:
@source $(PWD)/scripts/setenv.sh && \
mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \
GO111MODULE=on $(GO) build -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \
${APPLE_SILICON_FLAG} -o $(INSTALL_PATH)/meta-migration $(MIGRATION_PATH)/main.go 1>/dev/null
${AARCH64_FLAG} -o $(INSTALL_PATH)/meta-migration $(MIGRATION_PATH)/main.go 1>/dev/null
INTERATION_PATH = $(PWD)/tests/integration
integration-test:
@ -119,10 +118,13 @@ BUILD_TAGS = $(shell git describe --tags --always --dirty="-dev")
BUILD_TIME = $(shell date -u)
GIT_COMMIT = $(shell git rev-parse --short HEAD)
GO_VERSION = $(shell go version)
ifeq ($(OS),Darwin)
ifeq ($(ARCH),arm64)
APPLE_SILICON_FLAG = -tags dynamic
AARCH64_FLAG = -tags dynamic
endif
ifeq ($(ARCH),aarch64)
AARCH64_FLAG = -tags dynamic
endif
print-build-info:
@ -139,7 +141,7 @@ embd-milvus: build-cpp-embd print-build-info
@source $(PWD)/scripts/setenv.sh && \
mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \
GO111MODULE=on $(GO) build -ldflags="-r /tmp/milvus/lib/ -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \
${APPLE_SILICON_FLAG} -buildmode=c-shared -o $(INSTALL_PATH)/embd-milvus.so $(PWD)/pkg/embedded/embedded.go 1>/dev/null
${AARCH64_FLAG} -buildmode=c-shared -o $(INSTALL_PATH)/embd-milvus.so $(PWD)/pkg/embedded/embedded.go 1>/dev/null
update-milvus-api: download-milvus-proto
@echo "Update milvus/api version ..."

View File

@ -20,19 +20,19 @@
#include <boost_ext/dynamic_bitset_ext.hpp>
#include "exceptions/EasyAssert.h"
#include "common/Types.h"
#include "knowhere/utils/BitsetView.h"
#include "knowhere/bitsetview.h"
namespace milvus {
class BitsetView : public faiss::BitsetView {
class BitsetView : public knowhere::BitsetView {
public:
BitsetView() = default;
~BitsetView() = default;
BitsetView(const std::nullptr_t value) : faiss::BitsetView(value) { // NOLINT
BitsetView(const std::nullptr_t value) : knowhere::BitsetView(value) { // NOLINT
}
BitsetView(const uint8_t* data, size_t num_bits) : faiss::BitsetView(data, num_bits) { // NOLINT
BitsetView(const uint8_t* data, size_t num_bits) : knowhere::BitsetView(data, num_bits) { // NOLINT
}
BitsetView(const BitsetType& bitset) // NOLINT

View File

@ -21,7 +21,6 @@
#include "Types.h"
#include "common/CDataType.h"
#include "knowhere/index/Index.h"
// NOTE: field_id can be system field
// NOTE: Refer to common/SystemProperty.cpp for details

View File

@ -19,14 +19,14 @@
#include <memory>
#include "common/Types.h"
#include "knowhere/config.h"
namespace milvus {
struct SearchInfo {
int64_t topk_;
int64_t round_decimal_;
FieldId field_id_;
MetricType metric_type_;
Config search_params_;
knowhere::Json search_params_;
};
using SearchInfoPtr = std::shared_ptr<SearchInfo>;

View File

@ -30,10 +30,10 @@
#include <NamedType/named_type.hpp>
#include <variant>
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include <knowhere/index/IndexType.h>
#include "knowhere/common/BinarySet.h"
#include "knowhere/common/Dataset.h"
#include "nlohmann/json.hpp"
#include "knowhere/comp/index_param.h"
#include "knowhere/binaryset.h"
#include "knowhere/dataset.h"
#include "pb/schema.pb.h"
#include "pb/segcore.pb.h"
#include "pb/plan.pb.h"
@ -118,7 +118,8 @@ using TargetBitmapPtr = std::unique_ptr<TargetBitmap>;
using BinaryPtr = knowhere::BinaryPtr;
using BinarySet = knowhere::BinarySet;
using DatasetPtr = knowhere::DatasetPtr;
using Dataset = knowhere::DataSet;
using DatasetPtr = knowhere::DataSetPtr;
using MetricType = knowhere::MetricType;
// TODO :: type define milvus index type(vector index type and scalar index type)
using IndexType = knowhere::IndexType;

View File

@ -17,39 +17,37 @@
#include "common/Consts.h"
#include "config/ConfigChunkManager.h"
#include "exceptions/EasyAssert.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "knowhere/dataset.h"
namespace milvus {
inline DatasetPtr
GenDataset(const int64_t nb, const int64_t dim, const void* xb) {
return knowhere::GenDataset(nb, dim, xb);
return knowhere::GenDataSet(nb, dim, xb);
}
inline const float*
GetDatasetDistance(const DatasetPtr& dataset) {
return knowhere::GetDatasetDistance(dataset);
return dataset->GetDistance();
}
inline const int64_t*
GetDatasetIDs(const DatasetPtr& dataset) {
return knowhere::GetDatasetIDs(dataset);
return dataset->GetIds();
}
inline int64_t
GetDatasetRows(const DatasetPtr& dataset) {
return knowhere::GetDatasetRows(dataset);
return dataset->GetRows();
}
inline const void*
GetDatasetTensor(const DatasetPtr& dataset) {
return knowhere::GetDatasetTensor(dataset);
return dataset->GetTensor();
}
inline int64_t
GetDatasetDim(const DatasetPtr& dataset) {
return knowhere::GetDatasetDim(dataset);
return dataset->GetDim();
}
inline bool

View File

@ -14,7 +14,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "knowhere/common/BinarySet.h"
#include "knowhere/binaryset.h"
#include "common/binary_set_c.h"
CStatus

View File

@ -20,8 +20,8 @@
#include "exceptions/EasyAssert.h"
#include "easyloggingpp/easylogging++.h"
#include "log/Log.h"
#include "knowhere/archive/KnowhereConfig.h"
#include "knowhere/common/ThreadPool.h"
#include "knowhere/comp/thread_pool.h"
#include "knowhere/comp/knowhere_config.h"
namespace milvus::config {

View File

@ -40,7 +40,7 @@ EasyAssertInfo(bool value,
class SegcoreError : public std::runtime_error {
public:
SegcoreError(ErrorCodeEnum error_code, const std::string& error_msg)
: error_code_(error_code), std::runtime_error(error_msg) {
: std::runtime_error(error_msg), error_code_(error_code) {
}
ErrorCodeEnum

View File

@ -18,7 +18,7 @@
#include <memory>
#include <boost/dynamic_bitset.hpp>
#include "knowhere/dataset.h"
#include "common/Types.h"
namespace milvus::index {

View File

@ -16,8 +16,7 @@
#pragma once
#include "knowhere/index/IndexType.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "knowhere/comp/index_param.h"
namespace milvus::index {
constexpr const char* OPERATOR_TYPE = "operator_type";
@ -49,6 +48,7 @@ constexpr const char* INDEX_ID = "index_id";
constexpr const char* INDEX_VERSION = "index_version";
// DiskAnn build params
constexpr const char* DISK_ANN_PREFIX_PATH = "index_prefix";
constexpr const char* DISK_ANN_RAW_DATA_PATH = "data_path";
constexpr const char* DISK_ANN_MAX_DEGREE = "max_degree";
constexpr const char* DISK_ANN_SEARCH_LIST_SIZE = "search_list_size";
@ -56,6 +56,7 @@ constexpr const char* DISK_ANN_PQ_CODE_BUDGET = "pq_code_budget_gb";
constexpr const char* DISK_ANN_BUILD_DRAM_BUDGET = "build_dram_budget_gb";
constexpr const char* DISK_ANN_BUILD_THREAD_NUM = "num_build_thread";
constexpr const char* DISK_ANN_PQ_DIMS = "disk_pq_dims";
constexpr const char* DISK_ANN_THREADS_NUM = "num_threads";
// DiskAnn prepare params
constexpr const char* DISK_ANN_LOAD_THREAD_NUM = "num_load_thread";
@ -66,10 +67,4 @@ constexpr const char* DISK_ANN_PREPARE_USE_BFS_CACHE = "use_bfs_cache";
// DiskAnn query params
constexpr const char* DISK_ANN_QUERY_LIST = "search_list";
constexpr const char* DISK_ANN_QUERY_BEAMWIDTH = "beamwidth";
// DiskAnn config name
constexpr const char* Disk_ANN_Build_Config = "diskANN_build_config";
constexpr const char* Disk_ANN_Prepare_Config = "diskANN_prepare_config";
constexpr const char* Disk_ANN_Query_Config = "diskANN_query_config";
} // namespace milvus::index

View File

@ -19,7 +19,7 @@
#include <vector>
#include "index/Meta.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "knowhere/dataset.h"
namespace milvus::index {
template <typename T>
@ -44,14 +44,14 @@ ScalarIndex<T>::Query(const DatasetPtr& dataset) {
}
case OpType::In: {
auto n = knowhere::GetDatasetRows(dataset);
auto values = knowhere::GetDatasetTensor(dataset);
auto n = dataset->GetRows();
auto values = dataset->GetTensor();
return In(n, reinterpret_cast<const T*>(values));
}
case OpType::NotIn: {
auto n = knowhere::GetDatasetRows(dataset);
auto values = knowhere::GetDatasetTensor(dataset);
auto n = dataset->GetRows();
auto values = dataset->GetTensor();
return NotIn(n, reinterpret_cast<const T*>(values));
}

View File

@ -20,7 +20,7 @@
#include <pb/schema.pb.h>
#include <vector>
#include <string>
#include "knowhere/common/Log.h"
#include "knowhere/log.h"
#include "Meta.h"
#include "common/Utils.h"
#include "common/Slice.h"

View File

@ -21,7 +21,6 @@
#include <utility>
#include <vector>
#include <string>
#include "knowhere/common/Exception.h"
#include "index/IndexStructure.h"
#include "index/ScalarIndex.h"

View File

@ -20,7 +20,6 @@
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <knowhere/index/VecIndex.h>
#include "index/StringIndexMarisa.h"
#include "index/Utils.h"

View File

@ -21,7 +21,6 @@
#include "common/Utils.h"
#include "index/ScalarIndexSort.h"
#include "index/StringIndex.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
namespace milvus::index {
// TODO: should inherit from StringIndex?

View File

@ -23,7 +23,7 @@
#include "index/Meta.h"
#include <google/protobuf/text_format.h>
#include "exceptions/EasyAssert.h"
#include "knowhere/comp/index_param.h"
namespace milvus::index {
size_t

View File

@ -29,6 +29,8 @@ namespace milvus::index {
#define kSearchListMaxValue1 200 // used if tok <= 20
#define kSearchListMaxValue2 65535 // used for topk > 20
#define kPrepareDim 100
#define kPrepareRows 1
template <typename T>
VectorDiskAnnIndex<T>::VectorDiskAnnIndex(const IndexType& index_type,
@ -42,33 +44,50 @@ VectorDiskAnnIndex<T>::VectorDiskAnnIndex(const IndexType& index_type,
AssertInfo(!local_chunk_manager.Exist(local_index_path_prefix),
"local index path " + local_index_path_prefix + " has been exist");
local_chunk_manager.CreateDir(local_index_path_prefix);
index_ = std::make_unique<knowhere::IndexDiskANN<T>>(local_index_path_prefix, metric_type, file_manager);
auto diskann_index_pack = knowhere::Pack(std::shared_ptr<knowhere::FileManager>(file_manager));
index_ = knowhere::IndexFactory::Instance().Create(GetIndexType(), diskann_index_pack);
}
template <typename T>
void
VectorDiskAnnIndex<T>::Load(const BinarySet& binary_set /* not used */, const Config& config) {
auto prepare_config = parse_prepare_config(config);
knowhere::Config cfg;
knowhere::DiskANNPrepareConfig::Set(cfg, prepare_config);
knowhere::Json load_config = update_load_json(config);
auto index_files = GetValueFromConfig<std::vector<std::string>>(config, "index_files");
AssertInfo(index_files.has_value(), "index file paths is empty when load disk ann index data");
file_manager_->CacheIndexToDisk(index_files.value());
auto ok = index_->Prepare(cfg);
AssertInfo(ok, "load disk index failed");
SetDim(index_->Dim());
// todo : replace by index::load function later
knowhere::DataSetPtr qs = std::make_unique<knowhere::DataSet>();
qs->SetRows(kPrepareRows);
qs->SetDim(kPrepareDim);
qs->SetIsOwner(true);
auto query = new T[kPrepareRows * kPrepareDim];
qs->SetTensor(query);
index_.Search(*qs, load_config, nullptr);
SetDim(index_.Dim());
}
template <typename T>
void
VectorDiskAnnIndex<T>::BuildWithDataset(const DatasetPtr& dataset, const Config& config) {
auto& local_chunk_manager = storage::LocalChunkManager::GetInstance();
auto build_config = parse_build_config(config);
knowhere::Json build_config;
build_config.update(config);
// set data path
auto segment_id = file_manager_->GetFileDataMeta().segment_id;
auto field_id = file_manager_->GetFileDataMeta().field_id;
auto local_data_path = storage::GenFieldRawDataPathPrefix(segment_id, field_id) + "raw_data";
build_config.data_path = local_data_path;
build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path;
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
build_config[DISK_ANN_PREFIX_PATH] = local_index_path_prefix;
auto num_threads = GetValueFromConfig<std::string>(build_config, DISK_ANN_BUILD_THREAD_NUM);
AssertInfo(num_threads.has_value(), "param " + std::string(DISK_ANN_BUILD_THREAD_NUM) + "is empty");
build_config[DISK_ANN_THREADS_NUM] = std::atoi(num_threads.value().c_str());
if (!local_chunk_manager.Exist(local_data_path)) {
local_chunk_manager.CreateFile(local_data_path);
}
@ -86,10 +105,8 @@ VectorDiskAnnIndex<T>::BuildWithDataset(const DatasetPtr& dataset, const Config&
auto raw_data = const_cast<void*>(milvus::GetDatasetTensor(dataset));
local_chunk_manager.Write(local_data_path, offset, raw_data, data_size);
knowhere::Config cfg;
knowhere::DiskANNBuildConfig::Set(cfg, build_config);
index_->BuildAll(nullptr, cfg);
knowhere::DataSet* ds_ptr = nullptr;
index_.Build(*ds_ptr, build_config);
local_chunk_manager.RemoveDir(storage::GetSegmentRawDataPathPrefix(segment_id));
// TODO ::
@ -101,31 +118,41 @@ std::unique_ptr<SearchResult>
VectorDiskAnnIndex<T>::Query(const DatasetPtr dataset, const SearchInfo& search_info, const BitsetView& bitset) {
AssertInfo(GetMetricType() == search_info.metric_type_,
"Metric type of field index isn't the same with search info");
auto num_queries = milvus::GetDatasetRows(dataset);
auto num_queries = dataset->GetRows();
auto topk = search_info.topk_;
knowhere::DiskANNQueryConfig query_config;
query_config.k = topk;
knowhere::Json search_config = search_info.search_params_;
// set search list
search_config[knowhere::meta::TOPK] = topk;
search_config[knowhere::meta::METRIC_TYPE] = GetMetricType();
// set search list size
auto search_list_size = GetValueFromConfig<uint32_t>(search_info.search_params_, DISK_ANN_QUERY_LIST);
AssertInfo(search_list_size.has_value(), "param " + std::string(DISK_ANN_QUERY_LIST) + "is empty");
query_config.search_list_size = search_list_size.value();
AssertInfo(query_config.search_list_size > topk, "search_list should be greater than topk");
AssertInfo(query_config.search_list_size <= std::max(uint32_t(topk * 10), uint32_t(kSearchListMaxValue1)) &&
query_config.search_list_size <= uint32_t(kSearchListMaxValue2),
AssertInfo(search_list_size.value() > topk, "search_list should be greater than topk");
AssertInfo(search_list_size.value() <= std::max(uint32_t(topk * 10), uint32_t(kSearchListMaxValue1)) &&
search_list_size.value() <= uint32_t(kSearchListMaxValue2),
"search_list should be less than max(topk*10, 200) and less than 65535");
search_config[DISK_ANN_SEARCH_LIST_SIZE] = search_list_size.value();
// set beamwidth
query_config.beamwidth = search_beamwidth_;
search_config[DISK_ANN_QUERY_BEAMWIDTH] = int(search_beamwidth_);
knowhere::Config cfg;
knowhere::DiskANNQueryConfig::Set(cfg, query_config);
// set index prefix, will be removed later
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
search_config[DISK_ANN_PREFIX_PATH] = local_index_path_prefix;
auto final_result = index_->Query(dataset, cfg, bitset);
auto ids = milvus::GetDatasetIDs(final_result);
float* distances = (float*)milvus::GetDatasetDistance(final_result);
// set json reset field, will be removed later
search_config[DISK_ANN_PQ_CODE_BUDGET] = 0.0;
auto final = index_.Search(*dataset, search_config, bitset);
if (!final.has_value()) {
PanicCodeInfo(ErrorCodeEnum::UnexpectedError, "failed to search");
}
auto ids = final.value()->GetIds();
float* distances = const_cast<float*>(final.value()->GetDistance());
final.value()->SetIsOwner(true);
auto round_decimal = search_info.round_decimal_;
auto total_num = num_queries * topk;
@ -157,100 +184,31 @@ VectorDiskAnnIndex<T>::CleanLocalData() {
}
template <typename T>
knowhere::DiskANNBuildConfig
VectorDiskAnnIndex<T>::parse_build_config(const Config& config) {
Config build_config = config;
parse_config(build_config);
inline knowhere::Json
VectorDiskAnnIndex<T>::update_load_json(const Config& config) {
knowhere::Json load_config;
load_config.update(config);
// set disk ann build config
knowhere::DiskANNBuildConfig build_disk_ann_config;
// set data path
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
load_config[DISK_ANN_PREFIX_PATH] = local_index_path_prefix;
// set max degree
auto max_degree = GetValueFromConfig<uint32_t>(build_config, DISK_ANN_MAX_DEGREE);
AssertInfo(max_degree.has_value(), "param " + std::string(DISK_ANN_MAX_DEGREE) + "is empty");
build_disk_ann_config.max_degree = max_degree.value();
// set base info
load_config[DISK_ANN_PREPARE_WARM_UP] = false;
load_config[DISK_ANN_PREPARE_USE_BFS_CACHE] = false;
// set build list
auto search_list_size = GetValueFromConfig<uint32_t>(build_config, DISK_ANN_SEARCH_LIST_SIZE);
AssertInfo(search_list_size.has_value(), "param " + std::string(DISK_ANN_SEARCH_LIST_SIZE) + "is empty");
build_disk_ann_config.search_list_size = search_list_size.value();
// set search dram budget
auto search_dram_budget_gb = GetValueFromConfig<float>(build_config, DISK_ANN_PQ_CODE_BUDGET);
AssertInfo(search_dram_budget_gb.has_value(), "param " + std::string(DISK_ANN_PQ_CODE_BUDGET) + "is empty");
build_disk_ann_config.pq_code_budget_gb = search_dram_budget_gb.value();
// set build dram budget
auto build_dram_budget_gb = GetValueFromConfig<float>(build_config, DISK_ANN_BUILD_DRAM_BUDGET);
AssertInfo(build_dram_budget_gb.has_value(), "param " + std::string(DISK_ANN_BUILD_DRAM_BUDGET) + "is empty");
build_disk_ann_config.build_dram_budget_gb = build_dram_budget_gb.value();
// set num build thread
auto num_threads = GetValueFromConfig<uint32_t>(build_config, DISK_ANN_BUILD_THREAD_NUM);
AssertInfo(num_threads.has_value(), "param " + std::string(DISK_ANN_BUILD_THREAD_NUM) + "is empty");
build_disk_ann_config.num_threads = num_threads.value();
// set pq bytes
auto pq_disk_bytes = GetValueFromConfig<uint32_t>(build_config, DISK_ANN_PQ_DIMS);
AssertInfo(pq_disk_bytes.has_value(), "param " + std::string(DISK_ANN_PQ_DIMS) + "is empty");
build_disk_ann_config.disk_pq_dims = pq_disk_bytes.value();
return build_disk_ann_config;
}
template <typename T>
knowhere::DiskANNPrepareConfig
VectorDiskAnnIndex<T>::parse_prepare_config(const Config& config) {
Config prepare_config = config;
parse_config(prepare_config);
knowhere::DiskANNPrepareConfig prepare_disk_ann_config;
prepare_disk_ann_config.warm_up = false;
prepare_disk_ann_config.use_bfs_cache = false;
// set prepare thread num
auto num_threads = GetValueFromConfig<uint32_t>(prepare_config, DISK_ANN_LOAD_THREAD_NUM);
// set threads number
auto num_threads = GetValueFromConfig<std::string>(load_config, DISK_ANN_LOAD_THREAD_NUM);
AssertInfo(num_threads.has_value(), "param " + std::string(DISK_ANN_LOAD_THREAD_NUM) + "is empty");
prepare_disk_ann_config.num_threads = num_threads.value();
// get search_cache_budget_gb
auto search_cache_budget_gb = GetValueFromConfig<float>(prepare_config, DISK_ANN_SEARCH_CACHE_BUDGET);
AssertInfo(search_cache_budget_gb.has_value(), "param " + std::string(DISK_ANN_SEARCH_CACHE_BUDGET) + "is empty");
prepare_disk_ann_config.search_cache_budget_gb = search_cache_budget_gb.value();
load_config[DISK_ANN_THREADS_NUM] = std::atoi(num_threads.value().c_str());
// update search_beamwidth
auto beamwidth = GetValueFromConfig<uint32_t>(prepare_config, DISK_ANN_QUERY_BEAMWIDTH);
auto beamwidth = GetValueFromConfig<std::string>(load_config, DISK_ANN_QUERY_BEAMWIDTH);
if (beamwidth.has_value()) {
search_beamwidth_ = beamwidth.value();
search_beamwidth_ = std::atoi(beamwidth.value().c_str());
}
return prepare_disk_ann_config;
}
template <typename T>
void
VectorDiskAnnIndex<T>::parse_config(Config& config) {
auto stoi_closure = [](const std::string& s) -> uint32_t { return std::stoi(s); };
auto stof_closure = [](const std::string& s) -> float { return std::stof(s); };
/***************************** meta *******************************/
CheckParameter<int>(config, knowhere::meta::DIM, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::meta::TOPK, stoi_closure, std::nullopt);
/************************** DiskAnn build Params ************************/
CheckParameter<int>(config, DISK_ANN_MAX_DEGREE, stoi_closure, std::nullopt);
CheckParameter<int>(config, DISK_ANN_SEARCH_LIST_SIZE, stoi_closure, std::nullopt);
CheckParameter<float>(config, DISK_ANN_PQ_CODE_BUDGET, stof_closure, std::nullopt);
CheckParameter<float>(config, DISK_ANN_BUILD_DRAM_BUDGET, stof_closure, std::nullopt);
CheckParameter<int>(config, DISK_ANN_BUILD_THREAD_NUM, stoi_closure, std::optional{8});
CheckParameter<int>(config, DISK_ANN_PQ_DIMS, stoi_closure, std::optional{0});
/************************** DiskAnn prepare Params ************************/
CheckParameter<int>(config, DISK_ANN_LOAD_THREAD_NUM, stoi_closure, std::optional{8});
CheckParameter<float>(config, DISK_ANN_SEARCH_CACHE_BUDGET, stof_closure, std::nullopt);
/************************** DiskAnn query Params ************************/
CheckParameter<int>(config, DISK_ANN_QUERY_BEAMWIDTH, stoi_closure, std::nullopt);
return load_config;
}
template class VectorDiskAnnIndex<float>;

View File

@ -20,8 +20,6 @@
#include "index/VectorIndex.h"
#include "storage/DiskFileManagerImpl.h"
#include "knowhere/index/vector_index/IndexDiskANN.h"
#include "knowhere/index/vector_index/IndexDiskANNConfig.h"
namespace milvus::index {
@ -47,7 +45,7 @@ class VectorDiskAnnIndex : public VectorIndex {
int64_t
Count() override {
return index_->Count();
return index_.Count();
}
void
@ -63,17 +61,11 @@ class VectorDiskAnnIndex : public VectorIndex {
CleanLocalData() override;
private:
knowhere::DiskANNBuildConfig
parse_build_config(const Config& config);
knowhere::DiskANNPrepareConfig
parse_prepare_config(const Config& config);
void
parse_config(Config& config);
knowhere::Json
update_load_json(const Config& config);
private:
std::unique_ptr<knowhere::IndexDiskANN<T>> index_;
knowhere::Index<knowhere::IndexNode> index_;
std::shared_ptr<storage::DiskFileManagerImpl> file_manager_;
uint32_t search_beamwidth_ = 8;
};

View File

@ -21,7 +21,7 @@
#include <string>
#include <boost/dynamic_bitset.hpp>
#include "knowhere/index/VecIndex.h"
#include "knowhere/factory.h"
#include "index/Index.h"
#include "common/Types.h"
#include "common/BitsetView.h"
@ -81,5 +81,4 @@ class VectorIndex : public IndexBase {
int64_t dim_;
};
using VectorIndexPtr = std::unique_ptr<VectorIndex>;
} // namespace milvus::index

View File

@ -20,11 +20,9 @@
#include "exceptions/EasyAssert.h"
#include "config/ConfigKnowhere.h"
#include "knowhere/index/VecIndexFactory.h"
#include "knowhere/common/Timer.h"
#include "knowhere/factory.h"
#include "knowhere/comp/Timer.h"
#include "common/BitsetView.h"
#include "knowhere/index/vector_index/ConfAdapterMgr.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "common/Slice.h"
namespace milvus::index {
@ -33,16 +31,15 @@ VectorMemIndex::VectorMemIndex(const IndexType& index_type, const MetricType& me
: VectorIndex(index_type, index_mode, metric_type) {
AssertInfo(!is_unsupported(index_type, metric_type), index_type + " doesn't support metric: " + metric_type);
index_ = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(GetIndexType(), index_mode);
AssertInfo(index_ != nullptr, "[VecIndexCreator]Index is null after create index");
index_ = knowhere::IndexFactory::Instance().Create(GetIndexType());
}
BinarySet
VectorMemIndex::Serialize(const Config& config) {
knowhere::Config serialize_config = config;
parse_config(serialize_config);
auto ret = index_->Serialize(serialize_config);
knowhere::BinarySet ret;
auto stat = index_.Serialize(ret);
if (stat != knowhere::Status::success)
PanicCodeInfo(ErrorCodeEnum::UnexpectedError, "failed to serialize index");
milvus::Disassemble(ret);
return ret;
@ -51,30 +48,25 @@ VectorMemIndex::Serialize(const Config& config) {
void
VectorMemIndex::Load(const BinarySet& binary_set, const Config& config) {
milvus::Assemble(const_cast<BinarySet&>(binary_set));
index_->Load(binary_set);
SetDim(index_->Dim());
auto stat = index_.Deserialize(binary_set);
if (stat != knowhere::Status::success)
PanicCodeInfo(ErrorCodeEnum::UnexpectedError, "failed to Deserialize index");
SetDim(index_.Dim());
}
void
VectorMemIndex::BuildWithDataset(const DatasetPtr& dataset, const Config& config) {
knowhere::Config index_config;
knowhere::Json index_config;
index_config.update(config);
parse_config(index_config);
SetDim(knowhere::GetDatasetDim(dataset));
knowhere::SetMetaRows(index_config, knowhere::GetDatasetRows(dataset));
if (GetIndexType() == knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
if (!config.contains(knowhere::indexparam::NBITS)) {
knowhere::SetIndexParamNbits(index_config, 8);
}
}
auto conf_adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(GetIndexType());
AssertInfo(conf_adapter->CheckTrain(index_config, GetIndexMode()), "something wrong in index parameters!");
SetDim(dataset->GetDim());
knowhere::TimeRecorder rc("BuildWithoutIds", 1);
index_->BuildAll(dataset, index_config);
auto stat = index_.Build(*dataset, index_config);
if (stat != knowhere::Status::success)
PanicCodeInfo(ErrorCodeEnum::BuildIndexError, "failed to build index");
rc.ElapseFromBegin("Done");
SetDim(index_->Dim());
SetDim(index_.Dim());
}
std::unique_ptr<SearchResult>
@ -82,26 +74,21 @@ VectorMemIndex::Query(const DatasetPtr dataset, const SearchInfo& search_info, c
// AssertInfo(GetMetricType() == search_info.metric_type_,
// "Metric type of field index isn't the same with search info");
auto num_queries = knowhere::GetDatasetRows(dataset);
Config search_conf = search_info.search_params_;
auto num_queries = dataset->GetRows();
knowhere::Json search_conf = search_info.search_params_;
auto topk = search_info.topk_;
// TODO :: check dim of search data
auto final = [&] {
knowhere::SetMetaTopk(search_conf, topk);
knowhere::SetMetaMetricType(search_conf, GetMetricType());
search_conf[knowhere::meta::TOPK] = topk;
search_conf[knowhere::meta::METRIC_TYPE] = GetMetricType();
auto index_type = GetIndexType();
auto adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(index_type);
try {
adapter->CheckSearch(search_conf, index_type, GetIndexMode());
} catch (std::exception& e) {
AssertInfo(false, e.what());
}
return index_->Query(dataset, search_conf, bitset);
return index_.Search(*dataset, search_conf, bitset);
}();
auto ids = knowhere::GetDatasetIDs(final);
float* distances = (float*)knowhere::GetDatasetDistance(final);
if (!final.has_value())
PanicCodeInfo(ErrorCodeEnum::UnexpectedError, "failed to search");
auto ids = final.value()->GetIds();
float* distances = const_cast<float*>(final.value()->GetDistance());
final.value()->SetIsOwner(true);
auto round_decimal = search_info.round_decimal_;
auto total_num = num_queries * topk;
@ -123,31 +110,4 @@ VectorMemIndex::Query(const DatasetPtr dataset, const SearchInfo& search_info, c
return result;
}
void
VectorMemIndex::parse_config(Config& config) {
auto stoi_closure = [](const std::string& s) -> int { return std::stoi(s); };
/***************************** meta *******************************/
CheckParameter<int>(config, knowhere::meta::DIM, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::meta::TOPK, stoi_closure, std::nullopt);
/***************************** IVF Params *******************************/
CheckParameter<int>(config, knowhere::indexparam::NPROBE, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::indexparam::NLIST, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::indexparam::M, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::indexparam::NBITS, stoi_closure, std::nullopt);
/************************** PQ Params *****************************/
CheckParameter<int>(config, knowhere::indexparam::PQ_M, stoi_closure, std::nullopt);
/************************** HNSW Params *****************************/
CheckParameter<int>(config, knowhere::indexparam::EFCONSTRUCTION, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::indexparam::HNSW_M, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::indexparam::EF, stoi_closure, std::nullopt);
/************************** Annoy Params *****************************/
CheckParameter<int>(config, knowhere::indexparam::N_TREES, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::indexparam::SEARCH_K, stoi_closure, std::nullopt);
}
} // namespace milvus::index

View File

@ -21,7 +21,7 @@
#include <string>
#include <vector>
#include <boost/dynamic_bitset.hpp>
#include "knowhere/factory.h"
#include "index/VectorIndex.h"
namespace milvus::index {
@ -41,19 +41,15 @@ class VectorMemIndex : public VectorIndex {
int64_t
Count() override {
return index_->Count();
return index_.Count();
}
std::unique_ptr<SearchResult>
Query(const DatasetPtr dataset, const SearchInfo& search_info, const BitsetView& bitset) override;
protected:
void
parse_config(Config& config);
protected:
Config config_;
knowhere::VecIndexPtr index_ = nullptr;
knowhere::Index<knowhere::IndexNode> index_;
};
using VectorMemIndexPtr = std::unique_ptr<VectorMemIndex>;

View File

@ -19,19 +19,19 @@
#include "index/VectorMemNMIndex.h"
#include "log/Log.h"
#include "knowhere/index/VecIndexFactory.h"
#include "knowhere/common/Timer.h"
#include "knowhere/index/vector_index/ConfAdapterMgr.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "knowhere/factory.h"
#include "knowhere/comp/Timer.h"
#define RAW_DATA "RAW_DATA"
namespace milvus::index {
BinarySet
VectorMemNMIndex::Serialize(const Config& config) {
knowhere::Config serialize_config = config;
parse_config(serialize_config);
knowhere::BinarySet ret;
auto stat = index_.Serialize(ret);
if (stat != knowhere::Status::success)
PanicCodeInfo(ErrorCodeEnum::UnexpectedError, "failed to serialize index");
auto ret = index_->Serialize(serialize_config);
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
auto raw_data = std::shared_ptr<uint8_t[]>(static_cast<uint8_t*>(raw_data_.data()), deleter);
ret.Append(RAW_DATA, raw_data, raw_data_.size());
@ -67,11 +67,11 @@ VectorMemNMIndex::Query(const DatasetPtr dataset, const SearchInfo& search_info,
}
void
VectorMemNMIndex::store_raw_data(const knowhere::DatasetPtr& dataset) {
VectorMemNMIndex::store_raw_data(const DatasetPtr& dataset) {
auto index_type = GetIndexType();
auto tensor = knowhere::GetDatasetTensor(dataset);
auto row_num = knowhere::GetDatasetRows(dataset);
auto dim = knowhere::GetDatasetDim(dataset);
auto tensor = dataset->GetTensor();
auto row_num = dataset->GetRows();
auto dim = dataset->GetDim();
int64_t data_size;
if (is_in_bin_list(index_type)) {
data_size = dim / 8 * row_num;
@ -84,13 +84,19 @@ VectorMemNMIndex::store_raw_data(const knowhere::DatasetPtr& dataset) {
void
VectorMemNMIndex::LoadRawData() {
auto bs = index_->Serialize(Config{});
knowhere::BinarySet bs;
auto stat = index_.Serialize(bs);
if (stat != knowhere::Status::success)
PanicCodeInfo(ErrorCodeEnum::UnexpectedError, "failed to Serialize index");
auto bptr = std::make_shared<knowhere::Binary>();
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
bptr->data = std::shared_ptr<uint8_t[]>(static_cast<uint8_t*>(raw_data_.data()), deleter);
bptr->size = raw_data_.size();
bs.Append(RAW_DATA, bptr);
index_->Load(bs);
stat = index_.Deserialize(bs);
if (stat != knowhere::Status::success)
PanicCodeInfo(ErrorCodeEnum::UnexpectedError, "failed to Deserialize index");
}
} // namespace milvus::index

View File

@ -47,7 +47,7 @@ class VectorMemNMIndex : public VectorMemIndex {
private:
void
store_raw_data(const knowhere::DatasetPtr& dataset);
store_raw_data(const DatasetPtr& dataset);
void
LoadRawData();

View File

@ -47,8 +47,8 @@ ScalarIndexCreator::ScalarIndexCreator(DataType dtype, const char* type_params,
void
ScalarIndexCreator::Build(const milvus::DatasetPtr& dataset) {
auto size = knowhere::GetDatasetRows(dataset);
auto data = knowhere::GetDatasetTensor(dataset);
auto size = dataset->GetRows();
auto data = dataset->GetTensor();
index_->BuildWithRawData(size, data);
}

View File

@ -16,7 +16,6 @@
#endif
#include "exceptions/EasyAssert.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "indexbuilder/VecIndexCreator.h"
#include "indexbuilder/index_c.h"
#include "indexbuilder/IndexFactory.h"
@ -87,7 +86,7 @@ BuildFloatVecIndex(CIndex index, int64_t float_value_num, const float* vectors)
auto cIndex = dynamic_cast<milvus::indexbuilder::VecIndexCreator*>(real_index);
auto dim = cIndex->dim();
auto row_nums = float_value_num / dim;
auto ds = knowhere::GenDataset(row_nums, dim, vectors);
auto ds = knowhere::GenDataSet(row_nums, dim, vectors);
cIndex->Build(ds);
status.error_code = Success;
status.error_msg = "";
@ -107,7 +106,7 @@ BuildBinaryVecIndex(CIndex index, int64_t data_size, const uint8_t* vectors) {
auto cIndex = dynamic_cast<milvus::indexbuilder::VecIndexCreator*>(real_index);
auto dim = cIndex->dim();
auto row_nums = (data_size * 8) / dim;
auto ds = knowhere::GenDataset(row_nums, dim, vectors);
auto ds = knowhere::GenDataSet(row_nums, dim, vectors);
cIndex->Build(ds);
status.error_code = Success;
status.error_msg = "";
@ -131,7 +130,7 @@ BuildScalarIndex(CIndex c_index, int64_t size, const void* field_data) {
auto real_index = reinterpret_cast<milvus::indexbuilder::IndexCreatorBase*>(c_index);
const int64_t dim = 8; // not important here
auto dataset = knowhere::GenDataset(size, dim, field_data);
auto dataset = knowhere::GenDataSet(size, dim, field_data);
real_index->Build(dataset);
status.error_code = Success;

View File

@ -19,7 +19,6 @@
#include "common/QueryInfo.h"
#include "query/Expr.h"
#include "knowhere/common/Config.h"
namespace milvus::query {

View File

@ -14,9 +14,8 @@
#include "SearchBruteForce.h"
#include "SubSearchResult.h"
#include "knowhere/archive/BruteForce.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "knowhere/comp/brute_force.h"
#include "knowhere/comp/index_param.h"
namespace milvus::query {
void
@ -42,20 +41,23 @@ BruteForceSearch(const dataset::SearchDataset& dataset,
auto dim = dataset.dim;
auto topk = dataset.topk;
auto base_dataset = knowhere::GenDataset(chunk_rows, dim, chunk_data_raw);
auto query_dataset = knowhere::GenDataset(nq, dim, dataset.query_data);
auto config = knowhere::Config{
auto base_dataset = knowhere::GenDataSet(chunk_rows, dim, chunk_data_raw);
auto query_dataset = knowhere::GenDataSet(nq, dim, dataset.query_data);
auto config = knowhere::Json{
{knowhere::meta::METRIC_TYPE, dataset.metric_type},
{knowhere::meta::DIM, dim},
{knowhere::meta::TOPK, topk},
};
auto result = knowhere::BruteForce::Search(base_dataset, query_dataset, config, bitset);
sub_result.mutable_seg_offsets().resize(nq * topk);
sub_result.mutable_distances().resize(nq * topk);
std::copy_n(knowhere::GetDatasetIDs(result), nq * topk, sub_result.get_seg_offsets());
std::copy_n(knowhere::GetDatasetDistance(result), nq * topk, sub_result.get_distances());
auto stat =
knowhere::BruteForce::SearchWithBuf(base_dataset, query_dataset, sub_result.mutable_seg_offsets().data(),
sub_result.mutable_distances().data(), config, bitset);
if (stat != knowhere::Status::success) {
throw std::invalid_argument("invalid metric type");
}
} catch (std::exception& e) {
PanicInfo(e.what());
}

View File

@ -10,7 +10,6 @@
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "SearchOnIndex.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
namespace milvus::query {
SubSearchResult
@ -23,7 +22,7 @@ SearchOnIndex(const dataset::SearchDataset& search_dataset,
auto dim = search_dataset.dim;
auto metric_type = search_dataset.metric_type;
auto round_decimal = search_dataset.round_decimal;
auto dataset = knowhere::GenDataset(num_queries, dim, search_dataset.query_data);
auto dataset = knowhere::GenDataSet(num_queries, dim, search_dataset.query_data);
// NOTE: VecIndex Query API forget to add const qualifier
// NOTE: use const_cast as a workaround

View File

@ -12,7 +12,6 @@
#pragma once
#include "common/BitsetView.h"
#include "knowhere/index/VecIndex.h"
#include "query/SubSearchResult.h"
#include "query/helper.h"
#include "common/QueryInfo.h"

View File

@ -40,11 +40,11 @@ SearchOnSealedIndex(const Schema& schema,
"Metric type of field index isn't the same with search info");
auto final = [&] {
auto ds = knowhere::GenDataset(num_queries, dim, query_data);
auto ds = knowhere::GenDataSet(num_queries, dim, query_data);
auto conf = search_info.search_params_;
knowhere::SetMetaTopk(conf, search_info.topk_);
knowhere::SetMetaMetricType(conf, field_indexing->metric_type_);
conf[knowhere::meta::TOPK] = search_info.topk_;
conf[knowhere::meta::METRIC_TYPE] = field_indexing->metric_type_;
auto vec_index = dynamic_cast<index::VectorIndex*>(field_indexing->indexing_.get());
auto index_type = vec_index->GetIndexType();
return vec_index->Query(ds, search_info, bitset);

View File

@ -11,8 +11,6 @@
#include "ValidationUtil.h"
#include "config/ServerConfig.h"
#include "knowhere/index/vector_index/ConfAdapter.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "utils/Log.h"
#include "utils/StringHelpFunctions.h"

View File

@ -273,7 +273,7 @@ ExecExprVisitor::ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw) -> Bi
}
case OpType::PrefixMatch: {
auto index_func = [val](Index* index) {
auto dataset = std::make_unique<knowhere::Dataset>();
auto dataset = std::make_unique<Dataset>();
dataset->Set(milvus::index::OPERATOR_TYPE, OpType::PrefixMatch);
dataset->Set(milvus::index::PREFIX_VALUE, val);
return index->Query(std::move(dataset));

View File

@ -9,8 +9,6 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "knowhere/index/vector_index/ConfAdapterMgr.h"
#include "knowhere/index/vector_index/ConfAdapter.h"
#include "query/generated/VerifyPlanNodeVisitor.h"
namespace milvus::query {
@ -65,43 +63,15 @@ InferBinaryIndexType(const Json& search_params) {
}
void
VerifyPlanNodeVisitor::visit(FloatVectorANNS& node) {
auto& search_params = node.search_info_.search_params_;
auto inferred_type = InferIndexType(search_params);
auto adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(inferred_type);
auto index_mode = knowhere::IndexMode::MODE_CPU;
// mock the api, topk will be passed from placeholder
auto params_copy = search_params;
knowhere::SetMetaTopk(params_copy, 10);
// NOTE: the second parameter is not checked in knowhere, may be redundant
auto passed = adapter->CheckSearch(params_copy, inferred_type, index_mode);
if (!passed) {
PanicCodeInfo(ErrorCodeEnum::IllegalArgument, "invalid search params");
}
VerifyPlanNodeVisitor::visit(FloatVectorANNS&) {
}
void
VerifyPlanNodeVisitor::visit(BinaryVectorANNS& node) {
auto& search_params = node.search_info_.search_params_;
auto inferred_type = InferBinaryIndexType(search_params);
auto adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(inferred_type);
auto index_mode = knowhere::IndexMode::MODE_CPU;
// mock the api, topk will be passed from placeholder
auto params_copy = search_params;
knowhere::SetMetaTopk(params_copy, 10);
// NOTE: the second parameter is not checked in knowhere, may be redundant
auto passed = adapter->CheckSearch(params_copy, inferred_type, index_mode);
if (!passed) {
PanicCodeInfo(ErrorCodeEnum::IllegalArgument, "invalid search params");
}
VerifyPlanNodeVisitor::visit(BinaryVectorANNS&) {
}
void
VerifyPlanNodeVisitor::visit(RetrievePlanNode& node) {
VerifyPlanNodeVisitor::visit(RetrievePlanNode&) {
}
} // namespace milvus::query

View File

@ -35,13 +35,13 @@ VectorFieldIndexing::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vec
const auto& chunk = source->get_chunk(chunk_id);
auto indexing = std::make_unique<index::VectorMemNMIndex>(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
knowhere::metric::L2, IndexMode::MODE_CPU);
auto dataset = knowhere::GenDataset(source->get_size_per_chunk(), dim, chunk.data());
auto dataset = knowhere::GenDataSet(source->get_size_per_chunk(), dim, chunk.data());
indexing->BuildWithDataset(dataset, conf);
data_[chunk_id] = std::move(indexing);
}
}
knowhere::Config
knowhere::Json
VectorFieldIndexing::get_build_params() const {
// TODO
auto type_opt = field_meta_.get_metric_type();
@ -57,7 +57,7 @@ VectorFieldIndexing::get_build_params() const {
return base_params;
}
knowhere::Config
knowhere::Json
VectorFieldIndexing::get_search_params(int top_K) const {
// TODO
auto type_opt = field_meta_.get_metric_type();
@ -67,8 +67,8 @@ VectorFieldIndexing::get_search_params(int top_K) const {
auto base_params = config.search_params;
AssertInfo(base_params.count("nprobe"), "Can't get nprobe from base params");
knowhere::SetMetaTopk(base_params, top_K);
knowhere::SetMetaMetricType(base_params, metric_type);
base_params[knowhere::meta::TOPK] = top_K;
base_params[knowhere::meta::METRIC_TYPE] = metric_type;
return base_params;
}

View File

@ -95,10 +95,10 @@ class VectorFieldIndexing : public FieldIndexing {
return data_.at(chunk_id).get();
}
knowhere::Config
knowhere::Json
get_build_params() const;
knowhere::Config
knowhere::Json
get_search_params(int top_k) const;
private:

View File

@ -20,7 +20,7 @@
#include <optional>
#include <memory>
#include "knowhere/common/FileManager.h"
#include "knowhere/file_manager.h"
namespace milvus::storage {

View File

@ -39,7 +39,7 @@ struct Payload {
class PayloadOutputStream : public arrow::io::OutputStream {
public:
PayloadOutputStream();
~PayloadOutputStream();
~PayloadOutputStream() noexcept;
arrow::Status
Close() override;
@ -64,7 +64,7 @@ class PayloadOutputStream : public arrow::io::OutputStream {
class PayloadInputStream : public arrow::io::RandomAccessFile {
public:
PayloadInputStream(const uint8_t* data, int64_t size);
~PayloadInputStream();
~PayloadInputStream() noexcept;
arrow::Status
Close() override;

View File

@ -22,7 +22,7 @@
#include "storage/PayloadStream.h"
#include "storage/FileManager.h"
#include "knowhere/index/IndexType.h"
#include "knowhere/comp/index_param.h"
namespace milvus::storage {

View File

@ -11,34 +11,22 @@
# or implied. See the License for the specific language governing permissions and limitations under the License.
#-------------------------------------------------------------------------------
set( KNOWHERE_SOURCE_VER v1.3.8 )
set( KNOWHERE_SOURCE_MD5 "ab77c01f88f0eead84c979fe2356ce7b")
if ( DEFINED ENV{MILVUS_KNOWHERE_URL} )
set( KNOWHERE_SOURCE_URL "$ENV{MILVUS_KNOWHERE_URL}" )
else ()
set( KNOWHERE_SOURCE_URL
"https://github.com/milvus-io/knowhere/archive/refs/tags/${KNOWHERE_SOURCE_VER}.tar.gz" )
endif ()
set( KNOWHERE_VERSION v2.0.0 )
message(STATUS "Building knowhere-${KNOWHERE_SOURCE_VER} from source")
message(STATUS ${CMAKE_BUILD_TYPE})
if ( BUILD_DISK_ANN STREQUAL "ON" )
set(KNOWHERE_WITH_DISKANN ON CACHE BOOL "" FORCE )
set(WITH_DISKANN ON CACHE BOOL "" FORCE )
else ()
set(KNOWHERE_WITH_DISKANN OFF CACHE BOOL "" FORCE )
set(WITH_DISKANN OFF CACHE BOOL "" FORCE )
endif ()
# BUILD_UNIT_TEST OFF will only affect knowhere
set( CMAKE_PREFIX_PATH ${CONAN_BOOST_ROOT} )
set( BUILD_UNIT_TEST OFF )
FetchContent_Declare(
knowhere
# GIT_REPOSITORY "https://github.com/milvus-io/knowhere.git"
# GIT_TAG main
URL ${KNOWHERE_SOURCE_URL}
URL_MD5 ${KNOWHERE_SOURCE_MD5}
GIT_REPOSITORY "https://github.com/milvus-io/knowhere.git"
GIT_TAG ${KNOWHERE_VERSION}
SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/knowhere-src
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/knowhere-build
DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} )

View File

@ -23,7 +23,6 @@ set(MILVUS_TEST_FILES
test_bool_index.cpp
test_common.cpp
test_concurrent_vector.cpp
test_conf_adapter_mgr.cpp
test_c_api.cpp
test_expr.cpp
test_growing.cpp

View File

@ -13,8 +13,7 @@
#include <tuple>
#include <map>
#include <google/protobuf/text_format.h>
#include <knowhere/index/vector_index/helpers/IndexParameter.h>
#include <knowhere/index/vector_index/adapter/VectorAdapter.h>
#include <knowhere/comp/index_param.h>
#include "pb/index_cgo_msg.pb.h"
#include "indexbuilder/VecIndexCreator.h"
@ -60,7 +59,7 @@ IndexBuilder_build(benchmark::State& state) {
auto is_binary = state.range(2);
auto dataset = GenDataset(NB, metric_type, is_binary);
auto xb_data = dataset.get_col<float>(milvus::FieldId(START_USER_FIELDID));
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
auto xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
for (auto _ : state) {
auto index = std::make_unique<milvus::indexbuilder::VecIndexCreator>(
@ -90,7 +89,7 @@ IndexBuilder_build_and_codec(benchmark::State& state) {
auto is_binary = state.range(2);
auto dataset = GenDataset(NB, metric_type, is_binary);
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
auto xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
for (auto _ : state) {
auto index = std::make_unique<milvus::indexbuilder::VecIndexCreator>(

View File

@ -13,7 +13,6 @@
#include <random>
#include "common/Utils.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "query/SearchBruteForce.h"
#include "test_utils/Distance.h"

View File

@ -10,7 +10,6 @@
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <gtest/gtest.h>
#include <knowhere/index/vector_index/helpers/IndexParameter.h>
#include <pb/schema.pb.h>
#include <index/BoolIndex.h>
#include "test_utils/indexbuilder_test_utils.h"

View File

@ -18,11 +18,9 @@
#include <string>
#include <unordered_set>
#include "knowhere/comp/index_param.h"
#include "common/LoadInfo.h"
#include "knowhere/index/VecIndexFactory.h"
#include "knowhere/index/vector_index/IndexIVFPQ.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "pb/plan.pb.h"
#include "query/ExprImpl.h"
#include "segcore/Collection.h"
@ -202,7 +200,7 @@ generate_index(
CreateIndexInfo create_index_info{field_type, index_type, metric_type};
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
auto database = knowhere::GenDataset(N, dim, raw_data);
auto database = knowhere::GenDataSet(N, dim, raw_data);
auto build_config = generate_build_conf(index_type, metric_type);
indexing->BuildWithDataset(database, build_config);
@ -1400,22 +1398,23 @@ TEST(CApiTest, LoadIndexInfo) {
auto N = 1024 * 10;
auto [raw_data, timestamps, uids] = generate_data(N);
auto indexing = std::make_shared<knowhere::IVFPQ>();
auto conf = knowhere::Config{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, DIM},
{knowhere::meta::TOPK, TOPK},
{knowhere::indexparam::NLIST, 100},
{knowhere::indexparam::NPROBE, 4},
{knowhere::indexparam::M, 4},
{knowhere::indexparam::NBITS, 8},
{knowhere::meta::DEVICE_ID, 0}};
auto indexing = knowhere::IndexFactory::Instance().Create(knowhere::IndexEnum::INDEX_FAISS_IVFPQ);
auto conf = knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, DIM},
{knowhere::meta::TOPK, TOPK},
{knowhere::indexparam::NLIST, 100},
{knowhere::indexparam::NPROBE, 4},
{knowhere::indexparam::M, 4},
{knowhere::indexparam::NBITS, 8},
{knowhere::meta::DEVICE_ID, 0}};
auto database = knowhere::GenDataset(N, DIM, raw_data.data());
indexing->Train(database, conf);
indexing->AddWithoutIds(database, conf);
EXPECT_EQ(indexing->Count(), N);
EXPECT_EQ(indexing->Dim(), DIM);
auto binary_set = indexing->Serialize(conf);
auto database = knowhere::GenDataSet(N, DIM, raw_data.data());
indexing.Train(*database, conf);
indexing.Add(*database, conf);
EXPECT_EQ(indexing.Count(), N);
EXPECT_EQ(indexing.Dim(), DIM);
knowhere::BinarySet binary_set;
indexing.Serialize(binary_set);
CBinarySet c_binary_set = (CBinarySet)&binary_set;
void* c_load_index_info = nullptr;
@ -1447,25 +1446,26 @@ TEST(CApiTest, LoadIndex_Search) {
auto N = 1024 * 1024;
auto num_query = 100;
auto [raw_data, timestamps, uids] = generate_data(N);
auto indexing = std::make_shared<knowhere::IVFPQ>();
auto conf = knowhere::Config{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, DIM},
{knowhere::meta::TOPK, TOPK},
{knowhere::indexparam::NLIST, 100},
{knowhere::indexparam::NPROBE, 4},
{knowhere::indexparam::M, 4},
{knowhere::indexparam::NBITS, 8},
{knowhere::meta::DEVICE_ID, 0}};
auto indexing = knowhere::IndexFactory::Instance().Create("IVFPQ");
auto conf = knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, DIM},
{knowhere::meta::TOPK, TOPK},
{knowhere::indexparam::NLIST, 100},
{knowhere::indexparam::NPROBE, 4},
{knowhere::indexparam::M, 4},
{knowhere::indexparam::NBITS, 8},
{knowhere::meta::DEVICE_ID, 0}};
auto database = knowhere::GenDataset(N, DIM, raw_data.data());
indexing->Train(database, conf);
indexing->AddWithoutIds(database, conf);
auto database = knowhere::GenDataSet(N, DIM, raw_data.data());
indexing.Train(*database, conf);
indexing.Add(*database, conf);
EXPECT_EQ(indexing->Count(), N);
EXPECT_EQ(indexing->Dim(), DIM);
EXPECT_EQ(indexing.Count(), N);
EXPECT_EQ(indexing.Dim(), DIM);
// serializ index to binarySet
auto binary_set = indexing->Serialize(conf);
knowhere::BinarySet binary_set;
indexing.Serialize(binary_set);
// fill loadIndexInfo
milvus::segcore::LoadIndexInfo load_index_info;
@ -1477,12 +1477,12 @@ TEST(CApiTest, LoadIndex_Search) {
load_index_info.index->Load(binary_set);
// search
auto query_dataset = knowhere::GenDataset(num_query, DIM, raw_data.data() + DIM * 4200);
auto query_dataset = knowhere::GenDataSet(num_query, DIM, raw_data.data() + DIM * 4200);
auto result = indexing->Query(query_dataset, conf, nullptr);
auto result = indexing.Search(*query_dataset, conf, nullptr);
auto ids = knowhere::GetDatasetIDs(result);
auto dis = knowhere::GetDatasetDistance(result);
auto ids = (result.value()->GetIds());
auto dis = (result.value()->GetDistance());
// for (int i = 0; i < std::min(num_query * K, 100); ++i) {
// std::cout << ids[i] << "->" << dis[i] << std::endl;
//}
@ -1554,7 +1554,7 @@ TEST(CApiTest, Indexing_Without_Predicate) {
IndexEnum::INDEX_FAISS_IVFPQ, DIM, N);
// gen query dataset
auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr);
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
@ -1675,7 +1675,7 @@ TEST(CApiTest, Indexing_Expr_Without_Predicate) {
IndexEnum::INDEX_FAISS_IVFPQ, DIM, N);
// gen query dataset
auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr);
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
@ -1813,7 +1813,7 @@ TEST(CApiTest, Indexing_With_float_Predicate_Range) {
IndexEnum::INDEX_FAISS_IVFPQ, DIM, N);
// gen query dataset
auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr);
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
@ -1965,7 +1965,7 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Range) {
IndexEnum::INDEX_FAISS_IVFPQ, DIM, N);
// gen query dataset
auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr);
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
@ -2101,7 +2101,7 @@ TEST(CApiTest, Indexing_With_float_Predicate_Term) {
IndexEnum::INDEX_FAISS_IVFPQ, DIM, N);
// gen query dataset
auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr);
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
@ -2246,7 +2246,7 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Term) {
IndexEnum::INDEX_FAISS_IVFPQ, DIM, N);
// gen query dataset
auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr);
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
@ -2384,7 +2384,7 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) {
IndexEnum::INDEX_FAISS_BIN_IVFFLAT, DIM, N);
// gen query dataset
auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr);
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
@ -2534,7 +2534,7 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) {
IndexEnum::INDEX_FAISS_BIN_IVFFLAT, DIM, N);
// gen query dataset
auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr);
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
@ -2671,7 +2671,7 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) {
IndexEnum::INDEX_FAISS_BIN_IVFFLAT, DIM, N);
// gen query dataset
auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr);
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
@ -2832,7 +2832,7 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) {
IndexEnum::INDEX_FAISS_BIN_IVFFLAT, DIM, N);
// gen query dataset
auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr);
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
@ -3027,7 +3027,7 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) {
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr);
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
SearchInfo search_info;
search_info.topk_ = TOPK;
@ -3338,7 +3338,7 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
ASSERT_EQ(status.error_code, Success);
// gen query dataset
auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr);
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;

View File

@ -1,34 +0,0 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <vector>
#include <thread>
#include <gtest/gtest.h>
#include <knowhere/index/vector_index/ConfAdapterMgr.h>
TEST(AdapterMgr, MultiThread) {
auto run_case = [&]() {
auto& ins = knowhere::AdapterMgr::GetInstance();
auto adapter = ins.GetAdapter(knowhere::IndexEnum::INDEX_HNSW);
ASSERT_TRUE(adapter != nullptr);
ASSERT_ANY_THROW(ins.GetAdapter("not supported now!"));
};
size_t num = 4;
std::vector<std::thread> threads;
for (auto i = 0; i < num; i++) {
threads.emplace_back(std::move(std::thread(run_case)));
}
for (auto i = 0; i < num; i++) {
threads[i].join();
}
}

View File

@ -12,9 +12,6 @@
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include <tuple>
#include <knowhere/index/vector_index/helpers/IndexParameter.h>
#include <knowhere/index/vector_index/adapter/VectorAdapter.h>
#include <knowhere/index/vector_index/ConfAdapterMgr.h>
#include "pb/index_cgo_msg.pb.h"
#include "indexbuilder/index_c.h"
@ -131,7 +128,7 @@ TEST(BinaryVecIndex, All) {
TEST(CBoolIndexTest, All) {
schemapb::BoolArray half;
knowhere::DatasetPtr half_ds;
knowhere::DataSetPtr half_ds;
for (size_t i = 0; i < NB; i++) {
*(half.mutable_data()->Add()) = (i % 2) == 0;
@ -156,7 +153,7 @@ TEST(CBoolIndexTest, All) {
ASSERT_EQ(Success, status.error_code);
}
{
status = BuildScalarIndex(index, knowhere::GetDatasetRows(half_ds), knowhere::GetDatasetTensor(half_ds));
status = BuildScalarIndex(index, half_ds->GetRows(), half_ds->GetTensor());
ASSERT_EQ(Success, status.error_code);
}
{
@ -183,7 +180,7 @@ TEST(CBoolIndexTest, All) {
{ DeleteBinarySet(binary_set); }
}
delete[](char*) knowhere::GetDatasetTensor(half_ds);
delete[](char*)(half_ds->GetTensor());
}
// TODO: more scalar type.
@ -262,7 +259,7 @@ TEST(CStringIndexTest, All) {
ASSERT_EQ(Success, status.error_code);
}
{
status = BuildScalarIndex(index, knowhere::GetDatasetRows(str_ds), knowhere::GetDatasetTensor(str_ds));
status = BuildScalarIndex(index, (str_ds->GetRows()), (str_ds->GetTensor()));
ASSERT_EQ(Success, status.error_code);
}
{
@ -289,6 +286,6 @@ TEST(CStringIndexTest, All) {
{ DeleteBinarySet(binary_set); }
}
delete[](char*) knowhere::GetDatasetTensor(str_ds);
delete[](char*)(str_ds->GetTensor());
}
#endif

View File

@ -18,7 +18,6 @@
#include "indexbuilder/VecIndexCreator.h"
#include "common/QueryResult.h"
#include "test_utils/indexbuilder_test_utils.h"
#include "knowhere/archive/KnowhereConfig.h"
using namespace milvus;
using namespace milvus::segcore;
@ -30,7 +29,6 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
protected:
void
SetUp() override {
knowhere::KnowhereConfig::SetStatisticsLevel(3);
storage_config_ = get_default_storage_config();
auto param = GetParam();
@ -66,12 +64,12 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
auto dataset = GenDataset(NB, metric_type, is_binary);
if (!is_binary) {
xb_data = dataset.get_col<float>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
xq_dataset = knowhere::GenDataset(NQ, DIM, xb_data.data() + DIM * query_offset);
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
xq_dataset = knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
} else {
xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataset(NB, DIM, xb_bin_data.data());
xq_dataset = knowhere::GenDataset(NQ, DIM, xb_bin_data.data() + DIM * query_offset);
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_bin_data.data());
xq_dataset = knowhere::GenDataSet(NQ, DIM, xb_bin_data.data() + DIM * query_offset);
}
}
@ -87,10 +85,10 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
milvus::Config search_conf;
bool is_binary;
CDataType vec_field_data_type;
knowhere::DatasetPtr xb_dataset;
knowhere::DataSetPtr xb_dataset;
std::vector<float> xb_data;
std::vector<uint8_t> xb_bin_data;
knowhere::DatasetPtr xq_dataset;
knowhere::DataSetPtr xq_dataset;
int64_t query_offset = 100;
int64_t NB = 10000;
StorageConfig storage_config_;
@ -114,15 +112,15 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
vec_field_data_type, type_params_str.c_str(), index_params_str.c_str(), storage_config_);
auto dataset = GenDataset(NB, metric_type, is_binary);
knowhere::DatasetPtr xb_dataset;
knowhere::DataSetPtr xb_dataset;
std::vector<uint8_t> bin_vecs;
std::vector<float> f_vecs;
if (is_binary) {
bin_vecs = dataset.get_col<uint8_t>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataset(NB, DIM, bin_vecs.data());
xb_dataset = knowhere::GenDataSet(NB, DIM, bin_vecs.data());
} else {
f_vecs = dataset.get_col<float>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataset(NB, DIM, f_vecs.data());
xb_dataset = knowhere::GenDataSet(NB, DIM, f_vecs.data());
}
ASSERT_NO_THROW(index->Build(xb_dataset));

View File

@ -16,11 +16,9 @@
#include <string>
#include <vector>
#include "faiss/utils/distances.h"
#include "query/SearchBruteForce.h"
#include "segcore/Reduce.h"
#include "index/IndexFactory.h"
#include "knowhere/archive/KnowhereConfig.h"
#include "common/QueryResult.h"
#include "test_utils/indexbuilder_test_utils.h"
#include "test_utils/DataGen.h"
@ -98,6 +96,7 @@ merge_into(int64_t queries,
return Status::OK();
}
/*
TEST(Indexing, SmartBruteForce) {
int64_t N = 1000;
auto [raw_data, timestamps, uids] = generate_data<DIM>(N);
@ -139,7 +138,7 @@ TEST(Indexing, SmartBruteForce) {
std::cout << std::endl;
}
}
*/
TEST(Indexing, BinaryBruteForce) {
int64_t N = 100000;
int64_t num_queries = 10;
@ -224,7 +223,7 @@ TEST(Indexing, Naive) {
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFPQ;
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
auto build_conf = knowhere::Config{
auto build_conf = knowhere::Json{
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, std::to_string(DIM)},
{knowhere::indexparam::NLIST, "100"},
@ -232,12 +231,12 @@ TEST(Indexing, Naive) {
{knowhere::indexparam::NBITS, "8"},
};
auto search_conf = knowhere::Config{
auto search_conf = knowhere::Json{
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::indexparam::NPROBE, 4},
};
std::vector<knowhere::DatasetPtr> datasets;
std::vector<knowhere::DataSetPtr> datasets;
std::vector<std::vector<float>> ftrashs;
auto raw = raw_data.data();
for (int beg = 0; beg < N; beg += TestChunkSize) {
@ -247,7 +246,7 @@ TEST(Indexing, Naive) {
}
std::vector<float> ft(raw + DIM * beg, raw + DIM * end);
auto ds = knowhere::GenDataset(end - beg, DIM, ft.data());
auto ds = knowhere::GenDataSet(end - beg, DIM, ft.data());
datasets.push_back(ds);
ftrashs.push_back(std::move(ft));
}
@ -263,7 +262,7 @@ TEST(Indexing, Naive) {
}
BitsetView view = bitmap;
auto query_ds = knowhere::GenDataset(1, DIM, raw_data.data());
auto query_ds = knowhere::GenDataSet(1, DIM, raw_data.data());
milvus::SearchInfo searchInfo;
searchInfo.topk_ = TOPK;
@ -286,7 +285,6 @@ class IndexTest : public ::testing::TestWithParam<Param> {
protected:
void
SetUp() override {
knowhere::KnowhereConfig::SetStatisticsLevel(3);
storage_config_ = get_default_storage_config();
auto param = GetParam();
@ -318,12 +316,12 @@ class IndexTest : public ::testing::TestWithParam<Param> {
auto dataset = GenDataset(NB, metric_type, is_binary);
if (!is_binary) {
xb_data = dataset.get_col<float>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
xq_dataset = knowhere::GenDataset(NQ, DIM, xb_data.data() + DIM * query_offset);
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
xq_dataset = knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
} else {
xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataset(NB, DIM, xb_bin_data.data());
xq_dataset = knowhere::GenDataset(NQ, DIM, xb_bin_data.data() + DIM * query_offset);
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_bin_data.data());
xq_dataset = knowhere::GenDataSet(NQ, DIM, xb_bin_data.data() + DIM * query_offset);
}
}
@ -338,10 +336,10 @@ class IndexTest : public ::testing::TestWithParam<Param> {
milvus::Config load_conf;
milvus::Config search_conf;
milvus::DataType vec_field_data_type;
knowhere::DatasetPtr xb_dataset;
knowhere::DataSetPtr xb_dataset;
std::vector<float> xb_data;
std::vector<uint8_t> xb_bin_data;
knowhere::DatasetPtr xq_dataset;
knowhere::DataSetPtr xq_dataset;
int64_t query_offset = 100;
int64_t NB = 10000;
StorageConfig storage_config_;
@ -424,4 +422,4 @@ TEST_P(IndexTest, BuildAndQuery) {
if (!is_binary) {
EXPECT_EQ(result->seg_offsets_[0], query_offset);
}
}
}

View File

@ -14,7 +14,7 @@
#include <random>
#include <vector>
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "knowhere/comp/index_param.h"
#include "query/SubSearchResult.h"
using namespace milvus;
@ -123,4 +123,4 @@ TEST(Reduce, SubSearchResult) {
TestSubSearchResultMerge<queue_type_ip>(knowhere::metric::IP, 4, 1, 10);
TestSubSearchResultMerge<queue_type_ip>(knowhere::metric::IP, 4, 16, 1);
TestSubSearchResultMerge<queue_type_ip>(knowhere::metric::IP, 4, 16, 10);
}
}

View File

@ -10,8 +10,6 @@
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <gtest/gtest.h>
#include <knowhere/index/vector_index/helpers/IndexParameter.h>
#include <knowhere/index/vector_index/ConfAdapterMgr.h>
#include "index/IndexFactory.h"
#include "common/CDataType.h"

View File

@ -10,10 +10,7 @@
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <gtest/gtest.h>
#include <knowhere/index/vector_index/helpers/IndexParameter.h>
#include <knowhere/index/vector_index/adapter/VectorAdapter.h>
#include <knowhere/index/vector_index/ConfAdapterMgr.h>
#include <knowhere/archive/KnowhereConfig.h>
#include <knowhere/comp/index_param.h>
#include "indexbuilder/index_c.h"
#include "test_utils/DataGen.h"
@ -38,7 +35,7 @@ template <typename T, typename = std::enable_if_t<std::is_arithmetic_v<T> | std:
inline void
build_index(const ScalarIndexCreatorPtr& creator, const std::vector<T>& arr) {
const int64_t dim = 8; // not important here
auto dataset = knowhere::GenDataset(arr.size(), dim, arr.data());
auto dataset = knowhere::GenDataSet(arr.size(), dim, arr.data());
creator->Build(dataset);
}
@ -53,7 +50,7 @@ build_index(const ScalarIndexCreatorPtr& creator, const std::vector<bool>& arr)
creator->Build(ds);
delete[](char*) knowhere::GetDatasetTensor(ds);
delete[](char*) (ds->GetTensor());
}
template <>
@ -65,7 +62,7 @@ build_index(const ScalarIndexCreatorPtr& creator, const std::vector<std::string>
creator->Build(ds);
delete[](char*) knowhere::GetDatasetTensor(ds);
delete[](char*) (ds->GetTensor());
}
} // namespace

View File

@ -12,8 +12,6 @@
#include <gtest/gtest.h>
#include <boost/format.hpp>
#include <knowhere/index/IndexType.h>
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "segcore/SegmentSealedImpl.h"
#include "test_utils/DataGen.h"
#include "index/IndexFactory.h"
@ -85,19 +83,19 @@ TEST(Sealed, without_predicate) {
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
auto build_conf = knowhere::Config{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
auto build_conf = knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::NLIST, "100"}};
auto search_conf = knowhere::Config{{knowhere::indexparam::NPROBE, 10}};
auto search_conf = knowhere::Json{{knowhere::indexparam::NPROBE, 10}};
auto database = knowhere::GenDataset(N, dim, vec_col.data() + 1000 * dim);
auto database = knowhere::GenDataSet(N, dim, vec_col.data() + 1000 * dim);
indexing->BuildWithDataset(database, build_conf);
auto vec_index = dynamic_cast<milvus::index::VectorIndex*>(indexing.get());
EXPECT_EQ(vec_index->Count(), N);
EXPECT_EQ(vec_index->GetDim(), dim);
auto query_dataset = knowhere::GenDataset(num_queries, dim, query_ptr);
auto query_dataset = knowhere::GenDataSet(num_queries, dim, query_ptr);
milvus::SearchInfo searchInfo;
searchInfo.topk_ = topK;
@ -191,21 +189,21 @@ TEST(Sealed, with_predicate) {
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
auto build_conf = knowhere::Config{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
auto build_conf = knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::NLIST, "100"}};
auto database = knowhere::GenDataset(N, dim, vec_col.data());
auto database = knowhere::GenDataSet(N, dim, vec_col.data());
indexing->BuildWithDataset(database, build_conf);
auto vec_index = dynamic_cast<index::VectorIndex*>(indexing.get());
EXPECT_EQ(vec_index->Count(), N);
EXPECT_EQ(vec_index->GetDim(), dim);
auto query_dataset = knowhere::GenDataset(num_queries, dim, query_ptr);
auto query_dataset = knowhere::GenDataSet(num_queries, dim, query_ptr);
auto search_conf =
knowhere::Config{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}, {knowhere::indexparam::NPROBE, 10}};
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}, {knowhere::indexparam::NPROBE, 10}};
milvus::SearchInfo searchInfo;
searchInfo.topk_ = topK;
searchInfo.metric_type_ = knowhere::metric::L2;
@ -289,11 +287,11 @@ TEST(Sealed, with_predicate_filter_all) {
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
auto ivf_indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
auto ivf_build_conf = knowhere::Config{{knowhere::meta::DIM, std::to_string(dim)},
auto ivf_build_conf = knowhere::Json{{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::NLIST, "100"},
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}};
auto database = knowhere::GenDataset(N, dim, vec_col.data());
auto database = knowhere::GenDataSet(N, dim, vec_col.data());
ivf_indexing->BuildWithDataset(database, ivf_build_conf);
auto ivf_vec_index = dynamic_cast<index::VectorIndex*>(ivf_indexing.get());
@ -313,7 +311,7 @@ TEST(Sealed, with_predicate_filter_all) {
auto sr = ivf_sealed_segment->Search(plan.get(), ph_group.get(), time);
EXPECT_EQ(sr->get_total_result_count(), 0);
auto hnsw_conf = knowhere::Config{{knowhere::meta::DIM, std::to_string(dim)},
auto hnsw_conf = knowhere::Json{{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::HNSW_M, "16"},
{knowhere::indexparam::EFCONSTRUCTION, "200"},
{knowhere::indexparam::EF, "200"},

View File

@ -10,9 +10,6 @@
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <gtest/gtest.h>
#include <knowhere/index/vector_index/helpers/IndexParameter.h>
#include <knowhere/index/vector_index/adapter/VectorAdapter.h>
#include <knowhere/archive/KnowhereConfig.h>
#include "index/Index.h"
#include "index/ScalarIndex.h"
@ -137,21 +134,21 @@ TEST_F(StringIndexMarisaTest, Query) {
index->Build(nb, strs.data());
{
auto ds = knowhere::GenDataset(strs.size(), 8, strs.data());
auto ds = knowhere::GenDataSet(strs.size(), 8, strs.data());
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::In);
auto bitset = index->Query(ds);
ASSERT_TRUE(bitset->any());
}
{
auto ds = knowhere::GenDataset(strs.size(), 8, strs.data());
auto ds = knowhere::GenDataSet(strs.size(), 8, strs.data());
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::NotIn);
auto bitset = index->Query(ds);
ASSERT_TRUE(bitset->none());
}
{
auto ds = std::make_shared<knowhere::Dataset>();
auto ds = std::make_shared<knowhere::DataSet>();
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::GreaterEqual);
ds->Set<std::string>(milvus::index::RANGE_VALUE, "0");
auto bitset = index->Query(ds);
@ -160,7 +157,7 @@ TEST_F(StringIndexMarisaTest, Query) {
}
{
auto ds = std::make_shared<knowhere::Dataset>();
auto ds = std::make_shared<knowhere::DataSet>();
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::Range);
ds->Set<std::string>(milvus::index::LOWER_BOUND_VALUE, "0");
ds->Set<std::string>(milvus::index::UPPER_BOUND_VALUE, "range");
@ -172,7 +169,7 @@ TEST_F(StringIndexMarisaTest, Query) {
{
for (size_t i = 0; i < strs.size(); i++) {
auto ds = std::make_shared<knowhere::Dataset>();
auto ds = std::make_shared<knowhere::DataSet>();
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::PrefixMatch);
ds->Set<std::string>(milvus::index::PREFIX_VALUE, std::move(strs[i]));
auto bitset = index->Query(ds);

View File

@ -22,12 +22,11 @@
#include "index/ScalarIndexSort.h"
#include "index/StringIndexSort.h"
#include "index/VectorMemNMIndex.h"
#include "knowhere/index/VecIndex.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "query/SearchOnIndex.h"
#include "segcore/SegmentGrowingImpl.h"
#include "segcore/SegmentSealedImpl.h"
#include "segcore/Utils.h"
#include "knowhere/comp/index_param.h"
#include "PbHelper.h"
@ -466,14 +465,14 @@ SealedCreator(SchemaPtr schema, const GeneratedData& dataset) {
return segment;
}
inline index::VectorIndexPtr
inline std::unique_ptr<milvus::index::VectorIndex>
GenVecIndexing(int64_t N, int64_t dim, const float* vec) {
// {knowhere::IndexParams::nprobe, 10},
auto conf = knowhere::Config{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::NLIST, "1024"},
{knowhere::meta::DEVICE_ID, 0}};
auto database = knowhere::GenDataset(N, dim, vec);
auto conf = knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::NLIST, "1024"},
{knowhere::meta::DEVICE_ID, 0}};
auto database = knowhere::GenDataSet(N, dim, vec);
auto indexing = std::make_unique<index::VectorMemNMIndex>(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
knowhere::metric::L2, IndexMode::MODE_CPU);
indexing->BuildWithDataset(database, conf);

View File

@ -26,9 +26,6 @@
#include "indexbuilder/ScalarIndexCreator.h"
#include "indexbuilder/VecIndexCreator.h"
#include "indexbuilder/index_c.h"
#include "knowhere/index/VecIndexFactory.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "pb/index_cgo_msg.pb.h"
#include "storage/Types.h"
@ -195,54 +192,54 @@ get_default_cstorage_config() {
auto
generate_build_conf(const milvus::IndexType& index_type, const milvus::MetricType& metric_type) {
if (index_type == knowhere::IndexEnum::INDEX_FAISS_IDMAP) {
return knowhere::Config{
return knowhere::Json{
{knowhere::meta::METRIC_TYPE, metric_type},
{knowhere::meta::DIM, std::to_string(DIM)},
};
} else if (index_type == knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
return knowhere::Config{
return knowhere::Json{
{knowhere::meta::METRIC_TYPE, metric_type}, {knowhere::meta::DIM, std::to_string(DIM)},
{knowhere::indexparam::NLIST, "16"}, {knowhere::indexparam::M, "4"},
{knowhere::indexparam::NBITS, "8"},
};
} else if (index_type == knowhere::IndexEnum::INDEX_FAISS_IVFFLAT) {
return knowhere::Config{
return knowhere::Json{
{knowhere::meta::METRIC_TYPE, metric_type},
{knowhere::meta::DIM, std::to_string(DIM)},
{knowhere::indexparam::NLIST, "16"},
};
} else if (index_type == knowhere::IndexEnum::INDEX_FAISS_IVFSQ8) {
return knowhere::Config{
return knowhere::Json{
{knowhere::meta::METRIC_TYPE, metric_type},
{knowhere::meta::DIM, std::to_string(DIM)},
{knowhere::indexparam::NLIST, "16"},
};
} else if (index_type == knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
return knowhere::Config{
return knowhere::Json{
{knowhere::meta::METRIC_TYPE, metric_type},
{knowhere::meta::DIM, std::to_string(DIM)},
{knowhere::indexparam::NLIST, "16"},
};
} else if (index_type == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP) {
return knowhere::Config{
return knowhere::Json{
{knowhere::meta::METRIC_TYPE, metric_type},
{knowhere::meta::DIM, std::to_string(DIM)},
};
} else if (index_type == knowhere::IndexEnum::INDEX_HNSW) {
return knowhere::Config{
return knowhere::Json{
{knowhere::meta::METRIC_TYPE, metric_type},
{knowhere::meta::DIM, std::to_string(DIM)},
{knowhere::indexparam::HNSW_M, "16"},
{knowhere::indexparam::EFCONSTRUCTION, "200"},
};
} else if (index_type == knowhere::IndexEnum::INDEX_ANNOY) {
return knowhere::Config{
return knowhere::Json{
{knowhere::meta::METRIC_TYPE, metric_type},
{knowhere::meta::DIM, std::to_string(DIM)},
{knowhere::indexparam::N_TREES, "4"},
};
} else if (index_type == knowhere::IndexEnum::INDEX_DISKANN) {
return knowhere::Config{
return knowhere::Json{
{knowhere::meta::METRIC_TYPE, metric_type},
{knowhere::meta::DIM, std::to_string(DIM)},
{milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(48)},
@ -251,19 +248,19 @@ generate_build_conf(const milvus::IndexType& index_type, const milvus::MetricTyp
{milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(32)},
};
}
return knowhere::Config();
return knowhere::Json();
}
auto
generate_load_conf(const milvus::IndexType& index_type, const milvus::MetricType& metric_type, int64_t nb) {
if (index_type == knowhere::IndexEnum::INDEX_DISKANN) {
return knowhere::Config{
return knowhere::Json{
{knowhere::meta::METRIC_TYPE, metric_type},
{knowhere::meta::DIM, std::to_string(DIM)},
{milvus::index::DISK_ANN_SEARCH_CACHE_BUDGET, std::to_string(0.0002)},
};
}
return knowhere::Config();
return knowhere::Json();
}
std::vector<milvus::IndexType>
@ -401,13 +398,13 @@ CountDistance(
void
CheckDistances(const QueryResultPtr& result,
const knowhere::DatasetPtr& base_dataset,
const knowhere::DatasetPtr& query_dataset,
const knowhere::DataSetPtr& base_dataset,
const knowhere::DataSetPtr& query_dataset,
const knowhere::MetricType& metric,
const float threshold = 1.0e-5) {
auto base_vecs = (float*)knowhere::GetDatasetTensor(base_dataset);
auto query_vecs = (float*)knowhere::GetDatasetTensor(query_dataset);
auto dim = knowhere::GetDatasetDim(base_dataset);
auto base_vecs = (float*)(base_dataset->GetTensor());
auto query_vecs = (float*)(query_dataset->GetTensor());
auto dim = base_dataset->GetDim();
auto nq = result->total_nq_;
auto k = result->unity_topK_;
for (auto i = 0; i < nq; i++) {
@ -535,7 +532,7 @@ auto
GenDsFromPB(const google::protobuf::Message& msg) {
auto data = new char[msg.ByteSizeLong()];
msg.SerializeToArray(data, msg.ByteSizeLong());
return knowhere::GenDataset(msg.ByteSizeLong(), 8, data);
return knowhere::GenDataSet(msg.ByteSizeLong(), 8, data);
}
template <typename T>

View File

@ -492,7 +492,7 @@ def gen_invalid_search_param(index_type, metric_type="L2"):
hnsw_search_param = {"metric_type": metric_type, "params": {"ef": ef}}
search_params.append(hnsw_search_param)
elif index_type == "ANNOY":
for search_k in ["-1"]:
for search_k in ["-2"]:
annoy_search_param = {"metric_type": metric_type, "params": {"search_k": search_k}}
search_params.append(annoy_search_param)
elif index_type == "DISKANN":