mirror of https://github.com/milvus-io/milvus.git
Support diskann index for vector field (#19093)
Signed-off-by: xige-16 <xi.ge@zilliz.com> Signed-off-by: xige-16 <xi.ge@zilliz.com>pull/19349/head
parent
9d508dfa49
commit
428840178c
.github/workflows
build/docker/milvus
ubuntu18.04
ubuntu20.04
configs
internal/core
src
index
thirdparty
knowhere
unittest
|
@ -59,7 +59,8 @@ jobs:
|
|||
restore-keys: ubuntu20.04-go-mod-
|
||||
- name: Code Check
|
||||
env:
|
||||
CHECK_BUILDER: "1"
|
||||
# CHECK_BUILDER: "1"
|
||||
OS_NAME: "ubuntu20.04"
|
||||
run: |
|
||||
./build/builder.sh /bin/bash -c "make check-proto-product && make verifiers"
|
||||
centos:
|
||||
|
|
|
@ -15,7 +15,7 @@ FROM milvusdb/openblas:ubuntu18.04-20210428 AS openblas
|
|||
FROM ubuntu:bionic-20200921
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends libtbb-dev libzstd-dev gfortran netcat iputils-ping ca-certificates && \
|
||||
apt-get install -y --no-install-recommends libtbb-dev libzstd-dev gfortran netcat iputils-ping ca-certificates uuid-dev libaio-dev libboost-program-options-dev && \
|
||||
apt-get remove --purge -y && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
|
|
@ -14,8 +14,11 @@ FROM milvusdb/openblas:ubuntu20.04-20220914-179ea77 AS openblas
|
|||
#FROM alpine
|
||||
FROM ubuntu:focal-20220426
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV TZ=UTC
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends libtbb-dev gfortran netcat iputils-ping ca-certificates liblapack3 && \
|
||||
apt-get install -y --no-install-recommends curl libtbb-dev gfortran netcat iputils-ping ca-certificates liblapack3 libzstd-dev uuid-dev libaio-dev libboost-program-options-dev libboost-filesystem-dev && \
|
||||
apt-get remove --purge -y && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
|
|
@ -193,6 +193,8 @@ queryNode:
|
|||
cacheSize: 32 # GB, default 32 GB, `cacheSize` is the memory used for caching data for faster query. The `cacheSize` must be less than system memory size.
|
||||
port: 21123
|
||||
loadMemoryUsageFactor: 3 # The multiply factor of calculating the memory usage while loading segments
|
||||
enableDisk: true # enable querynode load disk index, and search on disk index
|
||||
maxDiskUsagePercentage: 95
|
||||
|
||||
stats:
|
||||
publishInterval: 1000 # Interval for querynode to report node information (milliseconds)
|
||||
|
@ -238,6 +240,8 @@ indexCoord:
|
|||
|
||||
indexNode:
|
||||
port: 21121
|
||||
enableDisk: true # enable index node build disk vector index
|
||||
maxDiskUsagePercentage: 95
|
||||
|
||||
scheduler:
|
||||
buildParallel: 1
|
||||
|
|
|
@ -23,6 +23,7 @@ services:
|
|||
# - "19530:19530"
|
||||
environment:
|
||||
<<: *ccache
|
||||
OS_NAME: ${OS_NAME}
|
||||
PULSAR_ADDRESS: ${PULSAR_ADDRESS}
|
||||
ETCD_ENDPOINTS: ${ETCD_ENDPOINTS}
|
||||
MINIO_ADDRESS: ${MINIO_ADDRESS}
|
||||
|
|
|
@ -234,6 +234,10 @@ if ( BUILD_UNIT_TEST STREQUAL "ON" AND BUILD_COVERAGE STREQUAL "ON" )
|
|||
)
|
||||
endif ()
|
||||
|
||||
if ( BUILD_DISK_ANN STREQUAL "ON" )
|
||||
ADD_DEFINITIONS(-DBUILD_DISK_ANN=${BUILD_DISK_ANN})
|
||||
endif ()
|
||||
|
||||
# Warning: add_subdirectory(src) must be after append_flags("-ftest-coverage"),
|
||||
# otherwise cpp code coverage tool will miss src folder
|
||||
add_subdirectory( thirdparty )
|
||||
|
|
|
@ -0,0 +1,189 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Compile jobs variable; Usage: $ jobs=12 ./build.sh ...
|
||||
if [[ ! ${jobs+1} ]]; then
|
||||
if command -v nproc &> /dev/null
|
||||
# For linux
|
||||
then
|
||||
jobs=$(nproc)
|
||||
elif command -v sysctl &> /dev/null
|
||||
# For macOS
|
||||
then
|
||||
jobs=$(sysctl -n hw.logicalcpu)
|
||||
else
|
||||
jobs=4
|
||||
fi
|
||||
fi
|
||||
|
||||
SOURCE="${BASH_SOURCE[0]}"
|
||||
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
|
||||
DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
|
||||
SOURCE="$(readlink "$SOURCE")"
|
||||
[[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
|
||||
done
|
||||
SCRIPTS_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
|
||||
|
||||
BUILD_OUTPUT_DIR="./cmake_build"
|
||||
BUILD_TYPE="Release"
|
||||
BUILD_UNITTEST="OFF"
|
||||
INSTALL_PREFIX="${SCRIPTS_DIR}/output"
|
||||
MAKE_CLEAN="OFF"
|
||||
BUILD_COVERAGE="OFF"
|
||||
PROFILING="OFF"
|
||||
RUN_CPPLINT="OFF"
|
||||
CUDA_COMPILER=/usr/local/cuda/bin/nvcc
|
||||
GPU_VERSION="OFF" #defaults to CPU version
|
||||
WITH_PROMETHEUS="ON"
|
||||
CUDA_ARCH="DEFAULT"
|
||||
CUSTOM_THIRDPARTY_PATH=""
|
||||
BUILD_DISK_ANN="OFF"
|
||||
|
||||
while getopts "p:t:s:f:o:ulrcghzme" arg; do
|
||||
case $arg in
|
||||
f)
|
||||
CUSTOM_THIRDPARTY_PATH=$OPTARG
|
||||
;;
|
||||
p)
|
||||
INSTALL_PREFIX=$OPTARG
|
||||
;;
|
||||
o)
|
||||
BUILD_OUTPUT_DIR=$OPTARG
|
||||
;;
|
||||
t)
|
||||
BUILD_TYPE=$OPTARG # BUILD_TYPE
|
||||
;;
|
||||
u)
|
||||
echo "Build and run unittest cases"
|
||||
BUILD_UNITTEST="ON"
|
||||
;;
|
||||
l)
|
||||
RUN_CPPLINT="ON"
|
||||
;;
|
||||
r)
|
||||
if [[ -d ${BUILD_OUTPUT_DIR} ]]; then
|
||||
MAKE_CLEAN="ON"
|
||||
fi
|
||||
;;
|
||||
c)
|
||||
BUILD_COVERAGE="ON"
|
||||
;;
|
||||
z)
|
||||
PROFILING="ON"
|
||||
;;
|
||||
g)
|
||||
GPU_VERSION="ON"
|
||||
;;
|
||||
e)
|
||||
WITH_PROMETHEUS="OFF"
|
||||
;;
|
||||
s)
|
||||
CUDA_ARCH=$OPTARG
|
||||
;;
|
||||
n)
|
||||
BUILD_DISK_ANN="OFF"
|
||||
;;
|
||||
h) # help
|
||||
echo "
|
||||
parameter:
|
||||
-f: custom paths of thirdparty downloaded files(default: NULL)
|
||||
-p: install prefix(default: $(pwd)/milvus)
|
||||
-d: db data path(default: /tmp/milvus)
|
||||
-t: build type(default: Debug)
|
||||
-u: building unit test options(default: OFF)
|
||||
-l: run cpplint, clang-format and clang-tidy(default: OFF)
|
||||
-r: remove previous build directory(default: OFF)
|
||||
-c: code coverage(default: OFF)
|
||||
-z: profiling(default: OFF)
|
||||
-g: build GPU version(default: OFF)
|
||||
-e: build without prometheus(default: OFF)
|
||||
-s: build with CUDA arch(default:DEFAULT), for example '-gencode=compute_61,code=sm_61;-gencode=compute_75,code=sm_75'
|
||||
-h: help
|
||||
usage:
|
||||
./build.sh -p \${INSTALL_PREFIX} -t \${BUILD_TYPE} -s \${CUDA_ARCH} -f\${CUSTOM_THIRDPARTY_PATH} [-u] [-l] [-r] [-c] [-z] [-g] [-m] [-e] [-h]
|
||||
"
|
||||
exit 0
|
||||
;;
|
||||
?)
|
||||
echo "ERROR! unknown argument"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! -d ${BUILD_OUTPUT_DIR} ]]; then
|
||||
mkdir ${BUILD_OUTPUT_DIR}
|
||||
fi
|
||||
|
||||
cd ${BUILD_OUTPUT_DIR}
|
||||
|
||||
# remove make cache since build.sh -l use default variables
|
||||
# force update the variables each time
|
||||
make rebuild_cache >/dev/null 2>&1
|
||||
|
||||
|
||||
if [[ ${MAKE_CLEAN} == "ON" ]]; then
|
||||
echo "Runing make clean in ${BUILD_OUTPUT_DIR} ..."
|
||||
make clean
|
||||
exit 0
|
||||
fi
|
||||
|
||||
unameOut="$(uname -s)"
|
||||
case "${unameOut}" in
|
||||
Darwin*)
|
||||
llvm_prefix="$(brew --prefix llvm)"
|
||||
export CLANG_TOOLS_PATH="${llvm_prefix}/bin"
|
||||
export CC="${llvm_prefix}/bin/clang"
|
||||
export CXX="${llvm_prefix}/bin/clang++"
|
||||
export LDFLAGS="-L${llvm_prefix}/lib -L/usr/local/opt/libomp/lib"
|
||||
export CXXFLAGS="-I${llvm_prefix}/include -I/usr/local/include -I/usr/local/opt/libomp/include"
|
||||
;;
|
||||
*) echo "==System:${unameOut}";
|
||||
esac
|
||||
|
||||
CMAKE_CMD="cmake \
|
||||
-DBUILD_UNIT_TEST=${BUILD_UNITTEST} \
|
||||
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}
|
||||
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
|
||||
-DOpenBLAS_SOURCE=AUTO \
|
||||
-DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} \
|
||||
-DBUILD_COVERAGE=${BUILD_COVERAGE} \
|
||||
-DENABLE_CPU_PROFILING=${PROFILING} \
|
||||
-DMILVUS_GPU_VERSION=${GPU_VERSION} \
|
||||
-DMILVUS_WITH_PROMETHEUS=${WITH_PROMETHEUS} \
|
||||
-DMILVUS_CUDA_ARCH=${CUDA_ARCH} \
|
||||
-DCUSTOM_THIRDPARTY_DOWNLOAD_PATH=${CUSTOM_THIRDPARTY_PATH} \
|
||||
-DKNOWHERE_GPU_VERSION=${SUPPORT_GPU} \
|
||||
-DBUILD_DISK_ANN=${BUILD_DISK_ANN} \
|
||||
${SCRIPTS_DIR}"
|
||||
echo ${CMAKE_CMD}
|
||||
${CMAKE_CMD}
|
||||
|
||||
|
||||
if [[ ${RUN_CPPLINT} == "ON" ]]; then
|
||||
# cpplint check
|
||||
make lint
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "ERROR! cpplint check failed"
|
||||
exit 1
|
||||
fi
|
||||
echo "cpplint check passed!"
|
||||
|
||||
# clang-format check
|
||||
make check-clang-format
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "ERROR! clang-format check failed"
|
||||
exit 1
|
||||
fi
|
||||
echo "clang-format check passed!"
|
||||
|
||||
# clang-tidy check
|
||||
make check-clang-tidy
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "ERROR! clang-tidy check failed"
|
||||
exit 1
|
||||
fi
|
||||
echo "clang-tidy check passed!"
|
||||
else
|
||||
# compile and build
|
||||
make -j ${jobs} install || exit 1
|
||||
fi
|
|
@ -15,10 +15,6 @@
|
|||
#include <string>
|
||||
|
||||
namespace milvus {
|
||||
inline bool
|
||||
IsVectorType(CDataType dtype) {
|
||||
return dtype == CDataType::FloatVector || dtype == CDataType::BinaryVector;
|
||||
}
|
||||
|
||||
template <typename T, typename = std::enable_if_t<std::is_fundamental_v<T> || std::is_same_v<T, std::string>>>
|
||||
inline CDataType
|
||||
|
|
|
@ -14,18 +14,16 @@ milvus_add_pkg_config("milvus_common")
|
|||
set(COMMON_SRC
|
||||
Schema.cpp
|
||||
SystemProperty.cpp
|
||||
vector_index_c.cpp
|
||||
binary_set_c.cpp
|
||||
init_c.cpp
|
||||
)
|
||||
|
||||
add_library(milvus_common SHARED ${COMMON_SRC})
|
||||
|
||||
if ( MSYS )
|
||||
target_link_libraries(milvus_common
|
||||
milvus_utils
|
||||
milvus_config
|
||||
milvus_log
|
||||
knowhere
|
||||
milvus_proto
|
||||
yaml-cpp
|
||||
boost_bitset_ext
|
||||
arrow
|
||||
|
@ -33,11 +31,8 @@ if ( MSYS )
|
|||
)
|
||||
else()
|
||||
target_link_libraries(milvus_common
|
||||
milvus_utils
|
||||
milvus_config
|
||||
milvus_log
|
||||
knowhere
|
||||
milvus_proto
|
||||
yaml-cpp
|
||||
boost_bitset_ext
|
||||
arrow
|
||||
|
|
|
@ -26,6 +26,10 @@ const milvus::PkType INVALID_PK; // of std::monostate if not set.
|
|||
const int64_t START_USER_FIELDID = 100;
|
||||
const char MAX_LENGTH[] = "max_length";
|
||||
|
||||
// const fieldID (rowID and timestamp)
|
||||
const milvus::FieldId RowFieldID = milvus::FieldId(0);
|
||||
const milvus::FieldId TimestampFieldID = milvus::FieldId(1);
|
||||
|
||||
// fill followed extra info to binlog file
|
||||
const char ORIGIN_SIZE_KEY[] = "original_size";
|
||||
const char INDEX_BUILD_ID_KEY[] = "indexBuildID";
|
||||
|
|
|
@ -23,13 +23,6 @@
|
|||
#include "common/CDataType.h"
|
||||
#include "knowhere/index/Index.h"
|
||||
|
||||
struct LoadIndexInfo {
|
||||
int64_t field_id;
|
||||
CDataType field_type;
|
||||
std::map<std::string, std::string> index_params;
|
||||
knowhere::IndexPtr index;
|
||||
};
|
||||
|
||||
// NOTE: field_id can be system field
|
||||
// NOTE: Refer to common/SystemProperty.cpp for details
|
||||
// TODO: use arrow to pass field data instead of proto
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common/Types.h"
|
||||
|
||||
namespace milvus {
|
||||
struct SearchInfo {
|
||||
int64_t topk_;
|
||||
int64_t round_decimal_;
|
||||
FieldId field_id_;
|
||||
MetricType metric_type_;
|
||||
Config search_params_;
|
||||
};
|
||||
|
||||
using SearchInfoPtr = std::shared_ptr<SearchInfo>;
|
||||
|
||||
} // namespace milvus
|
|
@ -31,6 +31,9 @@
|
|||
#include <variant>
|
||||
|
||||
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
|
||||
#include <knowhere/index/IndexType.h>
|
||||
#include "knowhere/common/BinarySet.h"
|
||||
#include "knowhere/common/Dataset.h"
|
||||
#include "pb/schema.pb.h"
|
||||
#include "pb/segcore.pb.h"
|
||||
#include "pb/plan.pb.h"
|
||||
|
@ -109,6 +112,16 @@ using BitsetTypeOpt = std::optional<BitsetType>;
|
|||
template <typename Type>
|
||||
using FixedVector = boost::container::vector<Type>;
|
||||
|
||||
const FieldId RowFieldID = FieldId(0);
|
||||
const FieldId TimestampFieldID = FieldId(1);
|
||||
using Config = nlohmann::json;
|
||||
using TargetBitmap = boost::dynamic_bitset<>;
|
||||
using TargetBitmapPtr = std::unique_ptr<TargetBitmap>;
|
||||
|
||||
using BinarySet = knowhere::BinarySet;
|
||||
using DatasetPtr = knowhere::DatasetPtr;
|
||||
using MetricType = knowhere::MetricType;
|
||||
// TODO :: type define milvus index type(vector index type and scalar index type)
|
||||
using IndexType = knowhere::IndexType;
|
||||
// TODO :: type define milvus index mode, add transfer func from milvus index mode to knowhere index mode
|
||||
using IndexMode = knowhere::IndexMode;
|
||||
|
||||
} // namespace milvus
|
||||
|
|
|
@ -13,8 +13,43 @@
|
|||
|
||||
#include <string>
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "config/ConfigChunkManager.h"
|
||||
#include "common/Consts.h"
|
||||
#include <google/protobuf/text_format.h>
|
||||
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
|
||||
|
||||
namespace milvus {
|
||||
|
||||
inline DatasetPtr
|
||||
GenDataset(const int64_t nb, const int64_t dim, const void* xb) {
|
||||
return knowhere::GenDataset(nb, dim, xb);
|
||||
}
|
||||
|
||||
inline const float*
|
||||
GetDatasetDistance(const DatasetPtr& dataset) {
|
||||
return knowhere::GetDatasetDistance(dataset);
|
||||
}
|
||||
|
||||
inline const int64_t*
|
||||
GetDatasetIDs(const DatasetPtr& dataset) {
|
||||
return knowhere::GetDatasetIDs(dataset);
|
||||
}
|
||||
|
||||
inline int64_t
|
||||
GetDatasetRows(const DatasetPtr& dataset) {
|
||||
return knowhere::GetDatasetRows(dataset);
|
||||
}
|
||||
|
||||
inline const void*
|
||||
GetDatasetTensor(const DatasetPtr& dataset) {
|
||||
return knowhere::GetDatasetTensor(dataset);
|
||||
}
|
||||
|
||||
inline int64_t
|
||||
GetDatasetDim(const DatasetPtr& dataset) {
|
||||
return knowhere::GetDatasetDim(dataset);
|
||||
}
|
||||
|
||||
inline bool
|
||||
PrefixMatch(const std::string& str, const std::string& prefix) {
|
||||
auto ret = strncmp(str.c_str(), prefix.c_str(), prefix.length());
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
// limitations under the License.
|
||||
|
||||
#include "knowhere/common/BinarySet.h"
|
||||
#include "common/vector_index_c.h"
|
||||
#include "common/binary_set_c.h"
|
||||
|
||||
CStatus
|
||||
NewBinarySet(CBinarySet* c_binary_set) {
|
|
@ -0,0 +1,66 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "common/init_c.h"
|
||||
|
||||
#include <string>
|
||||
#include "config/ConfigChunkManager.h"
|
||||
|
||||
void
|
||||
MinioAddressInit(const char* address) {
|
||||
std::string minio_address(address);
|
||||
milvus::ChunkMangerConfig::SetAddress(address);
|
||||
}
|
||||
|
||||
void
|
||||
MinioAccessKeyInit(const char* key) {
|
||||
std::string minio_access_key(key);
|
||||
milvus::ChunkMangerConfig::SetAccessKey(minio_access_key);
|
||||
}
|
||||
|
||||
void
|
||||
MinioAccessValueInit(const char* value) {
|
||||
std::string minio_access_value(value);
|
||||
milvus::ChunkMangerConfig::SetAccessValue(value);
|
||||
}
|
||||
|
||||
void
|
||||
MinioSSLInit(bool use_ssl) {
|
||||
milvus::ChunkMangerConfig::SetUseSSL(use_ssl);
|
||||
}
|
||||
|
||||
void
|
||||
MinioUseIamInit(bool use_iam) {
|
||||
milvus::ChunkMangerConfig::SetUseIAM(use_iam);
|
||||
}
|
||||
|
||||
void
|
||||
MinioBucketNameInit(const char* name) {
|
||||
std::string bucket_name(name);
|
||||
milvus::ChunkMangerConfig::SetBucketName(name);
|
||||
}
|
||||
|
||||
void
|
||||
MinioRootPathInit(const char* name) {
|
||||
std::string root_path(name);
|
||||
milvus::ChunkMangerConfig::SetRemoteRootPath(name);
|
||||
}
|
||||
|
||||
void
|
||||
LocalRootPathInit(const char* root_path) {
|
||||
std::string local_path_root(root_path);
|
||||
milvus::ChunkMangerConfig::SetLocalRootPath(local_path_root);
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
void
|
||||
MinioAddressInit(const char*);
|
||||
|
||||
void
|
||||
MinioAccessKeyInit(const char*);
|
||||
|
||||
void
|
||||
MinioAccessValueInit(const char*);
|
||||
|
||||
void
|
||||
MinioSSLInit(bool use_ssl);
|
||||
|
||||
void
|
||||
MinioUseIamInit(bool use_iam);
|
||||
|
||||
void
|
||||
MinioBucketNameInit(const char*);
|
||||
|
||||
void
|
||||
MinioRootPathInit(const char*);
|
||||
|
||||
void
|
||||
LocalRootPathInit(const char*);
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
|
@ -18,51 +18,53 @@
|
|||
|
||||
namespace milvus::ChunkMangerConfig {
|
||||
|
||||
std::string MINIO_ADDRESS = "localhost:9000"; // NOLINT
|
||||
std::string MINIO_ACCESS_KEY = "minioadmin"; // NOLINT
|
||||
std::string MINIO_ACCESS_VALUE = "minioadmin"; // NOLINT
|
||||
std::string MINIO_BUCKET_NAME = "a-bucket"; // NOLINT
|
||||
std::string LOCAL_BUCKET_NAME = "/tmp/milvus"; // NOLINT
|
||||
std::string REMOTE_ADDRESS = "localhost:9000"; // NOLINT
|
||||
std::string REMOTE_ACCESS_KEY = "minioadmin"; // NOLINT
|
||||
std::string REMOTE_ACCESS_VALUE = "minioadmin"; // NOLINT
|
||||
std::string REMOTE_BUCKET_NAME = "a-bucket"; // NOLINT
|
||||
std::string REMOTE_ROOT_PATH = "files"; // NOLINT
|
||||
std::string LOCAL_ROOT_PATH = "/tmp/milvus"; // NOLINT
|
||||
bool MINIO_USE_SSL = false;
|
||||
bool MINIO_USE_IAM = false;
|
||||
|
||||
void
|
||||
SetAddress(const std::string& address) {
|
||||
MINIO_ADDRESS = address.c_str();
|
||||
REMOTE_ADDRESS = address;
|
||||
}
|
||||
|
||||
std::string
|
||||
GetAddress() {
|
||||
return MINIO_ADDRESS;
|
||||
return REMOTE_ADDRESS;
|
||||
}
|
||||
|
||||
void
|
||||
SetAccessKey(const std::string& access_key) {
|
||||
MINIO_ACCESS_KEY = access_key.c_str();
|
||||
REMOTE_ACCESS_KEY = access_key;
|
||||
}
|
||||
|
||||
std::string
|
||||
GetAccessKey() {
|
||||
return MINIO_ACCESS_KEY;
|
||||
return REMOTE_ACCESS_KEY;
|
||||
}
|
||||
|
||||
void
|
||||
SetAccessValue(const std::string& access_value) {
|
||||
MINIO_ACCESS_VALUE = access_value.c_str();
|
||||
REMOTE_ACCESS_VALUE = access_value;
|
||||
}
|
||||
|
||||
std::string
|
||||
GetAccessValue() {
|
||||
return MINIO_ACCESS_VALUE;
|
||||
return REMOTE_ACCESS_VALUE;
|
||||
}
|
||||
|
||||
void
|
||||
SetBucketName(const std::string& bucket_name) {
|
||||
MINIO_BUCKET_NAME = bucket_name.c_str();
|
||||
REMOTE_BUCKET_NAME = bucket_name;
|
||||
}
|
||||
|
||||
std::string
|
||||
GetBucketName() {
|
||||
return MINIO_BUCKET_NAME;
|
||||
return REMOTE_BUCKET_NAME;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -76,13 +78,33 @@ GetUseSSL() {
|
|||
}
|
||||
|
||||
void
|
||||
SetLocalBucketName(const std::string& path_prefix) {
|
||||
LOCAL_BUCKET_NAME = path_prefix.c_str();
|
||||
SetUseIAM(bool use_iam) {
|
||||
MINIO_USE_IAM = use_iam;
|
||||
}
|
||||
|
||||
bool
|
||||
GetUseIAM() {
|
||||
return MINIO_USE_IAM;
|
||||
}
|
||||
|
||||
void
|
||||
SetRemoteRootPath(const std::string& root_path) {
|
||||
REMOTE_ROOT_PATH = root_path;
|
||||
}
|
||||
|
||||
std::string
|
||||
GetLocalBucketName() {
|
||||
return LOCAL_BUCKET_NAME;
|
||||
GetRemoteRootPath() {
|
||||
return REMOTE_ROOT_PATH;
|
||||
}
|
||||
|
||||
void
|
||||
SetLocalRootPath(const std::string& path_prefix) {
|
||||
LOCAL_ROOT_PATH = path_prefix;
|
||||
}
|
||||
|
||||
std::string
|
||||
GetLocalRootPath() {
|
||||
return LOCAL_ROOT_PATH;
|
||||
}
|
||||
|
||||
} // namespace milvus::ChunkMangerConfig
|
||||
|
|
|
@ -44,6 +44,12 @@ SetUseSSL(bool use_ssl);
|
|||
bool
|
||||
GetUseSSL();
|
||||
|
||||
void
|
||||
SetUseIAM(bool use_iam);
|
||||
|
||||
bool
|
||||
GetUseIAM();
|
||||
|
||||
void
|
||||
SetBucketName(const std::string& bucket_name);
|
||||
|
||||
|
@ -51,9 +57,15 @@ std::string
|
|||
GetBucketName();
|
||||
|
||||
void
|
||||
SetLocalBucketName(const std::string& path_prefix);
|
||||
SetRemoteRootPath(const std::string& path_prefix);
|
||||
|
||||
std::string
|
||||
GetLocalBucketName();
|
||||
GetRemoteRootPath();
|
||||
|
||||
void
|
||||
SetLocalRootPath(const std::string& path_prefix);
|
||||
|
||||
std::string
|
||||
GetLocalRootPath();
|
||||
|
||||
} // namespace milvus::ChunkMangerConfig
|
||||
|
|
|
@ -1,13 +1,18 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
|
@ -15,24 +20,14 @@
|
|||
#include <memory>
|
||||
#include "index/ScalarIndexSort.h"
|
||||
|
||||
namespace milvus::scalar {
|
||||
namespace milvus::index {
|
||||
|
||||
// TODO: optimize here.
|
||||
class BoolIndex : public ScalarIndexSort<bool> {
|
||||
public:
|
||||
void
|
||||
BuildWithDataset(const DatasetPtr& dataset) override {
|
||||
auto size = knowhere::GetDatasetRows(dataset);
|
||||
auto data = knowhere::GetDatasetTensor(dataset);
|
||||
proto::schema::BoolArray arr;
|
||||
arr.ParseFromArray(data, size);
|
||||
Build(arr.data().size(), arr.data().data());
|
||||
}
|
||||
};
|
||||
using BoolIndexPtr = std::unique_ptr<BoolIndex>;
|
||||
//// TODO: optimize here.
|
||||
class BoolIndex : public ScalarIndexSort<bool> {};
|
||||
using BoolIndexPtr = std::shared_ptr<BoolIndex>;
|
||||
|
||||
inline BoolIndexPtr
|
||||
CreateBoolIndex() {
|
||||
return std::make_unique<BoolIndex>();
|
||||
}
|
||||
} // namespace milvus::scalar
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -9,11 +9,23 @@
|
|||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
# or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
aux_source_directory( ${MILVUS_ENGINE_SRC}/index INDEX_FILES )
|
||||
set(INDEX_FILES
|
||||
StringIndexMarisa.cpp
|
||||
Utils.cpp
|
||||
VectorMemIndex.cpp
|
||||
IndexFactory.cpp
|
||||
)
|
||||
|
||||
add_library( milvus_index SHARED ${INDEX_FILES} )
|
||||
if ( BUILD_DISK_ANN STREQUAL "ON" )
|
||||
set(INDEX_FILES
|
||||
${INDEX_FILES}
|
||||
VectorDiskIndex.cpp
|
||||
)
|
||||
endif ()
|
||||
|
||||
milvus_add_pkg_config("milvus_index")
|
||||
add_library(milvus_index SHARED ${INDEX_FILES})
|
||||
|
||||
# TODO: support compile marisa on windows.
|
||||
set(PLATFORM_LIBS )
|
||||
if ( LINUX OR APPLE )
|
||||
set(PLATFORM_LIBS marisa)
|
||||
|
@ -23,9 +35,7 @@ if (MSYS)
|
|||
endif ()
|
||||
|
||||
target_link_libraries(milvus_index
|
||||
milvus_proto
|
||||
milvus_exceptions
|
||||
knowhere
|
||||
milvus_storage
|
||||
${PLATFORM_LIBS}
|
||||
)
|
||||
|
||||
|
|
|
@ -1,41 +1,51 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <knowhere/index/Index.h>
|
||||
#include <knowhere/common/Dataset.h>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
namespace milvus::scalar {
|
||||
using Index = knowhere::Index;
|
||||
using IndexPtr = std::unique_ptr<Index>;
|
||||
using BinarySet = knowhere::BinarySet;
|
||||
using Config = knowhere::Config;
|
||||
using DatasetPtr = knowhere::DatasetPtr;
|
||||
using TargetBitmap = boost::dynamic_bitset<>;
|
||||
using TargetBitmapPtr = std::unique_ptr<TargetBitmap>;
|
||||
#include "common/Types.h"
|
||||
|
||||
class IndexBase : public Index {
|
||||
namespace milvus::index {
|
||||
|
||||
class IndexBase {
|
||||
public:
|
||||
virtual ~IndexBase() = default;
|
||||
|
||||
virtual BinarySet
|
||||
Serialize(const Config& config) = 0;
|
||||
|
||||
virtual void
|
||||
BuildWithDataset(const DatasetPtr& dataset) = 0;
|
||||
Load(const BinarySet& binary_set, const Config& config = {}) = 0;
|
||||
|
||||
virtual const TargetBitmapPtr
|
||||
Query(const DatasetPtr& dataset) = 0;
|
||||
virtual void
|
||||
BuildWithRawData(size_t n, const void* values, const Config& config = {}) = 0;
|
||||
|
||||
virtual size_t
|
||||
virtual void
|
||||
BuildWithDataset(const DatasetPtr& dataset, const Config& config = {}) = 0;
|
||||
|
||||
virtual int64_t
|
||||
Count() = 0;
|
||||
};
|
||||
using IndexBasePtr = std::unique_ptr<IndexBase>;
|
||||
|
||||
} // namespace milvus::scalar
|
||||
protected:
|
||||
IndexType index_type_ = "";
|
||||
IndexMode index_mode_ = IndexMode::MODE_CPU;
|
||||
};
|
||||
|
||||
using IndexBasePtr = std::unique_ptr<IndexBase>;
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -1,37 +1,41 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string>
|
||||
#include "index/ScalarIndexSort.h"
|
||||
#include "index/StringIndexMarisa.h"
|
||||
#include "index/IndexType.h"
|
||||
#include "index/BoolIndex.h"
|
||||
|
||||
namespace milvus::scalar {
|
||||
namespace milvus::index {
|
||||
|
||||
template <typename T>
|
||||
inline ScalarIndexPtr<T>
|
||||
IndexFactory::CreateIndex(const std::string& index_type) {
|
||||
IndexFactory::CreateScalarIndex(const IndexType& index_type) {
|
||||
return CreateScalarIndexSort<T>();
|
||||
}
|
||||
|
||||
template <>
|
||||
inline ScalarIndexPtr<bool>
|
||||
IndexFactory::CreateIndex(const std::string& index_type) {
|
||||
return CreateBoolIndex();
|
||||
}
|
||||
// template <>
|
||||
// inline ScalarIndexPtr<bool>
|
||||
// IndexFactory::CreateScalarIndex(const IndexType& index_type) {
|
||||
// return CreateBoolIndex();
|
||||
//}
|
||||
|
||||
template <>
|
||||
inline ScalarIndexPtr<std::string>
|
||||
IndexFactory::CreateIndex(const std::string& index_type) {
|
||||
IndexFactory::CreateScalarIndex(const IndexType& index_type) {
|
||||
#if defined(__linux__) || defined(__APPLE__)
|
||||
return CreateStringIndexMarisa();
|
||||
#else
|
||||
|
@ -39,4 +43,4 @@ IndexFactory::CreateIndex(const std::string& index_type) {
|
|||
#endif
|
||||
}
|
||||
|
||||
} // namespace milvus::scalar
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -1,48 +1,94 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "index/IndexFactory.h"
|
||||
#include "index/ScalarIndexSort.h"
|
||||
#include "index/StringIndexMarisa.h"
|
||||
#include "index/VectorMemIndex.h"
|
||||
#include "index/Utils.h"
|
||||
#include "index/Meta.h"
|
||||
|
||||
namespace milvus::scalar {
|
||||
#ifdef BUILD_DISK_ANN
|
||||
#include "index/VectorDiskIndex.h"
|
||||
#endif
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
IndexBasePtr
|
||||
IndexFactory::CreateIndex(CDataType dtype, const std::string& index_type) {
|
||||
switch (dtype) {
|
||||
case Bool:
|
||||
return CreateIndex<bool>(index_type);
|
||||
case Int8:
|
||||
return CreateIndex<int8_t>(index_type);
|
||||
case Int16:
|
||||
return CreateIndex<int16_t>(index_type);
|
||||
case Int32:
|
||||
return CreateIndex<int32_t>(index_type);
|
||||
case Int64:
|
||||
return CreateIndex<int64_t>(index_type);
|
||||
case Float:
|
||||
return CreateIndex<float>(index_type);
|
||||
case Double:
|
||||
return CreateIndex<double>(index_type);
|
||||
IndexFactory::CreateIndex(const CreateIndexInfo& create_index_info, storage::FileManagerImplPtr file_manager) {
|
||||
if (datatype_is_vector(create_index_info.field_type)) {
|
||||
return CreateVectorIndex(create_index_info, file_manager);
|
||||
}
|
||||
|
||||
case String:
|
||||
case VarChar:
|
||||
return CreateIndex<std::string>(index_type);
|
||||
return CreateScalarIndex(create_index_info);
|
||||
}
|
||||
|
||||
case None:
|
||||
case BinaryVector:
|
||||
case FloatVector:
|
||||
IndexBasePtr
|
||||
IndexFactory::CreateScalarIndex(const CreateIndexInfo& create_index_info) {
|
||||
auto data_type = create_index_info.field_type;
|
||||
auto index_type = create_index_info.index_type;
|
||||
|
||||
switch (data_type) {
|
||||
// create scalar index
|
||||
case DataType::BOOL:
|
||||
return CreateScalarIndex<bool>(index_type);
|
||||
case DataType::INT8:
|
||||
return CreateScalarIndex<int8_t>(index_type);
|
||||
case DataType::INT16:
|
||||
return CreateScalarIndex<int16_t>(index_type);
|
||||
case DataType::INT32:
|
||||
return CreateScalarIndex<int32_t>(index_type);
|
||||
case DataType::INT64:
|
||||
return CreateScalarIndex<int64_t>(index_type);
|
||||
case DataType::FLOAT:
|
||||
return CreateScalarIndex<float>(index_type);
|
||||
case DataType::DOUBLE:
|
||||
return CreateScalarIndex<double>(index_type);
|
||||
|
||||
// create string index
|
||||
case DataType::STRING:
|
||||
case DataType::VARCHAR:
|
||||
return CreateScalarIndex<std::string>(index_type);
|
||||
default:
|
||||
throw std::invalid_argument(std::string("invalid data type: ") + std::to_string(dtype));
|
||||
throw std::invalid_argument(std::string("invalid data type to build index: ") +
|
||||
std::to_string(int(data_type)));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace milvus::scalar
|
||||
IndexBasePtr
|
||||
IndexFactory::CreateVectorIndex(const CreateIndexInfo& create_index_info, storage::FileManagerImplPtr file_manager) {
|
||||
auto data_type = create_index_info.field_type;
|
||||
auto index_type = create_index_info.index_type;
|
||||
auto metric_type = create_index_info.metric_type;
|
||||
auto index_mode = create_index_info.index_mode;
|
||||
|
||||
#ifdef BUILD_DISK_ANN
|
||||
// create disk index
|
||||
if (is_in_disk_list(index_type)) {
|
||||
switch (data_type) {
|
||||
case DataType::VECTOR_FLOAT: {
|
||||
return std::make_unique<VectorDiskAnnIndex<float>>(index_type, metric_type, index_mode, file_manager);
|
||||
}
|
||||
default:
|
||||
throw std::invalid_argument(std::string("invalid data type to build disk index: ") +
|
||||
std::to_string(int(data_type)));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// create mem index
|
||||
return std::make_unique<VectorMemIndex>(index_type, metric_type, index_mode);
|
||||
}
|
||||
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -1,24 +1,40 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <mutex>
|
||||
#include <shared_mutex>
|
||||
|
||||
#include "common/type_c.h"
|
||||
#include "config/ConfigChunkManager.h"
|
||||
#include "index/Index.h"
|
||||
#include "index/ScalarIndex.h"
|
||||
#include "index/StringIndex.h"
|
||||
#include "index/VectorIndex.h"
|
||||
#include "index/IndexInfo.h"
|
||||
#include "storage/Types.h"
|
||||
#include "storage/FileManager.h"
|
||||
|
||||
namespace milvus::scalar {
|
||||
#ifdef BUILD_DISK_ANN
|
||||
#include "storage/LocalChunkManager.h"
|
||||
#include "storage/MinioChunkManager.h"
|
||||
#endif
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
class IndexFactory {
|
||||
public:
|
||||
|
@ -32,17 +48,27 @@ class IndexFactory {
|
|||
GetInstance() {
|
||||
// thread-safe enough after c++ 11
|
||||
static IndexFactory instance;
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
IndexBasePtr
|
||||
CreateIndex(CDataType dtype, const std::string& index_type);
|
||||
CreateIndex(const CreateIndexInfo& create_index_info, storage::FileManagerImplPtr file_manager);
|
||||
|
||||
IndexBasePtr
|
||||
CreateVectorIndex(const CreateIndexInfo& create_index_info, storage::FileManagerImplPtr file_manager);
|
||||
|
||||
IndexBasePtr
|
||||
CreateScalarIndex(const CreateIndexInfo& create_index_info);
|
||||
|
||||
// IndexBasePtr
|
||||
// CreateIndex(DataType dtype, const IndexType& index_type, const IndexMode& index_mode = IndexMode::MODE_CPU);
|
||||
private:
|
||||
template <typename T>
|
||||
ScalarIndexPtr<T>
|
||||
CreateIndex(const std::string& index_type);
|
||||
CreateScalarIndex(const IndexType& index_type);
|
||||
};
|
||||
|
||||
} // namespace milvus::scalar
|
||||
} // namespace milvus::index
|
||||
|
||||
#include "index/IndexFactory-inl.h"
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "common/Types.h"
|
||||
#include "common/type_c.h"
|
||||
#include "index/Index.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
struct LoadIndexInfo {
|
||||
int64_t collection_id;
|
||||
int64_t partition_id;
|
||||
int64_t segment_id;
|
||||
int64_t field_id;
|
||||
DataType field_type;
|
||||
int64_t index_id;
|
||||
int64_t index_build_id;
|
||||
int64_t index_version;
|
||||
std::map<std::string, std::string> index_params;
|
||||
std::vector<std::string> index_files;
|
||||
index::IndexBasePtr index;
|
||||
};
|
||||
|
||||
struct CreateIndexInfo {
|
||||
DataType field_type;
|
||||
IndexType index_type;
|
||||
MetricType metric_type;
|
||||
IndexMode index_mode = IndexMode::MODE_CPU;
|
||||
};
|
||||
|
||||
} // namespace milvus::index
|
|
@ -1,15 +1,20 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
namespace milvus::scalar {
|
||||
namespace milvus::index {
|
||||
template <typename T>
|
||||
struct IndexStructure {
|
||||
IndexStructure() : a_(0), idx_(0) {
|
||||
|
@ -41,4 +46,4 @@ struct IndexStructure {
|
|||
T a_;
|
||||
size_t idx_;
|
||||
};
|
||||
} // namespace milvus::scalar
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -1,16 +0,0 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace milvus::scalar {
|
||||
constexpr const char* INDEX_TYPE_MARISA = "marisa";
|
||||
}
|
|
@ -1,17 +1,25 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace milvus::scalar {
|
||||
#include "knowhere/index/IndexType.h"
|
||||
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
|
||||
|
||||
namespace milvus::index {
|
||||
constexpr const char* OPERATOR_TYPE = "operator_type";
|
||||
constexpr const char* RANGE_VALUE = "range_value";
|
||||
constexpr const char* LOWER_BOUND_VALUE = "lower_bound_value";
|
||||
|
@ -19,9 +27,49 @@ constexpr const char* LOWER_BOUND_INCLUSIVE = "lower_bound_inclusive";
|
|||
constexpr const char* UPPER_BOUND_VALUE = "upper_bound_value";
|
||||
constexpr const char* UPPER_BOUND_INCLUSIVE = "upper_bound_inclusive";
|
||||
constexpr const char* PREFIX_VALUE = "prefix_value";
|
||||
constexpr const char* MARISA_TRIE = "marisa_trie";
|
||||
// below configurations will be persistent, do not edit them.
|
||||
constexpr const char* MARISA_TRIE_INDEX = "marisa_trie_index";
|
||||
constexpr const char* MARISA_STR_IDS = "marisa_trie_str_ids";
|
||||
constexpr const char* FLAT_STR_INDEX = "flat_str_index";
|
||||
} // namespace milvus::scalar
|
||||
|
||||
constexpr const char* INDEX_TYPE = "index_type";
|
||||
constexpr const char* INDEX_MODE = "index_mode";
|
||||
constexpr const char* METRIC_TYPE = "metric_type";
|
||||
|
||||
// scalar index type
|
||||
constexpr const char* ASCENDING_SORT = "STL_SORT";
|
||||
constexpr const char* MARISA_TRIE = "Trie";
|
||||
|
||||
// index meta
|
||||
constexpr const char* COLLECTION_ID = "collection_id";
|
||||
constexpr const char* PARTITION_ID = "partition_id";
|
||||
constexpr const char* SEGMENT_ID = "segment_id";
|
||||
constexpr const char* FIELD_ID = "field_id";
|
||||
constexpr const char* INDEX_BUILD_ID = "index_build_id";
|
||||
constexpr const char* INDEX_ID = "index_id";
|
||||
constexpr const char* INDEX_VERSION = "index_version";
|
||||
|
||||
// DiskAnn build params
|
||||
constexpr const char* DISK_ANN_RAW_DATA_PATH = "data_path";
|
||||
constexpr const char* DISK_ANN_MAX_DEGREE = "max_degree";
|
||||
constexpr const char* DISK_ANN_BUILD_LIST = "build_list";
|
||||
constexpr const char* DISK_ANN_SEARCH_DRAM_BUDGET = "search_dram_budget";
|
||||
constexpr const char* DISK_ANN_BUILD_DRAM_BUDGET = "build_dram_budget";
|
||||
constexpr const char* DISK_ANN_BUILD_THREAD_NUM = "num_build_thread";
|
||||
constexpr const char* DISK_ANN_PQ_BYTES = "ps_disk_bytes";
|
||||
|
||||
// DiskAnn prepare params
|
||||
constexpr const char* DISK_ANN_PREPARE_THREAD_NUM = "num_prepare_thread";
|
||||
constexpr const char* NUM_ROW_OF_RAW_DATA = "count";
|
||||
constexpr const char* DISK_ANN_PREPARE_WARM_UP = "warm_up";
|
||||
constexpr const char* DISK_ANN_PREPARE_USE_BFS_CACHE = "use_bfs_cache";
|
||||
|
||||
// DiskAnn query params
|
||||
constexpr const char* DISK_ANN_QUERY_LIST = "search_list";
|
||||
constexpr const char* DISK_ANN_QUERY_BEAMWIDTH = "beamwidth";
|
||||
|
||||
// DiskAnn config name
|
||||
constexpr const char* Disk_ANN_Build_Config = "diskANN_build_config";
|
||||
constexpr const char* Disk_ANN_Prepare_Config = "diskANN_prepare_config";
|
||||
constexpr const char* Disk_ANN_Query_Config = "diskANN_query_config";
|
||||
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -1,21 +1,27 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "index/Meta.h"
|
||||
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
|
||||
|
||||
namespace milvus::scalar {
|
||||
namespace milvus::index {
|
||||
template <typename T>
|
||||
const TargetBitmapPtr
|
||||
ScalarIndex<T>::Query(const DatasetPtr& dataset) {
|
||||
|
@ -55,4 +61,67 @@ ScalarIndex<T>::Query(const DatasetPtr& dataset) {
|
|||
throw std::invalid_argument(std::string("unsupported operator type: " + std::to_string(op)));
|
||||
}
|
||||
}
|
||||
} // namespace milvus::scalar
|
||||
|
||||
template <>
|
||||
inline void
|
||||
ScalarIndex<std::string>::BuildWithRawData(size_t n, const void* values, const Config& config) {
|
||||
// TODO :: use arrow
|
||||
proto::schema::StringArray arr;
|
||||
arr.ParseFromArray(values, n);
|
||||
|
||||
// TODO :: optimize here. avoid memory copy.
|
||||
std::vector<std::string> vecs{arr.data().begin(), arr.data().end()};
|
||||
Build(arr.data_size(), vecs.data());
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void
|
||||
ScalarIndex<bool>::BuildWithRawData(size_t n, const void* values, const Config& config) {
|
||||
proto::schema::BoolArray arr;
|
||||
arr.ParseFromArray(values, n);
|
||||
Build(arr.data_size(), arr.data().data());
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void
|
||||
ScalarIndex<int8_t>::BuildWithRawData(size_t n, const void* values, const Config& config) {
|
||||
auto data = reinterpret_cast<int8_t*>(const_cast<void*>(values));
|
||||
Build(n, data);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void
|
||||
ScalarIndex<int16_t>::BuildWithRawData(size_t n, const void* values, const Config& config) {
|
||||
auto data = reinterpret_cast<int16_t*>(const_cast<void*>(values));
|
||||
Build(n, data);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void
|
||||
ScalarIndex<int32_t>::BuildWithRawData(size_t n, const void* values, const Config& config) {
|
||||
auto data = reinterpret_cast<int32_t*>(const_cast<void*>(values));
|
||||
Build(n, data);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void
|
||||
ScalarIndex<int64_t>::BuildWithRawData(size_t n, const void* values, const Config& config) {
|
||||
auto data = reinterpret_cast<int64_t*>(const_cast<void*>(values));
|
||||
Build(n, data);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void
|
||||
ScalarIndex<float>::BuildWithRawData(size_t n, const void* values, const Config& config) {
|
||||
auto data = reinterpret_cast<float*>(const_cast<void*>(values));
|
||||
Build(n, data);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void
|
||||
ScalarIndex<double>::BuildWithRawData(size_t n, const void* values, const Config& config) {
|
||||
auto data = reinterpret_cast<double*>(const_cast<void*>(values));
|
||||
Build(n, data);
|
||||
}
|
||||
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -1,13 +1,18 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
|
@ -17,11 +22,21 @@
|
|||
#include <boost/dynamic_bitset.hpp>
|
||||
#include "index/Index.h"
|
||||
#include "common/Types.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
|
||||
namespace milvus::scalar {
|
||||
namespace milvus::index {
|
||||
|
||||
template <typename T>
|
||||
class ScalarIndex : public IndexBase {
|
||||
public:
|
||||
void
|
||||
BuildWithRawData(size_t n, const void* values, const Config& config = {}) override;
|
||||
|
||||
void
|
||||
BuildWithDataset(const DatasetPtr& dataset, const Config& config = {}) override {
|
||||
PanicInfo("scalar index don't support build index with dataset");
|
||||
};
|
||||
|
||||
public:
|
||||
virtual void
|
||||
Build(size_t n, const T* values) = 0;
|
||||
|
@ -41,13 +56,16 @@ class ScalarIndex : public IndexBase {
|
|||
virtual T
|
||||
Reverse_Lookup(size_t offset) const = 0;
|
||||
|
||||
const TargetBitmapPtr
|
||||
Query(const DatasetPtr& dataset) override;
|
||||
virtual const TargetBitmapPtr
|
||||
Query(const DatasetPtr& dataset);
|
||||
|
||||
virtual int64_t
|
||||
Size() = 0;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using ScalarIndexPtr = std::unique_ptr<ScalarIndex<T>>;
|
||||
|
||||
} // namespace milvus::scalar
|
||||
} // namespace milvus::index
|
||||
|
||||
#include "index/ScalarIndex-inl.h"
|
||||
|
|
|
@ -1,13 +1,18 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
@ -19,7 +24,7 @@
|
|||
#include "Meta.h"
|
||||
#include "common/Utils.h"
|
||||
|
||||
namespace milvus::scalar {
|
||||
namespace milvus::index {
|
||||
|
||||
template <typename T>
|
||||
inline ScalarIndexSort<T>::ScalarIndexSort() : is_built_(false), data_() {
|
||||
|
@ -27,38 +32,24 @@ inline ScalarIndexSort<T>::ScalarIndexSort() : is_built_(false), data_() {
|
|||
|
||||
template <typename T>
|
||||
inline ScalarIndexSort<T>::ScalarIndexSort(const size_t n, const T* values) : is_built_(false) {
|
||||
ScalarIndexSort<T>::Build(n, values);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void
|
||||
ScalarIndexSort<T>::BuildWithDataset(const DatasetPtr& dataset) {
|
||||
auto size = knowhere::GetDatasetRows(dataset);
|
||||
auto data = knowhere::GetDatasetTensor(dataset);
|
||||
Build(size, reinterpret_cast<const T*>(data));
|
||||
ScalarIndexSort<T>::BuildWithDataset(n, values);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void
|
||||
ScalarIndexSort<T>::Build(const size_t n, const T* values) {
|
||||
if (is_built_)
|
||||
return;
|
||||
if (n == 0) {
|
||||
// todo: throw an exception
|
||||
throw std::invalid_argument("ScalarIndexSort cannot build null values!");
|
||||
}
|
||||
data_.reserve(n);
|
||||
idx_to_offsets_.resize(n);
|
||||
T* p = const_cast<T*>(values);
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
data_.emplace_back(IndexStructure(*p++, i));
|
||||
}
|
||||
build();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void
|
||||
ScalarIndexSort<T>::build() {
|
||||
if (is_built_)
|
||||
return;
|
||||
if (data_.size() == 0) {
|
||||
// todo: throw an exception
|
||||
throw std::invalid_argument("ScalarIndexSort cannot build null values!");
|
||||
}
|
||||
std::sort(data_.begin(), data_.end());
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
idx_to_offsets_[data_[i].idx_] = i;
|
||||
|
@ -87,7 +78,7 @@ ScalarIndexSort<T>::Serialize(const Config& config) {
|
|||
|
||||
template <typename T>
|
||||
inline void
|
||||
ScalarIndexSort<T>::Load(const BinarySet& index_binary) {
|
||||
ScalarIndexSort<T>::Load(const BinarySet& index_binary, const Config& config) {
|
||||
size_t index_size;
|
||||
auto index_length = index_binary.GetByName("index_length");
|
||||
memcpy(&index_size, index_length->data.get(), (size_t)index_length->size);
|
||||
|
@ -206,5 +197,4 @@ ScalarIndexSort<T>::Reverse_Lookup(size_t idx) const {
|
|||
auto offset = idx_to_offsets_[idx];
|
||||
return data_[offset].a_;
|
||||
}
|
||||
|
||||
} // namespace milvus::scalar
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -1,13 +1,18 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
|
@ -20,7 +25,7 @@
|
|||
#include "index/IndexStructure.h"
|
||||
#include "index/ScalarIndex.h"
|
||||
|
||||
namespace milvus::scalar {
|
||||
namespace milvus::index {
|
||||
|
||||
template <typename T>
|
||||
class ScalarIndexSort : public ScalarIndex<T> {
|
||||
|
@ -32,12 +37,9 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
Serialize(const Config& config) override;
|
||||
|
||||
void
|
||||
Load(const BinarySet& index_binary) override;
|
||||
Load(const BinarySet& index_binary, const Config& config = {}) override;
|
||||
|
||||
void
|
||||
BuildWithDataset(const DatasetPtr& dataset) override;
|
||||
|
||||
size_t
|
||||
int64_t
|
||||
Count() override {
|
||||
return data_.size();
|
||||
}
|
||||
|
@ -45,9 +47,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
void
|
||||
Build(size_t n, const T* values) override;
|
||||
|
||||
void
|
||||
build();
|
||||
|
||||
const TargetBitmapPtr
|
||||
In(size_t n, const T* values) override;
|
||||
|
||||
|
@ -63,17 +62,17 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
T
|
||||
Reverse_Lookup(size_t offset) const override;
|
||||
|
||||
int64_t
|
||||
Size() override {
|
||||
return (int64_t)data_.size();
|
||||
}
|
||||
|
||||
public:
|
||||
const std::vector<IndexStructure<T>>&
|
||||
GetData() {
|
||||
return data_;
|
||||
}
|
||||
|
||||
int64_t
|
||||
Size() override {
|
||||
return (int64_t)data_.size();
|
||||
}
|
||||
|
||||
bool
|
||||
IsBuilt() const {
|
||||
return is_built_;
|
||||
|
@ -81,6 +80,7 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
|
||||
private:
|
||||
bool is_built_;
|
||||
Config config_;
|
||||
std::vector<size_t> idx_to_offsets_; // used to retrieve.
|
||||
std::vector<IndexStructure<T>> data_;
|
||||
};
|
||||
|
@ -88,14 +88,14 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
template <typename T>
|
||||
using ScalarIndexSortPtr = std::unique_ptr<ScalarIndexSort<T>>;
|
||||
|
||||
} // namespace milvus::scalar
|
||||
} // namespace milvus::index
|
||||
|
||||
#include "index/ScalarIndexSort-inl.h"
|
||||
|
||||
namespace milvus::scalar {
|
||||
namespace milvus::index {
|
||||
template <typename T>
|
||||
inline ScalarIndexSortPtr<T>
|
||||
CreateScalarIndexSort() {
|
||||
return std::make_unique<ScalarIndexSort<T>>();
|
||||
}
|
||||
} // namespace milvus::scalar
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -1,13 +1,18 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
|
@ -18,30 +23,10 @@
|
|||
#include "index/Meta.h"
|
||||
#include <pb/schema.pb.h>
|
||||
|
||||
namespace milvus::scalar {
|
||||
namespace milvus::index {
|
||||
|
||||
class StringIndex : public ScalarIndex<std::string> {
|
||||
public:
|
||||
void
|
||||
BuildWithDataset(const DatasetPtr& dataset) override {
|
||||
auto size = knowhere::GetDatasetRows(dataset);
|
||||
auto data = knowhere::GetDatasetTensor(dataset);
|
||||
proto::schema::StringArray arr;
|
||||
arr.ParseFromArray(data, size);
|
||||
|
||||
{
|
||||
// TODO: optimize here. avoid memory copy.
|
||||
std::vector<std::string> vecs{arr.data().begin(), arr.data().end()};
|
||||
Build(arr.data().size(), vecs.data());
|
||||
}
|
||||
|
||||
{
|
||||
// TODO: test this way.
|
||||
// auto strs = (const std::string*)arr.data().data();
|
||||
// Build(arr.data().size(), strs);
|
||||
}
|
||||
}
|
||||
|
||||
const TargetBitmapPtr
|
||||
Query(const DatasetPtr& dataset) override {
|
||||
auto op = dataset->Get<OpType>(OPERATOR_TYPE);
|
||||
|
@ -56,4 +41,4 @@ class StringIndex : public ScalarIndex<std::string> {
|
|||
PrefixMatch(std::string prefix) = 0;
|
||||
};
|
||||
using StringIndexPtr = std::unique_ptr<StringIndex>;
|
||||
} // namespace milvus::scalar
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -1,13 +1,18 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <boost/uuid/uuid.hpp>
|
||||
#include <boost/uuid/uuid_io.hpp>
|
||||
|
@ -22,7 +27,7 @@
|
|||
#include "index/Index.h"
|
||||
#include "common/Utils.h"
|
||||
|
||||
namespace milvus::scalar {
|
||||
namespace milvus::index {
|
||||
|
||||
#if defined(__linux__) || defined(__APPLE__)
|
||||
|
||||
|
@ -86,7 +91,7 @@ StringIndexMarisa::Serialize(const Config& config) {
|
|||
}
|
||||
|
||||
void
|
||||
StringIndexMarisa::Load(const BinarySet& set) {
|
||||
StringIndexMarisa::Load(const BinarySet& set, const Config& config) {
|
||||
knowhere::Assemble(const_cast<BinarySet&>(set));
|
||||
|
||||
auto uuid = boost::uuids::random_generator()();
|
||||
|
@ -289,4 +294,4 @@ StringIndexMarisa::Reverse_Lookup(size_t offset) const {
|
|||
|
||||
#endif
|
||||
|
||||
} // namespace milvus::scalar
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -1,13 +1,18 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
|
@ -20,7 +25,7 @@
|
|||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
namespace milvus::scalar {
|
||||
namespace milvus::index {
|
||||
|
||||
class StringIndexMarisa : public StringIndex {
|
||||
public:
|
||||
|
@ -33,9 +38,9 @@ class StringIndexMarisa : public StringIndex {
|
|||
Serialize(const Config& config) override;
|
||||
|
||||
void
|
||||
Load(const BinarySet& set) override;
|
||||
Load(const BinarySet& set, const Config& config = {}) override;
|
||||
|
||||
size_t
|
||||
int64_t
|
||||
Count() override {
|
||||
return str_ids_.size();
|
||||
}
|
||||
|
@ -76,6 +81,7 @@ class StringIndexMarisa : public StringIndex {
|
|||
prefix_match(const std::string& prefix);
|
||||
|
||||
private:
|
||||
Config config_;
|
||||
marisa::Trie trie_;
|
||||
std::vector<size_t> str_ids_; // used to retrieve.
|
||||
std::map<size_t, std::vector<size_t>> str_ids_to_offsets_;
|
||||
|
@ -89,6 +95,6 @@ CreateStringIndexMarisa() {
|
|||
return std::make_unique<StringIndexMarisa>();
|
||||
}
|
||||
|
||||
} // namespace milvus::scalar
|
||||
} // namespace milvus::index
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,13 +1,18 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
@ -18,30 +23,10 @@
|
|||
#include "index/StringIndex.h"
|
||||
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
|
||||
|
||||
namespace milvus::scalar {
|
||||
namespace milvus::index {
|
||||
// TODO: should inherit from StringIndex?
|
||||
class StringIndexSort : public ScalarIndexSort<std::string> {
|
||||
public:
|
||||
void
|
||||
BuildWithDataset(const DatasetPtr& dataset) override {
|
||||
auto size = knowhere::GetDatasetRows(dataset);
|
||||
auto data = knowhere::GetDatasetTensor(dataset);
|
||||
proto::schema::StringArray arr;
|
||||
arr.ParseFromArray(data, size);
|
||||
|
||||
{
|
||||
// TODO: optimize here. avoid memory copy.
|
||||
std::vector<std::string> vecs{arr.data().begin(), arr.data().end()};
|
||||
Build(arr.data().size(), vecs.data());
|
||||
}
|
||||
|
||||
{
|
||||
// TODO: test this way.
|
||||
// auto strs = (const std::string*)arr.data().data();
|
||||
// Build(arr.data().size(), strs);
|
||||
}
|
||||
}
|
||||
|
||||
const TargetBitmapPtr
|
||||
Query(const DatasetPtr& dataset) override {
|
||||
auto op = dataset->Get<OpType>(OPERATOR_TYPE);
|
||||
|
@ -70,4 +55,4 @@ inline StringIndexSortPtr
|
|||
CreateStringIndexSort() {
|
||||
return std::make_unique<StringIndexSort>();
|
||||
}
|
||||
} // namespace milvus::scalar
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -0,0 +1,205 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <algorithm>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
||||
#include "index/Utils.h"
|
||||
#include "index/Meta.h"
|
||||
#include "pb/index_cgo_msg.pb.h"
|
||||
#include <google/protobuf/text_format.h>
|
||||
#include "exceptions/EasyAssert.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
size_t
|
||||
get_file_size(int fd) {
|
||||
struct stat s;
|
||||
fstat(fd, &s);
|
||||
return s.st_size;
|
||||
}
|
||||
|
||||
std::vector<IndexType>
|
||||
NM_List() {
|
||||
static std::vector<IndexType> ret{
|
||||
knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<IndexType>
|
||||
BIN_List() {
|
||||
static std::vector<IndexType> ret{
|
||||
knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
|
||||
knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<IndexType>
|
||||
DISK_LIST() {
|
||||
static std::vector<IndexType> ret{
|
||||
knowhere::IndexEnum::INDEX_DISKANN,
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<std::tuple<IndexType, MetricType>>
|
||||
unsupported_index_combinations() {
|
||||
static std::vector<std::tuple<IndexType, MetricType>> ret{
|
||||
std::make_tuple(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::metric::L2),
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_bin_list(const IndexType& index_type) {
|
||||
return is_in_list<IndexType>(index_type, BIN_List);
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_nm_list(const IndexType& index_type) {
|
||||
return is_in_list<IndexType>(index_type, NM_List);
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_disk_list(const IndexType& index_type) {
|
||||
return is_in_list<IndexType>(index_type, DISK_LIST);
|
||||
}
|
||||
|
||||
bool
|
||||
is_unsupported(const IndexType& index_type, const MetricType& metric_type) {
|
||||
return is_in_list<std::tuple<IndexType, MetricType>>(std::make_tuple(index_type, metric_type),
|
||||
unsupported_index_combinations);
|
||||
}
|
||||
|
||||
bool
|
||||
CheckKeyInConfig(const Config& cfg, const std::string& key) {
|
||||
return cfg.contains(key);
|
||||
}
|
||||
|
||||
void
|
||||
ParseFromString(google::protobuf::Message& params, const std::string& str) {
|
||||
auto ok = google::protobuf::TextFormat::ParseFromString(str, ¶ms);
|
||||
AssertInfo(ok, "failed to parse params from string");
|
||||
}
|
||||
|
||||
int64_t
|
||||
GetDimFromConfig(const Config& config) {
|
||||
auto dimension = GetValueFromConfig<std::string>(config, "dim");
|
||||
AssertInfo(dimension.has_value(), "dimension not exist in config");
|
||||
return (std::stoi(dimension.value()));
|
||||
}
|
||||
|
||||
std::string
|
||||
GetMetricTypeFromConfig(const Config& config) {
|
||||
auto metric_type = GetValueFromConfig<std::string>(config, "metric_type");
|
||||
AssertInfo(metric_type.has_value(), "metric_type not exist in config");
|
||||
return metric_type.value();
|
||||
}
|
||||
|
||||
std::string
|
||||
GetIndexTypeFromConfig(const Config& config) {
|
||||
auto index_type = GetValueFromConfig<std::string>(config, "index_type");
|
||||
AssertInfo(index_type.has_value(), "index_type not exist in config");
|
||||
return index_type.value();
|
||||
}
|
||||
|
||||
IndexMode
|
||||
GetIndexModeFromConfig(const Config& config) {
|
||||
auto mode = GetValueFromConfig<std::string>(config, INDEX_MODE);
|
||||
return mode.has_value() ? GetIndexMode(mode.value()) : knowhere::IndexMode::MODE_CPU;
|
||||
}
|
||||
|
||||
IndexMode
|
||||
GetIndexMode(const std::string index_mode) {
|
||||
if (index_mode.compare("CPU") != 0) {
|
||||
return IndexMode::MODE_CPU;
|
||||
}
|
||||
|
||||
if (index_mode.compare("GPU") != 0) {
|
||||
return IndexMode::MODE_GPU;
|
||||
}
|
||||
|
||||
PanicInfo("unsupported index mode");
|
||||
}
|
||||
|
||||
// TODO :: too ugly
|
||||
storage::FieldDataMeta
|
||||
GetFieldDataMetaFromConfig(const Config& config) {
|
||||
storage::FieldDataMeta field_data_meta;
|
||||
// set collection id
|
||||
auto collection_id = index::GetValueFromConfig<std::string>(config, index::COLLECTION_ID);
|
||||
AssertInfo(collection_id.has_value(), "collection id not exist in index config");
|
||||
field_data_meta.collection_id = std::stol(collection_id.value());
|
||||
|
||||
// set partition id
|
||||
auto partition_id = index::GetValueFromConfig<std::string>(config, index::PARTITION_ID);
|
||||
AssertInfo(partition_id.has_value(), "partition id not exist in index config");
|
||||
field_data_meta.partition_id = std::stol(partition_id.value());
|
||||
|
||||
// set segment id
|
||||
auto segment_id = index::GetValueFromConfig<std::string>(config, index::SEGMENT_ID);
|
||||
AssertInfo(segment_id.has_value(), "segment id not exist in index config");
|
||||
field_data_meta.segment_id = std::stol(segment_id.value());
|
||||
|
||||
// set field id
|
||||
auto field_id = index::GetValueFromConfig<std::string>(config, index::FIELD_ID);
|
||||
AssertInfo(field_id.has_value(), "field id not exist in index config");
|
||||
field_data_meta.field_id = std::stol(field_id.value());
|
||||
|
||||
return field_data_meta;
|
||||
}
|
||||
|
||||
storage::IndexMeta
|
||||
GetIndexMetaFromConfig(const Config& config) {
|
||||
storage::IndexMeta index_meta;
|
||||
// set segment id
|
||||
auto segment_id = index::GetValueFromConfig<std::string>(config, index::SEGMENT_ID);
|
||||
AssertInfo(segment_id.has_value(), "segment id not exist in index config");
|
||||
index_meta.segment_id = std::stol(segment_id.value());
|
||||
|
||||
// set field id
|
||||
auto field_id = index::GetValueFromConfig<std::string>(config, index::FIELD_ID);
|
||||
AssertInfo(field_id.has_value(), "field id not exist in index config");
|
||||
index_meta.field_id = std::stol(field_id.value());
|
||||
|
||||
// set index version
|
||||
auto index_version = index::GetValueFromConfig<std::string>(config, index::INDEX_VERSION);
|
||||
AssertInfo(index_version.has_value(), "index_version id not exist in index config");
|
||||
index_meta.index_version = std::stol(index_version.value());
|
||||
|
||||
// set index id
|
||||
auto build_id = index::GetValueFromConfig<std::string>(config, index::INDEX_BUILD_ID);
|
||||
AssertInfo(build_id.has_value(), "build id not exist in index config");
|
||||
index_meta.build_id = std::stol(build_id.value());
|
||||
|
||||
return index_meta;
|
||||
}
|
||||
|
||||
Config
|
||||
ParseConfigFromIndexParams(const std::map<std::string, std::string>& index_params) {
|
||||
Config config;
|
||||
for (auto& p : index_params) {
|
||||
config[p.first] = p.second;
|
||||
}
|
||||
|
||||
return config;
|
||||
}
|
||||
|
||||
} // namespace milvus::index
|
|
@ -1,13 +1,20 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <stdio.h>
|
||||
|
@ -15,14 +22,105 @@
|
|||
#include <iostream>
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <tuple>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
namespace milvus::scalar {
|
||||
#include "common/Types.h"
|
||||
#include "index/IndexInfo.h"
|
||||
#include "storage/Types.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
size_t
|
||||
get_file_size(int fd) {
|
||||
struct stat s;
|
||||
fstat(fd, &s);
|
||||
return s.st_size;
|
||||
get_file_size(int fd);
|
||||
|
||||
std::vector<IndexType>
|
||||
NM_List();
|
||||
|
||||
std::vector<IndexType>
|
||||
BIN_List();
|
||||
|
||||
std::vector<std::tuple<IndexType, MetricType>>
|
||||
unsupported_index_combinations();
|
||||
|
||||
template <typename T>
|
||||
inline bool
|
||||
is_in_list(const T& t, std::function<std::vector<T>()> list_func) {
|
||||
auto l = list_func();
|
||||
return std::find(l.begin(), l.end(), t) != l.end();
|
||||
}
|
||||
|
||||
} // namespace milvus::scalar
|
||||
bool
|
||||
is_in_bin_list(const IndexType& index_type);
|
||||
|
||||
bool
|
||||
is_in_nm_list(const IndexType& index_type);
|
||||
|
||||
bool
|
||||
is_in_disk_list(const IndexType& index_type);
|
||||
|
||||
bool
|
||||
is_unsupported(const IndexType& index_type, const MetricType& metric_type);
|
||||
|
||||
bool
|
||||
CheckKeyInConfig(const Config& cfg, const std::string& key);
|
||||
|
||||
void
|
||||
ParseFromString(google::protobuf::Message& params, const std::string& str);
|
||||
|
||||
template <typename T>
|
||||
void inline CheckParameter(Config& conf,
|
||||
const std::string& key,
|
||||
std::function<T(std::string)> fn,
|
||||
std::optional<T> default_v) {
|
||||
if (!conf.contains(key)) {
|
||||
if (default_v.has_value()) {
|
||||
conf[key] = default_v.value();
|
||||
}
|
||||
} else {
|
||||
auto value = conf[key];
|
||||
conf[key] = fn(value);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline std::optional<T>
|
||||
GetValueFromConfig(const Config& cfg, const std::string& key) {
|
||||
if (cfg.contains(key)) {
|
||||
return cfg.at(key).get<T>();
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void
|
||||
SetValueToConfig(Config& cfg, const std::string& key, const T value) {
|
||||
cfg[key] = value;
|
||||
}
|
||||
|
||||
int64_t
|
||||
GetDimFromConfig(const Config& config);
|
||||
|
||||
std::string
|
||||
GetMetricTypeFromConfig(const Config& config);
|
||||
|
||||
std::string
|
||||
GetIndexTypeFromConfig(const Config& config);
|
||||
|
||||
IndexMode
|
||||
GetIndexModeFromConfig(const Config& config);
|
||||
|
||||
IndexMode
|
||||
GetIndexMode(const std::string index_mode);
|
||||
|
||||
storage::FieldDataMeta
|
||||
GetFieldDataMetaFromConfig(const Config& config);
|
||||
|
||||
storage::IndexMeta
|
||||
GetIndexMetaFromConfig(const Config& config);
|
||||
|
||||
Config
|
||||
ParseConfigFromIndexParams(const std::map<std::string, std::string>& index_params);
|
||||
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -0,0 +1,263 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "index/VectorDiskIndex.h"
|
||||
#include "index/Meta.h"
|
||||
#include "index/Utils.h"
|
||||
|
||||
#include "storage/LocalChunkManager.h"
|
||||
#include "config/ConfigKnowhere.h"
|
||||
#include "storage/Util.h"
|
||||
#include "common/Utils.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
#ifdef BUILD_DISK_ANN
|
||||
|
||||
template <typename T>
|
||||
VectorDiskAnnIndex<T>::VectorDiskAnnIndex(const IndexType& index_type,
|
||||
const MetricType& metric_type,
|
||||
const IndexMode& index_mode,
|
||||
storage::FileManagerImplPtr file_manager)
|
||||
: VectorIndex(index_type, index_mode, metric_type) {
|
||||
file_manager_ = std::dynamic_pointer_cast<storage::DiskFileManagerImpl>(file_manager);
|
||||
auto& local_chunk_manager = storage::LocalChunkManager::GetInstance();
|
||||
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
|
||||
AssertInfo(!local_chunk_manager.Exist(local_index_path_prefix),
|
||||
"local index path " + local_index_path_prefix + " has been exist");
|
||||
local_chunk_manager.CreateDir(local_index_path_prefix);
|
||||
index_ = std::make_unique<knowhere::IndexDiskANN<T>>(local_index_path_prefix, metric_type, file_manager);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorDiskAnnIndex<T>::Load(const BinarySet& binary_set /* not used */, const Config& config) {
|
||||
auto prepare_config = parse_prepare_config(config);
|
||||
knowhere::Config cfg;
|
||||
knowhere::DiskANNPrepareConfig::Set(cfg, prepare_config);
|
||||
|
||||
auto index_files = GetValueFromConfig<std::vector<std::string>>(config, "index_files");
|
||||
AssertInfo(index_files.has_value(), "index file paths is empty when load disk ann index data");
|
||||
file_manager_->CacheIndexToDisk(index_files.value());
|
||||
index_->Prepare(cfg);
|
||||
SetDim(index_->Dim());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorDiskAnnIndex<T>::BuildWithDataset(const DatasetPtr& dataset, const Config& config) {
|
||||
auto& local_chunk_manager = storage::LocalChunkManager::GetInstance();
|
||||
auto build_config = parse_build_config(config);
|
||||
auto segment_id = file_manager_->GetFileDataMeta().segment_id;
|
||||
auto field_id = file_manager_->GetFileDataMeta().field_id;
|
||||
auto local_data_path = storage::GenFieldRawDataPathPrefix(segment_id, field_id) + "raw_data";
|
||||
build_config.data_path = local_data_path;
|
||||
if (!local_chunk_manager.Exist(local_data_path)) {
|
||||
local_chunk_manager.CreateFile(local_data_path);
|
||||
}
|
||||
|
||||
int64_t offset = 0;
|
||||
auto num = uint32_t(milvus::GetDatasetRows(dataset));
|
||||
local_chunk_manager.Write(local_data_path, offset, &num, sizeof(num));
|
||||
offset += sizeof(num);
|
||||
|
||||
auto dim = uint32_t(milvus::GetDatasetDim(dataset));
|
||||
local_chunk_manager.Write(local_data_path, offset, &dim, sizeof(dim));
|
||||
offset += sizeof(dim);
|
||||
|
||||
auto data_size = num * dim * sizeof(float);
|
||||
auto raw_data = const_cast<void*>(milvus::GetDatasetTensor(dataset));
|
||||
local_chunk_manager.Write(local_data_path, offset, raw_data, data_size);
|
||||
|
||||
knowhere::Config cfg;
|
||||
knowhere::DiskANNBuildConfig::Set(cfg, build_config);
|
||||
|
||||
index_->BuildAll(nullptr, cfg);
|
||||
|
||||
local_chunk_manager.RemoveDir(storage::GetSegmentRawDataPathPrefix(segment_id));
|
||||
// TODO ::
|
||||
// SetDim(index_->Dim());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::unique_ptr<SearchResult>
|
||||
VectorDiskAnnIndex<T>::Query(const DatasetPtr dataset, const SearchInfo& search_info, const BitsetView& bitset) {
|
||||
AssertInfo(GetMetricType() == search_info.metric_type_,
|
||||
"Metric type of field index isn't the same with search info");
|
||||
auto num_queries = milvus::GetDatasetRows(dataset);
|
||||
auto topk = search_info.topk_;
|
||||
|
||||
knowhere::DiskANNQueryConfig query_config;
|
||||
query_config.k = topk;
|
||||
|
||||
// set search list
|
||||
auto search_list_size = GetValueFromConfig<uint32_t>(search_info.search_params_, DISK_ANN_QUERY_LIST);
|
||||
AssertInfo(search_list_size.has_value(), "param " + std::string(DISK_ANN_QUERY_LIST) + "is empty");
|
||||
query_config.search_list_size = search_list_size.value();
|
||||
|
||||
AssertInfo(query_config.search_list_size > topk, "search_list should be greater than topk");
|
||||
AssertInfo(query_config.search_list_size < std::min(uint32_t(topk * 10), uint32_t(65535)),
|
||||
"search_list should less than min(topk*10, 65535)");
|
||||
|
||||
// set beamwidth
|
||||
query_config.beamwidth = 16;
|
||||
auto beam_width = GetValueFromConfig<uint32_t>(search_info.search_params_, DISK_ANN_QUERY_BEAMWIDTH);
|
||||
if (beam_width.has_value()) {
|
||||
query_config.beamwidth = beam_width.value();
|
||||
}
|
||||
|
||||
knowhere::Config cfg;
|
||||
knowhere::DiskANNQueryConfig::Set(cfg, query_config);
|
||||
|
||||
auto final_result = index_->Query(dataset, cfg, bitset);
|
||||
auto ids = milvus::GetDatasetIDs(final_result);
|
||||
float* distances = (float*)milvus::GetDatasetDistance(final_result);
|
||||
|
||||
auto round_decimal = search_info.round_decimal_;
|
||||
auto total_num = num_queries * topk;
|
||||
|
||||
if (round_decimal != -1) {
|
||||
const float multiplier = pow(10.0, round_decimal);
|
||||
for (int i = 0; i < total_num; i++) {
|
||||
distances[i] = round(distances[i] * multiplier) / multiplier;
|
||||
}
|
||||
}
|
||||
auto result = std::make_unique<SearchResult>();
|
||||
result->seg_offsets_.resize(total_num);
|
||||
result->distances_.resize(total_num);
|
||||
result->total_nq_ = num_queries;
|
||||
result->unity_topK_ = topk;
|
||||
|
||||
std::copy_n(ids, total_num, result->seg_offsets_.data());
|
||||
std::copy_n(distances, total_num, result->distances_.data());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorDiskAnnIndex<T>::CleanLocalData() {
|
||||
auto& local_chunk_manager = storage::LocalChunkManager::GetInstance();
|
||||
local_chunk_manager.RemoveDir(file_manager_->GetLocalIndexObjectPrefix());
|
||||
local_chunk_manager.RemoveDir(file_manager_->GetLocalRawDataObjectPrefix());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
knowhere::DiskANNBuildConfig
|
||||
VectorDiskAnnIndex<T>::parse_build_config(const Config& config) {
|
||||
Config build_config = config;
|
||||
parse_config(build_config);
|
||||
|
||||
// set disk ann build config
|
||||
knowhere::DiskANNBuildConfig build_disk_ann_config;
|
||||
|
||||
// set max degree
|
||||
auto max_degree = GetValueFromConfig<uint32_t>(build_config, DISK_ANN_MAX_DEGREE);
|
||||
AssertInfo(max_degree.has_value(), "param " + std::string(DISK_ANN_MAX_DEGREE) + "is empty");
|
||||
build_disk_ann_config.max_degree = max_degree.value();
|
||||
|
||||
// set build list
|
||||
auto search_list_size = GetValueFromConfig<uint32_t>(build_config, DISK_ANN_BUILD_LIST);
|
||||
AssertInfo(search_list_size.has_value(), "param " + std::string(DISK_ANN_BUILD_LIST) + "is empty");
|
||||
build_disk_ann_config.search_list_size = search_list_size.value();
|
||||
|
||||
// set search dram budget
|
||||
auto search_dram_budget_gb = GetValueFromConfig<float>(build_config, DISK_ANN_SEARCH_DRAM_BUDGET);
|
||||
AssertInfo(search_dram_budget_gb.has_value(), "param " + std::string(DISK_ANN_SEARCH_DRAM_BUDGET) + "is empty");
|
||||
build_disk_ann_config.pq_code_budget_gb = search_dram_budget_gb.value();
|
||||
|
||||
// set build dram budget
|
||||
auto build_dram_budget_gb = GetValueFromConfig<float>(build_config, DISK_ANN_BUILD_DRAM_BUDGET);
|
||||
AssertInfo(build_dram_budget_gb.has_value(), "param " + std::string(DISK_ANN_BUILD_DRAM_BUDGET) + "is empty");
|
||||
build_disk_ann_config.build_dram_budget_gb = build_dram_budget_gb.value();
|
||||
|
||||
// set num build thread
|
||||
auto num_threads = GetValueFromConfig<uint32_t>(build_config, DISK_ANN_BUILD_THREAD_NUM);
|
||||
AssertInfo(num_threads.has_value(), "param " + std::string(DISK_ANN_BUILD_THREAD_NUM) + "is empty");
|
||||
build_disk_ann_config.num_threads = num_threads.value();
|
||||
|
||||
// set pq bytes
|
||||
auto pq_disk_bytes = GetValueFromConfig<uint32_t>(build_config, DISK_ANN_PQ_BYTES);
|
||||
AssertInfo(pq_disk_bytes.has_value(), "param " + std::string(DISK_ANN_PQ_BYTES) + "is empty");
|
||||
build_disk_ann_config.disk_pq_dims = pq_disk_bytes.value();
|
||||
|
||||
return build_disk_ann_config;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
knowhere::DiskANNPrepareConfig
|
||||
VectorDiskAnnIndex<T>::parse_prepare_config(const Config& config) {
|
||||
Config prepare_config = config;
|
||||
auto dim = GetDimFromConfig(prepare_config);
|
||||
parse_config(prepare_config);
|
||||
|
||||
knowhere::DiskANNPrepareConfig prepare_disk_ann_config;
|
||||
prepare_disk_ann_config.warm_up = false;
|
||||
prepare_disk_ann_config.use_bfs_cache = false;
|
||||
|
||||
// set prepare thread num
|
||||
auto num_threads = GetValueFromConfig<uint32_t>(prepare_config, DISK_ANN_PREPARE_THREAD_NUM);
|
||||
AssertInfo(num_threads.has_value(), "param " + std::string(DISK_ANN_PREPARE_THREAD_NUM) + "is empty");
|
||||
prepare_disk_ann_config.num_threads = num_threads.value();
|
||||
|
||||
// get max degree
|
||||
auto max_degree = GetValueFromConfig<uint32_t>(prepare_config, DISK_ANN_MAX_DEGREE);
|
||||
AssertInfo(max_degree.has_value(), "param " + std::string(DISK_ANN_MAX_DEGREE) + "is empty");
|
||||
|
||||
// set prepare cached node
|
||||
auto num_rows = GetValueFromConfig<int>(prepare_config, NUM_ROW_OF_RAW_DATA);
|
||||
AssertInfo(num_rows.has_value(), "param " + std::string(NUM_ROW_OF_RAW_DATA) + "is empty");
|
||||
|
||||
prepare_disk_ann_config.search_cache_budget_gb =
|
||||
(dim + max_degree.value() + 1) * sizeof(float) * 1.2 * num_rows.value() * 0.1 / 1024 / 1024 / 1024;
|
||||
|
||||
return prepare_disk_ann_config;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorDiskAnnIndex<T>::parse_config(Config& config) {
|
||||
auto stoi_closure = [](const std::string& s) -> uint32_t { return std::stoi(s); };
|
||||
auto stof_closure = [](const std::string& s) -> float { return std::stof(s); };
|
||||
|
||||
/***************************** meta *******************************/
|
||||
CheckParameter<int>(config, knowhere::meta::SLICE_SIZE, stoi_closure,
|
||||
std::optional{config::KnowhereGetIndexSliceSize()});
|
||||
CheckParameter<int>(config, knowhere::meta::DIM, stoi_closure, std::nullopt);
|
||||
CheckParameter<int>(config, knowhere::meta::TOPK, stoi_closure, std::nullopt);
|
||||
|
||||
/************************** DiskAnn build Params ************************/
|
||||
CheckParameter<int>(config, DISK_ANN_MAX_DEGREE, stoi_closure, std::optional{48});
|
||||
CheckParameter<int>(config, DISK_ANN_BUILD_LIST, stoi_closure, std::optional{128});
|
||||
CheckParameter<float>(config, DISK_ANN_SEARCH_DRAM_BUDGET, stof_closure, std::optional{0.03});
|
||||
CheckParameter<float>(config, DISK_ANN_BUILD_DRAM_BUDGET, stof_closure, std::optional{32});
|
||||
CheckParameter<int>(config, DISK_ANN_BUILD_THREAD_NUM, stoi_closure, std::optional{8});
|
||||
CheckParameter<int>(config, DISK_ANN_PQ_BYTES, stoi_closure, std::optional{0});
|
||||
|
||||
/************************** DiskAnn prepare Params ************************/
|
||||
CheckParameter<int>(config, DISK_ANN_PREPARE_THREAD_NUM, stoi_closure, std::optional{8});
|
||||
CheckParameter<int>(config, NUM_ROW_OF_RAW_DATA, stoi_closure, std::nullopt);
|
||||
|
||||
/************************** DiskAnn query Params ************************/
|
||||
// CheckParameter<int>(config, DISK_ANN_QUERY_LIST, stoi_closure, std::nullopt);
|
||||
// CheckParameter<int>(config, DISK_ANN_QUERY_BEAMWIDTH, stoi_closure, std::optional{16});
|
||||
}
|
||||
|
||||
template class VectorDiskAnnIndex<float>;
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace milvus::index
|
|
@ -0,0 +1,84 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "index/VectorIndex.h"
|
||||
#include "storage/DiskFileManagerImpl.h"
|
||||
#include "knowhere/index/vector_index/IndexDiskANN.h"
|
||||
#include "knowhere/index/vector_index/IndexDiskANNConfig.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
#ifdef BUILD_DISK_ANN
|
||||
|
||||
template <typename T>
|
||||
class VectorDiskAnnIndex : public VectorIndex {
|
||||
public:
|
||||
explicit VectorDiskAnnIndex(const IndexType& index_type,
|
||||
const MetricType& metric_type,
|
||||
const IndexMode& index_mode,
|
||||
storage::FileManagerImplPtr file_manager);
|
||||
BinarySet
|
||||
Serialize(const Config& config) override {
|
||||
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
|
||||
BinarySet binary_set;
|
||||
for (auto& file : remote_paths_to_size) {
|
||||
binary_set.Append(file.first, nullptr, file.second);
|
||||
}
|
||||
|
||||
return binary_set;
|
||||
}
|
||||
|
||||
int64_t
|
||||
Count() override {
|
||||
return index_->Count();
|
||||
}
|
||||
|
||||
void
|
||||
Load(const BinarySet& binary_set /* not used */, const Config& config = {}) override;
|
||||
|
||||
void
|
||||
BuildWithDataset(const DatasetPtr& dataset, const Config& config = {}) override;
|
||||
|
||||
std::unique_ptr<SearchResult>
|
||||
Query(const DatasetPtr dataset, const SearchInfo& search_info, const BitsetView& bitset) override;
|
||||
|
||||
void
|
||||
CleanLocalData() override;
|
||||
|
||||
private:
|
||||
knowhere::DiskANNBuildConfig
|
||||
parse_build_config(const Config& config);
|
||||
|
||||
knowhere::DiskANNPrepareConfig
|
||||
parse_prepare_config(const Config& config);
|
||||
|
||||
void
|
||||
parse_config(Config& config);
|
||||
|
||||
private:
|
||||
std::unique_ptr<knowhere::IndexDiskANN<T>> index_;
|
||||
std::shared_ptr<storage::DiskFileManagerImpl> file_manager_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using VectorDiskAnnIndexPtr = std::unique_ptr<VectorDiskAnnIndex<T>>;
|
||||
#endif
|
||||
|
||||
} // namespace milvus::index
|
|
@ -0,0 +1,85 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
#include "knowhere/index/VecIndex.h"
|
||||
#include "index/Index.h"
|
||||
#include "common/Types.h"
|
||||
#include "common/BitsetView.h"
|
||||
#include "common/QueryResult.h"
|
||||
#include "common/QueryInfo.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
class VectorIndex : public IndexBase {
|
||||
public:
|
||||
explicit VectorIndex(const IndexType& index_type, const IndexMode& index_mode, const MetricType& metric_type)
|
||||
: index_type_(index_type), index_mode_(index_mode), metric_type_(metric_type) {
|
||||
}
|
||||
|
||||
public:
|
||||
void
|
||||
BuildWithRawData(size_t n, const void* values, const Config& config = {}) override {
|
||||
PanicInfo("vector index don't support build index with raw data");
|
||||
};
|
||||
|
||||
virtual std::unique_ptr<SearchResult>
|
||||
Query(const DatasetPtr dataset, const SearchInfo& search_info, const BitsetView& bitset) = 0;
|
||||
|
||||
IndexType
|
||||
GetIndexType() const {
|
||||
return index_type_;
|
||||
}
|
||||
|
||||
MetricType
|
||||
GetMetricType() const {
|
||||
return metric_type_;
|
||||
}
|
||||
|
||||
IndexMode
|
||||
GetIndexMode() const {
|
||||
return index_mode_;
|
||||
}
|
||||
|
||||
int64_t
|
||||
GetDim() const {
|
||||
return dim_;
|
||||
}
|
||||
|
||||
void
|
||||
SetDim(int64_t dim) {
|
||||
dim_ = dim;
|
||||
}
|
||||
|
||||
virtual void
|
||||
CleanLocalData() {
|
||||
}
|
||||
|
||||
private:
|
||||
IndexType index_type_;
|
||||
IndexMode index_mode_;
|
||||
MetricType metric_type_;
|
||||
int64_t dim_;
|
||||
};
|
||||
|
||||
using VectorIndexPtr = std::unique_ptr<VectorIndex>;
|
||||
} // namespace milvus::index
|
|
@ -0,0 +1,218 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "index/VectorMemIndex.h"
|
||||
#include "index/Meta.h"
|
||||
#include "index/Utils.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "config/ConfigKnowhere.h"
|
||||
|
||||
#include "knowhere/index/VecIndexFactory.h"
|
||||
#include "knowhere/common/Timer.h"
|
||||
#include "common/BitsetView.h"
|
||||
#include "knowhere/index/vector_index/ConfAdapterMgr.h"
|
||||
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
|
||||
#include "pb/index_cgo_msg.pb.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
VectorMemIndex::VectorMemIndex(const IndexType& index_type, const MetricType& metric_type, const IndexMode& index_mode)
|
||||
: VectorIndex(index_type, index_mode, metric_type) {
|
||||
AssertInfo(!is_unsupported(index_type, metric_type), index_type + " doesn't support metric: " + metric_type);
|
||||
|
||||
index_ = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(GetIndexType(), index_mode);
|
||||
AssertInfo(index_ != nullptr, "[VecIndexCreator]Index is null after create index");
|
||||
}
|
||||
|
||||
BinarySet
|
||||
VectorMemIndex::Serialize(const Config& config) {
|
||||
knowhere::Config serialize_config = config;
|
||||
parse_config(serialize_config);
|
||||
|
||||
auto ret = index_->Serialize(serialize_config);
|
||||
auto index_type = GetIndexType();
|
||||
|
||||
if (is_in_nm_list(index_type)) {
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
auto raw_data = std::shared_ptr<uint8_t[]>(static_cast<uint8_t*>(raw_data_.data()), deleter);
|
||||
|
||||
// std::shared_ptr<uint8_t[]> raw_data(new uint8_t[raw_data_.size()], std::default_delete<uint8_t[]>());
|
||||
// memcpy(raw_data.get(), raw_data_.data(), raw_data_.size());
|
||||
ret.Append(RAW_DATA, raw_data, raw_data_.size());
|
||||
// Disassemble will only divide the raw vectors, other keys were already divided
|
||||
knowhere::Disassemble(ret, serialize_config);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
VectorMemIndex::Load(const BinarySet& binary_set, const Config& config) {
|
||||
auto& map_ = binary_set.binary_map_;
|
||||
for (auto it = map_.begin(); it != map_.end(); ++it) {
|
||||
if (it->first == RAW_DATA) {
|
||||
raw_data_.clear();
|
||||
auto data_size = it->second->size;
|
||||
raw_data_.resize(data_size);
|
||||
memcpy(raw_data_.data(), it->second->data.get(), data_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
index_->Load(binary_set);
|
||||
SetDim(index_->Dim());
|
||||
}
|
||||
|
||||
void
|
||||
VectorMemIndex::BuildWithDataset(const DatasetPtr& dataset, const Config& config) {
|
||||
knowhere::Config index_config;
|
||||
index_config.update(config);
|
||||
parse_config(index_config);
|
||||
|
||||
SetDim(knowhere::GetDatasetDim(dataset));
|
||||
knowhere::SetMetaRows(index_config, knowhere::GetDatasetRows(dataset));
|
||||
if (GetIndexType() == knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
|
||||
if (!config.contains(knowhere::indexparam::NBITS)) {
|
||||
knowhere::SetIndexParamNbits(index_config, 8);
|
||||
}
|
||||
}
|
||||
auto conf_adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(GetIndexType());
|
||||
AssertInfo(conf_adapter->CheckTrain(index_config, GetIndexMode()), "something wrong in index parameters!");
|
||||
|
||||
knowhere::TimeRecorder rc("BuildWithoutIds", 1);
|
||||
index_->BuildAll(dataset, index_config);
|
||||
rc.RecordSection("TrainAndAdd");
|
||||
|
||||
if (is_in_nm_list(GetIndexType())) {
|
||||
store_raw_data(dataset);
|
||||
rc.RecordSection("store_raw_data");
|
||||
}
|
||||
rc.ElapseFromBegin("Done");
|
||||
SetDim(index_->Dim());
|
||||
}
|
||||
|
||||
std::unique_ptr<SearchResult>
|
||||
VectorMemIndex::Query(const DatasetPtr dataset, const SearchInfo& search_info, const BitsetView& bitset) {
|
||||
// AssertInfo(GetMetricType() == search_info.metric_type_,
|
||||
// "Metric type of field index isn't the same with search info");
|
||||
|
||||
auto load_raw_data_closure = [&]() { LoadRawData(); }; // hide this pointer
|
||||
auto index_type = GetIndexType();
|
||||
if (is_in_nm_list(index_type)) {
|
||||
std::call_once(raw_data_loaded_, load_raw_data_closure);
|
||||
}
|
||||
|
||||
auto num_queries = knowhere::GetDatasetRows(dataset);
|
||||
Config search_conf = search_info.search_params_;
|
||||
auto topk = search_info.topk_;
|
||||
// TODO :: check dim of search data
|
||||
auto final = [&] {
|
||||
knowhere::SetMetaTopk(search_conf, topk);
|
||||
knowhere::SetMetaMetricType(search_conf, GetMetricType());
|
||||
auto index_type = GetIndexType();
|
||||
auto adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(index_type);
|
||||
try {
|
||||
adapter->CheckSearch(search_conf, index_type, GetIndexMode());
|
||||
} catch (std::exception& e) {
|
||||
AssertInfo(false, e.what());
|
||||
}
|
||||
return index_->Query(dataset, search_conf, bitset);
|
||||
}();
|
||||
|
||||
auto ids = knowhere::GetDatasetIDs(final);
|
||||
float* distances = (float*)knowhere::GetDatasetDistance(final);
|
||||
|
||||
auto round_decimal = search_info.round_decimal_;
|
||||
auto total_num = num_queries * topk;
|
||||
|
||||
if (round_decimal != -1) {
|
||||
const float multiplier = pow(10.0, round_decimal);
|
||||
for (int i = 0; i < total_num; i++) {
|
||||
distances[i] = round(distances[i] * multiplier) / multiplier;
|
||||
}
|
||||
}
|
||||
auto result = std::make_unique<SearchResult>();
|
||||
result->seg_offsets_.resize(total_num);
|
||||
result->distances_.resize(total_num);
|
||||
result->total_nq_ = num_queries;
|
||||
result->unity_topK_ = topk;
|
||||
|
||||
std::copy_n(ids, total_num, result->seg_offsets_.data());
|
||||
std::copy_n(distances, total_num, result->distances_.data());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
VectorMemIndex::store_raw_data(const knowhere::DatasetPtr& dataset) {
|
||||
auto index_type = GetIndexType();
|
||||
if (is_in_nm_list(index_type)) {
|
||||
auto tensor = knowhere::GetDatasetTensor(dataset);
|
||||
auto row_num = knowhere::GetDatasetRows(dataset);
|
||||
auto dim = knowhere::GetDatasetDim(dataset);
|
||||
int64_t data_size;
|
||||
if (is_in_bin_list(index_type)) {
|
||||
data_size = dim / 8 * row_num;
|
||||
} else {
|
||||
data_size = dim * row_num * sizeof(float);
|
||||
}
|
||||
raw_data_.resize(data_size);
|
||||
memcpy(raw_data_.data(), tensor, data_size);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
VectorMemIndex::LoadRawData() {
|
||||
auto index_type = GetIndexType();
|
||||
if (is_in_nm_list(index_type)) {
|
||||
auto bs = index_->Serialize(Config{knowhere::meta::SLICE_SIZE, config::KnowhereGetIndexSliceSize()});
|
||||
auto bptr = std::make_shared<knowhere::Binary>();
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
bptr->data = std::shared_ptr<uint8_t[]>(static_cast<uint8_t*>(raw_data_.data()), deleter);
|
||||
bptr->size = raw_data_.size();
|
||||
bs.Append(RAW_DATA, bptr);
|
||||
index_->Load(bs);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
VectorMemIndex::parse_config(Config& config) {
|
||||
auto stoi_closure = [](const std::string& s) -> int { return std::stoi(s); };
|
||||
|
||||
/***************************** meta *******************************/
|
||||
CheckParameter<int>(config, knowhere::meta::SLICE_SIZE, stoi_closure,
|
||||
std::optional{config::KnowhereGetIndexSliceSize()});
|
||||
CheckParameter<int>(config, knowhere::meta::DIM, stoi_closure, std::nullopt);
|
||||
CheckParameter<int>(config, knowhere::meta::TOPK, stoi_closure, std::nullopt);
|
||||
|
||||
/***************************** IVF Params *******************************/
|
||||
CheckParameter<int>(config, knowhere::indexparam::NPROBE, stoi_closure, std::nullopt);
|
||||
CheckParameter<int>(config, knowhere::indexparam::NLIST, stoi_closure, std::nullopt);
|
||||
CheckParameter<int>(config, knowhere::indexparam::M, stoi_closure, std::nullopt);
|
||||
CheckParameter<int>(config, knowhere::indexparam::NBITS, stoi_closure, std::nullopt);
|
||||
|
||||
/************************** PQ Params *****************************/
|
||||
CheckParameter<int>(config, knowhere::indexparam::PQ_M, stoi_closure, std::nullopt);
|
||||
|
||||
/************************** HNSW Params *****************************/
|
||||
CheckParameter<int>(config, knowhere::indexparam::EFCONSTRUCTION, stoi_closure, std::nullopt);
|
||||
CheckParameter<int>(config, knowhere::indexparam::HNSW_M, stoi_closure, std::nullopt);
|
||||
CheckParameter<int>(config, knowhere::indexparam::EF, stoi_closure, std::nullopt);
|
||||
|
||||
/************************** Annoy Params *****************************/
|
||||
CheckParameter<int>(config, knowhere::indexparam::N_TREES, stoi_closure, std::nullopt);
|
||||
CheckParameter<int>(config, knowhere::indexparam::SEARCH_K, stoi_closure, std::nullopt);
|
||||
}
|
||||
|
||||
} // namespace milvus::index
|
|
@ -0,0 +1,68 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
#include "index/VectorIndex.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
class VectorMemIndex : public VectorIndex {
|
||||
public:
|
||||
explicit VectorMemIndex(const IndexType& index_type, const MetricType& metric_type, const IndexMode& index_mode);
|
||||
|
||||
BinarySet
|
||||
Serialize(const Config& config) override;
|
||||
|
||||
void
|
||||
Load(const BinarySet& binary_set, const Config& config = {}) override;
|
||||
|
||||
void
|
||||
BuildWithDataset(const DatasetPtr& dataset, const Config& config = {}) override;
|
||||
|
||||
int64_t
|
||||
Count() override {
|
||||
return index_->Count();
|
||||
}
|
||||
|
||||
std::unique_ptr<SearchResult>
|
||||
Query(const DatasetPtr dataset, const SearchInfo& search_info, const BitsetView& bitset) override;
|
||||
|
||||
private:
|
||||
void
|
||||
store_raw_data(const knowhere::DatasetPtr& dataset);
|
||||
|
||||
void
|
||||
parse_config(Config& config);
|
||||
|
||||
void
|
||||
LoadRawData();
|
||||
|
||||
private:
|
||||
Config config_;
|
||||
knowhere::VecIndexPtr index_ = nullptr;
|
||||
std::vector<uint8_t> raw_data_;
|
||||
std::once_flag raw_data_loaded_;
|
||||
};
|
||||
|
||||
using VectorMemIndexPtr = std::unique_ptr<VectorMemIndex>;
|
||||
} // namespace milvus::index
|
|
@ -0,0 +1,9 @@
|
|||
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
||||
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||
|
||||
Name: Milvus index
|
||||
Description: index modules for Milvus
|
||||
Version: @MILVUS_VERSION@
|
||||
|
||||
Libs: -L${libdir} -lmilvus_index
|
||||
Cflags: -I${includedir}
|
|
@ -14,7 +14,6 @@ set(INDEXBUILDER_FILES
|
|||
VecIndexCreator.cpp
|
||||
index_c.cpp
|
||||
init_c.cpp
|
||||
utils.cpp
|
||||
ScalarIndexCreator.cpp
|
||||
)
|
||||
|
||||
|
@ -30,11 +29,8 @@ endif ()
|
|||
# link order matters
|
||||
target_link_libraries(milvus_indexbuilder
|
||||
milvus_index
|
||||
milvus_common
|
||||
knowhere
|
||||
${TBB}
|
||||
${PLATFORM_LIBS}
|
||||
pthread
|
||||
)
|
||||
|
||||
install(TARGETS milvus_indexbuilder DESTINATION "${CMAKE_INSTALL_LIBDIR}")
|
||||
|
|
|
@ -11,10 +11,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "knowhere/common/Dataset.h"
|
||||
#include "knowhere/common/BinarySet.h"
|
||||
#include <memory>
|
||||
#include <knowhere/index/Index.h>
|
||||
#include "common/Types.h"
|
||||
|
||||
namespace milvus::indexbuilder {
|
||||
class IndexCreatorBase {
|
||||
|
@ -22,17 +20,14 @@ class IndexCreatorBase {
|
|||
virtual ~IndexCreatorBase() = default;
|
||||
|
||||
virtual void
|
||||
Build(const knowhere::DatasetPtr& dataset) = 0;
|
||||
Build(const milvus::DatasetPtr& dataset) = 0;
|
||||
|
||||
virtual knowhere::BinarySet
|
||||
virtual milvus::BinarySet
|
||||
Serialize() = 0;
|
||||
|
||||
// used for test.
|
||||
virtual void
|
||||
Load(const knowhere::BinarySet&) = 0;
|
||||
|
||||
// virtual knowhere::IndexPtr
|
||||
// GetIndex() = 0;
|
||||
Load(const milvus::BinarySet&) = 0;
|
||||
};
|
||||
|
||||
using IndexCreatorBasePtr = std::unique_ptr<IndexCreatorBase>;
|
||||
|
|
|
@ -40,28 +40,24 @@ class IndexFactory {
|
|||
|
||||
IndexCreatorBasePtr
|
||||
CreateIndex(CDataType dtype, const char* type_params, const char* index_params) {
|
||||
auto real_dtype = proto::schema::DataType(dtype);
|
||||
auto invalid_dtype_msg = std::string("invalid data type: ") + std::to_string(real_dtype);
|
||||
auto real_dtype = DataType(dtype);
|
||||
auto invalid_dtype_msg = std::string("invalid data type: ") + std::to_string(int(real_dtype));
|
||||
|
||||
switch (real_dtype) {
|
||||
case proto::schema::Bool:
|
||||
case proto::schema::Int8:
|
||||
case proto::schema::Int16:
|
||||
case proto::schema::Int32:
|
||||
case proto::schema::Int64:
|
||||
case proto::schema::Float:
|
||||
case proto::schema::Double:
|
||||
case proto::schema::VarChar:
|
||||
case proto::schema::String:
|
||||
return CreateScalarIndex(dtype, type_params, index_params);
|
||||
case DataType::BOOL:
|
||||
case DataType::INT8:
|
||||
case DataType::INT16:
|
||||
case DataType::INT32:
|
||||
case DataType::INT64:
|
||||
case DataType::FLOAT:
|
||||
case DataType::DOUBLE:
|
||||
case DataType::VARCHAR:
|
||||
case DataType::STRING:
|
||||
return CreateScalarIndex(real_dtype, type_params, index_params);
|
||||
|
||||
case proto::schema::BinaryVector:
|
||||
case proto::schema::FloatVector:
|
||||
return std::make_unique<VecIndexCreator>(type_params, index_params);
|
||||
|
||||
case proto::schema::None:
|
||||
case proto::schema::DataType_INT_MIN_SENTINEL_DO_NOT_USE_:
|
||||
case proto::schema::DataType_INT_MAX_SENTINEL_DO_NOT_USE_:
|
||||
case DataType::VECTOR_FLOAT:
|
||||
case DataType::VECTOR_BINARY:
|
||||
return std::make_unique<VecIndexCreator>(real_dtype, type_params, index_params);
|
||||
default:
|
||||
throw std::invalid_argument(invalid_dtype_msg);
|
||||
}
|
||||
|
|
|
@ -9,35 +9,53 @@
|
|||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "indexbuilder/helper.h"
|
||||
#include "indexbuilder/ScalarIndexCreator.h"
|
||||
#include "index/IndexFactory.h"
|
||||
#include "index/IndexInfo.h"
|
||||
#include "index/Meta.h"
|
||||
#include "index/Utils.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace milvus::indexbuilder {
|
||||
|
||||
ScalarIndexCreator::ScalarIndexCreator(CDataType dtype, const char* type_params, const char* index_params) {
|
||||
dtype_ = dtype;
|
||||
ScalarIndexCreator::ScalarIndexCreator(DataType dtype, const char* type_params, const char* index_params)
|
||||
: dtype_(dtype) {
|
||||
// TODO: move parse-related logic to a common interface.
|
||||
Helper::ParseFromString(type_params_, std::string(type_params));
|
||||
Helper::ParseFromString(index_params_, std::string(index_params));
|
||||
// TODO: create index according to the params.
|
||||
index_ = scalar::IndexFactory::GetInstance().CreateIndex(dtype_, index_type());
|
||||
milvus::index::ParseFromString(type_params_, std::string(type_params));
|
||||
milvus::index::ParseFromString(index_params_, std::string(index_params));
|
||||
|
||||
for (auto i = 0; i < type_params_.params_size(); ++i) {
|
||||
const auto& param = type_params_.params(i);
|
||||
config_[param.key()] = param.value();
|
||||
}
|
||||
|
||||
for (auto i = 0; i < index_params_.params_size(); ++i) {
|
||||
const auto& param = index_params_.params(i);
|
||||
config_[param.key()] = param.value();
|
||||
}
|
||||
|
||||
milvus::index::CreateIndexInfo index_info;
|
||||
index_info.field_type = dtype_;
|
||||
index_info.index_type = index_type();
|
||||
index_info.index_mode = IndexMode::MODE_CPU;
|
||||
index_ = index::IndexFactory::GetInstance().CreateIndex(index_info, nullptr);
|
||||
}
|
||||
|
||||
void
|
||||
ScalarIndexCreator::Build(const knowhere::DatasetPtr& dataset) {
|
||||
index_->BuildWithDataset(dataset);
|
||||
ScalarIndexCreator::Build(const milvus::DatasetPtr& dataset) {
|
||||
auto size = knowhere::GetDatasetRows(dataset);
|
||||
auto data = knowhere::GetDatasetTensor(dataset);
|
||||
index_->BuildWithRawData(size, data);
|
||||
}
|
||||
|
||||
knowhere::BinarySet
|
||||
milvus::BinarySet
|
||||
ScalarIndexCreator::Serialize() {
|
||||
return index_->Serialize(config_);
|
||||
}
|
||||
|
||||
void
|
||||
ScalarIndexCreator::Load(const knowhere::BinarySet& binary_set) {
|
||||
ScalarIndexCreator::Load(const milvus::BinarySet& binary_set) {
|
||||
index_->Load(binary_set);
|
||||
}
|
||||
|
||||
|
|
|
@ -23,33 +23,33 @@ namespace milvus::indexbuilder {
|
|||
|
||||
class ScalarIndexCreator : public IndexCreatorBase {
|
||||
public:
|
||||
ScalarIndexCreator(CDataType dtype, const char* type_params, const char* index_params);
|
||||
ScalarIndexCreator(DataType data_type, const char* type_params, const char* index_params);
|
||||
|
||||
void
|
||||
Build(const knowhere::DatasetPtr& dataset) override;
|
||||
Build(const milvus::DatasetPtr& dataset) override;
|
||||
|
||||
knowhere::BinarySet
|
||||
milvus::BinarySet
|
||||
Serialize() override;
|
||||
|
||||
void
|
||||
Load(const knowhere::BinarySet&) override;
|
||||
Load(const milvus::BinarySet&) override;
|
||||
|
||||
private:
|
||||
std::string
|
||||
index_type();
|
||||
|
||||
private:
|
||||
scalar::IndexBasePtr index_ = nullptr;
|
||||
index::IndexBasePtr index_ = nullptr;
|
||||
proto::indexcgo::TypeParams type_params_;
|
||||
proto::indexcgo::IndexParams index_params_;
|
||||
knowhere::Config config_;
|
||||
CDataType dtype_;
|
||||
Config config_;
|
||||
DataType dtype_;
|
||||
};
|
||||
|
||||
using ScalarIndexCreatorPtr = std::unique_ptr<ScalarIndexCreator>;
|
||||
|
||||
inline ScalarIndexCreatorPtr
|
||||
CreateScalarIndex(CDataType dtype, const char* type_params, const char* index_params) {
|
||||
CreateScalarIndex(DataType dtype, const char* type_params, const char* index_params) {
|
||||
return std::make_unique<ScalarIndexCreator>(dtype, type_params, index_params);
|
||||
}
|
||||
|
||||
|
|
|
@ -9,323 +9,85 @@
|
|||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <exception>
|
||||
#include <map>
|
||||
#include <google/protobuf/text_format.h>
|
||||
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "indexbuilder/VecIndexCreator.h"
|
||||
#include "indexbuilder/utils.h"
|
||||
#include "knowhere/common/Timer.h"
|
||||
#include "knowhere/index/VecIndex.h"
|
||||
#include "knowhere/index/VecIndexFactory.h"
|
||||
#include "knowhere/index/vector_index/ConfAdapterMgr.h"
|
||||
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
|
||||
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
|
||||
#include "pb/index_cgo_msg.pb.h"
|
||||
#include "index/Utils.h"
|
||||
#include "index/IndexFactory.h"
|
||||
|
||||
#ifdef BUILD_DISK_ANN
|
||||
#include "storage/DiskFileManagerImpl.h"
|
||||
#endif
|
||||
|
||||
namespace milvus::indexbuilder {
|
||||
|
||||
VecIndexCreator::VecIndexCreator(const char* serialized_type_params, const char* serialized_index_params) {
|
||||
type_params_ = std::string(serialized_type_params);
|
||||
index_params_ = std::string(serialized_index_params);
|
||||
VecIndexCreator::VecIndexCreator(DataType data_type,
|
||||
const char* serialized_type_params,
|
||||
const char* serialized_index_params)
|
||||
: data_type_(data_type) {
|
||||
milvus::index::ParseFromString(type_params_, std::string(serialized_type_params));
|
||||
milvus::index::ParseFromString(index_params_, std::string(serialized_index_params));
|
||||
|
||||
parse();
|
||||
for (auto i = 0; i < type_params_.params_size(); ++i) {
|
||||
const auto& param = type_params_.params(i);
|
||||
config_[param.key()] = param.value();
|
||||
}
|
||||
|
||||
auto index_mode = get_index_mode();
|
||||
auto index_type = get_index_type();
|
||||
auto metric_type = get_metric_type();
|
||||
AssertInfo(!is_unsupported(index_type, metric_type), index_type + " doesn't support metric: " + metric_type);
|
||||
for (auto i = 0; i < index_params_.params_size(); ++i) {
|
||||
const auto& param = index_params_.params(i);
|
||||
config_[param.key()] = param.value();
|
||||
}
|
||||
|
||||
index_ = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(get_index_type(), index_mode);
|
||||
index::CreateIndexInfo index_info;
|
||||
index_info.field_type = data_type_;
|
||||
index_info.index_mode = index::GetIndexModeFromConfig(config_);
|
||||
index_info.index_type = index::GetIndexTypeFromConfig(config_);
|
||||
index_info.metric_type = index::GetMetricTypeFromConfig(config_);
|
||||
|
||||
std::shared_ptr<storage::FileManagerImpl> file_manager = nullptr;
|
||||
#ifdef BUILD_DISK_ANN
|
||||
if (index::is_in_disk_list(index_info.index_type)) {
|
||||
// For now, only support diskann index
|
||||
file_manager = std::make_shared<storage::DiskFileManagerImpl>(index::GetFieldDataMetaFromConfig(config_),
|
||||
index::GetIndexMetaFromConfig(config_));
|
||||
}
|
||||
#endif
|
||||
|
||||
index_ = index::IndexFactory::GetInstance().CreateIndex(index_info, file_manager);
|
||||
AssertInfo(index_ != nullptr, "[VecIndexCreator]Index is null after create index");
|
||||
}
|
||||
|
||||
template <typename ParamsT>
|
||||
// ugly here, ParamsT will just be MapParams later
|
||||
void
|
||||
VecIndexCreator::parse_impl(const std::string& serialized_params_str, knowhere::Config& conf) {
|
||||
bool deserialized_success;
|
||||
|
||||
ParamsT params;
|
||||
deserialized_success = google::protobuf::TextFormat::ParseFromString(serialized_params_str, ¶ms);
|
||||
AssertInfo(deserialized_success, "[VecIndexCreator]Deserialize params failed");
|
||||
|
||||
for (auto i = 0; i < params.params_size(); ++i) {
|
||||
const auto& param = params.params(i);
|
||||
const auto& key = param.key();
|
||||
const auto& value = param.value();
|
||||
conf[key] = value;
|
||||
}
|
||||
|
||||
auto stoi_closure = [](const std::string& s) -> int { return std::stoi(s); };
|
||||
auto stof_closure = [](const std::string& s) -> float { return std::stof(s); };
|
||||
|
||||
/***************************** meta *******************************/
|
||||
check_parameter<int>(conf, knowhere::meta::SLICE_SIZE, stoi_closure, std::optional{4});
|
||||
check_parameter<int>(conf, knowhere::meta::DIM, stoi_closure, std::nullopt);
|
||||
check_parameter<int>(conf, knowhere::meta::TOPK, stoi_closure, std::nullopt);
|
||||
|
||||
/***************************** IVF Params *******************************/
|
||||
check_parameter<int>(conf, knowhere::indexparam::NPROBE, stoi_closure, std::nullopt);
|
||||
check_parameter<int>(conf, knowhere::indexparam::NLIST, stoi_closure, std::nullopt);
|
||||
check_parameter<int>(conf, knowhere::indexparam::M, stoi_closure, std::nullopt);
|
||||
check_parameter<int>(conf, knowhere::indexparam::NBITS, stoi_closure, std::nullopt);
|
||||
|
||||
/************************** PQ Params *****************************/
|
||||
check_parameter<int>(conf, knowhere::indexparam::PQ_M, stoi_closure, std::nullopt);
|
||||
|
||||
/************************** HNSW Params *****************************/
|
||||
check_parameter<int>(conf, knowhere::indexparam::EFCONSTRUCTION, stoi_closure, std::nullopt);
|
||||
check_parameter<int>(conf, knowhere::indexparam::HNSW_M, stoi_closure, std::nullopt);
|
||||
check_parameter<int>(conf, knowhere::indexparam::EF, stoi_closure, std::nullopt);
|
||||
|
||||
/************************** Annoy Params *****************************/
|
||||
check_parameter<int>(conf, knowhere::indexparam::N_TREES, stoi_closure, std::nullopt);
|
||||
check_parameter<int>(conf, knowhere::indexparam::SEARCH_K, stoi_closure, std::nullopt);
|
||||
}
|
||||
|
||||
void
|
||||
VecIndexCreator::parse() {
|
||||
namespace indexcgo = milvus::proto::indexcgo;
|
||||
|
||||
parse_impl<indexcgo::TypeParams>(type_params_, type_config_);
|
||||
parse_impl<indexcgo::IndexParams>(index_params_, index_config_);
|
||||
|
||||
config_.update(type_config_); // just like dict().update in Python, amazing
|
||||
config_.update(index_config_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VecIndexCreator::check_parameter(knowhere::Config& conf,
|
||||
const std::string& key,
|
||||
std::function<T(std::string)> fn,
|
||||
std::optional<T> default_v) {
|
||||
if (!conf.contains(key)) {
|
||||
if (default_v.has_value()) {
|
||||
conf[key] = default_v.value();
|
||||
}
|
||||
} else {
|
||||
auto value = conf[key];
|
||||
conf[key] = fn(value);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::optional<T>
|
||||
VecIndexCreator::get_config_by_name(const std::string& name) {
|
||||
if (config_.contains(name)) {
|
||||
return knowhere::GetValueFromConfig<T>(config_, name);
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
int64_t
|
||||
VecIndexCreator::dim() {
|
||||
auto dimension = get_config_by_name<int64_t>(knowhere::meta::DIM);
|
||||
AssertInfo(dimension.has_value(), "[VecIndexCreator]Dimension doesn't have value");
|
||||
return (dimension.value());
|
||||
return index::GetDimFromConfig(config_);
|
||||
}
|
||||
|
||||
void
|
||||
VecIndexCreator::BuildWithoutIds(const knowhere::DatasetPtr& dataset) {
|
||||
auto index_type = get_index_type();
|
||||
auto index_mode = get_index_mode();
|
||||
knowhere::SetMetaRows(config_, knowhere::GetDatasetRows(dataset));
|
||||
if (index_type == knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
|
||||
if (!config_.contains(knowhere::indexparam::NBITS)) {
|
||||
knowhere::SetIndexParamNbits(config_, 8);
|
||||
}
|
||||
}
|
||||
auto conf_adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(index_type);
|
||||
// TODO: Use easylogging instead, if you really need to keep this log.
|
||||
// std::cout << "Konwhere BuildWithoutIds config_ is " << config_ << std::endl;
|
||||
AssertInfo(conf_adapter->CheckTrain(config_, index_mode), "something wrong in index parameters!");
|
||||
|
||||
if (is_in_need_id_list(index_type)) {
|
||||
PanicInfo(std::string(index_type) + " doesn't support build without ids yet!");
|
||||
}
|
||||
knowhere::TimeRecorder rc("BuildWithoutIds", 1);
|
||||
// if (is_in_need_build_all_list(index_type)) {
|
||||
// index_->BuildAll(dataset, config_);
|
||||
// } else {
|
||||
// index_->Train(dataset, config_);
|
||||
// index_->AddWithoutIds(dataset, config_);
|
||||
// }
|
||||
index_->BuildAll(dataset, config_);
|
||||
rc.RecordSection("TrainAndAdd");
|
||||
|
||||
if (is_in_nm_list(index_type)) {
|
||||
StoreRawData(dataset);
|
||||
rc.RecordSection("StoreRawData");
|
||||
}
|
||||
rc.ElapseFromBegin("Done");
|
||||
VecIndexCreator::Build(const milvus::DatasetPtr& dataset) {
|
||||
index_->BuildWithDataset(dataset, config_);
|
||||
}
|
||||
|
||||
void
|
||||
VecIndexCreator::BuildWithIds(const knowhere::DatasetPtr& dataset) {
|
||||
auto index_type = get_index_type();
|
||||
auto index_mode = get_index_mode();
|
||||
knowhere::SetMetaRows(config_, knowhere::GetDatasetRows(dataset));
|
||||
if (index_type == knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
|
||||
if (!config_.contains(knowhere::indexparam::NBITS)) {
|
||||
knowhere::SetIndexParamNbits(config_, 8);
|
||||
}
|
||||
}
|
||||
auto conf_adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(index_type);
|
||||
AssertInfo(conf_adapter->CheckTrain(config_, index_mode), "something wrong in index parameters!");
|
||||
// index_->Train(dataset, config_);
|
||||
// index_->Add(dataset, config_);
|
||||
index_->BuildAll(dataset, config_);
|
||||
|
||||
if (is_in_nm_list(get_index_type())) {
|
||||
StoreRawData(dataset);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
VecIndexCreator::StoreRawData(const knowhere::DatasetPtr& dataset) {
|
||||
auto index_type = get_index_type();
|
||||
if (is_in_nm_list(index_type)) {
|
||||
auto tensor = knowhere::GetDatasetTensor(dataset);
|
||||
auto row_num = knowhere::GetDatasetRows(dataset);
|
||||
auto dim = knowhere::GetDatasetDim(dataset);
|
||||
int64_t data_size;
|
||||
if (is_in_bin_list(index_type)) {
|
||||
data_size = dim / 8 * row_num;
|
||||
} else {
|
||||
data_size = dim * row_num * sizeof(float);
|
||||
}
|
||||
raw_data_.resize(data_size);
|
||||
memcpy(raw_data_.data(), tensor, data_size);
|
||||
}
|
||||
}
|
||||
|
||||
knowhere::BinarySet
|
||||
milvus::BinarySet
|
||||
VecIndexCreator::Serialize() {
|
||||
auto ret = index_->Serialize(config_);
|
||||
auto index_type = get_index_type();
|
||||
|
||||
if (is_in_nm_list(index_type)) {
|
||||
std::shared_ptr<uint8_t[]> raw_data(new uint8_t[raw_data_.size()], std::default_delete<uint8_t[]>());
|
||||
memcpy(raw_data.get(), raw_data_.data(), raw_data_.size());
|
||||
ret.Append(RAW_DATA, raw_data, raw_data_.size());
|
||||
// https://github.com/milvus-io/milvus/issues/6421
|
||||
// Disassemble will only divide the raw vectors, other keys were already divided
|
||||
knowhere::Disassemble(ret, config_);
|
||||
}
|
||||
return ret;
|
||||
return index_->Serialize(config_);
|
||||
}
|
||||
|
||||
void
|
||||
VecIndexCreator::Load(const knowhere::BinarySet& binary_set) {
|
||||
auto& map_ = binary_set.binary_map_;
|
||||
for (auto it = map_.begin(); it != map_.end(); ++it) {
|
||||
if (it->first == RAW_DATA) {
|
||||
raw_data_.clear();
|
||||
auto data_size = it->second->size;
|
||||
raw_data_.resize(data_size);
|
||||
memcpy(raw_data_.data(), it->second->data.get(), data_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
index_->Load(binary_set);
|
||||
VecIndexCreator::Load(const milvus::BinarySet& binary_set) {
|
||||
index_->Load(binary_set, config_);
|
||||
}
|
||||
|
||||
std::string
|
||||
VecIndexCreator::get_index_type() {
|
||||
// return index_->index_type();
|
||||
// knowhere bug here
|
||||
// the index_type of all ivf-based index will change to ivf flat after loaded
|
||||
auto type = get_config_by_name<std::string>("index_type");
|
||||
return type.has_value() ? type.value() : knowhere::IndexEnum::INDEX_FAISS_IVFPQ;
|
||||
}
|
||||
|
||||
std::string
|
||||
VecIndexCreator::get_metric_type() {
|
||||
auto type = get_config_by_name<std::string>(knowhere::meta::METRIC_TYPE);
|
||||
if (type.has_value()) {
|
||||
return type.value();
|
||||
} else {
|
||||
auto index_type = get_index_type();
|
||||
if (is_in_bin_list(index_type)) {
|
||||
return knowhere::metric::JACCARD;
|
||||
} else {
|
||||
return knowhere::metric::L2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
knowhere::IndexMode
|
||||
VecIndexCreator::get_index_mode() {
|
||||
static std::map<std::string, knowhere::IndexMode> mode_map = {
|
||||
{"CPU", knowhere::IndexMode::MODE_CPU},
|
||||
{"GPU", knowhere::IndexMode::MODE_GPU},
|
||||
};
|
||||
auto mode = get_config_by_name<std::string>("index_mode");
|
||||
return mode.has_value() ? mode_map[mode.value()] : knowhere::IndexMode::MODE_CPU;
|
||||
}
|
||||
|
||||
int64_t
|
||||
VecIndexCreator::get_index_file_slice_size() {
|
||||
if (knowhere::CheckKeyInConfig(config_, knowhere::meta::SLICE_SIZE)) {
|
||||
return knowhere::GetMetaSliceSize(config_);
|
||||
}
|
||||
return knowhere::index_file_slice_size; // by default
|
||||
}
|
||||
|
||||
std::unique_ptr<VecIndexCreator::QueryResult>
|
||||
VecIndexCreator::Query(const knowhere::DatasetPtr& dataset) {
|
||||
return std::move(QueryImpl(dataset, config_));
|
||||
}
|
||||
|
||||
std::unique_ptr<VecIndexCreator::QueryResult>
|
||||
VecIndexCreator::QueryWithParam(const knowhere::DatasetPtr& dataset, const char* serialized_search_params) {
|
||||
namespace indexcgo = milvus::proto::indexcgo;
|
||||
knowhere::Config search_conf;
|
||||
parse_impl<indexcgo::MapParams>(std::string(serialized_search_params), search_conf);
|
||||
|
||||
return std::move(QueryImpl(dataset, search_conf));
|
||||
}
|
||||
|
||||
std::unique_ptr<VecIndexCreator::QueryResult>
|
||||
VecIndexCreator::QueryImpl(const knowhere::DatasetPtr& dataset, const knowhere::Config& conf) {
|
||||
auto load_raw_data_closure = [&]() { LoadRawData(); }; // hide this pointer
|
||||
auto index_type = get_index_type();
|
||||
if (is_in_nm_list(index_type)) {
|
||||
std::call_once(raw_data_loaded_, load_raw_data_closure);
|
||||
}
|
||||
|
||||
auto res = index_->Query(dataset, conf, nullptr);
|
||||
auto ids = knowhere::GetDatasetIDs(res);
|
||||
auto distances = knowhere::GetDatasetDistance(res);
|
||||
auto nq = knowhere::GetDatasetRows(dataset);
|
||||
auto k = knowhere::GetMetaTopk(config_);
|
||||
|
||||
auto query_res = std::make_unique<VecIndexCreator::QueryResult>();
|
||||
query_res->nq = nq;
|
||||
query_res->topk = k;
|
||||
query_res->ids.resize(nq * k);
|
||||
query_res->distances.resize(nq * k);
|
||||
memcpy(query_res->ids.data(), ids, sizeof(int64_t) * nq * k);
|
||||
memcpy(query_res->distances.data(), distances, sizeof(float) * nq * k);
|
||||
|
||||
return std::move(query_res);
|
||||
std::unique_ptr<SearchResult>
|
||||
VecIndexCreator::Query(const milvus::DatasetPtr& dataset, const SearchInfo& search_info, const BitsetView& bitset) {
|
||||
auto vector_index = dynamic_cast<index::VectorIndex*>(index_.get());
|
||||
return vector_index->Query(dataset, search_info, bitset);
|
||||
}
|
||||
|
||||
void
|
||||
VecIndexCreator::LoadRawData() {
|
||||
auto index_type = get_index_type();
|
||||
if (is_in_nm_list(index_type)) {
|
||||
auto bs = index_->Serialize(config_);
|
||||
auto bptr = std::make_shared<knowhere::Binary>();
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
bptr->data = std::shared_ptr<uint8_t[]>(static_cast<uint8_t*>(raw_data_.data()), deleter);
|
||||
bptr->size = raw_data_.size();
|
||||
bs.Append(RAW_DATA, bptr);
|
||||
index_->Load(bs);
|
||||
}
|
||||
VecIndexCreator::CleanLocalData() {
|
||||
auto vector_index = dynamic_cast<index::VectorIndex*>(index_.get());
|
||||
vector_index->CleanLocalData();
|
||||
}
|
||||
|
||||
} // namespace milvus::indexbuilder
|
||||
|
|
|
@ -17,101 +17,44 @@
|
|||
#include <vector>
|
||||
|
||||
#include "indexbuilder/IndexCreatorBase.h"
|
||||
#include "knowhere/common/BinarySet.h"
|
||||
#include "knowhere/index/VecIndex.h"
|
||||
#include "index/VectorIndex.h"
|
||||
#include "index/IndexInfo.h"
|
||||
#include "pb/index_cgo_msg.pb.h"
|
||||
|
||||
namespace milvus::indexbuilder {
|
||||
|
||||
// TODO: better to distinguish binary vec & float vec.
|
||||
class VecIndexCreator : public IndexCreatorBase {
|
||||
public:
|
||||
explicit VecIndexCreator(const char* serialized_type_params, const char* serialized_index_params);
|
||||
explicit VecIndexCreator(DataType data_type,
|
||||
const char* serialized_type_params,
|
||||
const char* serialized_index_params);
|
||||
|
||||
void
|
||||
Build(const knowhere::DatasetPtr& dataset) override {
|
||||
BuildWithoutIds(dataset);
|
||||
}
|
||||
Build(const milvus::DatasetPtr& dataset) override;
|
||||
|
||||
knowhere::BinarySet
|
||||
milvus::BinarySet
|
||||
Serialize() override;
|
||||
|
||||
void
|
||||
Load(const knowhere::BinarySet& binary_set) override;
|
||||
Load(const milvus::BinarySet& binary_set) override;
|
||||
|
||||
int64_t
|
||||
dim();
|
||||
|
||||
public:
|
||||
// used for tests
|
||||
struct QueryResult {
|
||||
std::vector<knowhere::IDType> ids;
|
||||
std::vector<float> distances;
|
||||
int64_t nq;
|
||||
int64_t topk;
|
||||
};
|
||||
|
||||
std::unique_ptr<QueryResult>
|
||||
Query(const knowhere::DatasetPtr& dataset);
|
||||
|
||||
std::unique_ptr<QueryResult>
|
||||
QueryWithParam(const knowhere::DatasetPtr& dataset, const char* serialized_search_params);
|
||||
|
||||
private:
|
||||
void
|
||||
parse();
|
||||
|
||||
std::string
|
||||
get_index_type();
|
||||
|
||||
std::string
|
||||
get_metric_type();
|
||||
|
||||
knowhere::IndexMode
|
||||
get_index_mode();
|
||||
|
||||
int64_t
|
||||
get_index_file_slice_size();
|
||||
|
||||
template <typename T>
|
||||
std::optional<T>
|
||||
get_config_by_name(const std::string& name);
|
||||
|
||||
void
|
||||
StoreRawData(const knowhere::DatasetPtr& dataset);
|
||||
|
||||
void
|
||||
LoadRawData();
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
check_parameter(knowhere::Config& conf,
|
||||
const std::string& key,
|
||||
std::function<T(std::string)> fn,
|
||||
std::optional<T> default_v = std::nullopt);
|
||||
|
||||
template <typename ParamsT>
|
||||
void
|
||||
parse_impl(const std::string& serialized_params_str, knowhere::Config& conf);
|
||||
|
||||
std::unique_ptr<QueryResult>
|
||||
QueryImpl(const knowhere::DatasetPtr& dataset, const knowhere::Config& conf);
|
||||
std::unique_ptr<SearchResult>
|
||||
Query(const milvus::DatasetPtr& dataset, const SearchInfo& search_info, const BitsetView& bitset);
|
||||
|
||||
public:
|
||||
void
|
||||
BuildWithIds(const knowhere::DatasetPtr& dataset);
|
||||
|
||||
void
|
||||
BuildWithoutIds(const knowhere::DatasetPtr& dataset);
|
||||
CleanLocalData();
|
||||
|
||||
private:
|
||||
knowhere::VecIndexPtr index_ = nullptr;
|
||||
std::string type_params_;
|
||||
std::string index_params_;
|
||||
knowhere::Config type_config_;
|
||||
knowhere::Config index_config_;
|
||||
knowhere::Config config_;
|
||||
std::vector<uint8_t> raw_data_;
|
||||
std::once_flag raw_data_loaded_;
|
||||
milvus::index::IndexBasePtr index_ = nullptr;
|
||||
proto::indexcgo::TypeParams type_params_;
|
||||
proto::indexcgo::IndexParams index_params_;
|
||||
Config config_;
|
||||
DataType data_type_;
|
||||
};
|
||||
|
||||
} // namespace milvus::indexbuilder
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "pb/index_cgo_msg.pb.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include <google/protobuf/text_format.h>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
namespace milvus::indexbuilder {
|
||||
|
||||
using MapParams = std::map<std::string, std::string>;
|
||||
|
||||
struct Helper {
|
||||
static void
|
||||
ParseFromString(google::protobuf::Message& params, const std::string& str) {
|
||||
auto ok = google::protobuf::TextFormat::ParseFromString(str, ¶ms);
|
||||
AssertInfo(ok, "failed to parse params from string");
|
||||
}
|
||||
|
||||
static void
|
||||
ParseParams(google::protobuf::Message& params, const void* data, const size_t size) {
|
||||
auto ok = params.ParseFromArray(data, size);
|
||||
AssertInfo(ok, "failed to parse params from array");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace milvus::indexbuilder
|
|
@ -159,146 +159,13 @@ LoadIndexFromBinarySet(CIndex index, CBinarySet c_binary_set) {
|
|||
}
|
||||
|
||||
CStatus
|
||||
QueryOnFloatVecIndex(CIndex index, int64_t float_value_num, const float* vectors, CIndexQueryResult* res) {
|
||||
CleanLocalData(CIndex index) {
|
||||
auto status = CStatus();
|
||||
try {
|
||||
auto cIndex = (milvus::indexbuilder::VecIndexCreator*)index;
|
||||
auto dim = cIndex->dim();
|
||||
auto row_nums = float_value_num / dim;
|
||||
auto query_ds = knowhere::GenDataset(row_nums, dim, vectors);
|
||||
auto query_res = cIndex->Query(query_ds);
|
||||
*res = query_res.release();
|
||||
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
} catch (std::exception& e) {
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
CStatus
|
||||
QueryOnFloatVecIndexWithParam(CIndex index,
|
||||
int64_t float_value_num,
|
||||
const float* vectors,
|
||||
const char* serialized_search_params,
|
||||
CIndexQueryResult* res) {
|
||||
auto status = CStatus();
|
||||
try {
|
||||
auto cIndex = (milvus::indexbuilder::VecIndexCreator*)index;
|
||||
auto dim = cIndex->dim();
|
||||
auto row_nums = float_value_num / dim;
|
||||
auto query_ds = knowhere::GenDataset(row_nums, dim, vectors);
|
||||
auto query_res = cIndex->QueryWithParam(query_ds, serialized_search_params);
|
||||
*res = query_res.release();
|
||||
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
} catch (std::exception& e) {
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
CStatus
|
||||
QueryOnBinaryVecIndex(CIndex index, int64_t data_size, const uint8_t* vectors, CIndexQueryResult* res) {
|
||||
auto status = CStatus();
|
||||
try {
|
||||
auto cIndex = (milvus::indexbuilder::VecIndexCreator*)index;
|
||||
auto dim = cIndex->dim();
|
||||
auto row_nums = (data_size * 8) / dim;
|
||||
auto query_ds = knowhere::GenDataset(row_nums, dim, vectors);
|
||||
auto query_res = cIndex->Query(query_ds);
|
||||
*res = query_res.release();
|
||||
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
} catch (std::exception& e) {
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
CStatus
|
||||
QueryOnBinaryVecIndexWithParam(CIndex index,
|
||||
int64_t data_size,
|
||||
const uint8_t* vectors,
|
||||
const char* serialized_search_params,
|
||||
CIndexQueryResult* res) {
|
||||
auto status = CStatus();
|
||||
try {
|
||||
auto cIndex = (milvus::indexbuilder::VecIndexCreator*)index;
|
||||
auto dim = cIndex->dim();
|
||||
auto row_nums = (data_size * 8) / dim;
|
||||
auto query_ds = knowhere::GenDataset(row_nums, dim, vectors);
|
||||
auto query_res = cIndex->QueryWithParam(query_ds, serialized_search_params);
|
||||
*res = query_res.release();
|
||||
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
} catch (std::exception& e) {
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
CStatus
|
||||
CreateQueryResult(CIndexQueryResult* res) {
|
||||
auto status = CStatus();
|
||||
try {
|
||||
auto query_result = std::make_unique<milvus::indexbuilder::VecIndexCreator::QueryResult>();
|
||||
*res = query_result.release();
|
||||
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
} catch (std::exception& e) {
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
int64_t
|
||||
NqOfQueryResult(CIndexQueryResult res) {
|
||||
auto c_res = (milvus::indexbuilder::VecIndexCreator::QueryResult*)res;
|
||||
return c_res->nq;
|
||||
}
|
||||
|
||||
int64_t
|
||||
TopkOfQueryResult(CIndexQueryResult res) {
|
||||
auto c_res = (milvus::indexbuilder::VecIndexCreator::QueryResult*)res;
|
||||
return c_res->topk;
|
||||
}
|
||||
|
||||
void
|
||||
GetIdsOfQueryResult(CIndexQueryResult res, int64_t* ids) {
|
||||
auto c_res = (milvus::indexbuilder::VecIndexCreator::QueryResult*)res;
|
||||
auto nq = c_res->nq;
|
||||
auto k = c_res->topk;
|
||||
// TODO: how could we avoid memory copy whenever this called
|
||||
memcpy(ids, c_res->ids.data(), sizeof(int64_t) * nq * k);
|
||||
}
|
||||
|
||||
void
|
||||
GetDistancesOfQueryResult(CIndexQueryResult res, float* distances) {
|
||||
auto c_res = (milvus::indexbuilder::VecIndexCreator::QueryResult*)res;
|
||||
auto nq = c_res->nq;
|
||||
auto k = c_res->topk;
|
||||
// TODO: how could we avoid memory copy whenever this called
|
||||
memcpy(distances, c_res->distances.data(), sizeof(float) * nq * k);
|
||||
}
|
||||
|
||||
CStatus
|
||||
DeleteIndexQueryResult(CIndexQueryResult res) {
|
||||
auto status = CStatus();
|
||||
try {
|
||||
auto c_res = (milvus::indexbuilder::VecIndexCreator::QueryResult*)res;
|
||||
delete c_res;
|
||||
|
||||
AssertInfo(index, "failed to build float vector index, passed index was null");
|
||||
auto real_index = reinterpret_cast<milvus::indexbuilder::IndexCreatorBase*>(index);
|
||||
auto cIndex = dynamic_cast<milvus::indexbuilder::VecIndexCreator*>(real_index);
|
||||
cIndex->CleanLocalData();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
} catch (std::exception& e) {
|
||||
|
|
|
@ -17,7 +17,7 @@ extern "C" {
|
|||
|
||||
#include <stdint.h>
|
||||
#include "common/type_c.h"
|
||||
#include "common/vector_index_c.h"
|
||||
#include "common/binary_set_c.h"
|
||||
#include "indexbuilder/type_c.h"
|
||||
|
||||
CStatus
|
||||
|
@ -50,42 +50,7 @@ CStatus
|
|||
LoadIndexFromBinarySet(CIndex index, CBinarySet c_binary_set);
|
||||
|
||||
CStatus
|
||||
QueryOnFloatVecIndex(CIndex index, int64_t float_value_num, const float* vectors, CIndexQueryResult* res);
|
||||
|
||||
CStatus
|
||||
QueryOnFloatVecIndexWithParam(CIndex index,
|
||||
int64_t float_value_num,
|
||||
const float* vectors,
|
||||
const char* serialized_search_params,
|
||||
CIndexQueryResult* res);
|
||||
|
||||
CStatus
|
||||
QueryOnBinaryVecIndex(CIndex index, int64_t data_size, const uint8_t* vectors, CIndexQueryResult* res);
|
||||
|
||||
CStatus
|
||||
QueryOnBinaryVecIndexWithParam(CIndex index,
|
||||
int64_t data_size,
|
||||
const uint8_t* vectors,
|
||||
const char* serialized_search_params,
|
||||
CIndexQueryResult* res);
|
||||
|
||||
CStatus
|
||||
CreateQueryResult(CIndexQueryResult* res);
|
||||
|
||||
int64_t
|
||||
NqOfQueryResult(CIndexQueryResult res);
|
||||
|
||||
int64_t
|
||||
TopkOfQueryResult(CIndexQueryResult res);
|
||||
|
||||
void
|
||||
GetIdsOfQueryResult(CIndexQueryResult res, int64_t* ids);
|
||||
|
||||
void
|
||||
GetDistancesOfQueryResult(CIndexQueryResult res, float* distances);
|
||||
|
||||
CStatus
|
||||
DeleteIndexQueryResult(CIndexQueryResult res);
|
||||
CleanLocalData(CIndex index);
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
|
|
|
@ -1,83 +0,0 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "indexbuilder/utils.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
namespace milvus::indexbuilder {
|
||||
|
||||
std::vector<knowhere::IndexType>
|
||||
NM_List() {
|
||||
static std::vector<knowhere::IndexType> ret{
|
||||
knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<knowhere::IndexType>
|
||||
BIN_List() {
|
||||
static std::vector<knowhere::IndexType> ret{
|
||||
knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
|
||||
knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<knowhere::IndexType>
|
||||
Need_ID_List() {
|
||||
static std::vector<knowhere::IndexType> ret{
|
||||
// knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||
};
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<std::tuple<knowhere::IndexType, knowhere::MetricType>>
|
||||
unsupported_index_combinations() {
|
||||
static std::vector<std::tuple<knowhere::IndexType, knowhere::MetricType>> ret{
|
||||
std::make_tuple(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::metric::L2),
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool
|
||||
is_in_list(const T& t, std::function<std::vector<T>()> list_func) {
|
||||
auto l = list_func();
|
||||
return std::find(l.begin(), l.end(), t) != l.end();
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_bin_list(const knowhere::IndexType& index_type) {
|
||||
return is_in_list<knowhere::IndexType>(index_type, BIN_List);
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_nm_list(const knowhere::IndexType& index_type) {
|
||||
return is_in_list<knowhere::IndexType>(index_type, NM_List);
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_need_id_list(const knowhere::IndexType& index_type) {
|
||||
return is_in_list<knowhere::IndexType>(index_type, Need_ID_List);
|
||||
}
|
||||
|
||||
bool
|
||||
is_unsupported(const knowhere::IndexType& index_type, const knowhere::MetricType& metric_type) {
|
||||
return is_in_list<std::tuple<knowhere::IndexType, knowhere::MetricType>>(std::make_tuple(index_type, metric_type),
|
||||
unsupported_index_combinations);
|
||||
}
|
||||
|
||||
} // namespace milvus::indexbuilder
|
|
@ -1,54 +0,0 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
||||
#include <knowhere/common/Typedef.h>
|
||||
#include "knowhere/index/IndexType.h"
|
||||
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
|
||||
|
||||
namespace milvus::indexbuilder {
|
||||
|
||||
std::vector<knowhere::IndexType>
|
||||
NM_List();
|
||||
|
||||
std::vector<knowhere::IndexType>
|
||||
BIN_List();
|
||||
|
||||
std::vector<knowhere::IndexType>
|
||||
Need_ID_List();
|
||||
|
||||
std::vector<std::tuple<knowhere::IndexType, knowhere::MetricType>>
|
||||
unsupported_index_combinations();
|
||||
|
||||
template <typename T>
|
||||
bool
|
||||
is_in_list(const T& t, std::function<std::vector<T>()> list_func);
|
||||
|
||||
bool
|
||||
is_in_bin_list(const knowhere::IndexType& index_type);
|
||||
|
||||
bool
|
||||
is_in_nm_list(const knowhere::IndexType& index_type);
|
||||
|
||||
bool
|
||||
is_in_need_id_list(const knowhere::IndexType& index_type);
|
||||
|
||||
bool
|
||||
is_unsupported(const knowhere::IndexType& index_type, const knowhere::MetricType& metric_type);
|
||||
|
||||
} // namespace milvus::indexbuilder
|
|
@ -31,4 +31,4 @@ set(MILVUS_QUERY_SRCS
|
|||
PlanProto.cpp
|
||||
)
|
||||
add_library(milvus_query ${MILVUS_QUERY_SRCS})
|
||||
target_link_libraries(milvus_query milvus_index milvus_common)
|
||||
target_link_libraries(milvus_query milvus_index)
|
||||
|
|
|
@ -17,7 +17,8 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "Expr.h"
|
||||
#include "common/QueryInfo.h"
|
||||
#include "query/Expr.h"
|
||||
#include "knowhere/common/Config.h"
|
||||
|
||||
namespace milvus::query {
|
||||
|
@ -34,14 +35,6 @@ struct PlanNode {
|
|||
|
||||
using PlanNodePtr = std::unique_ptr<PlanNode>;
|
||||
|
||||
struct SearchInfo {
|
||||
int64_t topk_;
|
||||
int64_t round_decimal_;
|
||||
FieldId field_id_;
|
||||
knowhere::MetricType metric_type_;
|
||||
knowhere::Config search_params_;
|
||||
};
|
||||
|
||||
struct VectorPlanNode : PlanNode {
|
||||
std::optional<ExprPtr> predicate_;
|
||||
SearchInfo search_info_;
|
||||
|
|
|
@ -22,22 +22,22 @@
|
|||
namespace milvus::query {
|
||||
|
||||
template <typename T>
|
||||
inline scalar::ScalarIndexPtr<T>
|
||||
inline index::ScalarIndexPtr<T>
|
||||
generate_scalar_index(Span<T> data) {
|
||||
auto indexing = std::make_unique<scalar::ScalarIndexSort<T>>();
|
||||
auto indexing = std::make_unique<index::ScalarIndexSort<T>>();
|
||||
indexing->Build(data.row_count(), data.data());
|
||||
return indexing;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline scalar::ScalarIndexPtr<std::string>
|
||||
inline index::ScalarIndexPtr<std::string>
|
||||
generate_scalar_index(Span<std::string> data) {
|
||||
auto indexing = scalar::CreateStringIndexSort();
|
||||
auto indexing = index::CreateStringIndexSort();
|
||||
indexing->Build(data.row_count(), data.data());
|
||||
return indexing;
|
||||
}
|
||||
|
||||
inline std::unique_ptr<knowhere::Index>
|
||||
inline index::IndexBasePtr
|
||||
generate_scalar_index(SpanBase data, DataType data_type) {
|
||||
Assert(!datatype_is_vector(data_type));
|
||||
switch (data_type) {
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "common/BitsetView.h"
|
||||
#include "common/QueryInfo.h"
|
||||
#include "SearchOnGrowing.h"
|
||||
#include "query/SearchBruteForce.h"
|
||||
#include "query/SearchOnIndex.h"
|
||||
|
@ -22,7 +23,7 @@ namespace milvus::query {
|
|||
// - Query::ExecWithoutPredicate
|
||||
int32_t
|
||||
FloatIndexSearch(const segcore::SegmentGrowingImpl& segment,
|
||||
const query::SearchInfo& info,
|
||||
const SearchInfo& info,
|
||||
const void* query_data,
|
||||
int64_t num_queries,
|
||||
int64_t ins_barrier,
|
||||
|
@ -44,7 +45,9 @@ FloatIndexSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
if (indexing_record.is_in(vecfield_id)) {
|
||||
auto max_indexed_id = indexing_record.get_finished_ack();
|
||||
const auto& field_indexing = indexing_record.get_vec_field_indexing(vecfield_id);
|
||||
auto search_conf = field_indexing.get_search_params(info.topk_);
|
||||
auto search_params = field_indexing.get_search_params(info.topk_);
|
||||
SearchInfo search_conf(info);
|
||||
search_conf.search_params_ = search_params;
|
||||
AssertInfo(vec_ptr->get_size_per_chunk() == field_indexing.get_size_per_chunk(),
|
||||
"[FloatSearch]Chunk size of vector not equal to chunk size of field index");
|
||||
|
||||
|
@ -56,7 +59,8 @@ FloatIndexSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
|
||||
auto indexing = field_indexing.get_chunk_indexing(chunk_id);
|
||||
auto sub_view = bitset.subview(chunk_id * size_per_chunk, size_per_chunk);
|
||||
auto sub_qr = SearchOnIndex(search_dataset, *indexing, search_conf, sub_view);
|
||||
auto vec_index = (index::VectorIndex*)(indexing);
|
||||
auto sub_qr = SearchOnIndex(search_dataset, *vec_index, search_conf, sub_view);
|
||||
|
||||
// convert chunk uid to segment uid
|
||||
for (auto& x : sub_qr.mutable_seg_offsets()) {
|
||||
|
@ -74,7 +78,7 @@ FloatIndexSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
|
||||
void
|
||||
SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
|
||||
const query::SearchInfo& info,
|
||||
const SearchInfo& info,
|
||||
const void* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
|
|
|
@ -18,7 +18,7 @@ namespace milvus::query {
|
|||
|
||||
void
|
||||
SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
|
||||
const query::SearchInfo& info,
|
||||
const SearchInfo& info,
|
||||
const void* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
namespace milvus::query {
|
||||
SubSearchResult
|
||||
SearchOnIndex(const dataset::SearchDataset& search_dataset,
|
||||
const knowhere::VecIndex& indexing,
|
||||
const knowhere::Config& search_conf,
|
||||
const index::VectorIndex& indexing,
|
||||
const SearchInfo& search_conf,
|
||||
const BitsetView& bitset) {
|
||||
auto num_queries = search_dataset.num_queries;
|
||||
auto topK = search_dataset.topk;
|
||||
|
@ -27,15 +27,12 @@ SearchOnIndex(const dataset::SearchDataset& search_dataset,
|
|||
|
||||
// NOTE: VecIndex Query API forget to add const qualifier
|
||||
// NOTE: use const_cast as a workaround
|
||||
auto& indexing_nonconst = const_cast<knowhere::VecIndex&>(indexing);
|
||||
auto& indexing_nonconst = const_cast<index::VectorIndex&>(indexing);
|
||||
auto ans = indexing_nonconst.Query(dataset, search_conf, bitset);
|
||||
|
||||
auto dis = knowhere::GetDatasetDistance(ans);
|
||||
auto uids = knowhere::GetDatasetIDs(ans);
|
||||
|
||||
SubSearchResult sub_qr(num_queries, topK, metric_type, round_decimal);
|
||||
std::copy_n(dis, num_queries * topK, sub_qr.get_distances());
|
||||
std::copy_n(uids, num_queries * topK, sub_qr.get_seg_offsets());
|
||||
std::copy_n(ans->distances_.data(), num_queries * topK, sub_qr.get_distances());
|
||||
std::copy_n(ans->seg_offsets_.data(), num_queries * topK, sub_qr.get_seg_offsets());
|
||||
sub_qr.round_values();
|
||||
return sub_qr;
|
||||
}
|
||||
|
|
|
@ -15,13 +15,15 @@
|
|||
#include "knowhere/index/VecIndex.h"
|
||||
#include "query/SubSearchResult.h"
|
||||
#include "query/helper.h"
|
||||
#include "common/QueryInfo.h"
|
||||
#include "index/VectorIndex.h"
|
||||
|
||||
namespace milvus::query {
|
||||
|
||||
SubSearchResult
|
||||
SearchOnIndex(const dataset::SearchDataset& search_dataset,
|
||||
const knowhere::VecIndex& indexing,
|
||||
const knowhere::Config& search_conf,
|
||||
const index::VectorIndex& indexing,
|
||||
const SearchInfo& search_conf,
|
||||
const BitsetView& bitset);
|
||||
|
||||
} // namespace milvus::query
|
||||
|
|
|
@ -11,11 +11,7 @@
|
|||
|
||||
#include <cmath>
|
||||
|
||||
#include "knowhere/index/VecIndex.h"
|
||||
#include "knowhere/index/vector_index/ConfAdapter.h"
|
||||
#include "knowhere/index/vector_index/ConfAdapterMgr.h"
|
||||
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
|
||||
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
|
||||
#include "common/QueryInfo.h"
|
||||
#include "query/SearchBruteForce.h"
|
||||
#include "query/SearchOnSealed.h"
|
||||
#include "query/helper.h"
|
||||
|
@ -49,18 +45,13 @@ SearchOnSealedIndex(const Schema& schema,
|
|||
auto conf = search_info.search_params_;
|
||||
knowhere::SetMetaTopk(conf, search_info.topk_);
|
||||
knowhere::SetMetaMetricType(conf, field_indexing->metric_type_);
|
||||
auto index_type = field_indexing->indexing_->index_type();
|
||||
auto adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(index_type);
|
||||
try {
|
||||
adapter->CheckSearch(conf, index_type, field_indexing->indexing_->index_mode());
|
||||
} catch (std::exception& e) {
|
||||
AssertInfo(false, e.what());
|
||||
}
|
||||
return field_indexing->indexing_->Query(ds, conf, bitset);
|
||||
auto vec_index = dynamic_cast<index::VectorIndex*>(field_indexing->indexing_.get());
|
||||
auto index_type = vec_index->GetIndexType();
|
||||
return vec_index->Query(ds, search_info, bitset);
|
||||
}();
|
||||
|
||||
auto ids = knowhere::GetDatasetIDs(final);
|
||||
float* distances = (float*)knowhere::GetDatasetDistance(final);
|
||||
auto ids = final->seg_offsets_.data();
|
||||
float* distances = final->distances_.data();
|
||||
|
||||
auto total_num = num_queries * topk;
|
||||
if (round_decimal != -1) {
|
||||
|
|
|
@ -20,7 +20,7 @@ namespace milvus::query {
|
|||
|
||||
class SubSearchResult {
|
||||
public:
|
||||
SubSearchResult(int64_t num_queries, int64_t topk, const knowhere::MetricType& metric_type, int64_t round_decimal)
|
||||
SubSearchResult(int64_t num_queries, int64_t topk, const MetricType& metric_type, int64_t round_decimal)
|
||||
: num_queries_(num_queries),
|
||||
topk_(topk),
|
||||
round_decimal_(round_decimal),
|
||||
|
@ -40,12 +40,12 @@ class SubSearchResult {
|
|||
|
||||
public:
|
||||
static float
|
||||
init_value(const knowhere::MetricType& metric_type) {
|
||||
init_value(const MetricType& metric_type) {
|
||||
return (is_descending(metric_type) ? -1 : 1) * std::numeric_limits<float>::max();
|
||||
}
|
||||
|
||||
static bool
|
||||
is_descending(const knowhere::MetricType& metric_type) {
|
||||
is_descending(const MetricType& metric_type) {
|
||||
// TODO(dog): more types
|
||||
if (metric_type == knowhere::metric::IP) {
|
||||
return true;
|
||||
|
|
|
@ -154,7 +154,7 @@ ExecExprVisitor::ExecRangeVisitorImpl(FieldId field_id, IndexFunc index_func, El
|
|||
auto num_chunk = upper_div(row_count_, size_per_chunk);
|
||||
std::deque<BitsetType> results;
|
||||
|
||||
using Index = scalar::ScalarIndex<T>;
|
||||
using Index = index::ScalarIndex<T>;
|
||||
for (auto chunk_id = 0; chunk_id < indexing_barrier; ++chunk_id) {
|
||||
const Index& indexing = segment_.chunk_scalar_index<T>(field_id, chunk_id);
|
||||
// NOTE: knowhere is not const-ready
|
||||
|
@ -211,7 +211,7 @@ ExecExprVisitor::ExecDataRangeVisitorImpl(FieldId field_id, IndexFunc index_func
|
|||
|
||||
// if sealed segment has loaded scalar index for this field, then index_barrier = 1 and data_barrier = 0
|
||||
// in this case, sealed segment execute expr plan using scalar index
|
||||
using Index = scalar::ScalarIndex<T>;
|
||||
using Index = index::ScalarIndex<T>;
|
||||
for (auto chunk_id = data_barrier; chunk_id < indexing_barrier; ++chunk_id) {
|
||||
auto& indexing = segment_.chunk_scalar_index<T>(field_id, chunk_id);
|
||||
auto this_size = const_cast<Index*>(&indexing)->Count();
|
||||
|
@ -233,7 +233,7 @@ template <typename T>
|
|||
auto
|
||||
ExecExprVisitor::ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw) -> BitsetType {
|
||||
auto& expr = static_cast<UnaryRangeExprImpl<T>&>(expr_raw);
|
||||
using Index = scalar::ScalarIndex<T>;
|
||||
using Index = index::ScalarIndex<T>;
|
||||
auto op = expr.op_type_;
|
||||
auto val = expr.value_;
|
||||
switch (op) {
|
||||
|
@ -270,8 +270,8 @@ ExecExprVisitor::ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw) -> Bi
|
|||
case OpType::PrefixMatch: {
|
||||
auto index_func = [val](Index* index) {
|
||||
auto dataset = std::make_unique<knowhere::Dataset>();
|
||||
dataset->Set(scalar::OPERATOR_TYPE, OpType::PrefixMatch);
|
||||
dataset->Set(scalar::PREFIX_VALUE, val);
|
||||
dataset->Set(milvus::index::OPERATOR_TYPE, OpType::PrefixMatch);
|
||||
dataset->Set(milvus::index::PREFIX_VALUE, val);
|
||||
return index->Query(std::move(dataset));
|
||||
};
|
||||
auto elem_func = [val, op](T x) { return Match(x, val, op); };
|
||||
|
@ -291,7 +291,7 @@ template <typename T>
|
|||
auto
|
||||
ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(BinaryArithOpEvalRangeExpr& expr_raw) -> BitsetType {
|
||||
auto& expr = static_cast<BinaryArithOpEvalRangeExprImpl<T>&>(expr_raw);
|
||||
using Index = scalar::ScalarIndex<T>;
|
||||
using Index = index::ScalarIndex<T>;
|
||||
auto arith_op = expr.arith_op_;
|
||||
auto right_operand = expr.right_operand_;
|
||||
auto op = expr.op_type_;
|
||||
|
@ -409,7 +409,7 @@ template <typename T>
|
|||
auto
|
||||
ExecExprVisitor::ExecBinaryRangeVisitorDispatcher(BinaryRangeExpr& expr_raw) -> BitsetType {
|
||||
auto& expr = static_cast<BinaryRangeExprImpl<T>&>(expr_raw);
|
||||
using Index = scalar::ScalarIndex<T>;
|
||||
using Index = index::ScalarIndex<T>;
|
||||
bool lower_inclusive = expr.lower_inclusive_;
|
||||
bool upper_inclusive = expr.upper_inclusive_;
|
||||
T val1 = expr.lower_value_;
|
||||
|
@ -824,7 +824,7 @@ auto
|
|||
ExecExprVisitor::ExecTermVisitorImpl<std::string>(TermExpr& expr_raw) -> BitsetType {
|
||||
using T = std::string;
|
||||
auto& expr = static_cast<TermExprImpl<T>&>(expr_raw);
|
||||
using Index = scalar::ScalarIndex<T>;
|
||||
using Index = index::ScalarIndex<T>;
|
||||
const auto& terms = expr.terms_;
|
||||
auto n = terms.size();
|
||||
std::unordered_set<T> term_set(expr.terms_.begin(), expr.terms_.end());
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
|
||||
#include "knowhere/index/vector_index/ConfAdapterMgr.h"
|
||||
#include "knowhere/index/vector_index/ConfAdapter.h"
|
||||
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
|
||||
#include "query/generated/VerifyPlanNodeVisitor.h"
|
||||
|
||||
namespace milvus::query {
|
||||
|
@ -29,13 +28,14 @@ class VerifyPlanNodeVisitor : PlanNodeVisitor {
|
|||
};
|
||||
} // namespace impl
|
||||
|
||||
static knowhere::IndexType
|
||||
static IndexType
|
||||
InferIndexType(const Json& search_params) {
|
||||
// ivf -> nprobe
|
||||
// hnsw -> ef
|
||||
// annoy -> search_k
|
||||
static const std::map<std::string, knowhere::IndexType> key_list = [] {
|
||||
std::map<std::string, knowhere::IndexType> list;
|
||||
// ngtpanng / ngtonng -> max_search_edges / epsilon
|
||||
static const std::map<std::string, IndexType> key_list = [] {
|
||||
std::map<std::string, IndexType> list;
|
||||
namespace ip = knowhere::indexparam;
|
||||
namespace ie = knowhere::IndexEnum;
|
||||
list.emplace(ip::NPROBE, ie::INDEX_FAISS_IVFFLAT);
|
||||
|
@ -53,7 +53,7 @@ InferIndexType(const Json& search_params) {
|
|||
PanicCodeInfo(ErrorCodeEnum::IllegalArgument, "failed to infer index type");
|
||||
}
|
||||
|
||||
static knowhere::IndexType
|
||||
static IndexType
|
||||
InferBinaryIndexType(const Json& search_params) {
|
||||
namespace ip = knowhere::indexparam;
|
||||
namespace ie = knowhere::IndexEnum;
|
||||
|
|
|
@ -46,9 +46,7 @@ endif ()
|
|||
|
||||
target_link_libraries(milvus_segcore
|
||||
milvus_query
|
||||
milvus_common
|
||||
${PLATFORM_LIBS}
|
||||
pthread
|
||||
${TBB}
|
||||
${OpenMP_CXX_FLAGS}
|
||||
# gperftools
|
||||
|
|
|
@ -15,9 +15,8 @@
|
|||
#include "index/StringIndexSort.h"
|
||||
|
||||
#include "common/SystemProperty.h"
|
||||
#include "knowhere/index/vector_index/IndexIVF.h"
|
||||
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
|
||||
#include "segcore/FieldIndexing.h"
|
||||
#include "index/VectorMemIndex.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
|
@ -34,11 +33,10 @@ VectorFieldIndexing::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vec
|
|||
data_.grow_to_at_least(ack_end);
|
||||
for (int chunk_id = ack_beg; chunk_id < ack_end; chunk_id++) {
|
||||
const auto& chunk = source->get_chunk(chunk_id);
|
||||
// build index for chunk
|
||||
auto indexing = std::make_unique<knowhere::IVF>();
|
||||
auto indexing = std::make_unique<index::VectorMemIndex>(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
|
||||
knowhere::metric::L2, IndexMode::MODE_CPU);
|
||||
auto dataset = knowhere::GenDataset(source->get_size_per_chunk(), dim, chunk.data());
|
||||
indexing->Train(dataset, conf);
|
||||
indexing->AddWithoutIds(dataset, conf);
|
||||
indexing->BuildWithDataset(dataset, conf);
|
||||
data_[chunk_id] = std::move(indexing);
|
||||
}
|
||||
}
|
||||
|
@ -53,8 +51,8 @@ VectorFieldIndexing::get_build_params() const {
|
|||
auto base_params = config.build_params;
|
||||
|
||||
AssertInfo(base_params.count("nlist"), "Can't get nlist from index params");
|
||||
knowhere::SetMetaDim(base_params, field_meta_.get_dim());
|
||||
knowhere::SetMetaMetricType(base_params, metric_type);
|
||||
base_params[knowhere::meta::DIM] = std::to_string(field_meta_.get_dim());
|
||||
base_params[knowhere::meta::METRIC_TYPE] = metric_type;
|
||||
|
||||
return base_params;
|
||||
}
|
||||
|
@ -111,11 +109,11 @@ ScalarFieldIndexing<T>::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const
|
|||
// build index for chunk
|
||||
// TODO
|
||||
if constexpr (std::is_same_v<T, std::string>) {
|
||||
auto indexing = scalar::CreateStringIndexSort();
|
||||
auto indexing = index::CreateStringIndexSort();
|
||||
indexing->Build(vec_base->get_size_per_chunk(), chunk.data());
|
||||
data_[chunk_id] = std::move(indexing);
|
||||
} else {
|
||||
auto indexing = scalar::CreateScalarIndexSort<T>();
|
||||
auto indexing = index::CreateScalarIndexSort<T>();
|
||||
indexing->Build(vec_base->get_size_per_chunk(), chunk.data());
|
||||
data_[chunk_id] = std::move(indexing);
|
||||
}
|
||||
|
|
|
@ -22,8 +22,8 @@
|
|||
#include "AckResponder.h"
|
||||
#include "InsertRecord.h"
|
||||
#include "common/Schema.h"
|
||||
#include "knowhere/index/VecIndex.h"
|
||||
#include "segcore/SegcoreConfig.h"
|
||||
#include "index/VectorIndex.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
|
@ -53,7 +53,7 @@ class FieldIndexing {
|
|||
return segcore_config_.get_chunk_rows();
|
||||
}
|
||||
|
||||
virtual knowhere::Index*
|
||||
virtual index::IndexBase*
|
||||
get_chunk_indexing(int64_t chunk_id) const = 0;
|
||||
|
||||
protected:
|
||||
|
@ -71,14 +71,14 @@ class ScalarFieldIndexing : public FieldIndexing {
|
|||
BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) override;
|
||||
|
||||
// concurrent
|
||||
scalar::ScalarIndex<T>*
|
||||
index::ScalarIndex<T>*
|
||||
get_chunk_indexing(int64_t chunk_id) const override {
|
||||
Assert(!field_meta_.is_vector());
|
||||
return data_.at(chunk_id).get();
|
||||
}
|
||||
|
||||
private:
|
||||
tbb::concurrent_vector<scalar::ScalarIndexPtr<T>> data_;
|
||||
tbb::concurrent_vector<index::ScalarIndexPtr<T>> data_;
|
||||
};
|
||||
|
||||
class VectorFieldIndexing : public FieldIndexing {
|
||||
|
@ -89,7 +89,7 @@ class VectorFieldIndexing : public FieldIndexing {
|
|||
BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) override;
|
||||
|
||||
// concurrent
|
||||
knowhere::VecIndex*
|
||||
index::IndexBase*
|
||||
get_chunk_indexing(int64_t chunk_id) const override {
|
||||
Assert(field_meta_.is_vector());
|
||||
return data_.at(chunk_id).get();
|
||||
|
@ -102,7 +102,7 @@ class VectorFieldIndexing : public FieldIndexing {
|
|||
get_search_params(int top_k) const;
|
||||
|
||||
private:
|
||||
tbb::concurrent_vector<std::unique_ptr<knowhere::VecIndex>> data_;
|
||||
tbb::concurrent_vector<std::unique_ptr<index::VectorIndex>> data_;
|
||||
};
|
||||
|
||||
std::unique_ptr<FieldIndexing>
|
||||
|
|
|
@ -20,22 +20,22 @@
|
|||
|
||||
#include "common/Types.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "knowhere/index/VecIndex.h"
|
||||
#include "index/VectorIndex.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
struct SealedIndexingEntry {
|
||||
knowhere::MetricType metric_type_;
|
||||
knowhere::VecIndexPtr indexing_;
|
||||
MetricType metric_type_;
|
||||
index::IndexBasePtr indexing_;
|
||||
};
|
||||
|
||||
using SealedIndexingEntryPtr = std::unique_ptr<SealedIndexingEntry>;
|
||||
|
||||
struct SealedIndexingRecord {
|
||||
void
|
||||
append_field_indexing(FieldId field_id, const knowhere::MetricType& metric_type, knowhere::VecIndexPtr indexing) {
|
||||
append_field_indexing(FieldId field_id, const MetricType& metric_type, index::IndexBasePtr indexing) {
|
||||
auto ptr = std::make_unique<SealedIndexingEntry>();
|
||||
ptr->indexing_ = indexing;
|
||||
ptr->indexing_ = std::move(indexing);
|
||||
ptr->metric_type_ = metric_type;
|
||||
std::unique_lock lck(mutex_);
|
||||
field_indexings_[field_id] = std::move(ptr);
|
||||
|
|
|
@ -31,7 +31,7 @@ class SegcoreConfig {
|
|||
SegcoreConfig() {
|
||||
// hard code configurations for small index
|
||||
SmallIndexConf sub_conf;
|
||||
sub_conf.build_params["nlist"] = nlist_;
|
||||
sub_conf.build_params["nlist"] = std::to_string(nlist_);
|
||||
sub_conf.search_params["nprobe"] = nprobe_;
|
||||
sub_conf.index_type = "IVF";
|
||||
table_[knowhere::metric::L2] = sub_conf;
|
||||
|
@ -50,7 +50,7 @@ class SegcoreConfig {
|
|||
parse_from(const std::string& string_path);
|
||||
|
||||
const SmallIndexConf&
|
||||
at(const knowhere::MetricType& metric_type) const {
|
||||
at(const MetricType& metric_type) const {
|
||||
Assert(table_.count(metric_type));
|
||||
return table_.at(metric_type);
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ class SegcoreConfig {
|
|||
}
|
||||
|
||||
void
|
||||
set_small_index_config(const knowhere::MetricType& metric_type, const SmallIndexConf& small_index_conf) {
|
||||
set_small_index_config(const MetricType& metric_type, const SmallIndexConf& small_index_conf) {
|
||||
table_[metric_type] = small_index_conf;
|
||||
}
|
||||
|
||||
|
|
|
@ -16,11 +16,8 @@
|
|||
#include <boost/iterator/counting_iterator.hpp>
|
||||
|
||||
#include "common/Consts.h"
|
||||
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
|
||||
#include "query/PlanNode.h"
|
||||
#include "query/SearchOnSealed.h"
|
||||
#include "query/generated/ExecPlanNodeVisitor.h"
|
||||
#include "segcore/Reduce.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "segcore/Utils.h"
|
||||
|
||||
|
@ -176,7 +173,7 @@ SegmentGrowingImpl::num_chunk() const {
|
|||
}
|
||||
|
||||
void
|
||||
SegmentGrowingImpl::vector_search(query::SearchInfo& search_info,
|
||||
SegmentGrowingImpl::vector_search(SearchInfo& search_info,
|
||||
const void* query_data,
|
||||
int64_t query_count,
|
||||
Timestamp timestamp,
|
||||
|
|
|
@ -109,7 +109,7 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
}
|
||||
|
||||
// deprecated
|
||||
const knowhere::Index*
|
||||
const index::IndexBase*
|
||||
chunk_index_impl(FieldId field_id, int64_t chunk_id) const final {
|
||||
return indexing_record_.get_field_indexing(field_id).get_chunk_indexing(chunk_id);
|
||||
}
|
||||
|
@ -174,7 +174,7 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
mask_with_timestamps(BitsetType& bitset_chunk, Timestamp timestamp) const override;
|
||||
|
||||
void
|
||||
vector_search(query::SearchInfo& search_info,
|
||||
vector_search(SearchInfo& search_info,
|
||||
const void* query_data,
|
||||
int64_t query_count,
|
||||
Timestamp timestamp,
|
||||
|
|
|
@ -27,10 +27,12 @@
|
|||
#include "common/LoadInfo.h"
|
||||
#include "common/BitsetView.h"
|
||||
#include "common/QueryResult.h"
|
||||
#include "common/QueryInfo.h"
|
||||
#include "query/Plan.h"
|
||||
#include "query/PlanNode.h"
|
||||
#include "pb/schema.pb.h"
|
||||
#include "pb/segcore.pb.h"
|
||||
#include "index/IndexInfo.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
|
@ -91,10 +93,10 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
const scalar::ScalarIndex<T>&
|
||||
const index::ScalarIndex<T>&
|
||||
chunk_scalar_index(FieldId field_id, int64_t chunk_id) const {
|
||||
static_assert(IsScalar<T>);
|
||||
using IndexType = scalar::ScalarIndex<T>;
|
||||
using IndexType = index::ScalarIndex<T>;
|
||||
auto base_ptr = chunk_index_impl(field_id, chunk_id);
|
||||
auto ptr = dynamic_cast<const IndexType*>(base_ptr);
|
||||
AssertInfo(ptr, "entry mismatch");
|
||||
|
@ -129,7 +131,7 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
|
||||
public:
|
||||
virtual void
|
||||
vector_search(query::SearchInfo& search_info,
|
||||
vector_search(SearchInfo& search_info,
|
||||
const void* query_data,
|
||||
int64_t query_count,
|
||||
Timestamp timestamp,
|
||||
|
@ -176,7 +178,7 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
chunk_data_impl(FieldId field_id, int64_t chunk_id) const = 0;
|
||||
|
||||
// internal API: return chunk_index in span, support scalar index only
|
||||
virtual const knowhere::Index*
|
||||
virtual const index::IndexBase*
|
||||
chunk_index_impl(FieldId field_id, int64_t chunk_id) const = 0;
|
||||
|
||||
// TODO remove system fields
|
||||
|
|
|
@ -23,7 +23,7 @@ namespace milvus::segcore {
|
|||
class SegmentSealed : public SegmentInternalInterface {
|
||||
public:
|
||||
virtual void
|
||||
LoadIndex(const LoadIndexInfo& info) = 0;
|
||||
LoadIndex(const index::LoadIndexInfo& info) = 0;
|
||||
virtual void
|
||||
LoadSegmentMeta(const milvus::proto::segcore::LoadSegmentMeta& meta) = 0;
|
||||
virtual void
|
||||
|
|
|
@ -39,31 +39,7 @@ SegmentSealedImpl::PreDelete(int64_t size) {
|
|||
}
|
||||
|
||||
void
|
||||
print(const std::map<std::string, std::string>& m) {
|
||||
for (const auto& [k, v] : m) {
|
||||
std::cout << k << ": " << v << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
print(const LoadIndexInfo& info) {
|
||||
std::cout << "------------------LoadIndexInfo----------------------" << std::endl;
|
||||
std::cout << "field_id: " << info.field_id << std::endl;
|
||||
std::cout << "field_type: " << info.field_type << std::endl;
|
||||
std::cout << "index_params:" << std::endl;
|
||||
print(info.index_params);
|
||||
std::cout << "------------------LoadIndexInfo----------------------" << std::endl;
|
||||
}
|
||||
|
||||
void
|
||||
print(const LoadFieldDataInfo& info) {
|
||||
std::cout << "------------------LoadFieldDataInfo----------------------" << std::endl;
|
||||
std::cout << "field_id: " << info.field_id << std::endl;
|
||||
std::cout << "------------------LoadFieldDataInfo----------------------" << std::endl;
|
||||
}
|
||||
|
||||
void
|
||||
SegmentSealedImpl::LoadIndex(const LoadIndexInfo& info) {
|
||||
SegmentSealedImpl::LoadIndex(const index::LoadIndexInfo& info) {
|
||||
// print(info);
|
||||
// NOTE: lock only when data is ready to avoid starvation
|
||||
auto field_id = FieldId(info.field_id);
|
||||
|
@ -77,15 +53,14 @@ SegmentSealedImpl::LoadIndex(const LoadIndexInfo& info) {
|
|||
}
|
||||
|
||||
void
|
||||
SegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
|
||||
SegmentSealedImpl::LoadVecIndex(const index::LoadIndexInfo& info) {
|
||||
// NOTE: lock only when data is ready to avoid starvation
|
||||
auto field_id = FieldId(info.field_id);
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
|
||||
auto index = std::dynamic_pointer_cast<knowhere::VecIndex>(info.index);
|
||||
AssertInfo(info.index_params.count("metric_type"), "Can't get metric_type in index_params");
|
||||
auto metric_type = info.index_params.at("metric_type");
|
||||
auto row_count = index->Count();
|
||||
auto row_count = info.index->Count();
|
||||
AssertInfo(row_count > 0, "Index count is 0");
|
||||
|
||||
std::unique_lock lck(mutex_);
|
||||
|
@ -101,7 +76,8 @@ SegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
|
|||
std::to_string(row_count_opt_.value()) + ")");
|
||||
}
|
||||
AssertInfo(!vector_indexings_.is_ready(field_id), "vec index is not ready");
|
||||
vector_indexings_.append_field_indexing(field_id, metric_type, index);
|
||||
vector_indexings_.append_field_indexing(field_id, metric_type,
|
||||
std::move(const_cast<index::LoadIndexInfo&>(info).index));
|
||||
|
||||
set_bit(index_ready_bitset_, field_id, true);
|
||||
update_row_count(row_count);
|
||||
|
@ -109,13 +85,12 @@ SegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
|
|||
}
|
||||
|
||||
void
|
||||
SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
||||
SegmentSealedImpl::LoadScalarIndex(const index::LoadIndexInfo& info) {
|
||||
// NOTE: lock only when data is ready to avoid starvation
|
||||
auto field_id = FieldId(info.field_id);
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
|
||||
auto index = std::dynamic_pointer_cast<scalar::IndexBase>(info.index);
|
||||
auto row_count = index->Count();
|
||||
auto row_count = info.index->Count();
|
||||
AssertInfo(row_count > 0, "Index count is 0");
|
||||
|
||||
std::unique_lock lck(mutex_);
|
||||
|
@ -131,21 +106,21 @@ SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
|||
std::to_string(row_count_opt_.value()) + ")");
|
||||
}
|
||||
|
||||
scalar_indexings_[field_id] = index;
|
||||
scalar_indexings_[field_id] = std::move(const_cast<index::LoadIndexInfo&>(info).index);
|
||||
// reverse pk from scalar index and set pks to offset
|
||||
if (schema_->get_primary_field_id() == field_id) {
|
||||
AssertInfo(field_id.get() != -1, "Primary key is -1");
|
||||
AssertInfo(insert_record_.empty_pks(), "already exists");
|
||||
switch (field_meta.get_data_type()) {
|
||||
case DataType::INT64: {
|
||||
auto int64_index = std::dynamic_pointer_cast<scalar::ScalarIndex<int64_t>>(info.index);
|
||||
auto int64_index = dynamic_cast<index::ScalarIndex<int64_t>*>(scalar_indexings_[field_id].get());
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
insert_record_.insert_pk(int64_index->Reverse_Lookup(i), i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto string_index = std::dynamic_pointer_cast<scalar::ScalarIndex<std::string>>(info.index);
|
||||
auto string_index = dynamic_cast<index::ScalarIndex<std::string>*>(scalar_indexings_[field_id].get());
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
insert_record_.insert_pk(string_index->Reverse_Lookup(i), i);
|
||||
}
|
||||
|
@ -300,7 +275,7 @@ SegmentSealedImpl::chunk_data_impl(FieldId field_id, int64_t chunk_id) const {
|
|||
return field_data->get_span_base(0);
|
||||
}
|
||||
|
||||
const knowhere::Index*
|
||||
const index::IndexBase*
|
||||
SegmentSealedImpl::chunk_index_impl(FieldId field_id, int64_t chunk_id) const {
|
||||
AssertInfo(scalar_indexings_.find(field_id) != scalar_indexings_.end(),
|
||||
"Cannot find scalar_indexing with field_id: " + std::to_string(field_id.get()));
|
||||
|
@ -349,7 +324,7 @@ SegmentSealedImpl::mask_with_delete(BitsetType& bitset, int64_t ins_barrier, Tim
|
|||
}
|
||||
|
||||
void
|
||||
SegmentSealedImpl::vector_search(query::SearchInfo& search_info,
|
||||
SegmentSealedImpl::vector_search(SearchInfo& search_info,
|
||||
const void* query_data,
|
||||
int64_t query_count,
|
||||
Timestamp timestamp,
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "SealedIndexingRecord.h"
|
||||
#include "SegmentSealed.h"
|
||||
#include "TimestampIndex.h"
|
||||
#include "index/ScalarIndex.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
|
@ -34,7 +35,7 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
public:
|
||||
explicit SegmentSealedImpl(SchemaPtr schema, int64_t segment_id);
|
||||
void
|
||||
LoadIndex(const LoadIndexInfo& info) override;
|
||||
LoadIndex(const index::LoadIndexInfo& info) override;
|
||||
void
|
||||
LoadFieldData(const LoadFieldDataInfo& info) override;
|
||||
void
|
||||
|
@ -97,7 +98,7 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
SpanBase
|
||||
chunk_data_impl(FieldId field_id, int64_t chunk_id) const override;
|
||||
|
||||
const knowhere::Index*
|
||||
const index::IndexBase*
|
||||
chunk_index_impl(FieldId field_id, int64_t chunk_id) const override;
|
||||
|
||||
// Calculate: output[i] = Vec[seg_offset[i]],
|
||||
|
@ -141,7 +142,7 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
mask_with_timestamps(BitsetType& bitset_chunk, Timestamp timestamp) const override;
|
||||
|
||||
void
|
||||
vector_search(query::SearchInfo& search_info,
|
||||
vector_search(SearchInfo& search_info,
|
||||
const void* query_data,
|
||||
int64_t query_count,
|
||||
Timestamp timestamp,
|
||||
|
@ -171,10 +172,10 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
search_ids(const BitsetType& view, Timestamp timestamp) const override;
|
||||
|
||||
void
|
||||
LoadVecIndex(const LoadIndexInfo& info);
|
||||
LoadVecIndex(const index::LoadIndexInfo& info);
|
||||
|
||||
void
|
||||
LoadScalarIndex(const LoadIndexInfo& info);
|
||||
LoadScalarIndex(const index::LoadIndexInfo& info);
|
||||
|
||||
private:
|
||||
// segment loading state
|
||||
|
@ -187,7 +188,7 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
std::optional<int64_t> row_count_opt_;
|
||||
|
||||
// scalar field index
|
||||
std::unordered_map<FieldId, knowhere::IndexPtr> scalar_indexings_;
|
||||
std::unordered_map<FieldId, index::IndexBasePtr> scalar_indexings_;
|
||||
// vector field index
|
||||
SealedIndexingRecord vector_indexings_;
|
||||
|
||||
|
|
|
@ -11,11 +11,11 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <knowhere/index/vector_index/helpers/IndexParameter.h>
|
||||
#include "common/Types.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
static inline bool
|
||||
PositivelyRelated(const knowhere::MetricType& metric_type) {
|
||||
PositivelyRelated(const MetricType& metric_type) {
|
||||
return metric_type == knowhere::metric::IP;
|
||||
}
|
||||
} // namespace milvus::segcore
|
||||
|
|
|
@ -259,7 +259,7 @@ MergeDataArray(std::vector<std::pair<milvus::SearchResult*, int64_t>>& result_of
|
|||
|
||||
// TODO: split scalar IndexBase with knowhere::Index
|
||||
std::unique_ptr<DataArray>
|
||||
ReverseDataFromIndex(const knowhere::Index* index,
|
||||
ReverseDataFromIndex(const index::IndexBase* index,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count,
|
||||
const FieldMeta& field_meta) {
|
||||
|
@ -271,7 +271,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
|
|||
auto scalar_array = data_array->mutable_scalars();
|
||||
switch (data_type) {
|
||||
case DataType::BOOL: {
|
||||
using IndexType = scalar::ScalarIndex<bool>;
|
||||
using IndexType = index::ScalarIndex<bool>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<bool> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
|
@ -282,7 +282,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
|
|||
break;
|
||||
}
|
||||
case DataType::INT8: {
|
||||
using IndexType = scalar::ScalarIndex<int8_t>;
|
||||
using IndexType = index::ScalarIndex<int8_t>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<int8_t> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
|
@ -293,7 +293,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
|
|||
break;
|
||||
}
|
||||
case DataType::INT16: {
|
||||
using IndexType = scalar::ScalarIndex<int16_t>;
|
||||
using IndexType = index::ScalarIndex<int16_t>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<int16_t> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
|
@ -304,7 +304,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
|
|||
break;
|
||||
}
|
||||
case DataType::INT32: {
|
||||
using IndexType = scalar::ScalarIndex<int32_t>;
|
||||
using IndexType = index::ScalarIndex<int32_t>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<int32_t> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
|
@ -315,7 +315,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
|
|||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
using IndexType = scalar::ScalarIndex<int64_t>;
|
||||
using IndexType = index::ScalarIndex<int64_t>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<int64_t> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
|
@ -326,7 +326,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
|
|||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
using IndexType = scalar::ScalarIndex<float>;
|
||||
using IndexType = index::ScalarIndex<float>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<float> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
|
@ -337,7 +337,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
|
|||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
using IndexType = scalar::ScalarIndex<double>;
|
||||
using IndexType = index::ScalarIndex<double>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<double> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
|
@ -348,7 +348,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
|
|||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
using IndexType = scalar::ScalarIndex<std::string>;
|
||||
using IndexType = index::ScalarIndex<std::string>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<std::string> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
|
|
|
@ -18,9 +18,9 @@
|
|||
#include <vector>
|
||||
|
||||
#include "common/QueryResult.h"
|
||||
#include "knowhere/index/Index.h"
|
||||
#include "segcore/DeletedRecord.h"
|
||||
#include "segcore/InsertRecord.h"
|
||||
#include "index/Index.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
|
@ -56,7 +56,7 @@ get_deleted_bitmap(int64_t del_barrier,
|
|||
Timestamp query_timestamp);
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
ReverseDataFromIndex(const knowhere::Index* index,
|
||||
ReverseDataFromIndex(const index::IndexBase* index,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count,
|
||||
const FieldMeta& field_meta);
|
||||
|
|
|
@ -10,17 +10,19 @@
|
|||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "common/CDataType.h"
|
||||
#include "common/LoadInfo.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "common/FieldMeta.h"
|
||||
#include "common/Utils.h"
|
||||
#include "index/Meta.h"
|
||||
#include "index/Utils.h"
|
||||
#include "index/IndexFactory.h"
|
||||
#include "knowhere/common/BinarySet.h"
|
||||
#include "knowhere/index/VecIndexFactory.h"
|
||||
#include "storage/Util.h"
|
||||
#include "segcore/load_index_c.h"
|
||||
#include "pb/index_cgo_msg.pb.h"
|
||||
|
||||
CStatus
|
||||
NewLoadIndexInfo(CLoadIndexInfo* c_load_index_info) {
|
||||
try {
|
||||
auto load_index_info = std::make_unique<LoadIndexInfo>();
|
||||
auto load_index_info = std::make_unique<milvus::index::LoadIndexInfo>();
|
||||
*c_load_index_info = load_index_info.release();
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
|
@ -36,14 +38,14 @@ NewLoadIndexInfo(CLoadIndexInfo* c_load_index_info) {
|
|||
|
||||
void
|
||||
DeleteLoadIndexInfo(CLoadIndexInfo c_load_index_info) {
|
||||
auto info = (LoadIndexInfo*)c_load_index_info;
|
||||
auto info = (milvus::index::LoadIndexInfo*)c_load_index_info;
|
||||
delete info;
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendIndexParam(CLoadIndexInfo c_load_index_info, const char* c_index_key, const char* c_index_value) {
|
||||
try {
|
||||
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
|
||||
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
|
||||
std::string index_key(c_index_key);
|
||||
std::string index_value(c_index_value);
|
||||
load_index_info->index_params[index_key] = index_value;
|
||||
|
@ -61,11 +63,19 @@ AppendIndexParam(CLoadIndexInfo c_load_index_info, const char* c_index_key, cons
|
|||
}
|
||||
|
||||
CStatus
|
||||
AppendFieldInfo(CLoadIndexInfo c_load_index_info, int64_t field_id, enum CDataType field_type) {
|
||||
AppendFieldInfo(CLoadIndexInfo c_load_index_info,
|
||||
int64_t collection_id,
|
||||
int64_t partition_id,
|
||||
int64_t segment_id,
|
||||
int64_t field_id,
|
||||
enum CDataType field_type) {
|
||||
try {
|
||||
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
|
||||
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
|
||||
load_index_info->collection_id = collection_id;
|
||||
load_index_info->partition_id = partition_id;
|
||||
load_index_info->segment_id = segment_id;
|
||||
load_index_info->field_id = field_id;
|
||||
load_index_info->field_type = field_type;
|
||||
load_index_info->field_type = milvus::DataType(field_type);
|
||||
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
|
@ -82,23 +92,39 @@ AppendFieldInfo(CLoadIndexInfo c_load_index_info, int64_t field_id, enum CDataTy
|
|||
CStatus
|
||||
appendVecIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
|
||||
try {
|
||||
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
|
||||
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
|
||||
auto binary_set = (knowhere::BinarySet*)c_binary_set;
|
||||
auto& index_params = load_index_info->index_params;
|
||||
bool find_index_type = index_params.count("index_type") > 0 ? true : false;
|
||||
bool find_index_mode = index_params.count("index_mode") > 0 ? true : false;
|
||||
AssertInfo(find_index_type == true, "Can't find index type in index_params");
|
||||
knowhere::IndexMode mode;
|
||||
if (find_index_mode) {
|
||||
std::string index_mode = index_params["index_mode"];
|
||||
mode = (index_mode == "CPU" || index_mode == "cpu") ? knowhere::IndexMode::MODE_CPU
|
||||
: knowhere::IndexMode::MODE_GPU;
|
||||
} else {
|
||||
mode = knowhere::IndexMode::MODE_CPU;
|
||||
|
||||
milvus::index::CreateIndexInfo index_info;
|
||||
index_info.field_type = load_index_info->field_type;
|
||||
|
||||
// get index type
|
||||
AssertInfo(index_params.find("index_type") != index_params.end(), "index type is empty");
|
||||
index_info.index_type = index_params.at("index_type");
|
||||
|
||||
// get metric type
|
||||
AssertInfo(index_params.find("metric_type") != index_params.end(), "metric type is empty");
|
||||
index_info.metric_type = index_params.at("metric_type");
|
||||
|
||||
// set default index mode
|
||||
index_info.index_mode = milvus::IndexMode::MODE_CPU;
|
||||
if (index_params.count("index_mode")) {
|
||||
index_info.index_mode = milvus::index::GetIndexMode(index_params["index_mode"]);
|
||||
}
|
||||
load_index_info->index =
|
||||
knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_params["index_type"], mode);
|
||||
load_index_info->index->Load(*binary_set);
|
||||
|
||||
// init file manager
|
||||
milvus::storage::FieldDataMeta field_meta{load_index_info->collection_id, load_index_info->partition_id,
|
||||
load_index_info->segment_id, load_index_info->field_id};
|
||||
milvus::storage::IndexMeta index_meta{load_index_info->segment_id, load_index_info->field_id,
|
||||
load_index_info->index_build_id, load_index_info->index_version};
|
||||
auto file_manager = milvus::storage::CreateFileManager(index_info.index_type, field_meta, index_meta);
|
||||
|
||||
auto config = milvus::index::ParseConfigFromIndexParams(load_index_info->index_params);
|
||||
config["index_files"] = load_index_info->index_files;
|
||||
|
||||
load_index_info->index = milvus::index::IndexFactory::GetInstance().CreateIndex(index_info, file_manager);
|
||||
load_index_info->index->Load(*binary_set, config);
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
|
@ -114,14 +140,23 @@ appendVecIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
|
|||
CStatus
|
||||
appendScalarIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
|
||||
try {
|
||||
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
|
||||
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
|
||||
auto field_type = load_index_info->field_type;
|
||||
auto binary_set = (knowhere::BinarySet*)c_binary_set;
|
||||
auto& index_params = load_index_info->index_params;
|
||||
bool find_index_type = index_params.count("index_type") > 0 ? true : false;
|
||||
AssertInfo(find_index_type == true, "Can't find index type in index_params");
|
||||
load_index_info->index =
|
||||
milvus::scalar::IndexFactory::GetInstance().CreateIndex(field_type, index_params["index_type"]);
|
||||
|
||||
milvus::index::CreateIndexInfo index_info;
|
||||
index_info.field_type = milvus::DataType(field_type);
|
||||
index_info.index_type = index_params["index_type"];
|
||||
// set default index mode
|
||||
index_info.index_mode = milvus::IndexMode::MODE_CPU;
|
||||
if (index_params.count("index_mode")) {
|
||||
index_info.index_mode = milvus::index::GetIndexMode(index_params["index_mode"]);
|
||||
}
|
||||
|
||||
load_index_info->index = milvus::index::IndexFactory::GetInstance().CreateIndex(index_info, nullptr);
|
||||
load_index_info->index->Load(*binary_set);
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
|
@ -137,10 +172,78 @@ appendScalarIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
|
|||
|
||||
CStatus
|
||||
AppendIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
|
||||
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
|
||||
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
|
||||
auto field_type = load_index_info->field_type;
|
||||
if (milvus::IsVectorType(field_type)) {
|
||||
if (milvus::datatype_is_vector(field_type)) {
|
||||
return appendVecIndex(c_load_index_info, c_binary_set);
|
||||
}
|
||||
return appendScalarIndex(c_load_index_info, c_binary_set);
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* c_file_path) {
|
||||
try {
|
||||
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
|
||||
std::string index_file_path(c_file_path);
|
||||
load_index_info->index_files.emplace_back(index_file_path);
|
||||
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendIndexInfo(
|
||||
CLoadIndexInfo c_load_index_info, int64_t index_id, int64_t build_id, int64_t version, const char* c_index_params) {
|
||||
try {
|
||||
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
|
||||
load_index_info->index_id = index_id;
|
||||
load_index_info->index_build_id = build_id;
|
||||
load_index_info->index_version = version;
|
||||
milvus::proto::indexcgo::IndexParams index_params;
|
||||
milvus::index::ParseFromString(index_params, c_index_params);
|
||||
|
||||
for (auto i = 0; i < index_params.params().size(); i++) {
|
||||
auto& param = index_params.params(i);
|
||||
load_index_info->index_params[param.key()] = param.value();
|
||||
}
|
||||
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
CleanLoadedIndex(CLoadIndexInfo c_load_index_info) {
|
||||
try {
|
||||
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
|
||||
auto index_file_path_prefix =
|
||||
milvus::storage::GenLocalIndexPathPrefix(load_index_info->index_build_id, load_index_info->index_version);
|
||||
#ifdef BUILD_DISK_ANN
|
||||
milvus::storage::LocalChunkManager::GetInstance().RemoveDir(index_file_path_prefix);
|
||||
#endif
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@ extern "C" {
|
|||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "common/vector_index_c.h"
|
||||
#include "common/binary_set_c.h"
|
||||
#include "common/type_c.h"
|
||||
#include "segcore/collection_c.h"
|
||||
|
||||
|
@ -33,11 +33,26 @@ CStatus
|
|||
AppendIndexParam(CLoadIndexInfo c_load_index_info, const char* index_key, const char* index_value);
|
||||
|
||||
CStatus
|
||||
AppendFieldInfo(CLoadIndexInfo c_load_index_info, int64_t field_id, enum CDataType field_type);
|
||||
AppendFieldInfo(CLoadIndexInfo c_load_index_info,
|
||||
int64_t collection_id,
|
||||
int64_t partition_id,
|
||||
int64_t segment_id,
|
||||
int64_t field_id,
|
||||
enum CDataType field_type);
|
||||
|
||||
CStatus
|
||||
AppendIndexInfo(
|
||||
CLoadIndexInfo c_load_index_info, int64_t index_id, int64_t build_id, int64_t version, const char* index_params);
|
||||
|
||||
CStatus
|
||||
AppendIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set);
|
||||
|
||||
CStatus
|
||||
AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* file_path);
|
||||
|
||||
CStatus
|
||||
CleanLoadedIndex(CLoadIndexInfo c_load_index_info);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "segcore/SegmentSealedImpl.h"
|
||||
#include "segcore/SimilarityCorelation.h"
|
||||
#include "segcore/segment_c.h"
|
||||
#include "index/IndexInfo.h"
|
||||
#include "google/protobuf/text_format.h"
|
||||
|
||||
////////////////////////////// common interfaces //////////////////////////////
|
||||
|
@ -238,7 +239,7 @@ UpdateSealedSegmentIndex(CSegmentInterface c_segment, CLoadIndexInfo c_load_inde
|
|||
auto segment_interface = reinterpret_cast<milvus::segcore::SegmentInterface*>(c_segment);
|
||||
auto segment = dynamic_cast<milvus::segcore::SegmentSealed*>(segment_interface);
|
||||
AssertInfo(segment != nullptr, "segment conversion failed");
|
||||
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
|
||||
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
|
||||
segment->LoadIndex(*load_index_info);
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
|
|
|
@ -34,10 +34,23 @@ set(STORAGE_FILES
|
|||
IndexData.cpp
|
||||
InsertData.cpp
|
||||
Event.cpp
|
||||
)
|
||||
storage_c.cpp)
|
||||
|
||||
if ( BUILD_DISK_ANN STREQUAL "ON" )
|
||||
set(STORAGE_FILES
|
||||
${STORAGE_FILES}
|
||||
LocalChunkManager.cpp
|
||||
MinioChunkManager.cpp
|
||||
DiskFileManagerImpl.cpp)
|
||||
endif ()
|
||||
|
||||
add_library(milvus_storage SHARED ${STORAGE_FILES})
|
||||
#target_link_libraries( milvus_storage PUBLIC milvus_common boost_system boost_filesystem aws-cpp-sdk-s3 pthread)
|
||||
target_link_libraries( milvus_storage PUBLIC milvus_common pthread)
|
||||
|
||||
if ( BUILD_DISK_ANN STREQUAL "ON" )
|
||||
target_link_libraries( milvus_storage PUBLIC milvus_common boost_system boost_filesystem aws-cpp-sdk-s3 pthread)
|
||||
else()
|
||||
target_link_libraries( milvus_storage PUBLIC milvus_common pthread)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_INSTALL_PREFIX)
|
||||
set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
|
|
@ -19,14 +19,14 @@
|
|||
#include <mutex>
|
||||
|
||||
#include "common/Consts.h"
|
||||
#include "storage/DiskANNFileManagerImpl.h"
|
||||
#include "log/Log.h"
|
||||
#include "config/ConfigKnowhere.h"
|
||||
#include "storage/DiskFileManagerImpl.h"
|
||||
#include "storage/LocalChunkManager.h"
|
||||
#include "storage/MinioChunkManager.h"
|
||||
#include "storage/Exception.h"
|
||||
#include "log/Log.h"
|
||||
#include "storage/FieldData.h"
|
||||
#include "storage/IndexData.h"
|
||||
#include "config/ConfigKnowhere.h"
|
||||
#include "storage/Util.h"
|
||||
|
||||
#define FILEMANAGER_TRY try {
|
||||
|
@ -58,22 +58,22 @@ using WriteLock = std::lock_guard<std::shared_mutex>;
|
|||
|
||||
namespace milvus::storage {
|
||||
|
||||
DiskANNFileManagerImpl::DiskANNFileManagerImpl(const FieldDataMeta& field_mata, const IndexMeta& index_meta)
|
||||
DiskFileManagerImpl::DiskFileManagerImpl(const FieldDataMeta& field_mata, const IndexMeta& index_meta)
|
||||
: field_meta_(field_mata), index_meta_(index_meta) {
|
||||
}
|
||||
|
||||
DiskANNFileManagerImpl::~DiskANNFileManagerImpl() {
|
||||
DiskFileManagerImpl::~DiskFileManagerImpl() {
|
||||
auto& local_chunk_manager = LocalChunkManager::GetInstance();
|
||||
local_chunk_manager.RemoveDir(GetLocalIndexPathPrefixWithBuildID(index_meta_.build_id));
|
||||
}
|
||||
|
||||
bool
|
||||
DiskANNFileManagerImpl::LoadFile(const std::string& file) noexcept {
|
||||
DiskFileManagerImpl::LoadFile(const std::string& file) noexcept {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
DiskANNFileManagerImpl::AddFile(const std::string& file) noexcept {
|
||||
DiskFileManagerImpl::AddFile(const std::string& file) noexcept {
|
||||
auto& local_chunk_manager = LocalChunkManager::GetInstance();
|
||||
auto& remote_chunk_manager = MinioChunkManager::GetInstance();
|
||||
FILEMANAGER_TRY
|
||||
|
@ -119,7 +119,7 @@ DiskANNFileManagerImpl::AddFile(const std::string& file) noexcept {
|
|||
} // namespace knowhere
|
||||
|
||||
void
|
||||
DiskANNFileManagerImpl::CacheIndexToDisk(std::vector<std::string> remote_files) {
|
||||
DiskFileManagerImpl::CacheIndexToDisk(std::vector<std::string> remote_files) {
|
||||
auto& local_chunk_manager = LocalChunkManager::GetInstance();
|
||||
auto& remote_chunk_manager = MinioChunkManager::GetInstance();
|
||||
|
||||
|
@ -157,30 +157,30 @@ DiskANNFileManagerImpl::CacheIndexToDisk(std::vector<std::string> remote_files)
|
|||
}
|
||||
|
||||
std::string
|
||||
DiskANNFileManagerImpl::GetFileName(const std::string& localfile) {
|
||||
DiskFileManagerImpl::GetFileName(const std::string& localfile) {
|
||||
boost::filesystem::path localPath(localfile);
|
||||
return localPath.filename().string();
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskANNFileManagerImpl::GetRemoteIndexObjectPrefix() {
|
||||
return "files/" + std::string(INDEX_ROOT_PATH) + "/" + std::to_string(index_meta_.build_id) + "/" +
|
||||
std::to_string(index_meta_.index_version) + "/" + std::to_string(field_meta_.partition_id) + "/" +
|
||||
std::to_string(field_meta_.segment_id);
|
||||
DiskFileManagerImpl::GetRemoteIndexObjectPrefix() {
|
||||
return ChunkMangerConfig::GetRemoteRootPath() + "/" + std::string(INDEX_ROOT_PATH) + "/" +
|
||||
std::to_string(index_meta_.build_id) + "/" + std::to_string(index_meta_.index_version) + "/" +
|
||||
std::to_string(field_meta_.partition_id) + "/" + std::to_string(field_meta_.segment_id);
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskANNFileManagerImpl::GetLocalIndexObjectPrefix() {
|
||||
DiskFileManagerImpl::GetLocalIndexObjectPrefix() {
|
||||
return GenLocalIndexPathPrefix(index_meta_.build_id, index_meta_.index_version);
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskANNFileManagerImpl::GetLocalRawDataObjectPrefix() {
|
||||
return GenRawDataPathPrefix(field_meta_.segment_id, field_meta_.field_id);
|
||||
DiskFileManagerImpl::GetLocalRawDataObjectPrefix() {
|
||||
return GenFieldRawDataPathPrefix(field_meta_.segment_id, field_meta_.field_id);
|
||||
}
|
||||
|
||||
bool
|
||||
DiskANNFileManagerImpl::RemoveFile(const std::string& file) noexcept {
|
||||
DiskFileManagerImpl::RemoveFile(const std::string& file) noexcept {
|
||||
// remove local file
|
||||
bool localExist = false;
|
||||
auto& local_chunk_manager = LocalChunkManager::GetInstance();
|
||||
|
@ -213,7 +213,7 @@ DiskANNFileManagerImpl::RemoveFile(const std::string& file) noexcept {
|
|||
}
|
||||
|
||||
std::optional<bool>
|
||||
DiskANNFileManagerImpl::IsExisted(const std::string& file) noexcept {
|
||||
DiskFileManagerImpl::IsExisted(const std::string& file) noexcept {
|
||||
bool isExist = false;
|
||||
auto& local_chunk_manager = LocalChunkManager::GetInstance();
|
||||
auto& remote_chunk_manager = MinioChunkManager::GetInstance();
|
|
@ -27,11 +27,11 @@
|
|||
|
||||
namespace milvus::storage {
|
||||
|
||||
class DiskANNFileManagerImpl : public FileManagerImpl {
|
||||
class DiskFileManagerImpl : public FileManagerImpl {
|
||||
public:
|
||||
explicit DiskANNFileManagerImpl(const FieldDataMeta& field_mata, const IndexMeta& index_meta);
|
||||
explicit DiskFileManagerImpl(const FieldDataMeta& field_mata, const IndexMeta& index_meta);
|
||||
|
||||
virtual ~DiskANNFileManagerImpl();
|
||||
virtual ~DiskFileManagerImpl();
|
||||
|
||||
virtual bool
|
||||
LoadFile(const std::string& filename) noexcept;
|
||||
|
@ -48,7 +48,7 @@ class DiskANNFileManagerImpl : public FileManagerImpl {
|
|||
public:
|
||||
virtual std::string
|
||||
GetName() const {
|
||||
return "DiskANNFileManagerImpl";
|
||||
return "DiskFileManagerImpl";
|
||||
}
|
||||
|
||||
std::string
|
||||
|
@ -61,7 +61,7 @@ class DiskANNFileManagerImpl : public FileManagerImpl {
|
|||
GetLocalRawDataObjectPrefix();
|
||||
|
||||
std::map<std::string, int64_t>
|
||||
GetRemotePaths() const {
|
||||
GetRemotePathsToFileSize() const {
|
||||
return remote_paths_to_size_;
|
||||
}
|
||||
|
||||
|
@ -101,6 +101,6 @@ class DiskANNFileManagerImpl : public FileManagerImpl {
|
|||
std::map<std::string, int64_t> remote_paths_to_size_;
|
||||
};
|
||||
|
||||
using DiskANNFileManagerImplPtr = std::shared_ptr<DiskANNFileManagerImpl>;
|
||||
using DiskANNFileManagerImplPtr = std::shared_ptr<DiskFileManagerImpl>;
|
||||
|
||||
} // namespace milvus::storage
|
|
@ -42,7 +42,7 @@ class LocalChunkManager : public ChunkManager {
|
|||
static LocalChunkManager&
|
||||
GetInstance() {
|
||||
// thread-safe enough after c++ 11
|
||||
static LocalChunkManager instance(ChunkMangerConfig::GetLocalBucketName());
|
||||
static LocalChunkManager instance(ChunkMangerConfig::GetLocalRootPath());
|
||||
return instance;
|
||||
}
|
||||
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
|
||||
#include <fstream>
|
||||
#include <aws/core/auth/AWSCredentials.h>
|
||||
#include <aws/core/auth/AWSCredentialsProviderChain.h>
|
||||
#include <aws/core/auth/STSCredentialsProvider.h>
|
||||
#include <aws/s3/model/CreateBucketRequest.h>
|
||||
#include <aws/s3/model/DeleteBucketRequest.h>
|
||||
#include <aws/s3/model/DeleteObjectRequest.h>
|
||||
|
@ -66,7 +68,8 @@ MinioChunkManager::MinioChunkManager(const std::string& endpoint,
|
|||
const std::string& access_key,
|
||||
const std::string& access_value,
|
||||
const std::string& bucket_name,
|
||||
bool secure)
|
||||
bool secure,
|
||||
bool use_iam)
|
||||
: default_bucket_name_(bucket_name) {
|
||||
Aws::InitAPI(sdk_options_);
|
||||
Aws::Client::ClientConfiguration config;
|
||||
|
@ -80,9 +83,20 @@ MinioChunkManager::MinioChunkManager(const std::string& endpoint,
|
|||
config.verifySSL = false;
|
||||
}
|
||||
|
||||
client_ = std::make_shared<Aws::S3::S3Client>(
|
||||
Aws::Auth::AWSCredentials(ConvertToAwsString(access_key), ConvertToAwsString(access_value)), config,
|
||||
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, false);
|
||||
if (use_iam) {
|
||||
auto provider = std::make_shared<Aws::Auth::DefaultAWSCredentialsProviderChain>();
|
||||
client_ = std::make_shared<Aws::S3::S3Client>(provider, config,
|
||||
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, false);
|
||||
|
||||
LOG_SEGCORE_INFO_C << "use iam mode, credentials{ access_id:"
|
||||
<< provider->GetAWSCredentials().GetAWSAccessKeyId()
|
||||
<< " access_key:" << provider->GetAWSCredentials().GetAWSSecretKey()
|
||||
<< " token:" << provider->GetAWSCredentials().GetSessionToken() << "}";
|
||||
} else {
|
||||
client_ = std::make_shared<Aws::S3::S3Client>(
|
||||
Aws::Auth::AWSCredentials(ConvertToAwsString(access_key), ConvertToAwsString(access_value)), config,
|
||||
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, false);
|
||||
}
|
||||
|
||||
LOG_SEGCORE_INFO_C << "init MinioChunkManager with parameter[endpoint: '" << endpoint << "', access_key:'"
|
||||
<< access_key << "', access_value:'" << access_value << "', default_bucket_name:'" << bucket_name
|
||||
|
|
|
@ -38,7 +38,8 @@ class MinioChunkManager : public RemoteChunkManager {
|
|||
const std::string& access_key,
|
||||
const std::string& access_value,
|
||||
const std::string& default_bucket_name,
|
||||
bool sercure = false);
|
||||
bool serure = false,
|
||||
bool use_iam = false);
|
||||
|
||||
MinioChunkManager(const MinioChunkManager&);
|
||||
MinioChunkManager&
|
||||
|
@ -52,7 +53,7 @@ class MinioChunkManager : public RemoteChunkManager {
|
|||
// thread-safe enough after c++ 11
|
||||
static MinioChunkManager instance(ChunkMangerConfig::GetAddress(), ChunkMangerConfig::GetAccessKey(),
|
||||
ChunkMangerConfig::GetAccessValue(), ChunkMangerConfig::GetBucketName(),
|
||||
ChunkMangerConfig::GetUseSSL());
|
||||
ChunkMangerConfig::GetUseSSL(), ChunkMangerConfig::GetUseIAM());
|
||||
return instance;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,10 @@
|
|||
#include "common/Consts.h"
|
||||
#include "config/ConfigChunkManager.h"
|
||||
|
||||
#ifdef BUILD_DISK_ANN
|
||||
#include "storage/DiskFileManagerImpl.h"
|
||||
#endif
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
StorageType
|
||||
|
@ -320,26 +324,51 @@ GetDimensionFromArrowArray(std::shared_ptr<arrow::Array> data, DataType data_typ
|
|||
|
||||
std::string
|
||||
GenLocalIndexPathPrefix(int64_t build_id, int64_t index_version) {
|
||||
return milvus::ChunkMangerConfig::GetLocalBucketName() + "/" + std::string(INDEX_ROOT_PATH) + "/" +
|
||||
return milvus::ChunkMangerConfig::GetLocalRootPath() + "/" + std::string(INDEX_ROOT_PATH) + "/" +
|
||||
std::to_string(build_id) + "/" + std::to_string(index_version) + "/";
|
||||
}
|
||||
|
||||
std::string
|
||||
GetLocalIndexPathPrefixWithBuildID(int64_t build_id) {
|
||||
return milvus::ChunkMangerConfig::GetLocalBucketName() + "/" + std::string(INDEX_ROOT_PATH) + "/" +
|
||||
return milvus::ChunkMangerConfig::GetLocalRootPath() + "/" + std::string(INDEX_ROOT_PATH) + "/" +
|
||||
std::to_string(build_id);
|
||||
}
|
||||
|
||||
std::string
|
||||
GenRawDataPathPrefix(int64_t segment_id, int64_t field_id) {
|
||||
return milvus::ChunkMangerConfig::GetLocalBucketName() + "/" + std::string(RAWDATA_ROOT_PATH) + "/" +
|
||||
GenFieldRawDataPathPrefix(int64_t segment_id, int64_t field_id) {
|
||||
return milvus::ChunkMangerConfig::GetLocalRootPath() + "/" + std::string(RAWDATA_ROOT_PATH) + "/" +
|
||||
std::to_string(segment_id) + "/" + std::to_string(field_id) + "/";
|
||||
}
|
||||
|
||||
std::string
|
||||
GetLocalRawDataPathPrefixWithBuildID(int64_t segment_id) {
|
||||
return milvus::ChunkMangerConfig::GetLocalBucketName() + "/" + std::string(RAWDATA_ROOT_PATH) + "/" +
|
||||
GetSegmentRawDataPathPrefix(int64_t segment_id) {
|
||||
return milvus::ChunkMangerConfig::GetLocalRootPath() + "/" + std::string(RAWDATA_ROOT_PATH) + "/" +
|
||||
std::to_string(segment_id);
|
||||
}
|
||||
|
||||
std::vector<IndexType>
|
||||
DISK_LIST() {
|
||||
static std::vector<IndexType> ret{
|
||||
knowhere::IndexEnum::INDEX_DISKANN,
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_disk_list(const IndexType& index_type) {
|
||||
return is_in_list<IndexType>(index_type, DISK_LIST);
|
||||
}
|
||||
|
||||
FileManagerImplPtr
|
||||
CreateFileManager(IndexType index_type, const FieldDataMeta& field_meta, const IndexMeta& index_meta) {
|
||||
// TODO :: switch case index type to create file manager
|
||||
#ifdef BUILD_DISK_ANN
|
||||
if (is_in_disk_list(index_type)) {
|
||||
return std::make_shared<DiskFileManagerImpl>(field_meta, index_meta);
|
||||
}
|
||||
#endif
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace milvus::storage
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "storage/PayloadStream.h"
|
||||
#include "storage/FileManager.h"
|
||||
|
@ -62,12 +63,22 @@ std::string
|
|||
GenLocalIndexPathPrefix(int64_t build_id, int64_t index_version);
|
||||
|
||||
std::string
|
||||
GenRawDataPathPrefix(int64_t segment_id, int64_t field_id);
|
||||
GenFieldRawDataPathPrefix(int64_t segment_id, int64_t field_id);
|
||||
|
||||
std::string
|
||||
GetLocalRawDataPathPrefixWithBuildID(int64_t segment_id);
|
||||
GetSegmentRawDataPathPrefix(int64_t segment_id);
|
||||
|
||||
template <typename T>
|
||||
inline bool
|
||||
is_in_list(const T& t, std::function<std::vector<T>()> list_func) {
|
||||
auto l = list_func();
|
||||
return std::find(l.begin(), l.end(), t) != l.end();
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_disk_list(const IndexType& index_type);
|
||||
|
||||
FileManagerImplPtr
|
||||
CreateFileManager(knowhere::IndexType index_type, const FieldDataMeta& field_meta, const IndexMeta& index_meta);
|
||||
CreateFileManager(IndexType index_type, const FieldDataMeta& field_meta, const IndexMeta& index_meta);
|
||||
|
||||
} // namespace milvus::storage
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "storage/storage_c.h"
|
||||
#include "config/ConfigChunkManager.h"
|
||||
#include "common/CGoHelper.h"
|
||||
|
||||
#ifdef BUILD_DISK_ANN
|
||||
#include "storage/LocalChunkManager.h"
|
||||
#endif
|
||||
|
||||
CStatus
|
||||
GetLocalUsedSize(int64_t* size) {
|
||||
try {
|
||||
#ifdef BUILD_DISK_ANN
|
||||
auto& local_chunk_manager = milvus::storage::LocalChunkManager::GetInstance();
|
||||
auto dir = milvus::ChunkMangerConfig::GetLocalRootPath();
|
||||
if (local_chunk_manager.DirExist(dir)) {
|
||||
*size = local_chunk_manager.GetSizeOfDir(dir);
|
||||
} else {
|
||||
*size = 0;
|
||||
}
|
||||
#endif
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(UnexpectedError, e.what());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "common/type_c.h"
|
||||
|
||||
CStatus
|
||||
GetLocalUsedSize(int64_t* size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
|
@ -63,7 +63,11 @@ add_subdirectory( protobuf )
|
|||
add_subdirectory( boost_ext )
|
||||
add_subdirectory( arrow )
|
||||
add_subdirectory( rocksdb )
|
||||
#add_subdirectory( aws_sdk )
|
||||
|
||||
# ******************************* Thridparty aws sdk ********************************
|
||||
if ( LINUX )
|
||||
add_subdirectory( aws_sdk )
|
||||
endif()
|
||||
|
||||
# ******************************* Thridparty marisa ********************************
|
||||
# TODO: support win.
|
||||
|
|
|
@ -29,9 +29,18 @@ macro(build_knowhere)
|
|||
${EP_COMMON_CMAKE_ARGS}
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_INSTALL_PREFIX=${KNOWHERE_INSTALL_PREFIX}
|
||||
-DKNOWHERE_WITH_DISKANN=false
|
||||
)
|
||||
|
||||
if ( BUILD_DISK_ANN STREQUAL "ON" )
|
||||
set(KNOWHERE_CMAKE_ARGS
|
||||
${KNOWHERE_CMAKE_ARGS}
|
||||
-DKNOWHERE_WITH_DISKANN=true)
|
||||
else ()
|
||||
set(KNOWHERE_CMAKE_ARGS
|
||||
${KNOWHERE_CMAKE_ARGS}
|
||||
-DKNOWHERE_WITH_DISKANN=false)
|
||||
endif ()
|
||||
|
||||
externalproject_add(knowhere_ep
|
||||
# GIT_REPOSITORY "https://github.com/milvus-io/knowhere.git"
|
||||
# GIT_TAG main
|
||||
|
|
|
@ -49,6 +49,15 @@ set(MILVUS_TEST_FILES
|
|||
test_data_codec.cpp
|
||||
)
|
||||
|
||||
if ( BUILD_DISK_ANN STREQUAL "ON" )
|
||||
set(MILVUS_TEST_FILES
|
||||
${MILVUS_TEST_FILES}
|
||||
# test_minio_chunk_manager.cpp
|
||||
# test_disk_file_manager_test.cpp
|
||||
test_local_chunk_manager.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if (LINUX OR APPLE)
|
||||
set(MILVUS_TEST_FILES
|
||||
${MILVUS_TEST_FILES}
|
||||
|
@ -74,9 +83,6 @@ if (LINUX)
|
|||
gtest_main
|
||||
milvus_segcore
|
||||
milvus_indexbuilder
|
||||
milvus_index
|
||||
milvus_log
|
||||
pthread
|
||||
)
|
||||
install(TARGETS index_builder_test DESTINATION unittest)
|
||||
endif()
|
||||
|
@ -89,9 +95,6 @@ target_link_libraries(all_tests
|
|||
gtest
|
||||
milvus_segcore
|
||||
milvus_indexbuilder
|
||||
milvus_index
|
||||
milvus_log
|
||||
milvus_storage
|
||||
pthread
|
||||
)
|
||||
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
#include "pb/index_cgo_msg.pb.h"
|
||||
#include "indexbuilder/VecIndexCreator.h"
|
||||
#include "indexbuilder/index_c.h"
|
||||
#include "indexbuilder/utils.h"
|
||||
#include "test_utils/indexbuilder_test_utils.h"
|
||||
#include "common/Consts.h"
|
||||
|
||||
|
@ -64,9 +63,9 @@ IndexBuilder_build(benchmark::State& state) {
|
|||
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
|
||||
for (auto _ : state) {
|
||||
auto index =
|
||||
std::make_unique<milvus::indexbuilder::VecIndexCreator>(type_params_str.c_str(), index_params_str.c_str());
|
||||
index->BuildWithoutIds(xb_dataset);
|
||||
auto index = std::make_unique<milvus::indexbuilder::VecIndexCreator>(
|
||||
milvus::DataType::VECTOR_FLOAT, type_params_str.c_str(), index_params_str.c_str());
|
||||
index->Build(xb_dataset);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -93,10 +92,10 @@ IndexBuilder_build_and_codec(benchmark::State& state) {
|
|||
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
|
||||
for (auto _ : state) {
|
||||
auto index =
|
||||
std::make_unique<milvus::indexbuilder::VecIndexCreator>(type_params_str.c_str(), index_params_str.c_str());
|
||||
auto index = std::make_unique<milvus::indexbuilder::VecIndexCreator>(
|
||||
milvus::DataType::VECTOR_FLOAT, type_params_str.c_str(), index_params_str.c_str());
|
||||
|
||||
index->BuildWithoutIds(xb_dataset);
|
||||
index->Build(xb_dataset);
|
||||
index->Serialize();
|
||||
}
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue