Support diskann index for vector field ()

Signed-off-by: xige-16 <xi.ge@zilliz.com>

Signed-off-by: xige-16 <xi.ge@zilliz.com>
pull/19349/head
xige-16 2022-09-21 20:16:51 +08:00 committed by GitHub
parent 9d508dfa49
commit 428840178c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
150 changed files with 4752 additions and 2908 deletions
.github/workflows
build/docker/milvus
ubuntu18.04
ubuntu20.04
internal/core

View File

@ -59,7 +59,8 @@ jobs:
restore-keys: ubuntu20.04-go-mod-
- name: Code Check
env:
CHECK_BUILDER: "1"
# CHECK_BUILDER: "1"
OS_NAME: "ubuntu20.04"
run: |
./build/builder.sh /bin/bash -c "make check-proto-product && make verifiers"
centos:

View File

@ -15,7 +15,7 @@ FROM milvusdb/openblas:ubuntu18.04-20210428 AS openblas
FROM ubuntu:bionic-20200921
RUN apt-get update && \
apt-get install -y --no-install-recommends libtbb-dev libzstd-dev gfortran netcat iputils-ping ca-certificates && \
apt-get install -y --no-install-recommends libtbb-dev libzstd-dev gfortran netcat iputils-ping ca-certificates uuid-dev libaio-dev libboost-program-options-dev && \
apt-get remove --purge -y && \
rm -rf /var/lib/apt/lists/*

View File

@ -14,8 +14,11 @@ FROM milvusdb/openblas:ubuntu20.04-20220914-179ea77 AS openblas
#FROM alpine
FROM ubuntu:focal-20220426
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=UTC
RUN apt-get update && \
apt-get install -y --no-install-recommends libtbb-dev gfortran netcat iputils-ping ca-certificates liblapack3 && \
apt-get install -y --no-install-recommends curl libtbb-dev gfortran netcat iputils-ping ca-certificates liblapack3 libzstd-dev uuid-dev libaio-dev libboost-program-options-dev libboost-filesystem-dev && \
apt-get remove --purge -y && \
rm -rf /var/lib/apt/lists/*

View File

@ -193,6 +193,8 @@ queryNode:
cacheSize: 32 # GB, default 32 GB, `cacheSize` is the memory used for caching data for faster query. The `cacheSize` must be less than system memory size.
port: 21123
loadMemoryUsageFactor: 3 # The multiply factor of calculating the memory usage while loading segments
enableDisk: true # enable querynode load disk index, and search on disk index
maxDiskUsagePercentage: 95
stats:
publishInterval: 1000 # Interval for querynode to report node information (milliseconds)
@ -238,6 +240,8 @@ indexCoord:
indexNode:
port: 21121
enableDisk: true # enable index node build disk vector index
maxDiskUsagePercentage: 95
scheduler:
buildParallel: 1

View File

@ -23,6 +23,7 @@ services:
# - "19530:19530"
environment:
<<: *ccache
OS_NAME: ${OS_NAME}
PULSAR_ADDRESS: ${PULSAR_ADDRESS}
ETCD_ENDPOINTS: ${ETCD_ENDPOINTS}
MINIO_ADDRESS: ${MINIO_ADDRESS}

View File

@ -234,6 +234,10 @@ if ( BUILD_UNIT_TEST STREQUAL "ON" AND BUILD_COVERAGE STREQUAL "ON" )
)
endif ()
if ( BUILD_DISK_ANN STREQUAL "ON" )
ADD_DEFINITIONS(-DBUILD_DISK_ANN=${BUILD_DISK_ANN})
endif ()
# Warning: add_subdirectory(src) must be after append_flags("-ftest-coverage"),
# otherwise cpp code coverage tool will miss src folder
add_subdirectory( thirdparty )

189
internal/core/build.sh Executable file
View File

@ -0,0 +1,189 @@
#!/bin/bash
# Compile jobs variable; Usage: $ jobs=12 ./build.sh ...
if [[ ! ${jobs+1} ]]; then
if command -v nproc &> /dev/null
# For linux
then
jobs=$(nproc)
elif command -v sysctl &> /dev/null
# For macOS
then
jobs=$(sysctl -n hw.logicalcpu)
else
jobs=4
fi
fi
SOURCE="${BASH_SOURCE[0]}"
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
SCRIPTS_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
BUILD_OUTPUT_DIR="./cmake_build"
BUILD_TYPE="Release"
BUILD_UNITTEST="OFF"
INSTALL_PREFIX="${SCRIPTS_DIR}/output"
MAKE_CLEAN="OFF"
BUILD_COVERAGE="OFF"
PROFILING="OFF"
RUN_CPPLINT="OFF"
CUDA_COMPILER=/usr/local/cuda/bin/nvcc
GPU_VERSION="OFF" #defaults to CPU version
WITH_PROMETHEUS="ON"
CUDA_ARCH="DEFAULT"
CUSTOM_THIRDPARTY_PATH=""
BUILD_DISK_ANN="OFF"
while getopts "p:t:s:f:o:ulrcghzme" arg; do
case $arg in
f)
CUSTOM_THIRDPARTY_PATH=$OPTARG
;;
p)
INSTALL_PREFIX=$OPTARG
;;
o)
BUILD_OUTPUT_DIR=$OPTARG
;;
t)
BUILD_TYPE=$OPTARG # BUILD_TYPE
;;
u)
echo "Build and run unittest cases"
BUILD_UNITTEST="ON"
;;
l)
RUN_CPPLINT="ON"
;;
r)
if [[ -d ${BUILD_OUTPUT_DIR} ]]; then
MAKE_CLEAN="ON"
fi
;;
c)
BUILD_COVERAGE="ON"
;;
z)
PROFILING="ON"
;;
g)
GPU_VERSION="ON"
;;
e)
WITH_PROMETHEUS="OFF"
;;
s)
CUDA_ARCH=$OPTARG
;;
n)
BUILD_DISK_ANN="OFF"
;;
h) # help
echo "
parameter:
-f: custom paths of thirdparty downloaded files(default: NULL)
-p: install prefix(default: $(pwd)/milvus)
-d: db data path(default: /tmp/milvus)
-t: build type(default: Debug)
-u: building unit test options(default: OFF)
-l: run cpplint, clang-format and clang-tidy(default: OFF)
-r: remove previous build directory(default: OFF)
-c: code coverage(default: OFF)
-z: profiling(default: OFF)
-g: build GPU version(default: OFF)
-e: build without prometheus(default: OFF)
-s: build with CUDA arch(default:DEFAULT), for example '-gencode=compute_61,code=sm_61;-gencode=compute_75,code=sm_75'
-h: help
usage:
./build.sh -p \${INSTALL_PREFIX} -t \${BUILD_TYPE} -s \${CUDA_ARCH} -f\${CUSTOM_THIRDPARTY_PATH} [-u] [-l] [-r] [-c] [-z] [-g] [-m] [-e] [-h]
"
exit 0
;;
?)
echo "ERROR! unknown argument"
exit 1
;;
esac
done
if [[ ! -d ${BUILD_OUTPUT_DIR} ]]; then
mkdir ${BUILD_OUTPUT_DIR}
fi
cd ${BUILD_OUTPUT_DIR}
# remove make cache since build.sh -l use default variables
# force update the variables each time
make rebuild_cache >/dev/null 2>&1
if [[ ${MAKE_CLEAN} == "ON" ]]; then
echo "Runing make clean in ${BUILD_OUTPUT_DIR} ..."
make clean
exit 0
fi
unameOut="$(uname -s)"
case "${unameOut}" in
Darwin*)
llvm_prefix="$(brew --prefix llvm)"
export CLANG_TOOLS_PATH="${llvm_prefix}/bin"
export CC="${llvm_prefix}/bin/clang"
export CXX="${llvm_prefix}/bin/clang++"
export LDFLAGS="-L${llvm_prefix}/lib -L/usr/local/opt/libomp/lib"
export CXXFLAGS="-I${llvm_prefix}/include -I/usr/local/include -I/usr/local/opt/libomp/include"
;;
*) echo "==System:${unameOut}";
esac
CMAKE_CMD="cmake \
-DBUILD_UNIT_TEST=${BUILD_UNITTEST} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DOpenBLAS_SOURCE=AUTO \
-DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} \
-DBUILD_COVERAGE=${BUILD_COVERAGE} \
-DENABLE_CPU_PROFILING=${PROFILING} \
-DMILVUS_GPU_VERSION=${GPU_VERSION} \
-DMILVUS_WITH_PROMETHEUS=${WITH_PROMETHEUS} \
-DMILVUS_CUDA_ARCH=${CUDA_ARCH} \
-DCUSTOM_THIRDPARTY_DOWNLOAD_PATH=${CUSTOM_THIRDPARTY_PATH} \
-DKNOWHERE_GPU_VERSION=${SUPPORT_GPU} \
-DBUILD_DISK_ANN=${BUILD_DISK_ANN} \
${SCRIPTS_DIR}"
echo ${CMAKE_CMD}
${CMAKE_CMD}
if [[ ${RUN_CPPLINT} == "ON" ]]; then
# cpplint check
make lint
if [ $? -ne 0 ]; then
echo "ERROR! cpplint check failed"
exit 1
fi
echo "cpplint check passed!"
# clang-format check
make check-clang-format
if [ $? -ne 0 ]; then
echo "ERROR! clang-format check failed"
exit 1
fi
echo "clang-format check passed!"
# clang-tidy check
make check-clang-tidy
if [ $? -ne 0 ]; then
echo "ERROR! clang-tidy check failed"
exit 1
fi
echo "clang-tidy check passed!"
else
# compile and build
make -j ${jobs} install || exit 1
fi

View File

@ -15,10 +15,6 @@
#include <string>
namespace milvus {
inline bool
IsVectorType(CDataType dtype) {
return dtype == CDataType::FloatVector || dtype == CDataType::BinaryVector;
}
template <typename T, typename = std::enable_if_t<std::is_fundamental_v<T> || std::is_same_v<T, std::string>>>
inline CDataType

View File

@ -14,18 +14,16 @@ milvus_add_pkg_config("milvus_common")
set(COMMON_SRC
Schema.cpp
SystemProperty.cpp
vector_index_c.cpp
binary_set_c.cpp
init_c.cpp
)
add_library(milvus_common SHARED ${COMMON_SRC})
if ( MSYS )
target_link_libraries(milvus_common
milvus_utils
milvus_config
milvus_log
knowhere
milvus_proto
yaml-cpp
boost_bitset_ext
arrow
@ -33,11 +31,8 @@ if ( MSYS )
)
else()
target_link_libraries(milvus_common
milvus_utils
milvus_config
milvus_log
knowhere
milvus_proto
yaml-cpp
boost_bitset_ext
arrow

View File

@ -26,6 +26,10 @@ const milvus::PkType INVALID_PK; // of std::monostate if not set.
const int64_t START_USER_FIELDID = 100;
const char MAX_LENGTH[] = "max_length";
// const fieldID (rowID and timestamp)
const milvus::FieldId RowFieldID = milvus::FieldId(0);
const milvus::FieldId TimestampFieldID = milvus::FieldId(1);
// fill followed extra info to binlog file
const char ORIGIN_SIZE_KEY[] = "original_size";
const char INDEX_BUILD_ID_KEY[] = "indexBuildID";

View File

@ -23,13 +23,6 @@
#include "common/CDataType.h"
#include "knowhere/index/Index.h"
struct LoadIndexInfo {
int64_t field_id;
CDataType field_type;
std::map<std::string, std::string> index_params;
knowhere::IndexPtr index;
};
// NOTE: field_id can be system field
// NOTE: Refer to common/SystemProperty.cpp for details
// TODO: use arrow to pass field data instead of proto

View File

@ -0,0 +1,34 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "common/Types.h"
namespace milvus {
struct SearchInfo {
int64_t topk_;
int64_t round_decimal_;
FieldId field_id_;
MetricType metric_type_;
Config search_params_;
};
using SearchInfoPtr = std::shared_ptr<SearchInfo>;
} // namespace milvus

View File

@ -31,6 +31,9 @@
#include <variant>
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include <knowhere/index/IndexType.h>
#include "knowhere/common/BinarySet.h"
#include "knowhere/common/Dataset.h"
#include "pb/schema.pb.h"
#include "pb/segcore.pb.h"
#include "pb/plan.pb.h"
@ -109,6 +112,16 @@ using BitsetTypeOpt = std::optional<BitsetType>;
template <typename Type>
using FixedVector = boost::container::vector<Type>;
const FieldId RowFieldID = FieldId(0);
const FieldId TimestampFieldID = FieldId(1);
using Config = nlohmann::json;
using TargetBitmap = boost::dynamic_bitset<>;
using TargetBitmapPtr = std::unique_ptr<TargetBitmap>;
using BinarySet = knowhere::BinarySet;
using DatasetPtr = knowhere::DatasetPtr;
using MetricType = knowhere::MetricType;
// TODO :: type define milvus index type(vector index type and scalar index type)
using IndexType = knowhere::IndexType;
// TODO :: type define milvus index mode, add transfer func from milvus index mode to knowhere index mode
using IndexMode = knowhere::IndexMode;
} // namespace milvus

View File

@ -13,8 +13,43 @@
#include <string>
#include "exceptions/EasyAssert.h"
#include "config/ConfigChunkManager.h"
#include "common/Consts.h"
#include <google/protobuf/text_format.h>
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
namespace milvus {
inline DatasetPtr
GenDataset(const int64_t nb, const int64_t dim, const void* xb) {
return knowhere::GenDataset(nb, dim, xb);
}
inline const float*
GetDatasetDistance(const DatasetPtr& dataset) {
return knowhere::GetDatasetDistance(dataset);
}
inline const int64_t*
GetDatasetIDs(const DatasetPtr& dataset) {
return knowhere::GetDatasetIDs(dataset);
}
inline int64_t
GetDatasetRows(const DatasetPtr& dataset) {
return knowhere::GetDatasetRows(dataset);
}
inline const void*
GetDatasetTensor(const DatasetPtr& dataset) {
return knowhere::GetDatasetTensor(dataset);
}
inline int64_t
GetDatasetDim(const DatasetPtr& dataset) {
return knowhere::GetDatasetDim(dataset);
}
inline bool
PrefixMatch(const std::string& str, const std::string& prefix) {
auto ret = strncmp(str.c_str(), prefix.c_str(), prefix.length());

View File

@ -15,7 +15,7 @@
// limitations under the License.
#include "knowhere/common/BinarySet.h"
#include "common/vector_index_c.h"
#include "common/binary_set_c.h"
CStatus
NewBinarySet(CBinarySet* c_binary_set) {

View File

@ -0,0 +1,66 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "common/init_c.h"
#include <string>
#include "config/ConfigChunkManager.h"
void
MinioAddressInit(const char* address) {
std::string minio_address(address);
milvus::ChunkMangerConfig::SetAddress(address);
}
void
MinioAccessKeyInit(const char* key) {
std::string minio_access_key(key);
milvus::ChunkMangerConfig::SetAccessKey(minio_access_key);
}
void
MinioAccessValueInit(const char* value) {
std::string minio_access_value(value);
milvus::ChunkMangerConfig::SetAccessValue(value);
}
void
MinioSSLInit(bool use_ssl) {
milvus::ChunkMangerConfig::SetUseSSL(use_ssl);
}
void
MinioUseIamInit(bool use_iam) {
milvus::ChunkMangerConfig::SetUseIAM(use_iam);
}
void
MinioBucketNameInit(const char* name) {
std::string bucket_name(name);
milvus::ChunkMangerConfig::SetBucketName(name);
}
void
MinioRootPathInit(const char* name) {
std::string root_path(name);
milvus::ChunkMangerConfig::SetRemoteRootPath(name);
}
void
LocalRootPathInit(const char* root_path) {
std::string local_path_root(root_path);
milvus::ChunkMangerConfig::SetLocalRootPath(local_path_root);
}

View File

@ -0,0 +1,51 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
void
MinioAddressInit(const char*);
void
MinioAccessKeyInit(const char*);
void
MinioAccessValueInit(const char*);
void
MinioSSLInit(bool use_ssl);
void
MinioUseIamInit(bool use_iam);
void
MinioBucketNameInit(const char*);
void
MinioRootPathInit(const char*);
void
LocalRootPathInit(const char*);
#ifdef __cplusplus
};
#endif

View File

@ -18,51 +18,53 @@
namespace milvus::ChunkMangerConfig {
std::string MINIO_ADDRESS = "localhost:9000"; // NOLINT
std::string MINIO_ACCESS_KEY = "minioadmin"; // NOLINT
std::string MINIO_ACCESS_VALUE = "minioadmin"; // NOLINT
std::string MINIO_BUCKET_NAME = "a-bucket"; // NOLINT
std::string LOCAL_BUCKET_NAME = "/tmp/milvus"; // NOLINT
std::string REMOTE_ADDRESS = "localhost:9000"; // NOLINT
std::string REMOTE_ACCESS_KEY = "minioadmin"; // NOLINT
std::string REMOTE_ACCESS_VALUE = "minioadmin"; // NOLINT
std::string REMOTE_BUCKET_NAME = "a-bucket"; // NOLINT
std::string REMOTE_ROOT_PATH = "files"; // NOLINT
std::string LOCAL_ROOT_PATH = "/tmp/milvus"; // NOLINT
bool MINIO_USE_SSL = false;
bool MINIO_USE_IAM = false;
void
SetAddress(const std::string& address) {
MINIO_ADDRESS = address.c_str();
REMOTE_ADDRESS = address;
}
std::string
GetAddress() {
return MINIO_ADDRESS;
return REMOTE_ADDRESS;
}
void
SetAccessKey(const std::string& access_key) {
MINIO_ACCESS_KEY = access_key.c_str();
REMOTE_ACCESS_KEY = access_key;
}
std::string
GetAccessKey() {
return MINIO_ACCESS_KEY;
return REMOTE_ACCESS_KEY;
}
void
SetAccessValue(const std::string& access_value) {
MINIO_ACCESS_VALUE = access_value.c_str();
REMOTE_ACCESS_VALUE = access_value;
}
std::string
GetAccessValue() {
return MINIO_ACCESS_VALUE;
return REMOTE_ACCESS_VALUE;
}
void
SetBucketName(const std::string& bucket_name) {
MINIO_BUCKET_NAME = bucket_name.c_str();
REMOTE_BUCKET_NAME = bucket_name;
}
std::string
GetBucketName() {
return MINIO_BUCKET_NAME;
return REMOTE_BUCKET_NAME;
}
void
@ -76,13 +78,33 @@ GetUseSSL() {
}
void
SetLocalBucketName(const std::string& path_prefix) {
LOCAL_BUCKET_NAME = path_prefix.c_str();
SetUseIAM(bool use_iam) {
MINIO_USE_IAM = use_iam;
}
bool
GetUseIAM() {
return MINIO_USE_IAM;
}
void
SetRemoteRootPath(const std::string& root_path) {
REMOTE_ROOT_PATH = root_path;
}
std::string
GetLocalBucketName() {
return LOCAL_BUCKET_NAME;
GetRemoteRootPath() {
return REMOTE_ROOT_PATH;
}
void
SetLocalRootPath(const std::string& path_prefix) {
LOCAL_ROOT_PATH = path_prefix;
}
std::string
GetLocalRootPath() {
return LOCAL_ROOT_PATH;
}
} // namespace milvus::ChunkMangerConfig

View File

@ -44,6 +44,12 @@ SetUseSSL(bool use_ssl);
bool
GetUseSSL();
void
SetUseIAM(bool use_iam);
bool
GetUseIAM();
void
SetBucketName(const std::string& bucket_name);
@ -51,9 +57,15 @@ std::string
GetBucketName();
void
SetLocalBucketName(const std::string& path_prefix);
SetRemoteRootPath(const std::string& path_prefix);
std::string
GetLocalBucketName();
GetRemoteRootPath();
void
SetLocalRootPath(const std::string& path_prefix);
std::string
GetLocalRootPath();
} // namespace milvus::ChunkMangerConfig

View File

@ -1,13 +1,18 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
@ -15,24 +20,14 @@
#include <memory>
#include "index/ScalarIndexSort.h"
namespace milvus::scalar {
namespace milvus::index {
// TODO: optimize here.
class BoolIndex : public ScalarIndexSort<bool> {
public:
void
BuildWithDataset(const DatasetPtr& dataset) override {
auto size = knowhere::GetDatasetRows(dataset);
auto data = knowhere::GetDatasetTensor(dataset);
proto::schema::BoolArray arr;
arr.ParseFromArray(data, size);
Build(arr.data().size(), arr.data().data());
}
};
using BoolIndexPtr = std::unique_ptr<BoolIndex>;
//// TODO: optimize here.
class BoolIndex : public ScalarIndexSort<bool> {};
using BoolIndexPtr = std::shared_ptr<BoolIndex>;
inline BoolIndexPtr
CreateBoolIndex() {
return std::make_unique<BoolIndex>();
}
} // namespace milvus::scalar
} // namespace milvus::index

View File

@ -9,11 +9,23 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under the License
aux_source_directory( ${MILVUS_ENGINE_SRC}/index INDEX_FILES )
set(INDEX_FILES
StringIndexMarisa.cpp
Utils.cpp
VectorMemIndex.cpp
IndexFactory.cpp
)
add_library( milvus_index SHARED ${INDEX_FILES} )
if ( BUILD_DISK_ANN STREQUAL "ON" )
set(INDEX_FILES
${INDEX_FILES}
VectorDiskIndex.cpp
)
endif ()
milvus_add_pkg_config("milvus_index")
add_library(milvus_index SHARED ${INDEX_FILES})
# TODO: support compile marisa on windows.
set(PLATFORM_LIBS )
if ( LINUX OR APPLE )
set(PLATFORM_LIBS marisa)
@ -23,9 +35,7 @@ if (MSYS)
endif ()
target_link_libraries(milvus_index
milvus_proto
milvus_exceptions
knowhere
milvus_storage
${PLATFORM_LIBS}
)

View File

@ -1,41 +1,51 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <knowhere/index/Index.h>
#include <knowhere/common/Dataset.h>
#include <boost/dynamic_bitset.hpp>
namespace milvus::scalar {
using Index = knowhere::Index;
using IndexPtr = std::unique_ptr<Index>;
using BinarySet = knowhere::BinarySet;
using Config = knowhere::Config;
using DatasetPtr = knowhere::DatasetPtr;
using TargetBitmap = boost::dynamic_bitset<>;
using TargetBitmapPtr = std::unique_ptr<TargetBitmap>;
#include "common/Types.h"
class IndexBase : public Index {
namespace milvus::index {
class IndexBase {
public:
virtual ~IndexBase() = default;
virtual BinarySet
Serialize(const Config& config) = 0;
virtual void
BuildWithDataset(const DatasetPtr& dataset) = 0;
Load(const BinarySet& binary_set, const Config& config = {}) = 0;
virtual const TargetBitmapPtr
Query(const DatasetPtr& dataset) = 0;
virtual void
BuildWithRawData(size_t n, const void* values, const Config& config = {}) = 0;
virtual size_t
virtual void
BuildWithDataset(const DatasetPtr& dataset, const Config& config = {}) = 0;
virtual int64_t
Count() = 0;
};
using IndexBasePtr = std::unique_ptr<IndexBase>;
} // namespace milvus::scalar
protected:
IndexType index_type_ = "";
IndexMode index_mode_ = IndexMode::MODE_CPU;
};
using IndexBasePtr = std::unique_ptr<IndexBase>;
} // namespace milvus::index

View File

@ -1,37 +1,41 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "index/ScalarIndexSort.h"
#include "index/StringIndexMarisa.h"
#include "index/IndexType.h"
#include "index/BoolIndex.h"
namespace milvus::scalar {
namespace milvus::index {
template <typename T>
inline ScalarIndexPtr<T>
IndexFactory::CreateIndex(const std::string& index_type) {
IndexFactory::CreateScalarIndex(const IndexType& index_type) {
return CreateScalarIndexSort<T>();
}
template <>
inline ScalarIndexPtr<bool>
IndexFactory::CreateIndex(const std::string& index_type) {
return CreateBoolIndex();
}
// template <>
// inline ScalarIndexPtr<bool>
// IndexFactory::CreateScalarIndex(const IndexType& index_type) {
// return CreateBoolIndex();
//}
template <>
inline ScalarIndexPtr<std::string>
IndexFactory::CreateIndex(const std::string& index_type) {
IndexFactory::CreateScalarIndex(const IndexType& index_type) {
#if defined(__linux__) || defined(__APPLE__)
return CreateStringIndexMarisa();
#else
@ -39,4 +43,4 @@ IndexFactory::CreateIndex(const std::string& index_type) {
#endif
}
} // namespace milvus::scalar
} // namespace milvus::index

View File

@ -1,48 +1,94 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "index/IndexFactory.h"
#include "index/ScalarIndexSort.h"
#include "index/StringIndexMarisa.h"
#include "index/VectorMemIndex.h"
#include "index/Utils.h"
#include "index/Meta.h"
namespace milvus::scalar {
#ifdef BUILD_DISK_ANN
#include "index/VectorDiskIndex.h"
#endif
namespace milvus::index {
IndexBasePtr
IndexFactory::CreateIndex(CDataType dtype, const std::string& index_type) {
switch (dtype) {
case Bool:
return CreateIndex<bool>(index_type);
case Int8:
return CreateIndex<int8_t>(index_type);
case Int16:
return CreateIndex<int16_t>(index_type);
case Int32:
return CreateIndex<int32_t>(index_type);
case Int64:
return CreateIndex<int64_t>(index_type);
case Float:
return CreateIndex<float>(index_type);
case Double:
return CreateIndex<double>(index_type);
IndexFactory::CreateIndex(const CreateIndexInfo& create_index_info, storage::FileManagerImplPtr file_manager) {
if (datatype_is_vector(create_index_info.field_type)) {
return CreateVectorIndex(create_index_info, file_manager);
}
case String:
case VarChar:
return CreateIndex<std::string>(index_type);
return CreateScalarIndex(create_index_info);
}
case None:
case BinaryVector:
case FloatVector:
IndexBasePtr
IndexFactory::CreateScalarIndex(const CreateIndexInfo& create_index_info) {
auto data_type = create_index_info.field_type;
auto index_type = create_index_info.index_type;
switch (data_type) {
// create scalar index
case DataType::BOOL:
return CreateScalarIndex<bool>(index_type);
case DataType::INT8:
return CreateScalarIndex<int8_t>(index_type);
case DataType::INT16:
return CreateScalarIndex<int16_t>(index_type);
case DataType::INT32:
return CreateScalarIndex<int32_t>(index_type);
case DataType::INT64:
return CreateScalarIndex<int64_t>(index_type);
case DataType::FLOAT:
return CreateScalarIndex<float>(index_type);
case DataType::DOUBLE:
return CreateScalarIndex<double>(index_type);
// create string index
case DataType::STRING:
case DataType::VARCHAR:
return CreateScalarIndex<std::string>(index_type);
default:
throw std::invalid_argument(std::string("invalid data type: ") + std::to_string(dtype));
throw std::invalid_argument(std::string("invalid data type to build index: ") +
std::to_string(int(data_type)));
}
}
} // namespace milvus::scalar
IndexBasePtr
IndexFactory::CreateVectorIndex(const CreateIndexInfo& create_index_info, storage::FileManagerImplPtr file_manager) {
auto data_type = create_index_info.field_type;
auto index_type = create_index_info.index_type;
auto metric_type = create_index_info.metric_type;
auto index_mode = create_index_info.index_mode;
#ifdef BUILD_DISK_ANN
// create disk index
if (is_in_disk_list(index_type)) {
switch (data_type) {
case DataType::VECTOR_FLOAT: {
return std::make_unique<VectorDiskAnnIndex<float>>(index_type, metric_type, index_mode, file_manager);
}
default:
throw std::invalid_argument(std::string("invalid data type to build disk index: ") +
std::to_string(int(data_type)));
}
}
#endif
// create mem index
return std::make_unique<VectorMemIndex>(index_type, metric_type, index_mode);
}
} // namespace milvus::index

View File

@ -1,24 +1,40 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <mutex>
#include <shared_mutex>
#include "common/type_c.h"
#include "config/ConfigChunkManager.h"
#include "index/Index.h"
#include "index/ScalarIndex.h"
#include "index/StringIndex.h"
#include "index/VectorIndex.h"
#include "index/IndexInfo.h"
#include "storage/Types.h"
#include "storage/FileManager.h"
namespace milvus::scalar {
#ifdef BUILD_DISK_ANN
#include "storage/LocalChunkManager.h"
#include "storage/MinioChunkManager.h"
#endif
namespace milvus::index {
class IndexFactory {
public:
@ -32,17 +48,27 @@ class IndexFactory {
GetInstance() {
// thread-safe enough after c++ 11
static IndexFactory instance;
return instance;
}
IndexBasePtr
CreateIndex(CDataType dtype, const std::string& index_type);
CreateIndex(const CreateIndexInfo& create_index_info, storage::FileManagerImplPtr file_manager);
IndexBasePtr
CreateVectorIndex(const CreateIndexInfo& create_index_info, storage::FileManagerImplPtr file_manager);
IndexBasePtr
CreateScalarIndex(const CreateIndexInfo& create_index_info);
// IndexBasePtr
// CreateIndex(DataType dtype, const IndexType& index_type, const IndexMode& index_mode = IndexMode::MODE_CPU);
private:
template <typename T>
ScalarIndexPtr<T>
CreateIndex(const std::string& index_type);
CreateScalarIndex(const IndexType& index_type);
};
} // namespace milvus::scalar
} // namespace milvus::index
#include "index/IndexFactory-inl.h"

View File

@ -0,0 +1,51 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include "common/Types.h"
#include "common/type_c.h"
#include "index/Index.h"
namespace milvus::index {
struct LoadIndexInfo {
int64_t collection_id;
int64_t partition_id;
int64_t segment_id;
int64_t field_id;
DataType field_type;
int64_t index_id;
int64_t index_build_id;
int64_t index_version;
std::map<std::string, std::string> index_params;
std::vector<std::string> index_files;
index::IndexBasePtr index;
};
struct CreateIndexInfo {
DataType field_type;
IndexType index_type;
MetricType metric_type;
IndexMode index_mode = IndexMode::MODE_CPU;
};
} // namespace milvus::index

View File

@ -1,15 +1,20 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
namespace milvus::scalar {
namespace milvus::index {
template <typename T>
struct IndexStructure {
IndexStructure() : a_(0), idx_(0) {
@ -41,4 +46,4 @@ struct IndexStructure {
T a_;
size_t idx_;
};
} // namespace milvus::scalar
} // namespace milvus::index

View File

@ -1,16 +0,0 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#pragma once
namespace milvus::scalar {
constexpr const char* INDEX_TYPE_MARISA = "marisa";
}

View File

@ -1,17 +1,25 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
namespace milvus::scalar {
#include "knowhere/index/IndexType.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
namespace milvus::index {
constexpr const char* OPERATOR_TYPE = "operator_type";
constexpr const char* RANGE_VALUE = "range_value";
constexpr const char* LOWER_BOUND_VALUE = "lower_bound_value";
@ -19,9 +27,49 @@ constexpr const char* LOWER_BOUND_INCLUSIVE = "lower_bound_inclusive";
constexpr const char* UPPER_BOUND_VALUE = "upper_bound_value";
constexpr const char* UPPER_BOUND_INCLUSIVE = "upper_bound_inclusive";
constexpr const char* PREFIX_VALUE = "prefix_value";
constexpr const char* MARISA_TRIE = "marisa_trie";
// below configurations will be persistent, do not edit them.
constexpr const char* MARISA_TRIE_INDEX = "marisa_trie_index";
constexpr const char* MARISA_STR_IDS = "marisa_trie_str_ids";
constexpr const char* FLAT_STR_INDEX = "flat_str_index";
} // namespace milvus::scalar
constexpr const char* INDEX_TYPE = "index_type";
constexpr const char* INDEX_MODE = "index_mode";
constexpr const char* METRIC_TYPE = "metric_type";
// scalar index type
constexpr const char* ASCENDING_SORT = "STL_SORT";
constexpr const char* MARISA_TRIE = "Trie";
// index meta
constexpr const char* COLLECTION_ID = "collection_id";
constexpr const char* PARTITION_ID = "partition_id";
constexpr const char* SEGMENT_ID = "segment_id";
constexpr const char* FIELD_ID = "field_id";
constexpr const char* INDEX_BUILD_ID = "index_build_id";
constexpr const char* INDEX_ID = "index_id";
constexpr const char* INDEX_VERSION = "index_version";
// DiskAnn build params
constexpr const char* DISK_ANN_RAW_DATA_PATH = "data_path";
constexpr const char* DISK_ANN_MAX_DEGREE = "max_degree";
constexpr const char* DISK_ANN_BUILD_LIST = "build_list";
constexpr const char* DISK_ANN_SEARCH_DRAM_BUDGET = "search_dram_budget";
constexpr const char* DISK_ANN_BUILD_DRAM_BUDGET = "build_dram_budget";
constexpr const char* DISK_ANN_BUILD_THREAD_NUM = "num_build_thread";
constexpr const char* DISK_ANN_PQ_BYTES = "ps_disk_bytes";
// DiskAnn prepare params
constexpr const char* DISK_ANN_PREPARE_THREAD_NUM = "num_prepare_thread";
constexpr const char* NUM_ROW_OF_RAW_DATA = "count";
constexpr const char* DISK_ANN_PREPARE_WARM_UP = "warm_up";
constexpr const char* DISK_ANN_PREPARE_USE_BFS_CACHE = "use_bfs_cache";
// DiskAnn query params
constexpr const char* DISK_ANN_QUERY_LIST = "search_list";
constexpr const char* DISK_ANN_QUERY_BEAMWIDTH = "beamwidth";
// DiskAnn config name
constexpr const char* Disk_ANN_Build_Config = "diskANN_build_config";
constexpr const char* Disk_ANN_Prepare_Config = "diskANN_prepare_config";
constexpr const char* Disk_ANN_Query_Config = "diskANN_query_config";
} // namespace milvus::index

View File

@ -1,21 +1,27 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <string>
#include <vector>
#include "index/Meta.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
namespace milvus::scalar {
namespace milvus::index {
template <typename T>
const TargetBitmapPtr
ScalarIndex<T>::Query(const DatasetPtr& dataset) {
@ -55,4 +61,67 @@ ScalarIndex<T>::Query(const DatasetPtr& dataset) {
throw std::invalid_argument(std::string("unsupported operator type: " + std::to_string(op)));
}
}
} // namespace milvus::scalar
template <>
inline void
ScalarIndex<std::string>::BuildWithRawData(size_t n, const void* values, const Config& config) {
// TODO :: use arrow
proto::schema::StringArray arr;
arr.ParseFromArray(values, n);
// TODO :: optimize here. avoid memory copy.
std::vector<std::string> vecs{arr.data().begin(), arr.data().end()};
Build(arr.data_size(), vecs.data());
}
template <>
inline void
ScalarIndex<bool>::BuildWithRawData(size_t n, const void* values, const Config& config) {
proto::schema::BoolArray arr;
arr.ParseFromArray(values, n);
Build(arr.data_size(), arr.data().data());
}
template <>
inline void
ScalarIndex<int8_t>::BuildWithRawData(size_t n, const void* values, const Config& config) {
auto data = reinterpret_cast<int8_t*>(const_cast<void*>(values));
Build(n, data);
}
template <>
inline void
ScalarIndex<int16_t>::BuildWithRawData(size_t n, const void* values, const Config& config) {
auto data = reinterpret_cast<int16_t*>(const_cast<void*>(values));
Build(n, data);
}
template <>
inline void
ScalarIndex<int32_t>::BuildWithRawData(size_t n, const void* values, const Config& config) {
auto data = reinterpret_cast<int32_t*>(const_cast<void*>(values));
Build(n, data);
}
template <>
inline void
ScalarIndex<int64_t>::BuildWithRawData(size_t n, const void* values, const Config& config) {
auto data = reinterpret_cast<int64_t*>(const_cast<void*>(values));
Build(n, data);
}
template <>
inline void
ScalarIndex<float>::BuildWithRawData(size_t n, const void* values, const Config& config) {
auto data = reinterpret_cast<float*>(const_cast<void*>(values));
Build(n, data);
}
template <>
inline void
ScalarIndex<double>::BuildWithRawData(size_t n, const void* values, const Config& config) {
auto data = reinterpret_cast<double*>(const_cast<void*>(values));
Build(n, data);
}
} // namespace milvus::index

View File

@ -1,13 +1,18 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
@ -17,11 +22,21 @@
#include <boost/dynamic_bitset.hpp>
#include "index/Index.h"
#include "common/Types.h"
#include "exceptions/EasyAssert.h"
namespace milvus::scalar {
namespace milvus::index {
template <typename T>
class ScalarIndex : public IndexBase {
public:
void
BuildWithRawData(size_t n, const void* values, const Config& config = {}) override;
void
BuildWithDataset(const DatasetPtr& dataset, const Config& config = {}) override {
PanicInfo("scalar index don't support build index with dataset");
};
public:
virtual void
Build(size_t n, const T* values) = 0;
@ -41,13 +56,16 @@ class ScalarIndex : public IndexBase {
virtual T
Reverse_Lookup(size_t offset) const = 0;
const TargetBitmapPtr
Query(const DatasetPtr& dataset) override;
virtual const TargetBitmapPtr
Query(const DatasetPtr& dataset);
virtual int64_t
Size() = 0;
};
template <typename T>
using ScalarIndexPtr = std::unique_ptr<ScalarIndex<T>>;
} // namespace milvus::scalar
} // namespace milvus::index
#include "index/ScalarIndex-inl.h"

View File

@ -1,13 +1,18 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <memory>
@ -19,7 +24,7 @@
#include "Meta.h"
#include "common/Utils.h"
namespace milvus::scalar {
namespace milvus::index {
template <typename T>
inline ScalarIndexSort<T>::ScalarIndexSort() : is_built_(false), data_() {
@ -27,38 +32,24 @@ inline ScalarIndexSort<T>::ScalarIndexSort() : is_built_(false), data_() {
template <typename T>
inline ScalarIndexSort<T>::ScalarIndexSort(const size_t n, const T* values) : is_built_(false) {
ScalarIndexSort<T>::Build(n, values);
}
template <typename T>
inline void
ScalarIndexSort<T>::BuildWithDataset(const DatasetPtr& dataset) {
auto size = knowhere::GetDatasetRows(dataset);
auto data = knowhere::GetDatasetTensor(dataset);
Build(size, reinterpret_cast<const T*>(data));
ScalarIndexSort<T>::BuildWithDataset(n, values);
}
template <typename T>
inline void
ScalarIndexSort<T>::Build(const size_t n, const T* values) {
if (is_built_)
return;
if (n == 0) {
// todo: throw an exception
throw std::invalid_argument("ScalarIndexSort cannot build null values!");
}
data_.reserve(n);
idx_to_offsets_.resize(n);
T* p = const_cast<T*>(values);
for (size_t i = 0; i < n; ++i) {
data_.emplace_back(IndexStructure(*p++, i));
}
build();
}
template <typename T>
inline void
ScalarIndexSort<T>::build() {
if (is_built_)
return;
if (data_.size() == 0) {
// todo: throw an exception
throw std::invalid_argument("ScalarIndexSort cannot build null values!");
}
std::sort(data_.begin(), data_.end());
for (size_t i = 0; i < data_.size(); ++i) {
idx_to_offsets_[data_[i].idx_] = i;
@ -87,7 +78,7 @@ ScalarIndexSort<T>::Serialize(const Config& config) {
template <typename T>
inline void
ScalarIndexSort<T>::Load(const BinarySet& index_binary) {
ScalarIndexSort<T>::Load(const BinarySet& index_binary, const Config& config) {
size_t index_size;
auto index_length = index_binary.GetByName("index_length");
memcpy(&index_size, index_length->data.get(), (size_t)index_length->size);
@ -206,5 +197,4 @@ ScalarIndexSort<T>::Reverse_Lookup(size_t idx) const {
auto offset = idx_to_offsets_[idx];
return data_[offset].a_;
}
} // namespace milvus::scalar
} // namespace milvus::index

View File

@ -1,13 +1,18 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
@ -20,7 +25,7 @@
#include "index/IndexStructure.h"
#include "index/ScalarIndex.h"
namespace milvus::scalar {
namespace milvus::index {
template <typename T>
class ScalarIndexSort : public ScalarIndex<T> {
@ -32,12 +37,9 @@ class ScalarIndexSort : public ScalarIndex<T> {
Serialize(const Config& config) override;
void
Load(const BinarySet& index_binary) override;
Load(const BinarySet& index_binary, const Config& config = {}) override;
void
BuildWithDataset(const DatasetPtr& dataset) override;
size_t
int64_t
Count() override {
return data_.size();
}
@ -45,9 +47,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
void
Build(size_t n, const T* values) override;
void
build();
const TargetBitmapPtr
In(size_t n, const T* values) override;
@ -63,17 +62,17 @@ class ScalarIndexSort : public ScalarIndex<T> {
T
Reverse_Lookup(size_t offset) const override;
int64_t
Size() override {
return (int64_t)data_.size();
}
public:
const std::vector<IndexStructure<T>>&
GetData() {
return data_;
}
int64_t
Size() override {
return (int64_t)data_.size();
}
bool
IsBuilt() const {
return is_built_;
@ -81,6 +80,7 @@ class ScalarIndexSort : public ScalarIndex<T> {
private:
bool is_built_;
Config config_;
std::vector<size_t> idx_to_offsets_; // used to retrieve.
std::vector<IndexStructure<T>> data_;
};
@ -88,14 +88,14 @@ class ScalarIndexSort : public ScalarIndex<T> {
template <typename T>
using ScalarIndexSortPtr = std::unique_ptr<ScalarIndexSort<T>>;
} // namespace milvus::scalar
} // namespace milvus::index
#include "index/ScalarIndexSort-inl.h"
namespace milvus::scalar {
namespace milvus::index {
template <typename T>
inline ScalarIndexSortPtr<T>
CreateScalarIndexSort() {
return std::make_unique<ScalarIndexSort<T>>();
}
} // namespace milvus::scalar
} // namespace milvus::index

View File

@ -1,13 +1,18 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
@ -18,30 +23,10 @@
#include "index/Meta.h"
#include <pb/schema.pb.h>
namespace milvus::scalar {
namespace milvus::index {
class StringIndex : public ScalarIndex<std::string> {
public:
void
BuildWithDataset(const DatasetPtr& dataset) override {
auto size = knowhere::GetDatasetRows(dataset);
auto data = knowhere::GetDatasetTensor(dataset);
proto::schema::StringArray arr;
arr.ParseFromArray(data, size);
{
// TODO: optimize here. avoid memory copy.
std::vector<std::string> vecs{arr.data().begin(), arr.data().end()};
Build(arr.data().size(), vecs.data());
}
{
// TODO: test this way.
// auto strs = (const std::string*)arr.data().data();
// Build(arr.data().size(), strs);
}
}
const TargetBitmapPtr
Query(const DatasetPtr& dataset) override {
auto op = dataset->Get<OpType>(OPERATOR_TYPE);
@ -56,4 +41,4 @@ class StringIndex : public ScalarIndex<std::string> {
PrefixMatch(std::string prefix) = 0;
};
using StringIndexPtr = std::unique_ptr<StringIndex>;
} // namespace milvus::scalar
} // namespace milvus::index

View File

@ -1,13 +1,18 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_io.hpp>
@ -22,7 +27,7 @@
#include "index/Index.h"
#include "common/Utils.h"
namespace milvus::scalar {
namespace milvus::index {
#if defined(__linux__) || defined(__APPLE__)
@ -86,7 +91,7 @@ StringIndexMarisa::Serialize(const Config& config) {
}
void
StringIndexMarisa::Load(const BinarySet& set) {
StringIndexMarisa::Load(const BinarySet& set, const Config& config) {
knowhere::Assemble(const_cast<BinarySet&>(set));
auto uuid = boost::uuids::random_generator()();
@ -289,4 +294,4 @@ StringIndexMarisa::Reverse_Lookup(size_t offset) const {
#endif
} // namespace milvus::scalar
} // namespace milvus::index

View File

@ -1,13 +1,18 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
@ -20,7 +25,7 @@
#include <map>
#include <memory>
namespace milvus::scalar {
namespace milvus::index {
class StringIndexMarisa : public StringIndex {
public:
@ -33,9 +38,9 @@ class StringIndexMarisa : public StringIndex {
Serialize(const Config& config) override;
void
Load(const BinarySet& set) override;
Load(const BinarySet& set, const Config& config = {}) override;
size_t
int64_t
Count() override {
return str_ids_.size();
}
@ -76,6 +81,7 @@ class StringIndexMarisa : public StringIndex {
prefix_match(const std::string& prefix);
private:
Config config_;
marisa::Trie trie_;
std::vector<size_t> str_ids_; // used to retrieve.
std::map<size_t, std::vector<size_t>> str_ids_to_offsets_;
@ -89,6 +95,6 @@ CreateStringIndexMarisa() {
return std::make_unique<StringIndexMarisa>();
}
} // namespace milvus::scalar
} // namespace milvus::index
#endif

View File

@ -1,13 +1,18 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <vector>
@ -18,30 +23,10 @@
#include "index/StringIndex.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
namespace milvus::scalar {
namespace milvus::index {
// TODO: should inherit from StringIndex?
class StringIndexSort : public ScalarIndexSort<std::string> {
public:
void
BuildWithDataset(const DatasetPtr& dataset) override {
auto size = knowhere::GetDatasetRows(dataset);
auto data = knowhere::GetDatasetTensor(dataset);
proto::schema::StringArray arr;
arr.ParseFromArray(data, size);
{
// TODO: optimize here. avoid memory copy.
std::vector<std::string> vecs{arr.data().begin(), arr.data().end()};
Build(arr.data().size(), vecs.data());
}
{
// TODO: test this way.
// auto strs = (const std::string*)arr.data().data();
// Build(arr.data().size(), strs);
}
}
const TargetBitmapPtr
Query(const DatasetPtr& dataset) override {
auto op = dataset->Get<OpType>(OPERATOR_TYPE);
@ -70,4 +55,4 @@ inline StringIndexSortPtr
CreateStringIndexSort() {
return std::make_unique<StringIndexSort>();
}
} // namespace milvus::scalar
} // namespace milvus::index

View File

@ -0,0 +1,205 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <tuple>
#include <vector>
#include <functional>
#include "index/Utils.h"
#include "index/Meta.h"
#include "pb/index_cgo_msg.pb.h"
#include <google/protobuf/text_format.h>
#include "exceptions/EasyAssert.h"
namespace milvus::index {
size_t
get_file_size(int fd) {
struct stat s;
fstat(fd, &s);
return s.st_size;
}
std::vector<IndexType>
NM_List() {
static std::vector<IndexType> ret{
knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
};
return ret;
}
std::vector<IndexType>
BIN_List() {
static std::vector<IndexType> ret{
knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
};
return ret;
}
std::vector<IndexType>
DISK_LIST() {
static std::vector<IndexType> ret{
knowhere::IndexEnum::INDEX_DISKANN,
};
return ret;
}
std::vector<std::tuple<IndexType, MetricType>>
unsupported_index_combinations() {
static std::vector<std::tuple<IndexType, MetricType>> ret{
std::make_tuple(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::metric::L2),
};
return ret;
}
bool
is_in_bin_list(const IndexType& index_type) {
return is_in_list<IndexType>(index_type, BIN_List);
}
bool
is_in_nm_list(const IndexType& index_type) {
return is_in_list<IndexType>(index_type, NM_List);
}
bool
is_in_disk_list(const IndexType& index_type) {
return is_in_list<IndexType>(index_type, DISK_LIST);
}
bool
is_unsupported(const IndexType& index_type, const MetricType& metric_type) {
return is_in_list<std::tuple<IndexType, MetricType>>(std::make_tuple(index_type, metric_type),
unsupported_index_combinations);
}
bool
CheckKeyInConfig(const Config& cfg, const std::string& key) {
return cfg.contains(key);
}
void
ParseFromString(google::protobuf::Message& params, const std::string& str) {
auto ok = google::protobuf::TextFormat::ParseFromString(str, &params);
AssertInfo(ok, "failed to parse params from string");
}
int64_t
GetDimFromConfig(const Config& config) {
auto dimension = GetValueFromConfig<std::string>(config, "dim");
AssertInfo(dimension.has_value(), "dimension not exist in config");
return (std::stoi(dimension.value()));
}
std::string
GetMetricTypeFromConfig(const Config& config) {
auto metric_type = GetValueFromConfig<std::string>(config, "metric_type");
AssertInfo(metric_type.has_value(), "metric_type not exist in config");
return metric_type.value();
}
std::string
GetIndexTypeFromConfig(const Config& config) {
auto index_type = GetValueFromConfig<std::string>(config, "index_type");
AssertInfo(index_type.has_value(), "index_type not exist in config");
return index_type.value();
}
IndexMode
GetIndexModeFromConfig(const Config& config) {
auto mode = GetValueFromConfig<std::string>(config, INDEX_MODE);
return mode.has_value() ? GetIndexMode(mode.value()) : knowhere::IndexMode::MODE_CPU;
}
IndexMode
GetIndexMode(const std::string index_mode) {
if (index_mode.compare("CPU") != 0) {
return IndexMode::MODE_CPU;
}
if (index_mode.compare("GPU") != 0) {
return IndexMode::MODE_GPU;
}
PanicInfo("unsupported index mode");
}
// TODO :: too ugly
storage::FieldDataMeta
GetFieldDataMetaFromConfig(const Config& config) {
storage::FieldDataMeta field_data_meta;
// set collection id
auto collection_id = index::GetValueFromConfig<std::string>(config, index::COLLECTION_ID);
AssertInfo(collection_id.has_value(), "collection id not exist in index config");
field_data_meta.collection_id = std::stol(collection_id.value());
// set partition id
auto partition_id = index::GetValueFromConfig<std::string>(config, index::PARTITION_ID);
AssertInfo(partition_id.has_value(), "partition id not exist in index config");
field_data_meta.partition_id = std::stol(partition_id.value());
// set segment id
auto segment_id = index::GetValueFromConfig<std::string>(config, index::SEGMENT_ID);
AssertInfo(segment_id.has_value(), "segment id not exist in index config");
field_data_meta.segment_id = std::stol(segment_id.value());
// set field id
auto field_id = index::GetValueFromConfig<std::string>(config, index::FIELD_ID);
AssertInfo(field_id.has_value(), "field id not exist in index config");
field_data_meta.field_id = std::stol(field_id.value());
return field_data_meta;
}
storage::IndexMeta
GetIndexMetaFromConfig(const Config& config) {
storage::IndexMeta index_meta;
// set segment id
auto segment_id = index::GetValueFromConfig<std::string>(config, index::SEGMENT_ID);
AssertInfo(segment_id.has_value(), "segment id not exist in index config");
index_meta.segment_id = std::stol(segment_id.value());
// set field id
auto field_id = index::GetValueFromConfig<std::string>(config, index::FIELD_ID);
AssertInfo(field_id.has_value(), "field id not exist in index config");
index_meta.field_id = std::stol(field_id.value());
// set index version
auto index_version = index::GetValueFromConfig<std::string>(config, index::INDEX_VERSION);
AssertInfo(index_version.has_value(), "index_version id not exist in index config");
index_meta.index_version = std::stol(index_version.value());
// set index id
auto build_id = index::GetValueFromConfig<std::string>(config, index::INDEX_BUILD_ID);
AssertInfo(build_id.has_value(), "build id not exist in index config");
index_meta.build_id = std::stol(build_id.value());
return index_meta;
}
Config
ParseConfigFromIndexParams(const std::map<std::string, std::string>& index_params) {
Config config;
for (auto& p : index_params) {
config[p.first] = p.second;
}
return config;
}
} // namespace milvus::index

View File

@ -1,13 +1,20 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include <stdio.h>
@ -15,14 +22,105 @@
#include <iostream>
#include <fcntl.h>
#include <sys/stat.h>
#include <tuple>
#include <map>
#include <string>
namespace milvus::scalar {
#include "common/Types.h"
#include "index/IndexInfo.h"
#include "storage/Types.h"
namespace milvus::index {
size_t
get_file_size(int fd) {
struct stat s;
fstat(fd, &s);
return s.st_size;
get_file_size(int fd);
std::vector<IndexType>
NM_List();
std::vector<IndexType>
BIN_List();
std::vector<std::tuple<IndexType, MetricType>>
unsupported_index_combinations();
template <typename T>
inline bool
is_in_list(const T& t, std::function<std::vector<T>()> list_func) {
auto l = list_func();
return std::find(l.begin(), l.end(), t) != l.end();
}
} // namespace milvus::scalar
bool
is_in_bin_list(const IndexType& index_type);
bool
is_in_nm_list(const IndexType& index_type);
bool
is_in_disk_list(const IndexType& index_type);
bool
is_unsupported(const IndexType& index_type, const MetricType& metric_type);
bool
CheckKeyInConfig(const Config& cfg, const std::string& key);
void
ParseFromString(google::protobuf::Message& params, const std::string& str);
template <typename T>
void inline CheckParameter(Config& conf,
const std::string& key,
std::function<T(std::string)> fn,
std::optional<T> default_v) {
if (!conf.contains(key)) {
if (default_v.has_value()) {
conf[key] = default_v.value();
}
} else {
auto value = conf[key];
conf[key] = fn(value);
}
}
template <typename T>
inline std::optional<T>
GetValueFromConfig(const Config& cfg, const std::string& key) {
if (cfg.contains(key)) {
return cfg.at(key).get<T>();
}
return std::nullopt;
}
template <typename T>
inline void
SetValueToConfig(Config& cfg, const std::string& key, const T value) {
cfg[key] = value;
}
int64_t
GetDimFromConfig(const Config& config);
std::string
GetMetricTypeFromConfig(const Config& config);
std::string
GetIndexTypeFromConfig(const Config& config);
IndexMode
GetIndexModeFromConfig(const Config& config);
IndexMode
GetIndexMode(const std::string index_mode);
storage::FieldDataMeta
GetFieldDataMetaFromConfig(const Config& config);
storage::IndexMeta
GetIndexMetaFromConfig(const Config& config);
Config
ParseConfigFromIndexParams(const std::map<std::string, std::string>& index_params);
} // namespace milvus::index

View File

@ -0,0 +1,263 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "index/VectorDiskIndex.h"
#include "index/Meta.h"
#include "index/Utils.h"
#include "storage/LocalChunkManager.h"
#include "config/ConfigKnowhere.h"
#include "storage/Util.h"
#include "common/Utils.h"
namespace milvus::index {
#ifdef BUILD_DISK_ANN
template <typename T>
VectorDiskAnnIndex<T>::VectorDiskAnnIndex(const IndexType& index_type,
const MetricType& metric_type,
const IndexMode& index_mode,
storage::FileManagerImplPtr file_manager)
: VectorIndex(index_type, index_mode, metric_type) {
file_manager_ = std::dynamic_pointer_cast<storage::DiskFileManagerImpl>(file_manager);
auto& local_chunk_manager = storage::LocalChunkManager::GetInstance();
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
AssertInfo(!local_chunk_manager.Exist(local_index_path_prefix),
"local index path " + local_index_path_prefix + " has been exist");
local_chunk_manager.CreateDir(local_index_path_prefix);
index_ = std::make_unique<knowhere::IndexDiskANN<T>>(local_index_path_prefix, metric_type, file_manager);
}
template <typename T>
void
VectorDiskAnnIndex<T>::Load(const BinarySet& binary_set /* not used */, const Config& config) {
auto prepare_config = parse_prepare_config(config);
knowhere::Config cfg;
knowhere::DiskANNPrepareConfig::Set(cfg, prepare_config);
auto index_files = GetValueFromConfig<std::vector<std::string>>(config, "index_files");
AssertInfo(index_files.has_value(), "index file paths is empty when load disk ann index data");
file_manager_->CacheIndexToDisk(index_files.value());
index_->Prepare(cfg);
SetDim(index_->Dim());
}
template <typename T>
void
VectorDiskAnnIndex<T>::BuildWithDataset(const DatasetPtr& dataset, const Config& config) {
auto& local_chunk_manager = storage::LocalChunkManager::GetInstance();
auto build_config = parse_build_config(config);
auto segment_id = file_manager_->GetFileDataMeta().segment_id;
auto field_id = file_manager_->GetFileDataMeta().field_id;
auto local_data_path = storage::GenFieldRawDataPathPrefix(segment_id, field_id) + "raw_data";
build_config.data_path = local_data_path;
if (!local_chunk_manager.Exist(local_data_path)) {
local_chunk_manager.CreateFile(local_data_path);
}
int64_t offset = 0;
auto num = uint32_t(milvus::GetDatasetRows(dataset));
local_chunk_manager.Write(local_data_path, offset, &num, sizeof(num));
offset += sizeof(num);
auto dim = uint32_t(milvus::GetDatasetDim(dataset));
local_chunk_manager.Write(local_data_path, offset, &dim, sizeof(dim));
offset += sizeof(dim);
auto data_size = num * dim * sizeof(float);
auto raw_data = const_cast<void*>(milvus::GetDatasetTensor(dataset));
local_chunk_manager.Write(local_data_path, offset, raw_data, data_size);
knowhere::Config cfg;
knowhere::DiskANNBuildConfig::Set(cfg, build_config);
index_->BuildAll(nullptr, cfg);
local_chunk_manager.RemoveDir(storage::GetSegmentRawDataPathPrefix(segment_id));
// TODO ::
// SetDim(index_->Dim());
}
template <typename T>
std::unique_ptr<SearchResult>
VectorDiskAnnIndex<T>::Query(const DatasetPtr dataset, const SearchInfo& search_info, const BitsetView& bitset) {
AssertInfo(GetMetricType() == search_info.metric_type_,
"Metric type of field index isn't the same with search info");
auto num_queries = milvus::GetDatasetRows(dataset);
auto topk = search_info.topk_;
knowhere::DiskANNQueryConfig query_config;
query_config.k = topk;
// set search list
auto search_list_size = GetValueFromConfig<uint32_t>(search_info.search_params_, DISK_ANN_QUERY_LIST);
AssertInfo(search_list_size.has_value(), "param " + std::string(DISK_ANN_QUERY_LIST) + "is empty");
query_config.search_list_size = search_list_size.value();
AssertInfo(query_config.search_list_size > topk, "search_list should be greater than topk");
AssertInfo(query_config.search_list_size < std::min(uint32_t(topk * 10), uint32_t(65535)),
"search_list should less than min(topk*10, 65535)");
// set beamwidth
query_config.beamwidth = 16;
auto beam_width = GetValueFromConfig<uint32_t>(search_info.search_params_, DISK_ANN_QUERY_BEAMWIDTH);
if (beam_width.has_value()) {
query_config.beamwidth = beam_width.value();
}
knowhere::Config cfg;
knowhere::DiskANNQueryConfig::Set(cfg, query_config);
auto final_result = index_->Query(dataset, cfg, bitset);
auto ids = milvus::GetDatasetIDs(final_result);
float* distances = (float*)milvus::GetDatasetDistance(final_result);
auto round_decimal = search_info.round_decimal_;
auto total_num = num_queries * topk;
if (round_decimal != -1) {
const float multiplier = pow(10.0, round_decimal);
for (int i = 0; i < total_num; i++) {
distances[i] = round(distances[i] * multiplier) / multiplier;
}
}
auto result = std::make_unique<SearchResult>();
result->seg_offsets_.resize(total_num);
result->distances_.resize(total_num);
result->total_nq_ = num_queries;
result->unity_topK_ = topk;
std::copy_n(ids, total_num, result->seg_offsets_.data());
std::copy_n(distances, total_num, result->distances_.data());
return result;
}
template <typename T>
void
VectorDiskAnnIndex<T>::CleanLocalData() {
auto& local_chunk_manager = storage::LocalChunkManager::GetInstance();
local_chunk_manager.RemoveDir(file_manager_->GetLocalIndexObjectPrefix());
local_chunk_manager.RemoveDir(file_manager_->GetLocalRawDataObjectPrefix());
}
template <typename T>
knowhere::DiskANNBuildConfig
VectorDiskAnnIndex<T>::parse_build_config(const Config& config) {
Config build_config = config;
parse_config(build_config);
// set disk ann build config
knowhere::DiskANNBuildConfig build_disk_ann_config;
// set max degree
auto max_degree = GetValueFromConfig<uint32_t>(build_config, DISK_ANN_MAX_DEGREE);
AssertInfo(max_degree.has_value(), "param " + std::string(DISK_ANN_MAX_DEGREE) + "is empty");
build_disk_ann_config.max_degree = max_degree.value();
// set build list
auto search_list_size = GetValueFromConfig<uint32_t>(build_config, DISK_ANN_BUILD_LIST);
AssertInfo(search_list_size.has_value(), "param " + std::string(DISK_ANN_BUILD_LIST) + "is empty");
build_disk_ann_config.search_list_size = search_list_size.value();
// set search dram budget
auto search_dram_budget_gb = GetValueFromConfig<float>(build_config, DISK_ANN_SEARCH_DRAM_BUDGET);
AssertInfo(search_dram_budget_gb.has_value(), "param " + std::string(DISK_ANN_SEARCH_DRAM_BUDGET) + "is empty");
build_disk_ann_config.pq_code_budget_gb = search_dram_budget_gb.value();
// set build dram budget
auto build_dram_budget_gb = GetValueFromConfig<float>(build_config, DISK_ANN_BUILD_DRAM_BUDGET);
AssertInfo(build_dram_budget_gb.has_value(), "param " + std::string(DISK_ANN_BUILD_DRAM_BUDGET) + "is empty");
build_disk_ann_config.build_dram_budget_gb = build_dram_budget_gb.value();
// set num build thread
auto num_threads = GetValueFromConfig<uint32_t>(build_config, DISK_ANN_BUILD_THREAD_NUM);
AssertInfo(num_threads.has_value(), "param " + std::string(DISK_ANN_BUILD_THREAD_NUM) + "is empty");
build_disk_ann_config.num_threads = num_threads.value();
// set pq bytes
auto pq_disk_bytes = GetValueFromConfig<uint32_t>(build_config, DISK_ANN_PQ_BYTES);
AssertInfo(pq_disk_bytes.has_value(), "param " + std::string(DISK_ANN_PQ_BYTES) + "is empty");
build_disk_ann_config.disk_pq_dims = pq_disk_bytes.value();
return build_disk_ann_config;
}
template <typename T>
knowhere::DiskANNPrepareConfig
VectorDiskAnnIndex<T>::parse_prepare_config(const Config& config) {
Config prepare_config = config;
auto dim = GetDimFromConfig(prepare_config);
parse_config(prepare_config);
knowhere::DiskANNPrepareConfig prepare_disk_ann_config;
prepare_disk_ann_config.warm_up = false;
prepare_disk_ann_config.use_bfs_cache = false;
// set prepare thread num
auto num_threads = GetValueFromConfig<uint32_t>(prepare_config, DISK_ANN_PREPARE_THREAD_NUM);
AssertInfo(num_threads.has_value(), "param " + std::string(DISK_ANN_PREPARE_THREAD_NUM) + "is empty");
prepare_disk_ann_config.num_threads = num_threads.value();
// get max degree
auto max_degree = GetValueFromConfig<uint32_t>(prepare_config, DISK_ANN_MAX_DEGREE);
AssertInfo(max_degree.has_value(), "param " + std::string(DISK_ANN_MAX_DEGREE) + "is empty");
// set prepare cached node
auto num_rows = GetValueFromConfig<int>(prepare_config, NUM_ROW_OF_RAW_DATA);
AssertInfo(num_rows.has_value(), "param " + std::string(NUM_ROW_OF_RAW_DATA) + "is empty");
prepare_disk_ann_config.search_cache_budget_gb =
(dim + max_degree.value() + 1) * sizeof(float) * 1.2 * num_rows.value() * 0.1 / 1024 / 1024 / 1024;
return prepare_disk_ann_config;
}
template <typename T>
void
VectorDiskAnnIndex<T>::parse_config(Config& config) {
auto stoi_closure = [](const std::string& s) -> uint32_t { return std::stoi(s); };
auto stof_closure = [](const std::string& s) -> float { return std::stof(s); };
/***************************** meta *******************************/
CheckParameter<int>(config, knowhere::meta::SLICE_SIZE, stoi_closure,
std::optional{config::KnowhereGetIndexSliceSize()});
CheckParameter<int>(config, knowhere::meta::DIM, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::meta::TOPK, stoi_closure, std::nullopt);
/************************** DiskAnn build Params ************************/
CheckParameter<int>(config, DISK_ANN_MAX_DEGREE, stoi_closure, std::optional{48});
CheckParameter<int>(config, DISK_ANN_BUILD_LIST, stoi_closure, std::optional{128});
CheckParameter<float>(config, DISK_ANN_SEARCH_DRAM_BUDGET, stof_closure, std::optional{0.03});
CheckParameter<float>(config, DISK_ANN_BUILD_DRAM_BUDGET, stof_closure, std::optional{32});
CheckParameter<int>(config, DISK_ANN_BUILD_THREAD_NUM, stoi_closure, std::optional{8});
CheckParameter<int>(config, DISK_ANN_PQ_BYTES, stoi_closure, std::optional{0});
/************************** DiskAnn prepare Params ************************/
CheckParameter<int>(config, DISK_ANN_PREPARE_THREAD_NUM, stoi_closure, std::optional{8});
CheckParameter<int>(config, NUM_ROW_OF_RAW_DATA, stoi_closure, std::nullopt);
/************************** DiskAnn query Params ************************/
// CheckParameter<int>(config, DISK_ANN_QUERY_LIST, stoi_closure, std::nullopt);
// CheckParameter<int>(config, DISK_ANN_QUERY_BEAMWIDTH, stoi_closure, std::optional{16});
}
template class VectorDiskAnnIndex<float>;
#endif
} // namespace milvus::index

View File

@ -0,0 +1,84 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "index/VectorIndex.h"
#include "storage/DiskFileManagerImpl.h"
#include "knowhere/index/vector_index/IndexDiskANN.h"
#include "knowhere/index/vector_index/IndexDiskANNConfig.h"
namespace milvus::index {
#ifdef BUILD_DISK_ANN
template <typename T>
class VectorDiskAnnIndex : public VectorIndex {
public:
explicit VectorDiskAnnIndex(const IndexType& index_type,
const MetricType& metric_type,
const IndexMode& index_mode,
storage::FileManagerImplPtr file_manager);
BinarySet
Serialize(const Config& config) override {
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
BinarySet binary_set;
for (auto& file : remote_paths_to_size) {
binary_set.Append(file.first, nullptr, file.second);
}
return binary_set;
}
int64_t
Count() override {
return index_->Count();
}
void
Load(const BinarySet& binary_set /* not used */, const Config& config = {}) override;
void
BuildWithDataset(const DatasetPtr& dataset, const Config& config = {}) override;
std::unique_ptr<SearchResult>
Query(const DatasetPtr dataset, const SearchInfo& search_info, const BitsetView& bitset) override;
void
CleanLocalData() override;
private:
knowhere::DiskANNBuildConfig
parse_build_config(const Config& config);
knowhere::DiskANNPrepareConfig
parse_prepare_config(const Config& config);
void
parse_config(Config& config);
private:
std::unique_ptr<knowhere::IndexDiskANN<T>> index_;
std::shared_ptr<storage::DiskFileManagerImpl> file_manager_;
};
template <typename T>
using VectorDiskAnnIndexPtr = std::unique_ptr<VectorDiskAnnIndex<T>>;
#endif
} // namespace milvus::index

View File

@ -0,0 +1,85 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <boost/dynamic_bitset.hpp>
#include "knowhere/index/VecIndex.h"
#include "index/Index.h"
#include "common/Types.h"
#include "common/BitsetView.h"
#include "common/QueryResult.h"
#include "common/QueryInfo.h"
namespace milvus::index {
class VectorIndex : public IndexBase {
public:
explicit VectorIndex(const IndexType& index_type, const IndexMode& index_mode, const MetricType& metric_type)
: index_type_(index_type), index_mode_(index_mode), metric_type_(metric_type) {
}
public:
void
BuildWithRawData(size_t n, const void* values, const Config& config = {}) override {
PanicInfo("vector index don't support build index with raw data");
};
virtual std::unique_ptr<SearchResult>
Query(const DatasetPtr dataset, const SearchInfo& search_info, const BitsetView& bitset) = 0;
IndexType
GetIndexType() const {
return index_type_;
}
MetricType
GetMetricType() const {
return metric_type_;
}
IndexMode
GetIndexMode() const {
return index_mode_;
}
int64_t
GetDim() const {
return dim_;
}
void
SetDim(int64_t dim) {
dim_ = dim;
}
virtual void
CleanLocalData() {
}
private:
IndexType index_type_;
IndexMode index_mode_;
MetricType metric_type_;
int64_t dim_;
};
using VectorIndexPtr = std::unique_ptr<VectorIndex>;
} // namespace milvus::index

View File

@ -0,0 +1,218 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "index/VectorMemIndex.h"
#include "index/Meta.h"
#include "index/Utils.h"
#include "exceptions/EasyAssert.h"
#include "config/ConfigKnowhere.h"
#include "knowhere/index/VecIndexFactory.h"
#include "knowhere/common/Timer.h"
#include "common/BitsetView.h"
#include "knowhere/index/vector_index/ConfAdapterMgr.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "pb/index_cgo_msg.pb.h"
namespace milvus::index {
VectorMemIndex::VectorMemIndex(const IndexType& index_type, const MetricType& metric_type, const IndexMode& index_mode)
: VectorIndex(index_type, index_mode, metric_type) {
AssertInfo(!is_unsupported(index_type, metric_type), index_type + " doesn't support metric: " + metric_type);
index_ = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(GetIndexType(), index_mode);
AssertInfo(index_ != nullptr, "[VecIndexCreator]Index is null after create index");
}
BinarySet
VectorMemIndex::Serialize(const Config& config) {
knowhere::Config serialize_config = config;
parse_config(serialize_config);
auto ret = index_->Serialize(serialize_config);
auto index_type = GetIndexType();
if (is_in_nm_list(index_type)) {
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
auto raw_data = std::shared_ptr<uint8_t[]>(static_cast<uint8_t*>(raw_data_.data()), deleter);
// std::shared_ptr<uint8_t[]> raw_data(new uint8_t[raw_data_.size()], std::default_delete<uint8_t[]>());
// memcpy(raw_data.get(), raw_data_.data(), raw_data_.size());
ret.Append(RAW_DATA, raw_data, raw_data_.size());
// Disassemble will only divide the raw vectors, other keys were already divided
knowhere::Disassemble(ret, serialize_config);
}
return ret;
}
void
VectorMemIndex::Load(const BinarySet& binary_set, const Config& config) {
auto& map_ = binary_set.binary_map_;
for (auto it = map_.begin(); it != map_.end(); ++it) {
if (it->first == RAW_DATA) {
raw_data_.clear();
auto data_size = it->second->size;
raw_data_.resize(data_size);
memcpy(raw_data_.data(), it->second->data.get(), data_size);
break;
}
}
index_->Load(binary_set);
SetDim(index_->Dim());
}
void
VectorMemIndex::BuildWithDataset(const DatasetPtr& dataset, const Config& config) {
knowhere::Config index_config;
index_config.update(config);
parse_config(index_config);
SetDim(knowhere::GetDatasetDim(dataset));
knowhere::SetMetaRows(index_config, knowhere::GetDatasetRows(dataset));
if (GetIndexType() == knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
if (!config.contains(knowhere::indexparam::NBITS)) {
knowhere::SetIndexParamNbits(index_config, 8);
}
}
auto conf_adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(GetIndexType());
AssertInfo(conf_adapter->CheckTrain(index_config, GetIndexMode()), "something wrong in index parameters!");
knowhere::TimeRecorder rc("BuildWithoutIds", 1);
index_->BuildAll(dataset, index_config);
rc.RecordSection("TrainAndAdd");
if (is_in_nm_list(GetIndexType())) {
store_raw_data(dataset);
rc.RecordSection("store_raw_data");
}
rc.ElapseFromBegin("Done");
SetDim(index_->Dim());
}
std::unique_ptr<SearchResult>
VectorMemIndex::Query(const DatasetPtr dataset, const SearchInfo& search_info, const BitsetView& bitset) {
// AssertInfo(GetMetricType() == search_info.metric_type_,
// "Metric type of field index isn't the same with search info");
auto load_raw_data_closure = [&]() { LoadRawData(); }; // hide this pointer
auto index_type = GetIndexType();
if (is_in_nm_list(index_type)) {
std::call_once(raw_data_loaded_, load_raw_data_closure);
}
auto num_queries = knowhere::GetDatasetRows(dataset);
Config search_conf = search_info.search_params_;
auto topk = search_info.topk_;
// TODO :: check dim of search data
auto final = [&] {
knowhere::SetMetaTopk(search_conf, topk);
knowhere::SetMetaMetricType(search_conf, GetMetricType());
auto index_type = GetIndexType();
auto adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(index_type);
try {
adapter->CheckSearch(search_conf, index_type, GetIndexMode());
} catch (std::exception& e) {
AssertInfo(false, e.what());
}
return index_->Query(dataset, search_conf, bitset);
}();
auto ids = knowhere::GetDatasetIDs(final);
float* distances = (float*)knowhere::GetDatasetDistance(final);
auto round_decimal = search_info.round_decimal_;
auto total_num = num_queries * topk;
if (round_decimal != -1) {
const float multiplier = pow(10.0, round_decimal);
for (int i = 0; i < total_num; i++) {
distances[i] = round(distances[i] * multiplier) / multiplier;
}
}
auto result = std::make_unique<SearchResult>();
result->seg_offsets_.resize(total_num);
result->distances_.resize(total_num);
result->total_nq_ = num_queries;
result->unity_topK_ = topk;
std::copy_n(ids, total_num, result->seg_offsets_.data());
std::copy_n(distances, total_num, result->distances_.data());
return result;
}
void
VectorMemIndex::store_raw_data(const knowhere::DatasetPtr& dataset) {
auto index_type = GetIndexType();
if (is_in_nm_list(index_type)) {
auto tensor = knowhere::GetDatasetTensor(dataset);
auto row_num = knowhere::GetDatasetRows(dataset);
auto dim = knowhere::GetDatasetDim(dataset);
int64_t data_size;
if (is_in_bin_list(index_type)) {
data_size = dim / 8 * row_num;
} else {
data_size = dim * row_num * sizeof(float);
}
raw_data_.resize(data_size);
memcpy(raw_data_.data(), tensor, data_size);
}
}
void
VectorMemIndex::LoadRawData() {
auto index_type = GetIndexType();
if (is_in_nm_list(index_type)) {
auto bs = index_->Serialize(Config{knowhere::meta::SLICE_SIZE, config::KnowhereGetIndexSliceSize()});
auto bptr = std::make_shared<knowhere::Binary>();
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
bptr->data = std::shared_ptr<uint8_t[]>(static_cast<uint8_t*>(raw_data_.data()), deleter);
bptr->size = raw_data_.size();
bs.Append(RAW_DATA, bptr);
index_->Load(bs);
}
}
void
VectorMemIndex::parse_config(Config& config) {
auto stoi_closure = [](const std::string& s) -> int { return std::stoi(s); };
/***************************** meta *******************************/
CheckParameter<int>(config, knowhere::meta::SLICE_SIZE, stoi_closure,
std::optional{config::KnowhereGetIndexSliceSize()});
CheckParameter<int>(config, knowhere::meta::DIM, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::meta::TOPK, stoi_closure, std::nullopt);
/***************************** IVF Params *******************************/
CheckParameter<int>(config, knowhere::indexparam::NPROBE, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::indexparam::NLIST, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::indexparam::M, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::indexparam::NBITS, stoi_closure, std::nullopt);
/************************** PQ Params *****************************/
CheckParameter<int>(config, knowhere::indexparam::PQ_M, stoi_closure, std::nullopt);
/************************** HNSW Params *****************************/
CheckParameter<int>(config, knowhere::indexparam::EFCONSTRUCTION, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::indexparam::HNSW_M, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::indexparam::EF, stoi_closure, std::nullopt);
/************************** Annoy Params *****************************/
CheckParameter<int>(config, knowhere::indexparam::N_TREES, stoi_closure, std::nullopt);
CheckParameter<int>(config, knowhere::indexparam::SEARCH_K, stoi_closure, std::nullopt);
}
} // namespace milvus::index

View File

@ -0,0 +1,68 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
#include <boost/dynamic_bitset.hpp>
#include "index/VectorIndex.h"
namespace milvus::index {
class VectorMemIndex : public VectorIndex {
public:
explicit VectorMemIndex(const IndexType& index_type, const MetricType& metric_type, const IndexMode& index_mode);
BinarySet
Serialize(const Config& config) override;
void
Load(const BinarySet& binary_set, const Config& config = {}) override;
void
BuildWithDataset(const DatasetPtr& dataset, const Config& config = {}) override;
int64_t
Count() override {
return index_->Count();
}
std::unique_ptr<SearchResult>
Query(const DatasetPtr dataset, const SearchInfo& search_info, const BitsetView& bitset) override;
private:
void
store_raw_data(const knowhere::DatasetPtr& dataset);
void
parse_config(Config& config);
void
LoadRawData();
private:
Config config_;
knowhere::VecIndexPtr index_ = nullptr;
std::vector<uint8_t> raw_data_;
std::once_flag raw_data_loaded_;
};
using VectorMemIndexPtr = std::unique_ptr<VectorMemIndex>;
} // namespace milvus::index

View File

@ -0,0 +1,9 @@
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
Name: Milvus index
Description: index modules for Milvus
Version: @MILVUS_VERSION@
Libs: -L${libdir} -lmilvus_index
Cflags: -I${includedir}

View File

@ -14,7 +14,6 @@ set(INDEXBUILDER_FILES
VecIndexCreator.cpp
index_c.cpp
init_c.cpp
utils.cpp
ScalarIndexCreator.cpp
)
@ -30,11 +29,8 @@ endif ()
# link order matters
target_link_libraries(milvus_indexbuilder
milvus_index
milvus_common
knowhere
${TBB}
${PLATFORM_LIBS}
pthread
)
install(TARGETS milvus_indexbuilder DESTINATION "${CMAKE_INSTALL_LIBDIR}")

View File

@ -11,10 +11,8 @@
#pragma once
#include "knowhere/common/Dataset.h"
#include "knowhere/common/BinarySet.h"
#include <memory>
#include <knowhere/index/Index.h>
#include "common/Types.h"
namespace milvus::indexbuilder {
class IndexCreatorBase {
@ -22,17 +20,14 @@ class IndexCreatorBase {
virtual ~IndexCreatorBase() = default;
virtual void
Build(const knowhere::DatasetPtr& dataset) = 0;
Build(const milvus::DatasetPtr& dataset) = 0;
virtual knowhere::BinarySet
virtual milvus::BinarySet
Serialize() = 0;
// used for test.
virtual void
Load(const knowhere::BinarySet&) = 0;
// virtual knowhere::IndexPtr
// GetIndex() = 0;
Load(const milvus::BinarySet&) = 0;
};
using IndexCreatorBasePtr = std::unique_ptr<IndexCreatorBase>;

View File

@ -40,28 +40,24 @@ class IndexFactory {
IndexCreatorBasePtr
CreateIndex(CDataType dtype, const char* type_params, const char* index_params) {
auto real_dtype = proto::schema::DataType(dtype);
auto invalid_dtype_msg = std::string("invalid data type: ") + std::to_string(real_dtype);
auto real_dtype = DataType(dtype);
auto invalid_dtype_msg = std::string("invalid data type: ") + std::to_string(int(real_dtype));
switch (real_dtype) {
case proto::schema::Bool:
case proto::schema::Int8:
case proto::schema::Int16:
case proto::schema::Int32:
case proto::schema::Int64:
case proto::schema::Float:
case proto::schema::Double:
case proto::schema::VarChar:
case proto::schema::String:
return CreateScalarIndex(dtype, type_params, index_params);
case DataType::BOOL:
case DataType::INT8:
case DataType::INT16:
case DataType::INT32:
case DataType::INT64:
case DataType::FLOAT:
case DataType::DOUBLE:
case DataType::VARCHAR:
case DataType::STRING:
return CreateScalarIndex(real_dtype, type_params, index_params);
case proto::schema::BinaryVector:
case proto::schema::FloatVector:
return std::make_unique<VecIndexCreator>(type_params, index_params);
case proto::schema::None:
case proto::schema::DataType_INT_MIN_SENTINEL_DO_NOT_USE_:
case proto::schema::DataType_INT_MAX_SENTINEL_DO_NOT_USE_:
case DataType::VECTOR_FLOAT:
case DataType::VECTOR_BINARY:
return std::make_unique<VecIndexCreator>(real_dtype, type_params, index_params);
default:
throw std::invalid_argument(invalid_dtype_msg);
}

View File

@ -9,35 +9,53 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "indexbuilder/helper.h"
#include "indexbuilder/ScalarIndexCreator.h"
#include "index/IndexFactory.h"
#include "index/IndexInfo.h"
#include "index/Meta.h"
#include "index/Utils.h"
#include <string>
namespace milvus::indexbuilder {
ScalarIndexCreator::ScalarIndexCreator(CDataType dtype, const char* type_params, const char* index_params) {
dtype_ = dtype;
ScalarIndexCreator::ScalarIndexCreator(DataType dtype, const char* type_params, const char* index_params)
: dtype_(dtype) {
// TODO: move parse-related logic to a common interface.
Helper::ParseFromString(type_params_, std::string(type_params));
Helper::ParseFromString(index_params_, std::string(index_params));
// TODO: create index according to the params.
index_ = scalar::IndexFactory::GetInstance().CreateIndex(dtype_, index_type());
milvus::index::ParseFromString(type_params_, std::string(type_params));
milvus::index::ParseFromString(index_params_, std::string(index_params));
for (auto i = 0; i < type_params_.params_size(); ++i) {
const auto& param = type_params_.params(i);
config_[param.key()] = param.value();
}
for (auto i = 0; i < index_params_.params_size(); ++i) {
const auto& param = index_params_.params(i);
config_[param.key()] = param.value();
}
milvus::index::CreateIndexInfo index_info;
index_info.field_type = dtype_;
index_info.index_type = index_type();
index_info.index_mode = IndexMode::MODE_CPU;
index_ = index::IndexFactory::GetInstance().CreateIndex(index_info, nullptr);
}
void
ScalarIndexCreator::Build(const knowhere::DatasetPtr& dataset) {
index_->BuildWithDataset(dataset);
ScalarIndexCreator::Build(const milvus::DatasetPtr& dataset) {
auto size = knowhere::GetDatasetRows(dataset);
auto data = knowhere::GetDatasetTensor(dataset);
index_->BuildWithRawData(size, data);
}
knowhere::BinarySet
milvus::BinarySet
ScalarIndexCreator::Serialize() {
return index_->Serialize(config_);
}
void
ScalarIndexCreator::Load(const knowhere::BinarySet& binary_set) {
ScalarIndexCreator::Load(const milvus::BinarySet& binary_set) {
index_->Load(binary_set);
}

View File

@ -23,33 +23,33 @@ namespace milvus::indexbuilder {
class ScalarIndexCreator : public IndexCreatorBase {
public:
ScalarIndexCreator(CDataType dtype, const char* type_params, const char* index_params);
ScalarIndexCreator(DataType data_type, const char* type_params, const char* index_params);
void
Build(const knowhere::DatasetPtr& dataset) override;
Build(const milvus::DatasetPtr& dataset) override;
knowhere::BinarySet
milvus::BinarySet
Serialize() override;
void
Load(const knowhere::BinarySet&) override;
Load(const milvus::BinarySet&) override;
private:
std::string
index_type();
private:
scalar::IndexBasePtr index_ = nullptr;
index::IndexBasePtr index_ = nullptr;
proto::indexcgo::TypeParams type_params_;
proto::indexcgo::IndexParams index_params_;
knowhere::Config config_;
CDataType dtype_;
Config config_;
DataType dtype_;
};
using ScalarIndexCreatorPtr = std::unique_ptr<ScalarIndexCreator>;
inline ScalarIndexCreatorPtr
CreateScalarIndex(CDataType dtype, const char* type_params, const char* index_params) {
CreateScalarIndex(DataType dtype, const char* type_params, const char* index_params) {
return std::make_unique<ScalarIndexCreator>(dtype, type_params, index_params);
}

View File

@ -9,323 +9,85 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <exception>
#include <map>
#include <google/protobuf/text_format.h>
#include "exceptions/EasyAssert.h"
#include "indexbuilder/VecIndexCreator.h"
#include "indexbuilder/utils.h"
#include "knowhere/common/Timer.h"
#include "knowhere/index/VecIndex.h"
#include "knowhere/index/VecIndexFactory.h"
#include "knowhere/index/vector_index/ConfAdapterMgr.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "pb/index_cgo_msg.pb.h"
#include "index/Utils.h"
#include "index/IndexFactory.h"
#ifdef BUILD_DISK_ANN
#include "storage/DiskFileManagerImpl.h"
#endif
namespace milvus::indexbuilder {
VecIndexCreator::VecIndexCreator(const char* serialized_type_params, const char* serialized_index_params) {
type_params_ = std::string(serialized_type_params);
index_params_ = std::string(serialized_index_params);
VecIndexCreator::VecIndexCreator(DataType data_type,
const char* serialized_type_params,
const char* serialized_index_params)
: data_type_(data_type) {
milvus::index::ParseFromString(type_params_, std::string(serialized_type_params));
milvus::index::ParseFromString(index_params_, std::string(serialized_index_params));
parse();
for (auto i = 0; i < type_params_.params_size(); ++i) {
const auto& param = type_params_.params(i);
config_[param.key()] = param.value();
}
auto index_mode = get_index_mode();
auto index_type = get_index_type();
auto metric_type = get_metric_type();
AssertInfo(!is_unsupported(index_type, metric_type), index_type + " doesn't support metric: " + metric_type);
for (auto i = 0; i < index_params_.params_size(); ++i) {
const auto& param = index_params_.params(i);
config_[param.key()] = param.value();
}
index_ = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(get_index_type(), index_mode);
index::CreateIndexInfo index_info;
index_info.field_type = data_type_;
index_info.index_mode = index::GetIndexModeFromConfig(config_);
index_info.index_type = index::GetIndexTypeFromConfig(config_);
index_info.metric_type = index::GetMetricTypeFromConfig(config_);
std::shared_ptr<storage::FileManagerImpl> file_manager = nullptr;
#ifdef BUILD_DISK_ANN
if (index::is_in_disk_list(index_info.index_type)) {
// For now, only support diskann index
file_manager = std::make_shared<storage::DiskFileManagerImpl>(index::GetFieldDataMetaFromConfig(config_),
index::GetIndexMetaFromConfig(config_));
}
#endif
index_ = index::IndexFactory::GetInstance().CreateIndex(index_info, file_manager);
AssertInfo(index_ != nullptr, "[VecIndexCreator]Index is null after create index");
}
template <typename ParamsT>
// ugly here, ParamsT will just be MapParams later
void
VecIndexCreator::parse_impl(const std::string& serialized_params_str, knowhere::Config& conf) {
bool deserialized_success;
ParamsT params;
deserialized_success = google::protobuf::TextFormat::ParseFromString(serialized_params_str, &params);
AssertInfo(deserialized_success, "[VecIndexCreator]Deserialize params failed");
for (auto i = 0; i < params.params_size(); ++i) {
const auto& param = params.params(i);
const auto& key = param.key();
const auto& value = param.value();
conf[key] = value;
}
auto stoi_closure = [](const std::string& s) -> int { return std::stoi(s); };
auto stof_closure = [](const std::string& s) -> float { return std::stof(s); };
/***************************** meta *******************************/
check_parameter<int>(conf, knowhere::meta::SLICE_SIZE, stoi_closure, std::optional{4});
check_parameter<int>(conf, knowhere::meta::DIM, stoi_closure, std::nullopt);
check_parameter<int>(conf, knowhere::meta::TOPK, stoi_closure, std::nullopt);
/***************************** IVF Params *******************************/
check_parameter<int>(conf, knowhere::indexparam::NPROBE, stoi_closure, std::nullopt);
check_parameter<int>(conf, knowhere::indexparam::NLIST, stoi_closure, std::nullopt);
check_parameter<int>(conf, knowhere::indexparam::M, stoi_closure, std::nullopt);
check_parameter<int>(conf, knowhere::indexparam::NBITS, stoi_closure, std::nullopt);
/************************** PQ Params *****************************/
check_parameter<int>(conf, knowhere::indexparam::PQ_M, stoi_closure, std::nullopt);
/************************** HNSW Params *****************************/
check_parameter<int>(conf, knowhere::indexparam::EFCONSTRUCTION, stoi_closure, std::nullopt);
check_parameter<int>(conf, knowhere::indexparam::HNSW_M, stoi_closure, std::nullopt);
check_parameter<int>(conf, knowhere::indexparam::EF, stoi_closure, std::nullopt);
/************************** Annoy Params *****************************/
check_parameter<int>(conf, knowhere::indexparam::N_TREES, stoi_closure, std::nullopt);
check_parameter<int>(conf, knowhere::indexparam::SEARCH_K, stoi_closure, std::nullopt);
}
void
VecIndexCreator::parse() {
namespace indexcgo = milvus::proto::indexcgo;
parse_impl<indexcgo::TypeParams>(type_params_, type_config_);
parse_impl<indexcgo::IndexParams>(index_params_, index_config_);
config_.update(type_config_); // just like dict().update in Python, amazing
config_.update(index_config_);
}
template <typename T>
void
VecIndexCreator::check_parameter(knowhere::Config& conf,
const std::string& key,
std::function<T(std::string)> fn,
std::optional<T> default_v) {
if (!conf.contains(key)) {
if (default_v.has_value()) {
conf[key] = default_v.value();
}
} else {
auto value = conf[key];
conf[key] = fn(value);
}
}
template <typename T>
std::optional<T>
VecIndexCreator::get_config_by_name(const std::string& name) {
if (config_.contains(name)) {
return knowhere::GetValueFromConfig<T>(config_, name);
}
return std::nullopt;
}
int64_t
VecIndexCreator::dim() {
auto dimension = get_config_by_name<int64_t>(knowhere::meta::DIM);
AssertInfo(dimension.has_value(), "[VecIndexCreator]Dimension doesn't have value");
return (dimension.value());
return index::GetDimFromConfig(config_);
}
void
VecIndexCreator::BuildWithoutIds(const knowhere::DatasetPtr& dataset) {
auto index_type = get_index_type();
auto index_mode = get_index_mode();
knowhere::SetMetaRows(config_, knowhere::GetDatasetRows(dataset));
if (index_type == knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
if (!config_.contains(knowhere::indexparam::NBITS)) {
knowhere::SetIndexParamNbits(config_, 8);
}
}
auto conf_adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(index_type);
// TODO: Use easylogging instead, if you really need to keep this log.
// std::cout << "Konwhere BuildWithoutIds config_ is " << config_ << std::endl;
AssertInfo(conf_adapter->CheckTrain(config_, index_mode), "something wrong in index parameters!");
if (is_in_need_id_list(index_type)) {
PanicInfo(std::string(index_type) + " doesn't support build without ids yet!");
}
knowhere::TimeRecorder rc("BuildWithoutIds", 1);
// if (is_in_need_build_all_list(index_type)) {
// index_->BuildAll(dataset, config_);
// } else {
// index_->Train(dataset, config_);
// index_->AddWithoutIds(dataset, config_);
// }
index_->BuildAll(dataset, config_);
rc.RecordSection("TrainAndAdd");
if (is_in_nm_list(index_type)) {
StoreRawData(dataset);
rc.RecordSection("StoreRawData");
}
rc.ElapseFromBegin("Done");
VecIndexCreator::Build(const milvus::DatasetPtr& dataset) {
index_->BuildWithDataset(dataset, config_);
}
void
VecIndexCreator::BuildWithIds(const knowhere::DatasetPtr& dataset) {
auto index_type = get_index_type();
auto index_mode = get_index_mode();
knowhere::SetMetaRows(config_, knowhere::GetDatasetRows(dataset));
if (index_type == knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
if (!config_.contains(knowhere::indexparam::NBITS)) {
knowhere::SetIndexParamNbits(config_, 8);
}
}
auto conf_adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(index_type);
AssertInfo(conf_adapter->CheckTrain(config_, index_mode), "something wrong in index parameters!");
// index_->Train(dataset, config_);
// index_->Add(dataset, config_);
index_->BuildAll(dataset, config_);
if (is_in_nm_list(get_index_type())) {
StoreRawData(dataset);
}
}
void
VecIndexCreator::StoreRawData(const knowhere::DatasetPtr& dataset) {
auto index_type = get_index_type();
if (is_in_nm_list(index_type)) {
auto tensor = knowhere::GetDatasetTensor(dataset);
auto row_num = knowhere::GetDatasetRows(dataset);
auto dim = knowhere::GetDatasetDim(dataset);
int64_t data_size;
if (is_in_bin_list(index_type)) {
data_size = dim / 8 * row_num;
} else {
data_size = dim * row_num * sizeof(float);
}
raw_data_.resize(data_size);
memcpy(raw_data_.data(), tensor, data_size);
}
}
knowhere::BinarySet
milvus::BinarySet
VecIndexCreator::Serialize() {
auto ret = index_->Serialize(config_);
auto index_type = get_index_type();
if (is_in_nm_list(index_type)) {
std::shared_ptr<uint8_t[]> raw_data(new uint8_t[raw_data_.size()], std::default_delete<uint8_t[]>());
memcpy(raw_data.get(), raw_data_.data(), raw_data_.size());
ret.Append(RAW_DATA, raw_data, raw_data_.size());
// https://github.com/milvus-io/milvus/issues/6421
// Disassemble will only divide the raw vectors, other keys were already divided
knowhere::Disassemble(ret, config_);
}
return ret;
return index_->Serialize(config_);
}
void
VecIndexCreator::Load(const knowhere::BinarySet& binary_set) {
auto& map_ = binary_set.binary_map_;
for (auto it = map_.begin(); it != map_.end(); ++it) {
if (it->first == RAW_DATA) {
raw_data_.clear();
auto data_size = it->second->size;
raw_data_.resize(data_size);
memcpy(raw_data_.data(), it->second->data.get(), data_size);
break;
}
}
index_->Load(binary_set);
VecIndexCreator::Load(const milvus::BinarySet& binary_set) {
index_->Load(binary_set, config_);
}
std::string
VecIndexCreator::get_index_type() {
// return index_->index_type();
// knowhere bug here
// the index_type of all ivf-based index will change to ivf flat after loaded
auto type = get_config_by_name<std::string>("index_type");
return type.has_value() ? type.value() : knowhere::IndexEnum::INDEX_FAISS_IVFPQ;
}
std::string
VecIndexCreator::get_metric_type() {
auto type = get_config_by_name<std::string>(knowhere::meta::METRIC_TYPE);
if (type.has_value()) {
return type.value();
} else {
auto index_type = get_index_type();
if (is_in_bin_list(index_type)) {
return knowhere::metric::JACCARD;
} else {
return knowhere::metric::L2;
}
}
}
knowhere::IndexMode
VecIndexCreator::get_index_mode() {
static std::map<std::string, knowhere::IndexMode> mode_map = {
{"CPU", knowhere::IndexMode::MODE_CPU},
{"GPU", knowhere::IndexMode::MODE_GPU},
};
auto mode = get_config_by_name<std::string>("index_mode");
return mode.has_value() ? mode_map[mode.value()] : knowhere::IndexMode::MODE_CPU;
}
int64_t
VecIndexCreator::get_index_file_slice_size() {
if (knowhere::CheckKeyInConfig(config_, knowhere::meta::SLICE_SIZE)) {
return knowhere::GetMetaSliceSize(config_);
}
return knowhere::index_file_slice_size; // by default
}
std::unique_ptr<VecIndexCreator::QueryResult>
VecIndexCreator::Query(const knowhere::DatasetPtr& dataset) {
return std::move(QueryImpl(dataset, config_));
}
std::unique_ptr<VecIndexCreator::QueryResult>
VecIndexCreator::QueryWithParam(const knowhere::DatasetPtr& dataset, const char* serialized_search_params) {
namespace indexcgo = milvus::proto::indexcgo;
knowhere::Config search_conf;
parse_impl<indexcgo::MapParams>(std::string(serialized_search_params), search_conf);
return std::move(QueryImpl(dataset, search_conf));
}
std::unique_ptr<VecIndexCreator::QueryResult>
VecIndexCreator::QueryImpl(const knowhere::DatasetPtr& dataset, const knowhere::Config& conf) {
auto load_raw_data_closure = [&]() { LoadRawData(); }; // hide this pointer
auto index_type = get_index_type();
if (is_in_nm_list(index_type)) {
std::call_once(raw_data_loaded_, load_raw_data_closure);
}
auto res = index_->Query(dataset, conf, nullptr);
auto ids = knowhere::GetDatasetIDs(res);
auto distances = knowhere::GetDatasetDistance(res);
auto nq = knowhere::GetDatasetRows(dataset);
auto k = knowhere::GetMetaTopk(config_);
auto query_res = std::make_unique<VecIndexCreator::QueryResult>();
query_res->nq = nq;
query_res->topk = k;
query_res->ids.resize(nq * k);
query_res->distances.resize(nq * k);
memcpy(query_res->ids.data(), ids, sizeof(int64_t) * nq * k);
memcpy(query_res->distances.data(), distances, sizeof(float) * nq * k);
return std::move(query_res);
std::unique_ptr<SearchResult>
VecIndexCreator::Query(const milvus::DatasetPtr& dataset, const SearchInfo& search_info, const BitsetView& bitset) {
auto vector_index = dynamic_cast<index::VectorIndex*>(index_.get());
return vector_index->Query(dataset, search_info, bitset);
}
void
VecIndexCreator::LoadRawData() {
auto index_type = get_index_type();
if (is_in_nm_list(index_type)) {
auto bs = index_->Serialize(config_);
auto bptr = std::make_shared<knowhere::Binary>();
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
bptr->data = std::shared_ptr<uint8_t[]>(static_cast<uint8_t*>(raw_data_.data()), deleter);
bptr->size = raw_data_.size();
bs.Append(RAW_DATA, bptr);
index_->Load(bs);
}
VecIndexCreator::CleanLocalData() {
auto vector_index = dynamic_cast<index::VectorIndex*>(index_.get());
vector_index->CleanLocalData();
}
} // namespace milvus::indexbuilder

View File

@ -17,101 +17,44 @@
#include <vector>
#include "indexbuilder/IndexCreatorBase.h"
#include "knowhere/common/BinarySet.h"
#include "knowhere/index/VecIndex.h"
#include "index/VectorIndex.h"
#include "index/IndexInfo.h"
#include "pb/index_cgo_msg.pb.h"
namespace milvus::indexbuilder {
// TODO: better to distinguish binary vec & float vec.
class VecIndexCreator : public IndexCreatorBase {
public:
explicit VecIndexCreator(const char* serialized_type_params, const char* serialized_index_params);
explicit VecIndexCreator(DataType data_type,
const char* serialized_type_params,
const char* serialized_index_params);
void
Build(const knowhere::DatasetPtr& dataset) override {
BuildWithoutIds(dataset);
}
Build(const milvus::DatasetPtr& dataset) override;
knowhere::BinarySet
milvus::BinarySet
Serialize() override;
void
Load(const knowhere::BinarySet& binary_set) override;
Load(const milvus::BinarySet& binary_set) override;
int64_t
dim();
public:
// used for tests
struct QueryResult {
std::vector<knowhere::IDType> ids;
std::vector<float> distances;
int64_t nq;
int64_t topk;
};
std::unique_ptr<QueryResult>
Query(const knowhere::DatasetPtr& dataset);
std::unique_ptr<QueryResult>
QueryWithParam(const knowhere::DatasetPtr& dataset, const char* serialized_search_params);
private:
void
parse();
std::string
get_index_type();
std::string
get_metric_type();
knowhere::IndexMode
get_index_mode();
int64_t
get_index_file_slice_size();
template <typename T>
std::optional<T>
get_config_by_name(const std::string& name);
void
StoreRawData(const knowhere::DatasetPtr& dataset);
void
LoadRawData();
template <typename T>
void
check_parameter(knowhere::Config& conf,
const std::string& key,
std::function<T(std::string)> fn,
std::optional<T> default_v = std::nullopt);
template <typename ParamsT>
void
parse_impl(const std::string& serialized_params_str, knowhere::Config& conf);
std::unique_ptr<QueryResult>
QueryImpl(const knowhere::DatasetPtr& dataset, const knowhere::Config& conf);
std::unique_ptr<SearchResult>
Query(const milvus::DatasetPtr& dataset, const SearchInfo& search_info, const BitsetView& bitset);
public:
void
BuildWithIds(const knowhere::DatasetPtr& dataset);
void
BuildWithoutIds(const knowhere::DatasetPtr& dataset);
CleanLocalData();
private:
knowhere::VecIndexPtr index_ = nullptr;
std::string type_params_;
std::string index_params_;
knowhere::Config type_config_;
knowhere::Config index_config_;
knowhere::Config config_;
std::vector<uint8_t> raw_data_;
std::once_flag raw_data_loaded_;
milvus::index::IndexBasePtr index_ = nullptr;
proto::indexcgo::TypeParams type_params_;
proto::indexcgo::IndexParams index_params_;
Config config_;
DataType data_type_;
};
} // namespace milvus::indexbuilder

View File

@ -1,38 +0,0 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#pragma once
#include "pb/index_cgo_msg.pb.h"
#include "exceptions/EasyAssert.h"
#include <google/protobuf/text_format.h>
#include <string>
#include <map>
namespace milvus::indexbuilder {
using MapParams = std::map<std::string, std::string>;
struct Helper {
static void
ParseFromString(google::protobuf::Message& params, const std::string& str) {
auto ok = google::protobuf::TextFormat::ParseFromString(str, &params);
AssertInfo(ok, "failed to parse params from string");
}
static void
ParseParams(google::protobuf::Message& params, const void* data, const size_t size) {
auto ok = params.ParseFromArray(data, size);
AssertInfo(ok, "failed to parse params from array");
}
};
} // namespace milvus::indexbuilder

View File

@ -159,146 +159,13 @@ LoadIndexFromBinarySet(CIndex index, CBinarySet c_binary_set) {
}
CStatus
QueryOnFloatVecIndex(CIndex index, int64_t float_value_num, const float* vectors, CIndexQueryResult* res) {
CleanLocalData(CIndex index) {
auto status = CStatus();
try {
auto cIndex = (milvus::indexbuilder::VecIndexCreator*)index;
auto dim = cIndex->dim();
auto row_nums = float_value_num / dim;
auto query_ds = knowhere::GenDataset(row_nums, dim, vectors);
auto query_res = cIndex->Query(query_ds);
*res = query_res.release();
status.error_code = Success;
status.error_msg = "";
} catch (std::exception& e) {
status.error_code = UnexpectedError;
status.error_msg = strdup(e.what());
}
return status;
}
CStatus
QueryOnFloatVecIndexWithParam(CIndex index,
int64_t float_value_num,
const float* vectors,
const char* serialized_search_params,
CIndexQueryResult* res) {
auto status = CStatus();
try {
auto cIndex = (milvus::indexbuilder::VecIndexCreator*)index;
auto dim = cIndex->dim();
auto row_nums = float_value_num / dim;
auto query_ds = knowhere::GenDataset(row_nums, dim, vectors);
auto query_res = cIndex->QueryWithParam(query_ds, serialized_search_params);
*res = query_res.release();
status.error_code = Success;
status.error_msg = "";
} catch (std::exception& e) {
status.error_code = UnexpectedError;
status.error_msg = strdup(e.what());
}
return status;
}
CStatus
QueryOnBinaryVecIndex(CIndex index, int64_t data_size, const uint8_t* vectors, CIndexQueryResult* res) {
auto status = CStatus();
try {
auto cIndex = (milvus::indexbuilder::VecIndexCreator*)index;
auto dim = cIndex->dim();
auto row_nums = (data_size * 8) / dim;
auto query_ds = knowhere::GenDataset(row_nums, dim, vectors);
auto query_res = cIndex->Query(query_ds);
*res = query_res.release();
status.error_code = Success;
status.error_msg = "";
} catch (std::exception& e) {
status.error_code = UnexpectedError;
status.error_msg = strdup(e.what());
}
return status;
}
CStatus
QueryOnBinaryVecIndexWithParam(CIndex index,
int64_t data_size,
const uint8_t* vectors,
const char* serialized_search_params,
CIndexQueryResult* res) {
auto status = CStatus();
try {
auto cIndex = (milvus::indexbuilder::VecIndexCreator*)index;
auto dim = cIndex->dim();
auto row_nums = (data_size * 8) / dim;
auto query_ds = knowhere::GenDataset(row_nums, dim, vectors);
auto query_res = cIndex->QueryWithParam(query_ds, serialized_search_params);
*res = query_res.release();
status.error_code = Success;
status.error_msg = "";
} catch (std::exception& e) {
status.error_code = UnexpectedError;
status.error_msg = strdup(e.what());
}
return status;
}
CStatus
CreateQueryResult(CIndexQueryResult* res) {
auto status = CStatus();
try {
auto query_result = std::make_unique<milvus::indexbuilder::VecIndexCreator::QueryResult>();
*res = query_result.release();
status.error_code = Success;
status.error_msg = "";
} catch (std::exception& e) {
status.error_code = UnexpectedError;
status.error_msg = strdup(e.what());
}
return status;
}
int64_t
NqOfQueryResult(CIndexQueryResult res) {
auto c_res = (milvus::indexbuilder::VecIndexCreator::QueryResult*)res;
return c_res->nq;
}
int64_t
TopkOfQueryResult(CIndexQueryResult res) {
auto c_res = (milvus::indexbuilder::VecIndexCreator::QueryResult*)res;
return c_res->topk;
}
void
GetIdsOfQueryResult(CIndexQueryResult res, int64_t* ids) {
auto c_res = (milvus::indexbuilder::VecIndexCreator::QueryResult*)res;
auto nq = c_res->nq;
auto k = c_res->topk;
// TODO: how could we avoid memory copy whenever this called
memcpy(ids, c_res->ids.data(), sizeof(int64_t) * nq * k);
}
void
GetDistancesOfQueryResult(CIndexQueryResult res, float* distances) {
auto c_res = (milvus::indexbuilder::VecIndexCreator::QueryResult*)res;
auto nq = c_res->nq;
auto k = c_res->topk;
// TODO: how could we avoid memory copy whenever this called
memcpy(distances, c_res->distances.data(), sizeof(float) * nq * k);
}
CStatus
DeleteIndexQueryResult(CIndexQueryResult res) {
auto status = CStatus();
try {
auto c_res = (milvus::indexbuilder::VecIndexCreator::QueryResult*)res;
delete c_res;
AssertInfo(index, "failed to build float vector index, passed index was null");
auto real_index = reinterpret_cast<milvus::indexbuilder::IndexCreatorBase*>(index);
auto cIndex = dynamic_cast<milvus::indexbuilder::VecIndexCreator*>(real_index);
cIndex->CleanLocalData();
status.error_code = Success;
status.error_msg = "";
} catch (std::exception& e) {

View File

@ -17,7 +17,7 @@ extern "C" {
#include <stdint.h>
#include "common/type_c.h"
#include "common/vector_index_c.h"
#include "common/binary_set_c.h"
#include "indexbuilder/type_c.h"
CStatus
@ -50,42 +50,7 @@ CStatus
LoadIndexFromBinarySet(CIndex index, CBinarySet c_binary_set);
CStatus
QueryOnFloatVecIndex(CIndex index, int64_t float_value_num, const float* vectors, CIndexQueryResult* res);
CStatus
QueryOnFloatVecIndexWithParam(CIndex index,
int64_t float_value_num,
const float* vectors,
const char* serialized_search_params,
CIndexQueryResult* res);
CStatus
QueryOnBinaryVecIndex(CIndex index, int64_t data_size, const uint8_t* vectors, CIndexQueryResult* res);
CStatus
QueryOnBinaryVecIndexWithParam(CIndex index,
int64_t data_size,
const uint8_t* vectors,
const char* serialized_search_params,
CIndexQueryResult* res);
CStatus
CreateQueryResult(CIndexQueryResult* res);
int64_t
NqOfQueryResult(CIndexQueryResult res);
int64_t
TopkOfQueryResult(CIndexQueryResult res);
void
GetIdsOfQueryResult(CIndexQueryResult res, int64_t* ids);
void
GetDistancesOfQueryResult(CIndexQueryResult res, float* distances);
CStatus
DeleteIndexQueryResult(CIndexQueryResult res);
CleanLocalData(CIndex index);
#ifdef __cplusplus
};

View File

@ -1,83 +0,0 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "indexbuilder/utils.h"
#include <algorithm>
#include <string>
#include <tuple>
#include <vector>
namespace milvus::indexbuilder {
std::vector<knowhere::IndexType>
NM_List() {
static std::vector<knowhere::IndexType> ret{
knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
};
return ret;
}
std::vector<knowhere::IndexType>
BIN_List() {
static std::vector<knowhere::IndexType> ret{
knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
};
return ret;
}
std::vector<knowhere::IndexType>
Need_ID_List() {
static std::vector<knowhere::IndexType> ret{
// knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
};
return ret;
}
std::vector<std::tuple<knowhere::IndexType, knowhere::MetricType>>
unsupported_index_combinations() {
static std::vector<std::tuple<knowhere::IndexType, knowhere::MetricType>> ret{
std::make_tuple(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::metric::L2),
};
return ret;
}
template <typename T>
bool
is_in_list(const T& t, std::function<std::vector<T>()> list_func) {
auto l = list_func();
return std::find(l.begin(), l.end(), t) != l.end();
}
bool
is_in_bin_list(const knowhere::IndexType& index_type) {
return is_in_list<knowhere::IndexType>(index_type, BIN_List);
}
bool
is_in_nm_list(const knowhere::IndexType& index_type) {
return is_in_list<knowhere::IndexType>(index_type, NM_List);
}
bool
is_in_need_id_list(const knowhere::IndexType& index_type) {
return is_in_list<knowhere::IndexType>(index_type, Need_ID_List);
}
bool
is_unsupported(const knowhere::IndexType& index_type, const knowhere::MetricType& metric_type) {
return is_in_list<std::tuple<knowhere::IndexType, knowhere::MetricType>>(std::make_tuple(index_type, metric_type),
unsupported_index_combinations);
}
} // namespace milvus::indexbuilder

View File

@ -1,54 +0,0 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#pragma once
#include <algorithm>
#include <string>
#include <tuple>
#include <vector>
#include <functional>
#include <knowhere/common/Typedef.h>
#include "knowhere/index/IndexType.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
namespace milvus::indexbuilder {
std::vector<knowhere::IndexType>
NM_List();
std::vector<knowhere::IndexType>
BIN_List();
std::vector<knowhere::IndexType>
Need_ID_List();
std::vector<std::tuple<knowhere::IndexType, knowhere::MetricType>>
unsupported_index_combinations();
template <typename T>
bool
is_in_list(const T& t, std::function<std::vector<T>()> list_func);
bool
is_in_bin_list(const knowhere::IndexType& index_type);
bool
is_in_nm_list(const knowhere::IndexType& index_type);
bool
is_in_need_id_list(const knowhere::IndexType& index_type);
bool
is_unsupported(const knowhere::IndexType& index_type, const knowhere::MetricType& metric_type);
} // namespace milvus::indexbuilder

View File

@ -31,4 +31,4 @@ set(MILVUS_QUERY_SRCS
PlanProto.cpp
)
add_library(milvus_query ${MILVUS_QUERY_SRCS})
target_link_libraries(milvus_query milvus_index milvus_common)
target_link_libraries(milvus_query milvus_index)

View File

@ -17,7 +17,8 @@
#include <vector>
#include <string>
#include "Expr.h"
#include "common/QueryInfo.h"
#include "query/Expr.h"
#include "knowhere/common/Config.h"
namespace milvus::query {
@ -34,14 +35,6 @@ struct PlanNode {
using PlanNodePtr = std::unique_ptr<PlanNode>;
struct SearchInfo {
int64_t topk_;
int64_t round_decimal_;
FieldId field_id_;
knowhere::MetricType metric_type_;
knowhere::Config search_params_;
};
struct VectorPlanNode : PlanNode {
std::optional<ExprPtr> predicate_;
SearchInfo search_info_;

View File

@ -22,22 +22,22 @@
namespace milvus::query {
template <typename T>
inline scalar::ScalarIndexPtr<T>
inline index::ScalarIndexPtr<T>
generate_scalar_index(Span<T> data) {
auto indexing = std::make_unique<scalar::ScalarIndexSort<T>>();
auto indexing = std::make_unique<index::ScalarIndexSort<T>>();
indexing->Build(data.row_count(), data.data());
return indexing;
}
template <>
inline scalar::ScalarIndexPtr<std::string>
inline index::ScalarIndexPtr<std::string>
generate_scalar_index(Span<std::string> data) {
auto indexing = scalar::CreateStringIndexSort();
auto indexing = index::CreateStringIndexSort();
indexing->Build(data.row_count(), data.data());
return indexing;
}
inline std::unique_ptr<knowhere::Index>
inline index::IndexBasePtr
generate_scalar_index(SpanBase data, DataType data_type) {
Assert(!datatype_is_vector(data_type));
switch (data_type) {

View File

@ -10,6 +10,7 @@
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "common/BitsetView.h"
#include "common/QueryInfo.h"
#include "SearchOnGrowing.h"
#include "query/SearchBruteForce.h"
#include "query/SearchOnIndex.h"
@ -22,7 +23,7 @@ namespace milvus::query {
// - Query::ExecWithoutPredicate
int32_t
FloatIndexSearch(const segcore::SegmentGrowingImpl& segment,
const query::SearchInfo& info,
const SearchInfo& info,
const void* query_data,
int64_t num_queries,
int64_t ins_barrier,
@ -44,7 +45,9 @@ FloatIndexSearch(const segcore::SegmentGrowingImpl& segment,
if (indexing_record.is_in(vecfield_id)) {
auto max_indexed_id = indexing_record.get_finished_ack();
const auto& field_indexing = indexing_record.get_vec_field_indexing(vecfield_id);
auto search_conf = field_indexing.get_search_params(info.topk_);
auto search_params = field_indexing.get_search_params(info.topk_);
SearchInfo search_conf(info);
search_conf.search_params_ = search_params;
AssertInfo(vec_ptr->get_size_per_chunk() == field_indexing.get_size_per_chunk(),
"[FloatSearch]Chunk size of vector not equal to chunk size of field index");
@ -56,7 +59,8 @@ FloatIndexSearch(const segcore::SegmentGrowingImpl& segment,
auto indexing = field_indexing.get_chunk_indexing(chunk_id);
auto sub_view = bitset.subview(chunk_id * size_per_chunk, size_per_chunk);
auto sub_qr = SearchOnIndex(search_dataset, *indexing, search_conf, sub_view);
auto vec_index = (index::VectorIndex*)(indexing);
auto sub_qr = SearchOnIndex(search_dataset, *vec_index, search_conf, sub_view);
// convert chunk uid to segment uid
for (auto& x : sub_qr.mutable_seg_offsets()) {
@ -74,7 +78,7 @@ FloatIndexSearch(const segcore::SegmentGrowingImpl& segment,
void
SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
const query::SearchInfo& info,
const SearchInfo& info,
const void* query_data,
int64_t num_queries,
Timestamp timestamp,

View File

@ -18,7 +18,7 @@ namespace milvus::query {
void
SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
const query::SearchInfo& info,
const SearchInfo& info,
const void* query_data,
int64_t num_queries,
Timestamp timestamp,

View File

@ -15,8 +15,8 @@
namespace milvus::query {
SubSearchResult
SearchOnIndex(const dataset::SearchDataset& search_dataset,
const knowhere::VecIndex& indexing,
const knowhere::Config& search_conf,
const index::VectorIndex& indexing,
const SearchInfo& search_conf,
const BitsetView& bitset) {
auto num_queries = search_dataset.num_queries;
auto topK = search_dataset.topk;
@ -27,15 +27,12 @@ SearchOnIndex(const dataset::SearchDataset& search_dataset,
// NOTE: VecIndex Query API forget to add const qualifier
// NOTE: use const_cast as a workaround
auto& indexing_nonconst = const_cast<knowhere::VecIndex&>(indexing);
auto& indexing_nonconst = const_cast<index::VectorIndex&>(indexing);
auto ans = indexing_nonconst.Query(dataset, search_conf, bitset);
auto dis = knowhere::GetDatasetDistance(ans);
auto uids = knowhere::GetDatasetIDs(ans);
SubSearchResult sub_qr(num_queries, topK, metric_type, round_decimal);
std::copy_n(dis, num_queries * topK, sub_qr.get_distances());
std::copy_n(uids, num_queries * topK, sub_qr.get_seg_offsets());
std::copy_n(ans->distances_.data(), num_queries * topK, sub_qr.get_distances());
std::copy_n(ans->seg_offsets_.data(), num_queries * topK, sub_qr.get_seg_offsets());
sub_qr.round_values();
return sub_qr;
}

View File

@ -15,13 +15,15 @@
#include "knowhere/index/VecIndex.h"
#include "query/SubSearchResult.h"
#include "query/helper.h"
#include "common/QueryInfo.h"
#include "index/VectorIndex.h"
namespace milvus::query {
SubSearchResult
SearchOnIndex(const dataset::SearchDataset& search_dataset,
const knowhere::VecIndex& indexing,
const knowhere::Config& search_conf,
const index::VectorIndex& indexing,
const SearchInfo& search_conf,
const BitsetView& bitset);
} // namespace milvus::query

View File

@ -11,11 +11,7 @@
#include <cmath>
#include "knowhere/index/VecIndex.h"
#include "knowhere/index/vector_index/ConfAdapter.h"
#include "knowhere/index/vector_index/ConfAdapterMgr.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "common/QueryInfo.h"
#include "query/SearchBruteForce.h"
#include "query/SearchOnSealed.h"
#include "query/helper.h"
@ -49,18 +45,13 @@ SearchOnSealedIndex(const Schema& schema,
auto conf = search_info.search_params_;
knowhere::SetMetaTopk(conf, search_info.topk_);
knowhere::SetMetaMetricType(conf, field_indexing->metric_type_);
auto index_type = field_indexing->indexing_->index_type();
auto adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(index_type);
try {
adapter->CheckSearch(conf, index_type, field_indexing->indexing_->index_mode());
} catch (std::exception& e) {
AssertInfo(false, e.what());
}
return field_indexing->indexing_->Query(ds, conf, bitset);
auto vec_index = dynamic_cast<index::VectorIndex*>(field_indexing->indexing_.get());
auto index_type = vec_index->GetIndexType();
return vec_index->Query(ds, search_info, bitset);
}();
auto ids = knowhere::GetDatasetIDs(final);
float* distances = (float*)knowhere::GetDatasetDistance(final);
auto ids = final->seg_offsets_.data();
float* distances = final->distances_.data();
auto total_num = num_queries * topk;
if (round_decimal != -1) {

View File

@ -20,7 +20,7 @@ namespace milvus::query {
class SubSearchResult {
public:
SubSearchResult(int64_t num_queries, int64_t topk, const knowhere::MetricType& metric_type, int64_t round_decimal)
SubSearchResult(int64_t num_queries, int64_t topk, const MetricType& metric_type, int64_t round_decimal)
: num_queries_(num_queries),
topk_(topk),
round_decimal_(round_decimal),
@ -40,12 +40,12 @@ class SubSearchResult {
public:
static float
init_value(const knowhere::MetricType& metric_type) {
init_value(const MetricType& metric_type) {
return (is_descending(metric_type) ? -1 : 1) * std::numeric_limits<float>::max();
}
static bool
is_descending(const knowhere::MetricType& metric_type) {
is_descending(const MetricType& metric_type) {
// TODO(dog): more types
if (metric_type == knowhere::metric::IP) {
return true;

View File

@ -154,7 +154,7 @@ ExecExprVisitor::ExecRangeVisitorImpl(FieldId field_id, IndexFunc index_func, El
auto num_chunk = upper_div(row_count_, size_per_chunk);
std::deque<BitsetType> results;
using Index = scalar::ScalarIndex<T>;
using Index = index::ScalarIndex<T>;
for (auto chunk_id = 0; chunk_id < indexing_barrier; ++chunk_id) {
const Index& indexing = segment_.chunk_scalar_index<T>(field_id, chunk_id);
// NOTE: knowhere is not const-ready
@ -211,7 +211,7 @@ ExecExprVisitor::ExecDataRangeVisitorImpl(FieldId field_id, IndexFunc index_func
// if sealed segment has loaded scalar index for this field, then index_barrier = 1 and data_barrier = 0
// in this case, sealed segment execute expr plan using scalar index
using Index = scalar::ScalarIndex<T>;
using Index = index::ScalarIndex<T>;
for (auto chunk_id = data_barrier; chunk_id < indexing_barrier; ++chunk_id) {
auto& indexing = segment_.chunk_scalar_index<T>(field_id, chunk_id);
auto this_size = const_cast<Index*>(&indexing)->Count();
@ -233,7 +233,7 @@ template <typename T>
auto
ExecExprVisitor::ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw) -> BitsetType {
auto& expr = static_cast<UnaryRangeExprImpl<T>&>(expr_raw);
using Index = scalar::ScalarIndex<T>;
using Index = index::ScalarIndex<T>;
auto op = expr.op_type_;
auto val = expr.value_;
switch (op) {
@ -270,8 +270,8 @@ ExecExprVisitor::ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw) -> Bi
case OpType::PrefixMatch: {
auto index_func = [val](Index* index) {
auto dataset = std::make_unique<knowhere::Dataset>();
dataset->Set(scalar::OPERATOR_TYPE, OpType::PrefixMatch);
dataset->Set(scalar::PREFIX_VALUE, val);
dataset->Set(milvus::index::OPERATOR_TYPE, OpType::PrefixMatch);
dataset->Set(milvus::index::PREFIX_VALUE, val);
return index->Query(std::move(dataset));
};
auto elem_func = [val, op](T x) { return Match(x, val, op); };
@ -291,7 +291,7 @@ template <typename T>
auto
ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(BinaryArithOpEvalRangeExpr& expr_raw) -> BitsetType {
auto& expr = static_cast<BinaryArithOpEvalRangeExprImpl<T>&>(expr_raw);
using Index = scalar::ScalarIndex<T>;
using Index = index::ScalarIndex<T>;
auto arith_op = expr.arith_op_;
auto right_operand = expr.right_operand_;
auto op = expr.op_type_;
@ -409,7 +409,7 @@ template <typename T>
auto
ExecExprVisitor::ExecBinaryRangeVisitorDispatcher(BinaryRangeExpr& expr_raw) -> BitsetType {
auto& expr = static_cast<BinaryRangeExprImpl<T>&>(expr_raw);
using Index = scalar::ScalarIndex<T>;
using Index = index::ScalarIndex<T>;
bool lower_inclusive = expr.lower_inclusive_;
bool upper_inclusive = expr.upper_inclusive_;
T val1 = expr.lower_value_;
@ -824,7 +824,7 @@ auto
ExecExprVisitor::ExecTermVisitorImpl<std::string>(TermExpr& expr_raw) -> BitsetType {
using T = std::string;
auto& expr = static_cast<TermExprImpl<T>&>(expr_raw);
using Index = scalar::ScalarIndex<T>;
using Index = index::ScalarIndex<T>;
const auto& terms = expr.terms_;
auto n = terms.size();
std::unordered_set<T> term_set(expr.terms_.begin(), expr.terms_.end());

View File

@ -11,7 +11,6 @@
#include "knowhere/index/vector_index/ConfAdapterMgr.h"
#include "knowhere/index/vector_index/ConfAdapter.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "query/generated/VerifyPlanNodeVisitor.h"
namespace milvus::query {
@ -29,13 +28,14 @@ class VerifyPlanNodeVisitor : PlanNodeVisitor {
};
} // namespace impl
static knowhere::IndexType
static IndexType
InferIndexType(const Json& search_params) {
// ivf -> nprobe
// hnsw -> ef
// annoy -> search_k
static const std::map<std::string, knowhere::IndexType> key_list = [] {
std::map<std::string, knowhere::IndexType> list;
// ngtpanng / ngtonng -> max_search_edges / epsilon
static const std::map<std::string, IndexType> key_list = [] {
std::map<std::string, IndexType> list;
namespace ip = knowhere::indexparam;
namespace ie = knowhere::IndexEnum;
list.emplace(ip::NPROBE, ie::INDEX_FAISS_IVFFLAT);
@ -53,7 +53,7 @@ InferIndexType(const Json& search_params) {
PanicCodeInfo(ErrorCodeEnum::IllegalArgument, "failed to infer index type");
}
static knowhere::IndexType
static IndexType
InferBinaryIndexType(const Json& search_params) {
namespace ip = knowhere::indexparam;
namespace ie = knowhere::IndexEnum;

View File

@ -46,9 +46,7 @@ endif ()
target_link_libraries(milvus_segcore
milvus_query
milvus_common
${PLATFORM_LIBS}
pthread
${TBB}
${OpenMP_CXX_FLAGS}
# gperftools

View File

@ -15,9 +15,8 @@
#include "index/StringIndexSort.h"
#include "common/SystemProperty.h"
#include "knowhere/index/vector_index/IndexIVF.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "segcore/FieldIndexing.h"
#include "index/VectorMemIndex.h"
namespace milvus::segcore {
@ -34,11 +33,10 @@ VectorFieldIndexing::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vec
data_.grow_to_at_least(ack_end);
for (int chunk_id = ack_beg; chunk_id < ack_end; chunk_id++) {
const auto& chunk = source->get_chunk(chunk_id);
// build index for chunk
auto indexing = std::make_unique<knowhere::IVF>();
auto indexing = std::make_unique<index::VectorMemIndex>(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
knowhere::metric::L2, IndexMode::MODE_CPU);
auto dataset = knowhere::GenDataset(source->get_size_per_chunk(), dim, chunk.data());
indexing->Train(dataset, conf);
indexing->AddWithoutIds(dataset, conf);
indexing->BuildWithDataset(dataset, conf);
data_[chunk_id] = std::move(indexing);
}
}
@ -53,8 +51,8 @@ VectorFieldIndexing::get_build_params() const {
auto base_params = config.build_params;
AssertInfo(base_params.count("nlist"), "Can't get nlist from index params");
knowhere::SetMetaDim(base_params, field_meta_.get_dim());
knowhere::SetMetaMetricType(base_params, metric_type);
base_params[knowhere::meta::DIM] = std::to_string(field_meta_.get_dim());
base_params[knowhere::meta::METRIC_TYPE] = metric_type;
return base_params;
}
@ -111,11 +109,11 @@ ScalarFieldIndexing<T>::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const
// build index for chunk
// TODO
if constexpr (std::is_same_v<T, std::string>) {
auto indexing = scalar::CreateStringIndexSort();
auto indexing = index::CreateStringIndexSort();
indexing->Build(vec_base->get_size_per_chunk(), chunk.data());
data_[chunk_id] = std::move(indexing);
} else {
auto indexing = scalar::CreateScalarIndexSort<T>();
auto indexing = index::CreateScalarIndexSort<T>();
indexing->Build(vec_base->get_size_per_chunk(), chunk.data());
data_[chunk_id] = std::move(indexing);
}

View File

@ -22,8 +22,8 @@
#include "AckResponder.h"
#include "InsertRecord.h"
#include "common/Schema.h"
#include "knowhere/index/VecIndex.h"
#include "segcore/SegcoreConfig.h"
#include "index/VectorIndex.h"
namespace milvus::segcore {
@ -53,7 +53,7 @@ class FieldIndexing {
return segcore_config_.get_chunk_rows();
}
virtual knowhere::Index*
virtual index::IndexBase*
get_chunk_indexing(int64_t chunk_id) const = 0;
protected:
@ -71,14 +71,14 @@ class ScalarFieldIndexing : public FieldIndexing {
BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) override;
// concurrent
scalar::ScalarIndex<T>*
index::ScalarIndex<T>*
get_chunk_indexing(int64_t chunk_id) const override {
Assert(!field_meta_.is_vector());
return data_.at(chunk_id).get();
}
private:
tbb::concurrent_vector<scalar::ScalarIndexPtr<T>> data_;
tbb::concurrent_vector<index::ScalarIndexPtr<T>> data_;
};
class VectorFieldIndexing : public FieldIndexing {
@ -89,7 +89,7 @@ class VectorFieldIndexing : public FieldIndexing {
BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) override;
// concurrent
knowhere::VecIndex*
index::IndexBase*
get_chunk_indexing(int64_t chunk_id) const override {
Assert(field_meta_.is_vector());
return data_.at(chunk_id).get();
@ -102,7 +102,7 @@ class VectorFieldIndexing : public FieldIndexing {
get_search_params(int top_k) const;
private:
tbb::concurrent_vector<std::unique_ptr<knowhere::VecIndex>> data_;
tbb::concurrent_vector<std::unique_ptr<index::VectorIndex>> data_;
};
std::unique_ptr<FieldIndexing>

View File

@ -20,22 +20,22 @@
#include "common/Types.h"
#include "exceptions/EasyAssert.h"
#include "knowhere/index/VecIndex.h"
#include "index/VectorIndex.h"
namespace milvus::segcore {
struct SealedIndexingEntry {
knowhere::MetricType metric_type_;
knowhere::VecIndexPtr indexing_;
MetricType metric_type_;
index::IndexBasePtr indexing_;
};
using SealedIndexingEntryPtr = std::unique_ptr<SealedIndexingEntry>;
struct SealedIndexingRecord {
void
append_field_indexing(FieldId field_id, const knowhere::MetricType& metric_type, knowhere::VecIndexPtr indexing) {
append_field_indexing(FieldId field_id, const MetricType& metric_type, index::IndexBasePtr indexing) {
auto ptr = std::make_unique<SealedIndexingEntry>();
ptr->indexing_ = indexing;
ptr->indexing_ = std::move(indexing);
ptr->metric_type_ = metric_type;
std::unique_lock lck(mutex_);
field_indexings_[field_id] = std::move(ptr);

View File

@ -31,7 +31,7 @@ class SegcoreConfig {
SegcoreConfig() {
// hard code configurations for small index
SmallIndexConf sub_conf;
sub_conf.build_params["nlist"] = nlist_;
sub_conf.build_params["nlist"] = std::to_string(nlist_);
sub_conf.search_params["nprobe"] = nprobe_;
sub_conf.index_type = "IVF";
table_[knowhere::metric::L2] = sub_conf;
@ -50,7 +50,7 @@ class SegcoreConfig {
parse_from(const std::string& string_path);
const SmallIndexConf&
at(const knowhere::MetricType& metric_type) const {
at(const MetricType& metric_type) const {
Assert(table_.count(metric_type));
return table_.at(metric_type);
}
@ -76,7 +76,7 @@ class SegcoreConfig {
}
void
set_small_index_config(const knowhere::MetricType& metric_type, const SmallIndexConf& small_index_conf) {
set_small_index_config(const MetricType& metric_type, const SmallIndexConf& small_index_conf) {
table_[metric_type] = small_index_conf;
}

View File

@ -16,11 +16,8 @@
#include <boost/iterator/counting_iterator.hpp>
#include "common/Consts.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "query/PlanNode.h"
#include "query/SearchOnSealed.h"
#include "query/generated/ExecPlanNodeVisitor.h"
#include "segcore/Reduce.h"
#include "segcore/SegmentGrowingImpl.h"
#include "segcore/Utils.h"
@ -176,7 +173,7 @@ SegmentGrowingImpl::num_chunk() const {
}
void
SegmentGrowingImpl::vector_search(query::SearchInfo& search_info,
SegmentGrowingImpl::vector_search(SearchInfo& search_info,
const void* query_data,
int64_t query_count,
Timestamp timestamp,

View File

@ -109,7 +109,7 @@ class SegmentGrowingImpl : public SegmentGrowing {
}
// deprecated
const knowhere::Index*
const index::IndexBase*
chunk_index_impl(FieldId field_id, int64_t chunk_id) const final {
return indexing_record_.get_field_indexing(field_id).get_chunk_indexing(chunk_id);
}
@ -174,7 +174,7 @@ class SegmentGrowingImpl : public SegmentGrowing {
mask_with_timestamps(BitsetType& bitset_chunk, Timestamp timestamp) const override;
void
vector_search(query::SearchInfo& search_info,
vector_search(SearchInfo& search_info,
const void* query_data,
int64_t query_count,
Timestamp timestamp,

View File

@ -27,10 +27,12 @@
#include "common/LoadInfo.h"
#include "common/BitsetView.h"
#include "common/QueryResult.h"
#include "common/QueryInfo.h"
#include "query/Plan.h"
#include "query/PlanNode.h"
#include "pb/schema.pb.h"
#include "pb/segcore.pb.h"
#include "index/IndexInfo.h"
namespace milvus::segcore {
@ -91,10 +93,10 @@ class SegmentInternalInterface : public SegmentInterface {
}
template <typename T>
const scalar::ScalarIndex<T>&
const index::ScalarIndex<T>&
chunk_scalar_index(FieldId field_id, int64_t chunk_id) const {
static_assert(IsScalar<T>);
using IndexType = scalar::ScalarIndex<T>;
using IndexType = index::ScalarIndex<T>;
auto base_ptr = chunk_index_impl(field_id, chunk_id);
auto ptr = dynamic_cast<const IndexType*>(base_ptr);
AssertInfo(ptr, "entry mismatch");
@ -129,7 +131,7 @@ class SegmentInternalInterface : public SegmentInterface {
public:
virtual void
vector_search(query::SearchInfo& search_info,
vector_search(SearchInfo& search_info,
const void* query_data,
int64_t query_count,
Timestamp timestamp,
@ -176,7 +178,7 @@ class SegmentInternalInterface : public SegmentInterface {
chunk_data_impl(FieldId field_id, int64_t chunk_id) const = 0;
// internal API: return chunk_index in span, support scalar index only
virtual const knowhere::Index*
virtual const index::IndexBase*
chunk_index_impl(FieldId field_id, int64_t chunk_id) const = 0;
// TODO remove system fields

View File

@ -23,7 +23,7 @@ namespace milvus::segcore {
class SegmentSealed : public SegmentInternalInterface {
public:
virtual void
LoadIndex(const LoadIndexInfo& info) = 0;
LoadIndex(const index::LoadIndexInfo& info) = 0;
virtual void
LoadSegmentMeta(const milvus::proto::segcore::LoadSegmentMeta& meta) = 0;
virtual void

View File

@ -39,31 +39,7 @@ SegmentSealedImpl::PreDelete(int64_t size) {
}
void
print(const std::map<std::string, std::string>& m) {
for (const auto& [k, v] : m) {
std::cout << k << ": " << v << std::endl;
}
}
void
print(const LoadIndexInfo& info) {
std::cout << "------------------LoadIndexInfo----------------------" << std::endl;
std::cout << "field_id: " << info.field_id << std::endl;
std::cout << "field_type: " << info.field_type << std::endl;
std::cout << "index_params:" << std::endl;
print(info.index_params);
std::cout << "------------------LoadIndexInfo----------------------" << std::endl;
}
void
print(const LoadFieldDataInfo& info) {
std::cout << "------------------LoadFieldDataInfo----------------------" << std::endl;
std::cout << "field_id: " << info.field_id << std::endl;
std::cout << "------------------LoadFieldDataInfo----------------------" << std::endl;
}
void
SegmentSealedImpl::LoadIndex(const LoadIndexInfo& info) {
SegmentSealedImpl::LoadIndex(const index::LoadIndexInfo& info) {
// print(info);
// NOTE: lock only when data is ready to avoid starvation
auto field_id = FieldId(info.field_id);
@ -77,15 +53,14 @@ SegmentSealedImpl::LoadIndex(const LoadIndexInfo& info) {
}
void
SegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
SegmentSealedImpl::LoadVecIndex(const index::LoadIndexInfo& info) {
// NOTE: lock only when data is ready to avoid starvation
auto field_id = FieldId(info.field_id);
auto& field_meta = schema_->operator[](field_id);
auto index = std::dynamic_pointer_cast<knowhere::VecIndex>(info.index);
AssertInfo(info.index_params.count("metric_type"), "Can't get metric_type in index_params");
auto metric_type = info.index_params.at("metric_type");
auto row_count = index->Count();
auto row_count = info.index->Count();
AssertInfo(row_count > 0, "Index count is 0");
std::unique_lock lck(mutex_);
@ -101,7 +76,8 @@ SegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
std::to_string(row_count_opt_.value()) + ")");
}
AssertInfo(!vector_indexings_.is_ready(field_id), "vec index is not ready");
vector_indexings_.append_field_indexing(field_id, metric_type, index);
vector_indexings_.append_field_indexing(field_id, metric_type,
std::move(const_cast<index::LoadIndexInfo&>(info).index));
set_bit(index_ready_bitset_, field_id, true);
update_row_count(row_count);
@ -109,13 +85,12 @@ SegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
}
void
SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
SegmentSealedImpl::LoadScalarIndex(const index::LoadIndexInfo& info) {
// NOTE: lock only when data is ready to avoid starvation
auto field_id = FieldId(info.field_id);
auto& field_meta = schema_->operator[](field_id);
auto index = std::dynamic_pointer_cast<scalar::IndexBase>(info.index);
auto row_count = index->Count();
auto row_count = info.index->Count();
AssertInfo(row_count > 0, "Index count is 0");
std::unique_lock lck(mutex_);
@ -131,21 +106,21 @@ SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
std::to_string(row_count_opt_.value()) + ")");
}
scalar_indexings_[field_id] = index;
scalar_indexings_[field_id] = std::move(const_cast<index::LoadIndexInfo&>(info).index);
// reverse pk from scalar index and set pks to offset
if (schema_->get_primary_field_id() == field_id) {
AssertInfo(field_id.get() != -1, "Primary key is -1");
AssertInfo(insert_record_.empty_pks(), "already exists");
switch (field_meta.get_data_type()) {
case DataType::INT64: {
auto int64_index = std::dynamic_pointer_cast<scalar::ScalarIndex<int64_t>>(info.index);
auto int64_index = dynamic_cast<index::ScalarIndex<int64_t>*>(scalar_indexings_[field_id].get());
for (int i = 0; i < row_count; ++i) {
insert_record_.insert_pk(int64_index->Reverse_Lookup(i), i);
}
break;
}
case DataType::VARCHAR: {
auto string_index = std::dynamic_pointer_cast<scalar::ScalarIndex<std::string>>(info.index);
auto string_index = dynamic_cast<index::ScalarIndex<std::string>*>(scalar_indexings_[field_id].get());
for (int i = 0; i < row_count; ++i) {
insert_record_.insert_pk(string_index->Reverse_Lookup(i), i);
}
@ -300,7 +275,7 @@ SegmentSealedImpl::chunk_data_impl(FieldId field_id, int64_t chunk_id) const {
return field_data->get_span_base(0);
}
const knowhere::Index*
const index::IndexBase*
SegmentSealedImpl::chunk_index_impl(FieldId field_id, int64_t chunk_id) const {
AssertInfo(scalar_indexings_.find(field_id) != scalar_indexings_.end(),
"Cannot find scalar_indexing with field_id: " + std::to_string(field_id.get()));
@ -349,7 +324,7 @@ SegmentSealedImpl::mask_with_delete(BitsetType& bitset, int64_t ins_barrier, Tim
}
void
SegmentSealedImpl::vector_search(query::SearchInfo& search_info,
SegmentSealedImpl::vector_search(SearchInfo& search_info,
const void* query_data,
int64_t query_count,
Timestamp timestamp,

View File

@ -27,6 +27,7 @@
#include "SealedIndexingRecord.h"
#include "SegmentSealed.h"
#include "TimestampIndex.h"
#include "index/ScalarIndex.h"
namespace milvus::segcore {
@ -34,7 +35,7 @@ class SegmentSealedImpl : public SegmentSealed {
public:
explicit SegmentSealedImpl(SchemaPtr schema, int64_t segment_id);
void
LoadIndex(const LoadIndexInfo& info) override;
LoadIndex(const index::LoadIndexInfo& info) override;
void
LoadFieldData(const LoadFieldDataInfo& info) override;
void
@ -97,7 +98,7 @@ class SegmentSealedImpl : public SegmentSealed {
SpanBase
chunk_data_impl(FieldId field_id, int64_t chunk_id) const override;
const knowhere::Index*
const index::IndexBase*
chunk_index_impl(FieldId field_id, int64_t chunk_id) const override;
// Calculate: output[i] = Vec[seg_offset[i]],
@ -141,7 +142,7 @@ class SegmentSealedImpl : public SegmentSealed {
mask_with_timestamps(BitsetType& bitset_chunk, Timestamp timestamp) const override;
void
vector_search(query::SearchInfo& search_info,
vector_search(SearchInfo& search_info,
const void* query_data,
int64_t query_count,
Timestamp timestamp,
@ -171,10 +172,10 @@ class SegmentSealedImpl : public SegmentSealed {
search_ids(const BitsetType& view, Timestamp timestamp) const override;
void
LoadVecIndex(const LoadIndexInfo& info);
LoadVecIndex(const index::LoadIndexInfo& info);
void
LoadScalarIndex(const LoadIndexInfo& info);
LoadScalarIndex(const index::LoadIndexInfo& info);
private:
// segment loading state
@ -187,7 +188,7 @@ class SegmentSealedImpl : public SegmentSealed {
std::optional<int64_t> row_count_opt_;
// scalar field index
std::unordered_map<FieldId, knowhere::IndexPtr> scalar_indexings_;
std::unordered_map<FieldId, index::IndexBasePtr> scalar_indexings_;
// vector field index
SealedIndexingRecord vector_indexings_;

View File

@ -11,11 +11,11 @@
#pragma once
#include <knowhere/index/vector_index/helpers/IndexParameter.h>
#include "common/Types.h"
namespace milvus::segcore {
static inline bool
PositivelyRelated(const knowhere::MetricType& metric_type) {
PositivelyRelated(const MetricType& metric_type) {
return metric_type == knowhere::metric::IP;
}
} // namespace milvus::segcore

View File

@ -259,7 +259,7 @@ MergeDataArray(std::vector<std::pair<milvus::SearchResult*, int64_t>>& result_of
// TODO: split scalar IndexBase with knowhere::Index
std::unique_ptr<DataArray>
ReverseDataFromIndex(const knowhere::Index* index,
ReverseDataFromIndex(const index::IndexBase* index,
const int64_t* seg_offsets,
int64_t count,
const FieldMeta& field_meta) {
@ -271,7 +271,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
auto scalar_array = data_array->mutable_scalars();
switch (data_type) {
case DataType::BOOL: {
using IndexType = scalar::ScalarIndex<bool>;
using IndexType = index::ScalarIndex<bool>;
auto ptr = dynamic_cast<const IndexType*>(index);
std::vector<bool> raw_data(count);
for (int64_t i = 0; i < count; ++i) {
@ -282,7 +282,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
break;
}
case DataType::INT8: {
using IndexType = scalar::ScalarIndex<int8_t>;
using IndexType = index::ScalarIndex<int8_t>;
auto ptr = dynamic_cast<const IndexType*>(index);
std::vector<int8_t> raw_data(count);
for (int64_t i = 0; i < count; ++i) {
@ -293,7 +293,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
break;
}
case DataType::INT16: {
using IndexType = scalar::ScalarIndex<int16_t>;
using IndexType = index::ScalarIndex<int16_t>;
auto ptr = dynamic_cast<const IndexType*>(index);
std::vector<int16_t> raw_data(count);
for (int64_t i = 0; i < count; ++i) {
@ -304,7 +304,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
break;
}
case DataType::INT32: {
using IndexType = scalar::ScalarIndex<int32_t>;
using IndexType = index::ScalarIndex<int32_t>;
auto ptr = dynamic_cast<const IndexType*>(index);
std::vector<int32_t> raw_data(count);
for (int64_t i = 0; i < count; ++i) {
@ -315,7 +315,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
break;
}
case DataType::INT64: {
using IndexType = scalar::ScalarIndex<int64_t>;
using IndexType = index::ScalarIndex<int64_t>;
auto ptr = dynamic_cast<const IndexType*>(index);
std::vector<int64_t> raw_data(count);
for (int64_t i = 0; i < count; ++i) {
@ -326,7 +326,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
break;
}
case DataType::FLOAT: {
using IndexType = scalar::ScalarIndex<float>;
using IndexType = index::ScalarIndex<float>;
auto ptr = dynamic_cast<const IndexType*>(index);
std::vector<float> raw_data(count);
for (int64_t i = 0; i < count; ++i) {
@ -337,7 +337,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
break;
}
case DataType::DOUBLE: {
using IndexType = scalar::ScalarIndex<double>;
using IndexType = index::ScalarIndex<double>;
auto ptr = dynamic_cast<const IndexType*>(index);
std::vector<double> raw_data(count);
for (int64_t i = 0; i < count; ++i) {
@ -348,7 +348,7 @@ ReverseDataFromIndex(const knowhere::Index* index,
break;
}
case DataType::VARCHAR: {
using IndexType = scalar::ScalarIndex<std::string>;
using IndexType = index::ScalarIndex<std::string>;
auto ptr = dynamic_cast<const IndexType*>(index);
std::vector<std::string> raw_data(count);
for (int64_t i = 0; i < count; ++i) {

View File

@ -18,9 +18,9 @@
#include <vector>
#include "common/QueryResult.h"
#include "knowhere/index/Index.h"
#include "segcore/DeletedRecord.h"
#include "segcore/InsertRecord.h"
#include "index/Index.h"
namespace milvus::segcore {
@ -56,7 +56,7 @@ get_deleted_bitmap(int64_t del_barrier,
Timestamp query_timestamp);
std::unique_ptr<DataArray>
ReverseDataFromIndex(const knowhere::Index* index,
ReverseDataFromIndex(const index::IndexBase* index,
const int64_t* seg_offsets,
int64_t count,
const FieldMeta& field_meta);

View File

@ -10,17 +10,19 @@
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "common/CDataType.h"
#include "common/LoadInfo.h"
#include "exceptions/EasyAssert.h"
#include "common/FieldMeta.h"
#include "common/Utils.h"
#include "index/Meta.h"
#include "index/Utils.h"
#include "index/IndexFactory.h"
#include "knowhere/common/BinarySet.h"
#include "knowhere/index/VecIndexFactory.h"
#include "storage/Util.h"
#include "segcore/load_index_c.h"
#include "pb/index_cgo_msg.pb.h"
CStatus
NewLoadIndexInfo(CLoadIndexInfo* c_load_index_info) {
try {
auto load_index_info = std::make_unique<LoadIndexInfo>();
auto load_index_info = std::make_unique<milvus::index::LoadIndexInfo>();
*c_load_index_info = load_index_info.release();
auto status = CStatus();
status.error_code = Success;
@ -36,14 +38,14 @@ NewLoadIndexInfo(CLoadIndexInfo* c_load_index_info) {
void
DeleteLoadIndexInfo(CLoadIndexInfo c_load_index_info) {
auto info = (LoadIndexInfo*)c_load_index_info;
auto info = (milvus::index::LoadIndexInfo*)c_load_index_info;
delete info;
}
CStatus
AppendIndexParam(CLoadIndexInfo c_load_index_info, const char* c_index_key, const char* c_index_value) {
try {
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
std::string index_key(c_index_key);
std::string index_value(c_index_value);
load_index_info->index_params[index_key] = index_value;
@ -61,11 +63,19 @@ AppendIndexParam(CLoadIndexInfo c_load_index_info, const char* c_index_key, cons
}
CStatus
AppendFieldInfo(CLoadIndexInfo c_load_index_info, int64_t field_id, enum CDataType field_type) {
AppendFieldInfo(CLoadIndexInfo c_load_index_info,
int64_t collection_id,
int64_t partition_id,
int64_t segment_id,
int64_t field_id,
enum CDataType field_type) {
try {
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
load_index_info->collection_id = collection_id;
load_index_info->partition_id = partition_id;
load_index_info->segment_id = segment_id;
load_index_info->field_id = field_id;
load_index_info->field_type = field_type;
load_index_info->field_type = milvus::DataType(field_type);
auto status = CStatus();
status.error_code = Success;
@ -82,23 +92,39 @@ AppendFieldInfo(CLoadIndexInfo c_load_index_info, int64_t field_id, enum CDataTy
CStatus
appendVecIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
try {
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
auto binary_set = (knowhere::BinarySet*)c_binary_set;
auto& index_params = load_index_info->index_params;
bool find_index_type = index_params.count("index_type") > 0 ? true : false;
bool find_index_mode = index_params.count("index_mode") > 0 ? true : false;
AssertInfo(find_index_type == true, "Can't find index type in index_params");
knowhere::IndexMode mode;
if (find_index_mode) {
std::string index_mode = index_params["index_mode"];
mode = (index_mode == "CPU" || index_mode == "cpu") ? knowhere::IndexMode::MODE_CPU
: knowhere::IndexMode::MODE_GPU;
} else {
mode = knowhere::IndexMode::MODE_CPU;
milvus::index::CreateIndexInfo index_info;
index_info.field_type = load_index_info->field_type;
// get index type
AssertInfo(index_params.find("index_type") != index_params.end(), "index type is empty");
index_info.index_type = index_params.at("index_type");
// get metric type
AssertInfo(index_params.find("metric_type") != index_params.end(), "metric type is empty");
index_info.metric_type = index_params.at("metric_type");
// set default index mode
index_info.index_mode = milvus::IndexMode::MODE_CPU;
if (index_params.count("index_mode")) {
index_info.index_mode = milvus::index::GetIndexMode(index_params["index_mode"]);
}
load_index_info->index =
knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_params["index_type"], mode);
load_index_info->index->Load(*binary_set);
// init file manager
milvus::storage::FieldDataMeta field_meta{load_index_info->collection_id, load_index_info->partition_id,
load_index_info->segment_id, load_index_info->field_id};
milvus::storage::IndexMeta index_meta{load_index_info->segment_id, load_index_info->field_id,
load_index_info->index_build_id, load_index_info->index_version};
auto file_manager = milvus::storage::CreateFileManager(index_info.index_type, field_meta, index_meta);
auto config = milvus::index::ParseConfigFromIndexParams(load_index_info->index_params);
config["index_files"] = load_index_info->index_files;
load_index_info->index = milvus::index::IndexFactory::GetInstance().CreateIndex(index_info, file_manager);
load_index_info->index->Load(*binary_set, config);
auto status = CStatus();
status.error_code = Success;
status.error_msg = "";
@ -114,14 +140,23 @@ appendVecIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
CStatus
appendScalarIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
try {
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
auto field_type = load_index_info->field_type;
auto binary_set = (knowhere::BinarySet*)c_binary_set;
auto& index_params = load_index_info->index_params;
bool find_index_type = index_params.count("index_type") > 0 ? true : false;
AssertInfo(find_index_type == true, "Can't find index type in index_params");
load_index_info->index =
milvus::scalar::IndexFactory::GetInstance().CreateIndex(field_type, index_params["index_type"]);
milvus::index::CreateIndexInfo index_info;
index_info.field_type = milvus::DataType(field_type);
index_info.index_type = index_params["index_type"];
// set default index mode
index_info.index_mode = milvus::IndexMode::MODE_CPU;
if (index_params.count("index_mode")) {
index_info.index_mode = milvus::index::GetIndexMode(index_params["index_mode"]);
}
load_index_info->index = milvus::index::IndexFactory::GetInstance().CreateIndex(index_info, nullptr);
load_index_info->index->Load(*binary_set);
auto status = CStatus();
status.error_code = Success;
@ -137,10 +172,78 @@ appendScalarIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
CStatus
AppendIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
auto field_type = load_index_info->field_type;
if (milvus::IsVectorType(field_type)) {
if (milvus::datatype_is_vector(field_type)) {
return appendVecIndex(c_load_index_info, c_binary_set);
}
return appendScalarIndex(c_load_index_info, c_binary_set);
}
CStatus
AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* c_file_path) {
try {
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
std::string index_file_path(c_file_path);
load_index_info->index_files.emplace_back(index_file_path);
auto status = CStatus();
status.error_code = Success;
status.error_msg = "";
return status;
} catch (std::exception& e) {
auto status = CStatus();
status.error_code = UnexpectedError;
status.error_msg = strdup(e.what());
return status;
}
}
CStatus
AppendIndexInfo(
CLoadIndexInfo c_load_index_info, int64_t index_id, int64_t build_id, int64_t version, const char* c_index_params) {
try {
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
load_index_info->index_id = index_id;
load_index_info->index_build_id = build_id;
load_index_info->index_version = version;
milvus::proto::indexcgo::IndexParams index_params;
milvus::index::ParseFromString(index_params, c_index_params);
for (auto i = 0; i < index_params.params().size(); i++) {
auto& param = index_params.params(i);
load_index_info->index_params[param.key()] = param.value();
}
auto status = CStatus();
status.error_code = Success;
status.error_msg = "";
return status;
} catch (std::exception& e) {
auto status = CStatus();
status.error_code = UnexpectedError;
status.error_msg = strdup(e.what());
return status;
}
}
CStatus
CleanLoadedIndex(CLoadIndexInfo c_load_index_info) {
try {
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
auto index_file_path_prefix =
milvus::storage::GenLocalIndexPathPrefix(load_index_info->index_build_id, load_index_info->index_version);
#ifdef BUILD_DISK_ANN
milvus::storage::LocalChunkManager::GetInstance().RemoveDir(index_file_path_prefix);
#endif
auto status = CStatus();
status.error_code = Success;
status.error_msg = "";
return status;
} catch (std::exception& e) {
auto status = CStatus();
status.error_code = UnexpectedError;
status.error_msg = strdup(e.what());
return status;
}
}

View File

@ -17,7 +17,7 @@ extern "C" {
#include <stdint.h>
#include <stdlib.h>
#include "common/vector_index_c.h"
#include "common/binary_set_c.h"
#include "common/type_c.h"
#include "segcore/collection_c.h"
@ -33,11 +33,26 @@ CStatus
AppendIndexParam(CLoadIndexInfo c_load_index_info, const char* index_key, const char* index_value);
CStatus
AppendFieldInfo(CLoadIndexInfo c_load_index_info, int64_t field_id, enum CDataType field_type);
AppendFieldInfo(CLoadIndexInfo c_load_index_info,
int64_t collection_id,
int64_t partition_id,
int64_t segment_id,
int64_t field_id,
enum CDataType field_type);
CStatus
AppendIndexInfo(
CLoadIndexInfo c_load_index_info, int64_t index_id, int64_t build_id, int64_t version, const char* index_params);
CStatus
AppendIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set);
CStatus
AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* file_path);
CStatus
CleanLoadedIndex(CLoadIndexInfo c_load_index_info);
#ifdef __cplusplus
}
#endif

View File

@ -20,6 +20,7 @@
#include "segcore/SegmentSealedImpl.h"
#include "segcore/SimilarityCorelation.h"
#include "segcore/segment_c.h"
#include "index/IndexInfo.h"
#include "google/protobuf/text_format.h"
////////////////////////////// common interfaces //////////////////////////////
@ -238,7 +239,7 @@ UpdateSealedSegmentIndex(CSegmentInterface c_segment, CLoadIndexInfo c_load_inde
auto segment_interface = reinterpret_cast<milvus::segcore::SegmentInterface*>(c_segment);
auto segment = dynamic_cast<milvus::segcore::SegmentSealed*>(segment_interface);
AssertInfo(segment != nullptr, "segment conversion failed");
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
auto load_index_info = (milvus::index::LoadIndexInfo*)c_load_index_info;
segment->LoadIndex(*load_index_info);
return milvus::SuccessCStatus();
} catch (std::exception& e) {

View File

@ -34,10 +34,23 @@ set(STORAGE_FILES
IndexData.cpp
InsertData.cpp
Event.cpp
)
storage_c.cpp)
if ( BUILD_DISK_ANN STREQUAL "ON" )
set(STORAGE_FILES
${STORAGE_FILES}
LocalChunkManager.cpp
MinioChunkManager.cpp
DiskFileManagerImpl.cpp)
endif ()
add_library(milvus_storage SHARED ${STORAGE_FILES})
#target_link_libraries( milvus_storage PUBLIC milvus_common boost_system boost_filesystem aws-cpp-sdk-s3 pthread)
target_link_libraries( milvus_storage PUBLIC milvus_common pthread)
if ( BUILD_DISK_ANN STREQUAL "ON" )
target_link_libraries( milvus_storage PUBLIC milvus_common boost_system boost_filesystem aws-cpp-sdk-s3 pthread)
else()
target_link_libraries( milvus_storage PUBLIC milvus_common pthread)
endif()
if(NOT CMAKE_INSTALL_PREFIX)
set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR})

View File

@ -19,14 +19,14 @@
#include <mutex>
#include "common/Consts.h"
#include "storage/DiskANNFileManagerImpl.h"
#include "log/Log.h"
#include "config/ConfigKnowhere.h"
#include "storage/DiskFileManagerImpl.h"
#include "storage/LocalChunkManager.h"
#include "storage/MinioChunkManager.h"
#include "storage/Exception.h"
#include "log/Log.h"
#include "storage/FieldData.h"
#include "storage/IndexData.h"
#include "config/ConfigKnowhere.h"
#include "storage/Util.h"
#define FILEMANAGER_TRY try {
@ -58,22 +58,22 @@ using WriteLock = std::lock_guard<std::shared_mutex>;
namespace milvus::storage {
DiskANNFileManagerImpl::DiskANNFileManagerImpl(const FieldDataMeta& field_mata, const IndexMeta& index_meta)
DiskFileManagerImpl::DiskFileManagerImpl(const FieldDataMeta& field_mata, const IndexMeta& index_meta)
: field_meta_(field_mata), index_meta_(index_meta) {
}
DiskANNFileManagerImpl::~DiskANNFileManagerImpl() {
DiskFileManagerImpl::~DiskFileManagerImpl() {
auto& local_chunk_manager = LocalChunkManager::GetInstance();
local_chunk_manager.RemoveDir(GetLocalIndexPathPrefixWithBuildID(index_meta_.build_id));
}
bool
DiskANNFileManagerImpl::LoadFile(const std::string& file) noexcept {
DiskFileManagerImpl::LoadFile(const std::string& file) noexcept {
return true;
}
bool
DiskANNFileManagerImpl::AddFile(const std::string& file) noexcept {
DiskFileManagerImpl::AddFile(const std::string& file) noexcept {
auto& local_chunk_manager = LocalChunkManager::GetInstance();
auto& remote_chunk_manager = MinioChunkManager::GetInstance();
FILEMANAGER_TRY
@ -119,7 +119,7 @@ DiskANNFileManagerImpl::AddFile(const std::string& file) noexcept {
} // namespace knowhere
void
DiskANNFileManagerImpl::CacheIndexToDisk(std::vector<std::string> remote_files) {
DiskFileManagerImpl::CacheIndexToDisk(std::vector<std::string> remote_files) {
auto& local_chunk_manager = LocalChunkManager::GetInstance();
auto& remote_chunk_manager = MinioChunkManager::GetInstance();
@ -157,30 +157,30 @@ DiskANNFileManagerImpl::CacheIndexToDisk(std::vector<std::string> remote_files)
}
std::string
DiskANNFileManagerImpl::GetFileName(const std::string& localfile) {
DiskFileManagerImpl::GetFileName(const std::string& localfile) {
boost::filesystem::path localPath(localfile);
return localPath.filename().string();
}
std::string
DiskANNFileManagerImpl::GetRemoteIndexObjectPrefix() {
return "files/" + std::string(INDEX_ROOT_PATH) + "/" + std::to_string(index_meta_.build_id) + "/" +
std::to_string(index_meta_.index_version) + "/" + std::to_string(field_meta_.partition_id) + "/" +
std::to_string(field_meta_.segment_id);
DiskFileManagerImpl::GetRemoteIndexObjectPrefix() {
return ChunkMangerConfig::GetRemoteRootPath() + "/" + std::string(INDEX_ROOT_PATH) + "/" +
std::to_string(index_meta_.build_id) + "/" + std::to_string(index_meta_.index_version) + "/" +
std::to_string(field_meta_.partition_id) + "/" + std::to_string(field_meta_.segment_id);
}
std::string
DiskANNFileManagerImpl::GetLocalIndexObjectPrefix() {
DiskFileManagerImpl::GetLocalIndexObjectPrefix() {
return GenLocalIndexPathPrefix(index_meta_.build_id, index_meta_.index_version);
}
std::string
DiskANNFileManagerImpl::GetLocalRawDataObjectPrefix() {
return GenRawDataPathPrefix(field_meta_.segment_id, field_meta_.field_id);
DiskFileManagerImpl::GetLocalRawDataObjectPrefix() {
return GenFieldRawDataPathPrefix(field_meta_.segment_id, field_meta_.field_id);
}
bool
DiskANNFileManagerImpl::RemoveFile(const std::string& file) noexcept {
DiskFileManagerImpl::RemoveFile(const std::string& file) noexcept {
// remove local file
bool localExist = false;
auto& local_chunk_manager = LocalChunkManager::GetInstance();
@ -213,7 +213,7 @@ DiskANNFileManagerImpl::RemoveFile(const std::string& file) noexcept {
}
std::optional<bool>
DiskANNFileManagerImpl::IsExisted(const std::string& file) noexcept {
DiskFileManagerImpl::IsExisted(const std::string& file) noexcept {
bool isExist = false;
auto& local_chunk_manager = LocalChunkManager::GetInstance();
auto& remote_chunk_manager = MinioChunkManager::GetInstance();

View File

@ -27,11 +27,11 @@
namespace milvus::storage {
class DiskANNFileManagerImpl : public FileManagerImpl {
class DiskFileManagerImpl : public FileManagerImpl {
public:
explicit DiskANNFileManagerImpl(const FieldDataMeta& field_mata, const IndexMeta& index_meta);
explicit DiskFileManagerImpl(const FieldDataMeta& field_mata, const IndexMeta& index_meta);
virtual ~DiskANNFileManagerImpl();
virtual ~DiskFileManagerImpl();
virtual bool
LoadFile(const std::string& filename) noexcept;
@ -48,7 +48,7 @@ class DiskANNFileManagerImpl : public FileManagerImpl {
public:
virtual std::string
GetName() const {
return "DiskANNFileManagerImpl";
return "DiskFileManagerImpl";
}
std::string
@ -61,7 +61,7 @@ class DiskANNFileManagerImpl : public FileManagerImpl {
GetLocalRawDataObjectPrefix();
std::map<std::string, int64_t>
GetRemotePaths() const {
GetRemotePathsToFileSize() const {
return remote_paths_to_size_;
}
@ -101,6 +101,6 @@ class DiskANNFileManagerImpl : public FileManagerImpl {
std::map<std::string, int64_t> remote_paths_to_size_;
};
using DiskANNFileManagerImplPtr = std::shared_ptr<DiskANNFileManagerImpl>;
using DiskANNFileManagerImplPtr = std::shared_ptr<DiskFileManagerImpl>;
} // namespace milvus::storage

View File

@ -42,7 +42,7 @@ class LocalChunkManager : public ChunkManager {
static LocalChunkManager&
GetInstance() {
// thread-safe enough after c++ 11
static LocalChunkManager instance(ChunkMangerConfig::GetLocalBucketName());
static LocalChunkManager instance(ChunkMangerConfig::GetLocalRootPath());
return instance;
}

View File

@ -16,6 +16,8 @@
#include <fstream>
#include <aws/core/auth/AWSCredentials.h>
#include <aws/core/auth/AWSCredentialsProviderChain.h>
#include <aws/core/auth/STSCredentialsProvider.h>
#include <aws/s3/model/CreateBucketRequest.h>
#include <aws/s3/model/DeleteBucketRequest.h>
#include <aws/s3/model/DeleteObjectRequest.h>
@ -66,7 +68,8 @@ MinioChunkManager::MinioChunkManager(const std::string& endpoint,
const std::string& access_key,
const std::string& access_value,
const std::string& bucket_name,
bool secure)
bool secure,
bool use_iam)
: default_bucket_name_(bucket_name) {
Aws::InitAPI(sdk_options_);
Aws::Client::ClientConfiguration config;
@ -80,9 +83,20 @@ MinioChunkManager::MinioChunkManager(const std::string& endpoint,
config.verifySSL = false;
}
client_ = std::make_shared<Aws::S3::S3Client>(
Aws::Auth::AWSCredentials(ConvertToAwsString(access_key), ConvertToAwsString(access_value)), config,
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, false);
if (use_iam) {
auto provider = std::make_shared<Aws::Auth::DefaultAWSCredentialsProviderChain>();
client_ = std::make_shared<Aws::S3::S3Client>(provider, config,
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, false);
LOG_SEGCORE_INFO_C << "use iam mode, credentials{ access_id:"
<< provider->GetAWSCredentials().GetAWSAccessKeyId()
<< " access_key:" << provider->GetAWSCredentials().GetAWSSecretKey()
<< " token:" << provider->GetAWSCredentials().GetSessionToken() << "}";
} else {
client_ = std::make_shared<Aws::S3::S3Client>(
Aws::Auth::AWSCredentials(ConvertToAwsString(access_key), ConvertToAwsString(access_value)), config,
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, false);
}
LOG_SEGCORE_INFO_C << "init MinioChunkManager with parameter[endpoint: '" << endpoint << "', access_key:'"
<< access_key << "', access_value:'" << access_value << "', default_bucket_name:'" << bucket_name

View File

@ -38,7 +38,8 @@ class MinioChunkManager : public RemoteChunkManager {
const std::string& access_key,
const std::string& access_value,
const std::string& default_bucket_name,
bool sercure = false);
bool serure = false,
bool use_iam = false);
MinioChunkManager(const MinioChunkManager&);
MinioChunkManager&
@ -52,7 +53,7 @@ class MinioChunkManager : public RemoteChunkManager {
// thread-safe enough after c++ 11
static MinioChunkManager instance(ChunkMangerConfig::GetAddress(), ChunkMangerConfig::GetAccessKey(),
ChunkMangerConfig::GetAccessValue(), ChunkMangerConfig::GetBucketName(),
ChunkMangerConfig::GetUseSSL());
ChunkMangerConfig::GetUseSSL(), ChunkMangerConfig::GetUseIAM());
return instance;
}

View File

@ -19,6 +19,10 @@
#include "common/Consts.h"
#include "config/ConfigChunkManager.h"
#ifdef BUILD_DISK_ANN
#include "storage/DiskFileManagerImpl.h"
#endif
namespace milvus::storage {
StorageType
@ -320,26 +324,51 @@ GetDimensionFromArrowArray(std::shared_ptr<arrow::Array> data, DataType data_typ
std::string
GenLocalIndexPathPrefix(int64_t build_id, int64_t index_version) {
return milvus::ChunkMangerConfig::GetLocalBucketName() + "/" + std::string(INDEX_ROOT_PATH) + "/" +
return milvus::ChunkMangerConfig::GetLocalRootPath() + "/" + std::string(INDEX_ROOT_PATH) + "/" +
std::to_string(build_id) + "/" + std::to_string(index_version) + "/";
}
std::string
GetLocalIndexPathPrefixWithBuildID(int64_t build_id) {
return milvus::ChunkMangerConfig::GetLocalBucketName() + "/" + std::string(INDEX_ROOT_PATH) + "/" +
return milvus::ChunkMangerConfig::GetLocalRootPath() + "/" + std::string(INDEX_ROOT_PATH) + "/" +
std::to_string(build_id);
}
std::string
GenRawDataPathPrefix(int64_t segment_id, int64_t field_id) {
return milvus::ChunkMangerConfig::GetLocalBucketName() + "/" + std::string(RAWDATA_ROOT_PATH) + "/" +
GenFieldRawDataPathPrefix(int64_t segment_id, int64_t field_id) {
return milvus::ChunkMangerConfig::GetLocalRootPath() + "/" + std::string(RAWDATA_ROOT_PATH) + "/" +
std::to_string(segment_id) + "/" + std::to_string(field_id) + "/";
}
std::string
GetLocalRawDataPathPrefixWithBuildID(int64_t segment_id) {
return milvus::ChunkMangerConfig::GetLocalBucketName() + "/" + std::string(RAWDATA_ROOT_PATH) + "/" +
GetSegmentRawDataPathPrefix(int64_t segment_id) {
return milvus::ChunkMangerConfig::GetLocalRootPath() + "/" + std::string(RAWDATA_ROOT_PATH) + "/" +
std::to_string(segment_id);
}
std::vector<IndexType>
DISK_LIST() {
static std::vector<IndexType> ret{
knowhere::IndexEnum::INDEX_DISKANN,
};
return ret;
}
bool
is_in_disk_list(const IndexType& index_type) {
return is_in_list<IndexType>(index_type, DISK_LIST);
}
FileManagerImplPtr
CreateFileManager(IndexType index_type, const FieldDataMeta& field_meta, const IndexMeta& index_meta) {
// TODO :: switch case index type to create file manager
#ifdef BUILD_DISK_ANN
if (is_in_disk_list(index_type)) {
return std::make_shared<DiskFileManagerImpl>(field_meta, index_meta);
}
#endif
return nullptr;
}
} // namespace milvus::storage

View File

@ -18,6 +18,7 @@
#include <memory>
#include <string>
#include <vector>
#include "storage/PayloadStream.h"
#include "storage/FileManager.h"
@ -62,12 +63,22 @@ std::string
GenLocalIndexPathPrefix(int64_t build_id, int64_t index_version);
std::string
GenRawDataPathPrefix(int64_t segment_id, int64_t field_id);
GenFieldRawDataPathPrefix(int64_t segment_id, int64_t field_id);
std::string
GetLocalRawDataPathPrefixWithBuildID(int64_t segment_id);
GetSegmentRawDataPathPrefix(int64_t segment_id);
template <typename T>
inline bool
is_in_list(const T& t, std::function<std::vector<T>()> list_func) {
auto l = list_func();
return std::find(l.begin(), l.end(), t) != l.end();
}
bool
is_in_disk_list(const IndexType& index_type);
FileManagerImplPtr
CreateFileManager(knowhere::IndexType index_type, const FieldDataMeta& field_meta, const IndexMeta& index_meta);
CreateFileManager(IndexType index_type, const FieldDataMeta& field_meta, const IndexMeta& index_meta);
} // namespace milvus::storage

View File

@ -0,0 +1,41 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "storage/storage_c.h"
#include "config/ConfigChunkManager.h"
#include "common/CGoHelper.h"
#ifdef BUILD_DISK_ANN
#include "storage/LocalChunkManager.h"
#endif
CStatus
GetLocalUsedSize(int64_t* size) {
try {
#ifdef BUILD_DISK_ANN
auto& local_chunk_manager = milvus::storage::LocalChunkManager::GetInstance();
auto dir = milvus::ChunkMangerConfig::GetLocalRootPath();
if (local_chunk_manager.DirExist(dir)) {
*size = local_chunk_manager.GetSizeOfDir(dir);
} else {
*size = 0;
}
#endif
return milvus::SuccessCStatus();
} catch (std::exception& e) {
return milvus::FailureCStatus(UnexpectedError, e.what());
}
}

View File

@ -0,0 +1,29 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "common/type_c.h"
CStatus
GetLocalUsedSize(int64_t* size);
#ifdef __cplusplus
};
#endif

View File

@ -63,7 +63,11 @@ add_subdirectory( protobuf )
add_subdirectory( boost_ext )
add_subdirectory( arrow )
add_subdirectory( rocksdb )
#add_subdirectory( aws_sdk )
# ******************************* Thridparty aws sdk ********************************
if ( LINUX )
add_subdirectory( aws_sdk )
endif()
# ******************************* Thridparty marisa ********************************
# TODO: support win.

View File

@ -29,9 +29,18 @@ macro(build_knowhere)
${EP_COMMON_CMAKE_ARGS}
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${KNOWHERE_INSTALL_PREFIX}
-DKNOWHERE_WITH_DISKANN=false
)
if ( BUILD_DISK_ANN STREQUAL "ON" )
set(KNOWHERE_CMAKE_ARGS
${KNOWHERE_CMAKE_ARGS}
-DKNOWHERE_WITH_DISKANN=true)
else ()
set(KNOWHERE_CMAKE_ARGS
${KNOWHERE_CMAKE_ARGS}
-DKNOWHERE_WITH_DISKANN=false)
endif ()
externalproject_add(knowhere_ep
# GIT_REPOSITORY "https://github.com/milvus-io/knowhere.git"
# GIT_TAG main

View File

@ -49,6 +49,15 @@ set(MILVUS_TEST_FILES
test_data_codec.cpp
)
if ( BUILD_DISK_ANN STREQUAL "ON" )
set(MILVUS_TEST_FILES
${MILVUS_TEST_FILES}
# test_minio_chunk_manager.cpp
# test_disk_file_manager_test.cpp
test_local_chunk_manager.cpp
)
endif()
if (LINUX OR APPLE)
set(MILVUS_TEST_FILES
${MILVUS_TEST_FILES}
@ -74,9 +83,6 @@ if (LINUX)
gtest_main
milvus_segcore
milvus_indexbuilder
milvus_index
milvus_log
pthread
)
install(TARGETS index_builder_test DESTINATION unittest)
endif()
@ -89,9 +95,6 @@ target_link_libraries(all_tests
gtest
milvus_segcore
milvus_indexbuilder
milvus_index
milvus_log
milvus_storage
pthread
)

View File

@ -19,7 +19,6 @@
#include "pb/index_cgo_msg.pb.h"
#include "indexbuilder/VecIndexCreator.h"
#include "indexbuilder/index_c.h"
#include "indexbuilder/utils.h"
#include "test_utils/indexbuilder_test_utils.h"
#include "common/Consts.h"
@ -64,9 +63,9 @@ IndexBuilder_build(benchmark::State& state) {
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
for (auto _ : state) {
auto index =
std::make_unique<milvus::indexbuilder::VecIndexCreator>(type_params_str.c_str(), index_params_str.c_str());
index->BuildWithoutIds(xb_dataset);
auto index = std::make_unique<milvus::indexbuilder::VecIndexCreator>(
milvus::DataType::VECTOR_FLOAT, type_params_str.c_str(), index_params_str.c_str());
index->Build(xb_dataset);
}
}
@ -93,10 +92,10 @@ IndexBuilder_build_and_codec(benchmark::State& state) {
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
for (auto _ : state) {
auto index =
std::make_unique<milvus::indexbuilder::VecIndexCreator>(type_params_str.c_str(), index_params_str.c_str());
auto index = std::make_unique<milvus::indexbuilder::VecIndexCreator>(
milvus::DataType::VECTOR_FLOAT, type_params_str.c_str(), index_params_str.c_str());
index->BuildWithoutIds(xb_dataset);
index->Build(xb_dataset);
index->Serialize();
}
}

Some files were not shown because too many files have changed in this diff Show More