Former-commit-id: 26e0e72b1da4f7b63d27f8ff7a96915a11fcbace
pull/191/head
zhenwu 2019-10-13 15:17:15 +08:00
commit 7a9c96b6a9
1250 changed files with 43495 additions and 114631 deletions

27
.clang-format Normal file
View File

@ -0,0 +1,27 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
---
BasedOnStyle: Google
DerivePointerAlignment: false
ColumnLimit: 120
IndentWidth: 4
AccessModifierOffset: -3
AlwaysBreakAfterReturnType: All
AllowShortBlocksOnASingleLine: false
AllowShortFunctionsOnASingleLine: false
AllowShortIfStatementsOnASingleLine: false
AlignTrailingComments: true

33
.clang-tidy Normal file
View File

@ -0,0 +1,33 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
---
Checks: 'clang-diagnostic-*,clang-analyzer-*,-clang-analyzer-alpha*,google-*,modernize-*,readability-*'
# produce HeaderFilterRegex from cpp/build-support/lint_exclusions.txt with:
# echo -n '^('; sed -e 's/*/\.*/g' cpp/build-support/lint_exclusions.txt | tr '\n' '|'; echo ')$'
HeaderFilterRegex: '^(.*cmake-build-debug.*|.*cmake-build-release.*|.*cmake_build.*|.*src/core/thirdparty.*|.*thirdparty.*|.*easylogging++.*|.*SqliteMetaImpl.cpp|.*src/grpc.*|.*src/core.*|.*src/wrapper.*)$'
AnalyzeTemporaryDtors: true
ChainedConditionalReturn: 1
ChainedConditionalAssignment: 1
CheckOptions:
- key: google-readability-braces-around-statements.ShortStatementLines
value: '1'
- key: google-readability-function-size.StatementThreshold
value: '800'
- key: google-readability-namespace-comments.ShortNamespaceLines
value: '10'
- key: google-readability-namespace-comments.SpacesBeforeComments
value: '2'

19
.clang-tidy-ignore Normal file
View File

@ -0,0 +1,19 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# ipc-adapter-test.cc
# memory-pool-test.cc

View File

@ -5,7 +5,7 @@ try {
dir ("milvus-helm") {
checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:megasearch/milvus-helm.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]])
dir ("milvus/milvus-cluster") {
sh "helm install --wait --timeout 300 --set roServers.image.tag=${DOCKER_VERSION} --set woServers.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP -f ci/values.yaml --name ${env.JOB_NAME}-${env.BUILD_NUMBER}-cluster --namespace milvus-cluster --version 0.4.0 . "
sh "helm install --wait --timeout 300 --set roServers.image.tag=${DOCKER_VERSION} --set woServers.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP -f ci/values.yaml --name ${env.JOB_NAME}-${env.BUILD_NUMBER}-cluster --namespace milvus-cluster --version 0.5.0 . "
}
}
/*

View File

@ -5,7 +5,7 @@ try {
dir ("milvus-helm") {
checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:megasearch/milvus-helm.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]])
dir ("milvus/milvus-gpu") {
sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.JOB_NAME}-${env.BUILD_NUMBER} -f ci/values.yaml --namespace milvus-1 --version 0.4.0 ."
sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.JOB_NAME}-${env.BUILD_NUMBER} -f ci/values.yaml --namespace milvus-1 --version 0.5.0 ."
}
}
} catch (exc) {

View File

@ -5,7 +5,7 @@ try {
dir ("milvus-helm") {
checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:megasearch/milvus-helm.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]])
dir ("milvus/milvus-gpu") {
sh "helm install --wait --timeout 300 --set engine.image.repository=\"zilliz.azurecr.cn/milvus/engine\" --set engine.image.tag=${DOCKER_VERSION} --set expose.type=loadBalancer --name ${env.JOB_NAME}-${env.BUILD_NUMBER} -f ci/values.yaml --namespace milvus-1 --version 0.4.0 ."
sh "helm install --wait --timeout 300 --set engine.image.repository=\"zilliz.azurecr.cn/milvus/engine\" --set engine.image.tag=${DOCKER_VERSION} --set expose.type=loadBalancer --name ${env.JOB_NAME}-${env.BUILD_NUMBER} -f ci/values.yaml --namespace milvus-1 --version 0.5.0 ."
}
}
} catch (exc) {

View File

@ -3,9 +3,9 @@ timeout(time: 30, unit: 'MINUTES') {
dir ("${PROJECT_NAME}_test") {
checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:Test/milvus_test.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]])
sh 'python3 -m pip install -r requirements.txt -i http://pypi.douban.com/simple --trusted-host pypi.douban.com'
sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-1.svc.cluster.local"
sh 'python3 -m pip install -r requirements.txt'
sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --level=1 --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-1.svc.cluster.local"
}
// mysql database backend test
load "${env.WORKSPACE}/ci/jenkinsfile/cleanup_dev.groovy"
@ -16,7 +16,7 @@ timeout(time: 30, unit: 'MINUTES') {
}
dir ("milvus-helm") {
dir ("milvus/milvus-gpu") {
sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.JOB_NAME}-${env.BUILD_NUMBER} -f ci/db_backend/mysql_values.yaml --namespace milvus-2 --version 0.4.0 ."
sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.JOB_NAME}-${env.BUILD_NUMBER} -f ci/db_backend/mysql_values.yaml --namespace milvus-2 --version 0.5.0 ."
}
}
dir ("${PROJECT_NAME}_test") {

View File

@ -5,17 +5,13 @@ container('milvus-build-env') {
try {
checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'SubmoduleOption',disableSubmodules: false,parentCredentials: true,recursiveSubmodules: true,reference: '',trackingSubmodules: false]], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:megasearch/milvus.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]])
/*
dir ("cpp/thirdparty/knowhere") {
checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'SubmoduleOption',disableSubmodules: false,parentCredentials: true,recursiveSubmodules: true,reference: '',trackingSubmodules: false]], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:megasearch/knowhere.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]])
sh "./build.sh -t ${params.BUILD_TYPE} -p ${knowhere_build_dir} -j"
}
*/
dir ("cpp") {
sh "git config --global user.email \"test@zilliz.com\""
sh "git config --global user.name \"test\""
sh "./build.sh -t ${params.BUILD_TYPE} -j -u -c"
withCredentials([usernamePassword(credentialsId: "${params.JFROG_USER}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
sh "./build.sh -l"
sh "export JFROG_ARTFACTORY_URL='${params.JFROG_ARTFACTORY_URL}' && export JFROG_USER_NAME='${USERNAME}' && export JFROG_PASSWORD='${PASSWORD}' && ./build.sh -t ${params.BUILD_TYPE} -j -u -c"
}
}
} catch (exc) {
updateGitlabCommitStatus name: 'Build Engine', state: 'failed'

View File

@ -5,17 +5,13 @@ container('milvus-build-env') {
try {
checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'SubmoduleOption',disableSubmodules: false,parentCredentials: true,recursiveSubmodules: true,reference: '',trackingSubmodules: false]], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:megasearch/milvus.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]])
/*
dir ("cpp/thirdparty/knowhere") {
checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'SubmoduleOption',disableSubmodules: false,parentCredentials: true,recursiveSubmodules: true,reference: '',trackingSubmodules: false]], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:megasearch/knowhere.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]])
sh "./build.sh -t ${params.BUILD_TYPE} -p ${knowhere_build_dir} -j"
}
*/
dir ("cpp") {
sh "git config --global user.email \"test@zilliz.com\""
sh "git config --global user.name \"test\""
sh "./build.sh -t ${params.BUILD_TYPE} -j"
withCredentials([usernamePassword(credentialsId: "${params.JFROG_USER}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
sh "./build.sh -l"
sh "export JFROG_ARTFACTORY_URL='${params.JFROG_ARTFACTORY_URL}' && export JFROG_USER_NAME='${USERNAME}' && export JFROG_PASSWORD='${PASSWORD}' && ./build.sh -t ${params.BUILD_TYPE} -j"
}
}
} catch (exc) {
updateGitlabCommitStatus name: 'Build Engine', state: 'failed'

View File

@ -17,7 +17,7 @@ timeout(time: 40, unit: 'MINUTES') {
}
dir ("milvus-helm") {
dir ("milvus/milvus-gpu") {
sh "helm install --wait --timeout 300 --set engine.image.repository=\"zilliz.azurecr.cn/milvus/engine\" --set engine.image.tag=${DOCKER_VERSION} --set expose.type=loadBalancer --name ${env.JOB_NAME}-${env.BUILD_NUMBER} -f ci/db_backend/mysql_values.yaml --namespace milvus-2 --version 0.4.0 ."
sh "helm install --wait --timeout 300 --set engine.image.repository=\"zilliz.azurecr.cn/milvus/engine\" --set engine.image.tag=${DOCKER_VERSION} --set expose.type=loadBalancer --name ${env.JOB_NAME}-${env.BUILD_NUMBER} -f ci/db_backend/mysql_values.yaml --namespace milvus-2 --version 0.5.0 ."
}
}
dir ("${PROJECT_NAME}_test") {

View File

@ -33,12 +33,30 @@ pipeline {
cloud 'build-kubernetes'
label 'build'
defaultContainer 'jnlp'
containerTemplate {
name 'milvus-build-env'
image 'registry.zilliz.com/milvus/milvus-build-env:v0.12'
ttyEnabled true
command 'cat'
}
yaml """
apiVersion: v1
kind: Pod
metadata:
name: milvus-build-env
labels:
app: milvus
componet: build-env
spec:
containers:
- name: milvus-build-env
image: registry.zilliz.com/milvus/milvus-build-env:v0.13
command:
- cat
tty: true
resources:
limits:
memory: "28Gi"
cpu: "10.0"
nvidia.com/gpu: 1
requests:
memory: "14Gi"
cpu: "5.0"
"""
}
}
stages {

View File

@ -33,12 +33,30 @@ pipeline {
cloud 'build-kubernetes'
label 'build'
defaultContainer 'jnlp'
containerTemplate {
name 'milvus-build-env'
image 'registry.zilliz.com/milvus/milvus-build-env:v0.12'
ttyEnabled true
command 'cat'
}
yaml """
apiVersion: v1
kind: Pod
metadata:
name: milvus-build-env
labels:
app: milvus
componet: build-env
spec:
containers:
- name: milvus-build-env
image: registry.zilliz.com/milvus/milvus-build-env:v0.13
command:
- cat
tty: true
resources:
limits:
memory: "28Gi"
cpu: "10.0"
nvidia.com/gpu: 1
requests:
memory: "14Gi"
cpu: "5.0"
"""
}
}
stages {

View File

@ -33,12 +33,30 @@ pipeline {
cloud 'build-kubernetes'
label 'build'
defaultContainer 'jnlp'
containerTemplate {
name 'milvus-build-env'
image 'registry.zilliz.com/milvus/milvus-build-env:v0.12'
ttyEnabled true
command 'cat'
}
yaml """
apiVersion: v1
kind: Pod
metadata:
name: milvus-build-env
labels:
app: milvus
componet: build-env
spec:
containers:
- name: milvus-build-env
image: registry.zilliz.com/milvus/milvus-build-env:v0.13
command:
- cat
tty: true
resources:
limits:
memory: "28Gi"
cpu: "10.0"
nvidia.com/gpu: 1
requests:
memory: "14Gi"
cpu: "5.0"
"""
}
}
stages {

59
cmake-format.py Normal file
View File

@ -0,0 +1,59 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# cmake-format configuration file
# Use run-cmake-format.py to reformat all cmake files in the source tree
# How wide to allow formatted cmake files
line_width = 90
# How many spaces to tab for indent
tab_size = 2
# If arglists are longer than this, break them always
max_subargs_per_line = 4
# If true, separate flow control names from their parentheses with a space
separate_ctrl_name_with_space = False
# If true, separate function names from parentheses with a space
separate_fn_name_with_space = False
# If a statement is wrapped to more than one line, than dangle the closing
# parenthesis on it's own line
dangle_parens = False
# What style line endings to use in the output.
line_ending = 'unix'
# Format command names consistently as 'lower' or 'upper' case
command_case = 'lower'
# Format keywords consistently as 'lower' or 'upper' case
keyword_case = 'unchanged'
# enable comment markup parsing and reflow
enable_markup = False
# If comment markup is enabled, don't reflow the first comment block in
# eachlistfile. Use this to preserve formatting of your
# copyright/licensestatements.
first_comment_is_literal = False
# If comment markup is enabled, don't reflow any comment block which matchesthis
# (regex) pattern. Default is `None` (disabled).
literal_comment_pattern = None

1
cpp/.gitignore vendored
View File

@ -8,3 +8,4 @@ output.info
output_new.info
server.info
thirdparty/knowhere/
*.pyc

View File

@ -5,12 +5,55 @@ Please mark all change in change log and use the ticket from JIRA.
# Milvus 0.5.0 (TODO)
## Bug
- MS-568 - Fix gpuresource free error
- MS-572 - Milvus crash when get SIGINT
- MS-577 - Unittest Query randomly hung
- MS-587 - Count get wrong result after adding vectors and index built immediately
- MS-599 - search wrong result when table created with metric_type: IP
- MS-601 - Docker logs error caused by get CPUTemperature error
- MS-622 - Delete vectors should be failed if date range is invalid
- MS-620 - Get table row counts display wrong error code
- MS-637 - out of memory when load too many tasks
- MS-640 - Cache object size calculate incorrect
- MS-641 - Segment fault(signal 11) in PickToLoad
## Improvement
- MS-552 - Add and change the easylogging library
- MS-553 - Refine cache code
- MS-555 - Remove old scheduler
- MS-556 - Add Job Definition in Scheduler
- MS-557 - Merge Log.h
- MS-558 - Refine status code
- MS-562 - Add JobMgr and TaskCreator in Scheduler
- MS-566 - Refactor cmake
- MS-574 - Milvus configuration refactor
- MS-578 - Make sure milvus5.0 don't crack 0.3.1 data
- MS-585 - Update namespace in scheduler
- MS-606 - Speed up result reduce
- MS-608 - Update TODO names
- MS-609 - Update task construct function
- MS-611 - Add resources validity check in ResourceMgr
- MS-619 - Add optimizer class in scheduler
- MS-614 - Preload table at startup
- MS-626 - Refactor DataObj to support cache any type data
## New Feature
- MS-627 - Integrate new index: IVFSQHybrid
- MS-631 - IVFSQ8H Index support
- MS-636 - Add optimizer in scheduler for FAISS_IVFSQ8H
## Task
- MS-554 - Change license to Apache 2.0
- MS-561 - Add contributing guidelines, code of conduct and README docs
- MS-567 - Add NOTICE.md
- MS-569 - Complete the NOTICE.md
- MS-575 - Add Clang-format & Clang-tidy & Cpplint
- MS-586 - Remove BUILD_FAISS_WITH_MKL option
- MS-590 - Refine cmake code to support cpplint
- MS-600 - Reconstruct unittest code
- MS-602 - Remove zilliz namespace
- MS-610 - Change error code base value from hex to decimal
- MS-635 - Add compile option to support customized faiss
# Milvus 0.4.0 (2019-09-12)
@ -45,6 +88,7 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-510 - unittest out of memory and crashed
- MS-507 - Dataset 10m-512, index type sq8performance in-normal when set CPU_CACHE to 16 or 64
- MS-543 - SearchTask fail without exception
- MS-582 - grafana displays changes frequently
## Improvement
- MS-327 - Clean code for milvus
@ -129,6 +173,9 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-523 - Config file validation
- MS-539 - Remove old task code
- MS-546 - Add simple mode resource_config
- MS-570 - Add prometheus docker-compose file
- MS-576 - Scheduler refactor
- MS-592 - Change showtables stream transport to unary
## New Feature
- MS-343 - Implement ResourceMgr

View File

@ -1,12 +1,28 @@
#-------------------------------------------------------------------------------
# Copyright (Zilliz) - All Rights Reserved
# Unauthorized copying of this file, via any medium is strictly prohibited.
# Proprietary and confidential.
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http:#www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#-------------------------------------------------------------------------------
cmake_minimum_required(VERSION 3.14)
message(STATUS "Building using CMake version: ${CMAKE_VERSION}")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
MACRO (GET_CURRENT_TIME CURRENT_TIME)
execute_process(COMMAND "date" +"%Y-%m-%d %H:%M.%S" OUTPUT_VARIABLE ${CURRENT_TIME})
ENDMACRO (GET_CURRENT_TIME)
@ -27,24 +43,21 @@ endif()
set(MILVUS_VERSION "${GIT_BRANCH_NAME}")
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]" MILVUS_VERSION "${MILVUS_VERSION}")
find_package(ClangTools)
set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set(BUILD_TYPE "release")
set(BUILD_TYPE "Release")
else()
set(BUILD_TYPE "debug")
set(BUILD_TYPE "Debug")
endif()
message(STATUS "Build type = ${BUILD_TYPE}")
add_definitions(-DNEW_SCHEDULER)
project(milvus VERSION "${MILVUS_VERSION}")
project(milvus_engine LANGUAGES CUDA CXX)
# Ensure that a default make is set
if("${MAKE}" STREQUAL "")
if(NOT MSVC)
find_program(MAKE make)
endif()
endif()
unset(CMAKE_EXPORT_COMPILE_COMMANDS CACHE)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(MILVUS_VERSION_MAJOR "${milvus_VERSION_MAJOR}")
set(MILVUS_VERSION_MINOR "${milvus_VERSION_MINOR}")
@ -54,7 +67,7 @@ if(MILVUS_VERSION_MAJOR STREQUAL ""
OR MILVUS_VERSION_MINOR STREQUAL ""
OR MILVUS_VERSION_PATCH STREQUAL "")
message(WARNING "Failed to determine Milvus version from git branch name")
set(MILVUS_VERSION "0.4.0")
set(MILVUS_VERSION "0.5.0")
endif()
message(STATUS "Build version = ${MILVUS_VERSION}")
@ -64,86 +77,82 @@ message(STATUS "Milvus version: "
"${MILVUS_VERSION_MAJOR}.${MILVUS_VERSION_MINOR}.${MILVUS_VERSION_PATCH} "
"(full: '${MILVUS_VERSION}')")
set(MILVUS_SOURCE_DIR ${PROJECT_SOURCE_DIR})
set(MILVUS_BINARY_DIR ${PROJECT_BINARY_DIR})
find_package(CUDA)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler -fPIC -std=c++11 -D_FORCE_INLINES -arch sm_60 --expt-extended-lambda")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -O0 -g")
message(STATUS "CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}")
message(STATUS "CUDA_NVCC_FLAGS=${CUDA_NVCC_FLAGS}")
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED on)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
message(STATUS "building milvus_engine on x86 architecture")
message(STATUS "Building milvus_engine on x86 architecture")
set(MILVUS_BUILD_ARCH x86_64)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(ppc)")
message(STATUS "building milvus_engine on ppc architecture")
message(STATUS "Building milvus_engine on ppc architecture")
set(MILVUS_BUILD_ARCH ppc64le)
else()
message(WARNING "unknown processor type")
message(WARNING "Unknown processor type")
message(WARNING "CMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}")
set(MILVUS_BUILD_ARCH unknown)
endif()
find_package (Python COMPONENTS Interpreter Development)
find_package(CUDA)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler -fPIC -std=c++11 -D_FORCE_INLINES --expt-extended-lambda")
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC -DELPP_THREAD_SAFE -fopenmp")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -O3")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -fPIC -DELPP_THREAD_SAFE -fopenmp")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -O0 -g")
endif()
set(ALLOW_DUPLICATE_CUSTOM_TARGETS TRUE)
# Ensure that a default make is set
if("${MAKE}" STREQUAL "")
if(NOT MSVC)
find_program(MAKE make)
endif()
endif()
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
find_path(MYSQL_INCLUDE_DIR
NAMES "mysql.h"
PATH_SUFFIXES "mysql")
if (${MYSQL_INCLUDE_DIR} STREQUAL "MYSQL_INCLUDE_DIR-NOTFOUND")
message(FATAL_ERROR "Could not found MySQL include directory")
else()
include_directories(${MYSQL_INCLUDE_DIR})
endif()
set(MILVUS_SOURCE_DIR ${PROJECT_SOURCE_DIR})
set(MILVUS_BINARY_DIR ${PROJECT_BINARY_DIR})
set(MILVUS_ENGINE_SRC ${PROJECT_SOURCE_DIR}/src)
if (CUSTOMIZATION)
add_definitions(-DCUSTOMIZATION)
endif (CUSTOMIZATION)
include(ExternalProject)
include(DefineOptions)
include(BuildUtils)
include(ThirdPartyPackages)
include_directories(${MILVUS_SOURCE_DIR})
link_directories(${MILVUS_BINARY_DIR})
## Following should be check
set(MILVUS_ENGINE_INCLUDE ${PROJECT_SOURCE_DIR}/include)
set(MILVUS_ENGINE_SRC ${PROJECT_SOURCE_DIR}/src)
add_compile_definitions(PROFILER=${PROFILER})
message(STATUS "MILVUS_ENABLE_PROFILING = ${MILVUS_ENABLE_PROFILING}")
if (MILVUS_ENABLE_PROFILING STREQUAL "ON")
ADD_DEFINITIONS(-DMILVUS_ENABLE_PROFILING)
endif()
include_directories(${MILVUS_ENGINE_INCLUDE})
include_directories(${MILVUS_ENGINE_SRC})
link_directories(${CMAKE_CURRRENT_BINARY_DIR})
config_summary()
add_subdirectory(src)
if (BUILD_COVERAGE STREQUAL "ON")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
endif()
if (BUILD_UNIT_TEST STREQUAL "ON")
if (BUILD_COVERAGE STREQUAL "ON")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
endif()
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/unittest)
endif()
add_custom_target(Clean-All COMMAND ${CMAKE_BUILD_TOOL} clean)
if("${MILVUS_DB_PATH}" STREQUAL "")
set(MILVUS_DB_PATH "/tmp/milvus")
endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/conf/server_config.template ${CMAKE_CURRENT_SOURCE_DIR}/conf/server_config.yaml)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/conf/log_config.template ${CMAKE_CURRENT_SOURCE_DIR}/conf/log_config.conf)
#install
install(DIRECTORY scripts/
DESTINATION scripts
FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
@ -155,12 +164,99 @@ install(FILES
conf/log_config.conf
DESTINATION
conf)
install(FILES
./Milvus-EULA-cn.md
./Milvus-EULA-en.md
DESTINATION
license
)
config_summary()
#
# "make lint" target
#
if(NOT MILVUS_VERBOSE_LINT)
set(MILVUS_LINT_QUIET "--quiet")
endif()
if(NOT LINT_EXCLUSIONS_FILE)
# source files matching a glob from a line in this file
# will be excluded from linting (cpplint, clang-tidy, clang-format)
set(LINT_EXCLUSIONS_FILE ${BUILD_SUPPORT_DIR}/lint_exclusions.txt)
endif()
find_program(CPPLINT_BIN NAMES cpplint cpplint.py HINTS ${BUILD_SUPPORT_DIR})
message(STATUS "Found cpplint executable at ${CPPLINT_BIN}")
#
# "make lint" targets
#
add_custom_target(lint
${PYTHON_EXECUTABLE}
${BUILD_SUPPORT_DIR}/run_cpplint.py
--cpplint_binary
${CPPLINT_BIN}
--exclude_globs
${LINT_EXCLUSIONS_FILE}
--source_dir
${CMAKE_CURRENT_SOURCE_DIR}
${MILVUS_LINT_QUIET})
#
# "make clang-format" and "make check-clang-format" targets
#
if(${CLANG_FORMAT_FOUND})
# runs clang format and updates files in place.
add_custom_target(clang-format
${PYTHON_EXECUTABLE}
${BUILD_SUPPORT_DIR}/run_clang_format.py
--clang_format_binary
${CLANG_FORMAT_BIN}
--exclude_globs
${LINT_EXCLUSIONS_FILE}
--source_dir
${CMAKE_CURRENT_SOURCE_DIR}/src
--fix
${MILVUS_LINT_QUIET})
# runs clang format and exits with a non-zero exit code if any files need to be reformatted
add_custom_target(check-clang-format
${PYTHON_EXECUTABLE}
${BUILD_SUPPORT_DIR}/run_clang_format.py
--clang_format_binary
${CLANG_FORMAT_BIN}
--exclude_globs
${LINT_EXCLUSIONS_FILE}
--source_dir
${CMAKE_CURRENT_SOURCE_DIR}/src
${MILVUS_LINT_QUIET})
endif()
#
# "make clang-tidy" and "make check-clang-tidy" targets
#
if(${CLANG_TIDY_FOUND})
# runs clang-tidy and attempts to fix any warning automatically
add_custom_target(clang-tidy
${PYTHON_EXECUTABLE}
${BUILD_SUPPORT_DIR}/run_clang_tidy.py
--clang_tidy_binary
${CLANG_TIDY_BIN}
--exclude_globs
${LINT_EXCLUSIONS_FILE}
--compile_commands
${CMAKE_BINARY_DIR}/compile_commands.json
--source_dir
${CMAKE_CURRENT_SOURCE_DIR}/src
--fix
${MILVUS_LINT_QUIET})
# runs clang-tidy and exits with a non-zero exit code if any errors are found.
add_custom_target(check-clang-tidy
${PYTHON_EXECUTABLE}
${BUILD_SUPPORT_DIR}/run_clang_tidy.py
--clang_tidy_binary
${CLANG_TIDY_BIN}
--exclude_globs
${LINT_EXCLUSIONS_FILE}
--compile_commands
${CMAKE_BINARY_DIR}/compile_commands.json
--source_dir
${CMAKE_CURRENT_SOURCE_DIR}/src
${MILVUS_LINT_QUIET})
endif()

48
cpp/CODE_OF_CONDUCT.md Normal file
View File

@ -0,0 +1,48 @@
# Milvus Code of Conduct
In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment include:
- Using welcoming and inclusive language.
- Being respectful of differing viewpoints and experiences.
- Gracefully accepting constructive criticism.
- Focusing on what is best for the community.
- Showing empathy towards other community members.
Examples of unacceptable behavior by participants include:
- The use of sexualized language or imagery and unwelcome sexual attention or advances.
- Trolling, insulting/derogatory comments, and personal or political attacks.
- Public or private harassment.
- Publishing others' private information, such as a physical or electronic address, without explicit permission.
- Conduct which could reasonably be considered inappropriate for the forum in which it occurs.
All Milvus forums and spaces are meant for professional interactions, and any behavior which could reasonably be considered inappropriate in a professional setting is unacceptable.
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
## Scope
This Code of Conduct applies to all content on milvus.io, Milvuss GitHub organization, or any other official Milvus web presence allowing for community interactions, as well as at all official Milvus events, whether offline or online.
The Code of Conduct also applies within all project spaces and in public spaces whenever an individual is representing Milvus or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed or de facto representative at an online or offline event. Representation of Milvus may be further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at support@zilliz.com. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq

65
cpp/CONTRIBUTING.md Normal file
View File

@ -0,0 +1,65 @@
# Contributing to Milvus
First of all, thanks for taking the time to contribute to Milvus! It's people like you that help Milvus come to fruition.
The following are a set of guidelines for contributing to Milvus. Following these guidelines helps contributing to this project easy and transparent. These are mostly guideline, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request.
As for everything else in the project, the contributions to Milvus are governed by our [Code of Conduct](CODE OF CONDUCT.md).
TOC
## Contribution Checklist
Before you make any contributions, make sure you follow this list.
- Read [Contributing to Milvus](CONTRIBUTING.md).
- Check if the changes are consistent with the [coding style](CONTRIBUTING.md#coding-style).
- Run [unit tests](CONTRIBUTING.md#run-unit-test).
## What contributions can I make?
Contributions to Milvus fall into the following categories.
1. To report a bug or a problem with documentation, please file an [issue](https://github.com/milvus-io/milvus/issues/new/choose) providing the details of the problem. If you believe the issue needs priority attention, please comment on the issue to notify the team.
2. To propose a new feature, please file a new feature request [issue](https://github.com/milvus-io/milvus/issues/new/choose). Describe the intended feature and discuss the design and implementation with the team and community. Once the team agrees that the plan looks good, go ahead and implement it, following the [Contributing code](CONTRIBUTING.md#contributing-code).
3. To implement a feature or bug-fix for an existing outstanding issue, follow the [Contributing code](CONTRIBUTING.md#contributing-code). If you need more context on a particular issue, comment on the issue to let people know.
## How can I contribute?
### Contributing code
If you have improvements to Milvus, send us your pull requests! For those just getting started, GitHub has a [how-to](https://help.github.com/en/articles/about-pull-requests).
The Milvus team members will review your pull requests, and once it is accepted, it will be given a `ready to merge` label. This means we are working on submitting your pull request to the internal repository. After the change has been submitted internally, your pull request will be merged automatically on GitHub.
### General guidelines
Before sending your pull requests for review, make sure your changes are consistent with the guidelines and follow the Milvus coding style.
- Include unit tests when you contribute new features, as they help to prove that your code works correctly, and also guard against future breaking changes to lower the maintenance cost.
- Bug fixes also require unit tests, because the presence of bugs usually indicates insufficient test coverage.
- Keep API compatibility in mind when you change code in Milvus. Reviewers of your pull request will comment on any API compatibility issues.
- When you contribute a new feature to Milvus, the maintenance burden is (by default) transferred to the Milvus team. This means that the benefit of the contribution must be compared against the cost of maintaining the feature.
## Coding Style
The coding style used in Milvus generally follow [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
And we made the following changes based on the guide:
- 4 spaces for indentation
- Adopt .cpp file extension instead of .cc extension
- 120-character line length
- Camel-Cased file names
## Run unit test
We use Google Test framework for test running.
To run unit test for Milvus under C++, please use the following command:
```shell
# Run unit test for Milvus
$ ./build.sh -u
```

29
cpp/NOTICE.md Normal file
View File

@ -0,0 +1,29 @@
| Name | License |
| ------------- | ------------------------------------------------------------ |
| Apache Arrow | [Apache License 2.0](https://github.com/apache/arrow/blob/master/LICENSE.txt) |
| Boost | [Boost Software License](https://github.com/boostorg/boost/blob/master/LICENSE_1_0.txt) |
| BZip2 | [BZip2](http://www.bzip.org/) |
| FAISS | [MIT](https://github.com/facebookresearch/faiss/blob/master/LICENSE) |
| Gtest | [BSD 3-Clause](https://github.com/google/googletest/blob/master/LICENSE) |
| LAPACK | [LAPACK](https://github.com/Reference-LAPACK/lapack/blob/master/LICENSE) |
| LZ4 | [BSD 2-Clause](https://github.com/Blosc/c-blosc/blob/master/LICENSES/LZ4.txt) |
| MySQLPP | [LGPL 2.1](https://tangentsoft.com/mysqlpp/artifact/b128a66dab867923) |
| OpenBLAS | [BSD 3-Clause](https://github.com/xianyi/OpenBLAS/blob/develop/LICENSE) |
| Prometheus | [Apache License 2.0](https://github.com/prometheus/prometheus/blob/master/LICENSE) |
| Snappy | [BSD](https://github.com/google/snappy/blob/master/COPYING) |
| SQLite | [Public Domain](https://www.sqlite.org/copyright.html) |
| SQLite-ORM | [BSD 3-Clause](https://github.com/fnc12/sqlite_orm/blob/master/LICENSE) |
| yaml-cpp | [MIT](https://github.com/jbeder/yaml-cpp/blob/master/LICENSE) |
| ZLIB | [ZLIB](http://zlib.net/zlib_license.html) |
| ZSTD | [BSD](https://github.com/facebook/zstd/blob/dev/LICENSE) |
| libunwind | [MIT](https://github.com/libunwind/libunwind/blob/master/LICENSE) |
| gperftools | [BSD 3-Clause](https://github.com/gperftools/gperftools/blob/master/COPYING) |
| grpc | [Apache 2.0](https://github.com/grpc/grpc/blob/master/LICENSE) |
| EASYLOGGINGPP | [MIT](https://github.com/zuhd-org/easyloggingpp/blob/master/LICENSEhttps://github.com/zuhd-org/easyloggingpp/blob/master/LICENSE) |

View File

@ -1,72 +1,238 @@
### Compilation
#### Step 1: install necessery tools
- [Slack Community](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk)
- [Blog](https://www.milvus.io/blog/)
centos7 :
yum install gfortran qt4 flex bison mysql-devel mysql
ubuntu16.04 :
sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev mysql-client
cd scripts && sudo ./requirements.sh
# Welcome to Milvus
If `libmysqlclient_r.so` does not exist after installing MySQL Development Files, you need to create a symbolic link:
Firstly, welcome, and thanks for your interest in [Milvus](https://milvus.io)! No matter who you are, what you do, we greatly appreciate your contribution to help us reinvent data science with Milvus.
```
sudo ln -s /path/to/libmysqlclient.so /path/to/libmysqlclient_r.so
## What is Milvus
Milvus is an open source vector search engine that supports similarity search of large-scale vectors. Built on optimized indexing algorithm, it is compatible with major AI/ML models.
Milvus was developed by ZILLIZ, a tech startup that intends to reinvent data science, with the purpose of providing enterprises with efficient and scalable similarity search and analysis of feature vectors and unstructured data.
Milvus provides stable Python, C++ and Java APIs.
Keep up-to-date with newest releases and latest updates by reading Milvus [release notes](https://milvus.io/docs/en/Releases/v0.4.0/).
- GPU-accelerated search engine
Milvus is designed for the largest scale of vector index. CPU/GPU heterogeneous computing architecture allows you to process data at a speed 1000 times faster.
- Intelligent index
With a “Decide Your Own Algorithm” approach, you can embed machine learning and advanced algorithms into Milvus without the headache of complex data engineering or migrating data between disparate systems. Milvus is built on optimized indexing algorithm based on quantization indexing, tree-based and graph indexing methods.
- Strong scalability
The data is stored and computed on a distributed architecture. This lets you scale data sizes up and down without redesigning the system.
## Architecture
![Milvus_arch](https://milvus.io/docs/assets/milvus_arch.png)
## Get started
### Install and start Milvus server
#### Use Docker
Use Docker to install Milvus is a breeze. See the [Milvus install guide](https://milvus.io/docs/en/userguide/install_milvus/) for details.
#### Use source code
##### Compilation
###### Step 1 Install necessary tools
```shell
# Install tools
Centos7 :
$ yum install gfortran qt4 flex bison
$ yum install mysql-devel mysql
Ubuntu 16.04 or 18.04:
$ sudo apt-get install gfortran qt4-qmake flex bison
$ sudo apt-get install libmysqlclient-dev mysql-client
```
#### Step 2: build(output to cmake_build folder)
Verify the existence of `libmysqlclient_r.so`:
cmake_build/src/milvus_server is the server
```shell
# Verify existence
$ locate libmysqlclient_r.so
```
cd [sourcecode path]/cpp/thirdparty
git clone git@192.168.1.105:megasearch/knowhere.git
cd knowhere
./build.sh -t Debug
or ./build.sh -t Release
If not, you need to create a symbolic link:
cd [sourcecode path]/cpp
./build.sh -t Debug
or ./build.sh -t Release
```shell
# Locate libmysqlclient.so
$ sudo updatedb
$ locate libmysqlclient.so
If you encounter the following error when building:
# Create symbolic link
$ sudo ln -s /path/to/libmysqlclient.so /path/to/libmysqlclient_r.so
```
###### Step 2 Build
```shell
$ cd [Milvus sourcecode path]/cpp
$ ./build.sh -t Debug
or
$ ./build.sh -t Release
```
When the build is completed, all the stuff that you need in order to run Milvus will be installed under `[Milvus root path]/cpp/milvus`.
If you encounter the following error message,
`protocol https not supported or disabled in libcurl`
1. Install libcurl4-openssl-dev
please reinstall CMake with curl:
2. Install cmake 3.14:
```
./bootstrap --system-curl
make
sudo make install
1. Install curl development files:
```shell
CentOS 7:
$ yum install curl-devel
Ubuntu 16.04 or 18.04:
$ sudo apt-get install libcurl4-openssl-dev
```
#### To build unittest:
2. Install [CMake 3.14](https://github.com/Kitware/CMake/releases/download/v3.14.6/cmake-3.14.6.tar.gz):
```shell
$ ./bootstrap --system-curl
$ make
$ sudo make install
```
./build.sh -u
or
./build.sh --unittest
#### To run code coverage
apt-get install lcov
./build.sh -u -c
### Launch server
Set config in cpp/conf/server_config.yaml
Add milvus/lib to LD_LIBRARY_PATH
##### code format and linting
Install clang-format and clang-tidy
```shell
CentOS 7:
$ yum install clang
Ubuntu 16.04:
$ sudo apt-get install clang-tidy
$ sudo su
$ wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
$ apt-add-repository "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-6.0 main"
$ apt-get update
$ apt-get install clang-format-6.0
Ubuntu 18.04:
$ sudo apt-get install clang-tidy clang-format
$ rm cmake_build/CMakeCache.txt
```
Check code style
```shell
$ ./build.sh -l
```
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/lib
To format the code
```shell
$ cd cmake_build
$ make clang-format
```
Then launch server with config:
cd [build output path]
start_server.sh
stop_server.sh
##### Run unit test
### Launch test_client(only for debug)
If you want to test remote api, you can run sdk example.
[build output path]/sdk/examples/grpcsimple/sdk_simple
```shell
$ ./build.sh -u
```
##### Run code coverage
Install lcov
```shell
CentOS 7:
$ yum install lcov
Ubuntu 16.04 or 18.04:
$ sudo apt-get install lcov
```
```shell
$ ./build.sh -u -c
```
##### Launch Milvus server
```shell
$ cd [Milvus root path]/cpp/milvus
```
Add `lib/` directory to `LD_LIBRARY_PATH`
```
$ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/lib
```
Then start Milvus server:
```
$ cd scripts
$ ./start_server.sh
```
To stop Milvus server, run:
```shell
$ ./stop_server.sh
```
To edit Milvus settings in `conf/server_config.yaml` and `conf/log_config.conf`, please read [Milvus Configuration](https://www.milvus-io/docs/master/reference/milvus_config.md).
### Try your first Milvus program
#### Run Python example code
Make sure [Python 3.4](https://www.python.org/downloads/) or higher is already installed and in use.
Install Milvus Python SDK.
```shell
# Install Milvus Python SDK
$ pip install pymilvus==0.2.0
```
Create a new file `example.py`, and add [Python example code](https://github.com/milvus-io/pymilvus/blob/branch-0.3.1/examples/AdvancedExample.py) to it.
Run the example code.
```python
# Run Milvus Python example
$ python3 example.py
```
#### Run C++ example code
```shell
# Run Milvus C++ example
$ cd [Milvus root path]/cpp/milvus/bin
$ ./sdk_simple
```
## Contribution guidelines
Contributions are welcomed and greatly appreciated. If you want to contribute to Milvus, please read our [contribution guidelines](CONTRIBUTING.md). This project adheres to the [code of conduct](CODE OF CONDUCT.md) of Milvus. By participating, you are expected to uphold this code.
We use [GitHub issues](https://github.com/milvus-io/milvus/issues) to track issues and bugs. For general questions and public discussions, please join our community.
## Join the Milvus community
To connect with other users and contributors, welcome to join our [slack channel](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk).
## Milvus Roadmap
Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upcoming features.
## Resources
[Milvus official website](https://www.milvus.io)
[Milvus docs](https://www.milvus.io/docs/en/QuickStart/)
[Milvus blog](https://www.milvus.io/blog/)
[Milvus CSDN](https://mp.csdn.net/mdeditor/100041006#)
[Milvus roadmap](https://milvus.io/docs/en/roadmap/)
## License
[Apache 2.0 license](milvus-io/milvus/LICENSE.md)

View File

@ -0,0 +1,38 @@
<code_scheme name="milvus" version="173">
<Objective-C>
<option name="INDENT_NAMESPACE_MEMBERS" value="0" />
<option name="INDENT_VISIBILITY_KEYWORDS" value="1" />
<option name="KEEP_STRUCTURES_IN_ONE_LINE" value="true" />
<option name="KEEP_CASE_EXPRESSIONS_IN_ONE_LINE" value="true" />
<option name="FUNCTION_NON_TOP_AFTER_RETURN_TYPE_WRAP" value="0" />
<option name="FUNCTION_TOP_AFTER_RETURN_TYPE_WRAP" value="2" />
<option name="FUNCTION_PARAMETERS_WRAP" value="5" />
<option name="FUNCTION_CALL_ARGUMENTS_WRAP" value="5" />
<option name="TEMPLATE_CALL_ARGUMENTS_WRAP" value="5" />
<option name="TEMPLATE_CALL_ARGUMENTS_ALIGN_MULTILINE" value="true" />
<option name="CLASS_CONSTRUCTOR_INIT_LIST_WRAP" value="5" />
<option name="ALIGN_INIT_LIST_IN_COLUMNS" value="false" />
<option name="SPACE_BEFORE_PROTOCOLS_BRACKETS" value="false" />
<option name="SPACE_BEFORE_POINTER_IN_DECLARATION" value="false" />
<option name="SPACE_AFTER_POINTER_IN_DECLARATION" value="true" />
<option name="SPACE_BEFORE_REFERENCE_IN_DECLARATION" value="false" />
<option name="SPACE_AFTER_REFERENCE_IN_DECLARATION" value="true" />
<option name="KEEP_BLANK_LINES_BEFORE_END" value="1" />
</Objective-C>
<codeStyleSettings language="ObjectiveC">
<option name="KEEP_BLANK_LINES_IN_DECLARATIONS" value="1" />
<option name="KEEP_BLANK_LINES_IN_CODE" value="1" />
<option name="KEEP_BLANK_LINES_BEFORE_RBRACE" value="1" />
<option name="BLANK_LINES_AROUND_CLASS" value="0" />
<option name="BLANK_LINES_AROUND_METHOD_IN_INTERFACE" value="0" />
<option name="BLANK_LINES_AFTER_CLASS_HEADER" value="1" />
<option name="SPACE_AFTER_TYPE_CAST" value="false" />
<option name="BINARY_OPERATION_SIGN_ON_NEXT_LINE" value="true" />
<option name="KEEP_SIMPLE_BLOCKS_IN_ONE_LINE" value="false" />
<option name="FOR_STATEMENT_WRAP" value="1" />
<option name="ASSIGNMENT_WRAP" value="1" />
<indentOptions>
<option name="CONTINUATION_INDENT_SIZE" value="4" />
</indentOptions>
</codeStyleSettings>
</code_scheme>

6476
cpp/build-support/cpplint.py vendored Executable file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,9 @@
*cmake-build-debug*
*cmake-build-release*
*cmake_build*
*src/core/thirdparty*
*thirdparty*
*easylogging++*
*SqliteMetaImpl.cpp
*src/grpc*
*milvus/include*

110
cpp/build-support/lintutils.py Executable file
View File

@ -0,0 +1,110 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import multiprocessing as mp
import os
from fnmatch import fnmatch
from subprocess import Popen
def chunk(seq, n):
"""
divide a sequence into equal sized chunks
(the last chunk may be smaller, but won't be empty)
"""
chunks = []
some = []
for element in seq:
if len(some) == n:
chunks.append(some)
some = []
some.append(element)
if len(some) > 0:
chunks.append(some)
return chunks
def dechunk(chunks):
"flatten chunks into a single list"
seq = []
for chunk in chunks:
seq.extend(chunk)
return seq
def run_parallel(cmds, **kwargs):
"""
Run each of cmds (with shared **kwargs) using subprocess.Popen
then wait for all of them to complete.
Runs batches of multiprocessing.cpu_count() * 2 from cmds
returns a list of tuples containing each process'
returncode, stdout, stderr
"""
complete = []
for cmds_batch in chunk(cmds, mp.cpu_count() * 2):
procs_batch = [Popen(cmd, **kwargs) for cmd in cmds_batch]
for proc in procs_batch:
stdout, stderr = proc.communicate()
complete.append((proc.returncode, stdout, stderr))
return complete
_source_extensions = '''
.h
.cc
.cpp
'''.split()
def get_sources(source_dir, exclude_globs=[]):
sources = []
for directory, subdirs, basenames in os.walk(source_dir):
for path in [os.path.join(directory, basename)
for basename in basenames]:
# filter out non-source files
if os.path.splitext(path)[1] not in _source_extensions:
continue
path = os.path.abspath(path)
# filter out files that match the globs in the globs file
if any([fnmatch(path, glob) for glob in exclude_globs]):
continue
sources.append(path)
return sources
def stdout_pathcolonline(completed_process, filenames):
"""
given a completed process which may have reported some files as problematic
by printing the path name followed by ':' then a line number, examine
stdout and return the set of actually reported file names
"""
returncode, stdout, stderr = completed_process
bfilenames = set()
for filename in filenames:
bfilenames.add(filename.encode('utf-8') + b':')
problem_files = set()
for line in stdout.splitlines():
for filename in bfilenames:
if line.startswith(filename):
problem_files.add(filename.decode('utf-8'))
bfilenames.remove(filename)
break
return problem_files, stdout

View File

@ -0,0 +1,142 @@
#!/usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import print_function
import lintutils
from subprocess import PIPE
import argparse
import difflib
import multiprocessing as mp
import sys
from functools import partial
# examine the output of clang-format and if changes are
# present assemble a (unified)patch of the difference
def _check_one_file(completed_processes, filename):
with open(filename, "rb") as reader:
original = reader.read()
returncode, stdout, stderr = completed_processes[filename]
formatted = stdout
if formatted != original:
# Run the equivalent of diff -u
diff = list(difflib.unified_diff(
original.decode('utf8').splitlines(True),
formatted.decode('utf8').splitlines(True),
fromfile=filename,
tofile="{} (after clang format)".format(
filename)))
else:
diff = None
return filename, diff
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Runs clang-format on all of the source "
"files. If --fix is specified enforce format by "
"modifying in place, otherwise compare the output "
"with the existing file and output any necessary "
"changes as a patch in unified diff format")
parser.add_argument("--clang_format_binary",
required=True,
help="Path to the clang-format binary")
parser.add_argument("--exclude_globs",
help="Filename containing globs for files "
"that should be excluded from the checks")
parser.add_argument("--source_dir",
required=True,
help="Root directory of the source code")
parser.add_argument("--fix", default=False,
action="store_true",
help="If specified, will re-format the source "
"code instead of comparing the re-formatted "
"output, defaults to %(default)s")
parser.add_argument("--quiet", default=False,
action="store_true",
help="If specified, only print errors")
arguments = parser.parse_args()
exclude_globs = []
if arguments.exclude_globs:
for line in open(arguments.exclude_globs):
exclude_globs.append(line.strip())
formatted_filenames = []
for path in lintutils.get_sources(arguments.source_dir, exclude_globs):
formatted_filenames.append(str(path))
if arguments.fix:
if not arguments.quiet:
print("\n".join(map(lambda x: "Formatting {}".format(x),
formatted_filenames)))
# Break clang-format invocations into chunks: each invocation formats
# 16 files. Wait for all processes to complete
results = lintutils.run_parallel([
[arguments.clang_format_binary, "-i"] + some
for some in lintutils.chunk(formatted_filenames, 16)
])
for returncode, stdout, stderr in results:
# if any clang-format reported a parse error, bubble it
if returncode != 0:
sys.exit(returncode)
else:
# run an instance of clang-format for each source file in parallel,
# then wait for all processes to complete
results = lintutils.run_parallel([
[arguments.clang_format_binary, filename]
for filename in formatted_filenames
], stdout=PIPE, stderr=PIPE)
for returncode, stdout, stderr in results:
# if any clang-format reported a parse error, bubble it
if returncode != 0:
sys.exit(returncode)
error = False
checker = partial(_check_one_file, {
filename: result
for filename, result in zip(formatted_filenames, results)
})
pool = mp.Pool()
try:
# check the output from each invocation of clang-format in parallel
for filename, diff in pool.imap(checker, formatted_filenames):
if not arguments.quiet:
print("Checking {}".format(filename))
if diff:
print("{} had clang-format style issues".format(filename))
# Print out the diff to stderr
error = True
# pad with a newline
print(file=sys.stderr)
diff_out = []
for diff_str in diff:
diff_out.append(diff_str.encode('raw_unicode_escape'))
sys.stderr.writelines(diff_out)
except Exception:
error = True
raise
finally:
pool.terminate()
pool.join()
sys.exit(1 if error else 0)

View File

@ -0,0 +1,126 @@
#!/usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import print_function
import argparse
import multiprocessing as mp
import lintutils
from subprocess import PIPE
import sys
from functools import partial
def _get_chunk_key(filenames):
# lists are not hashable so key on the first filename in a chunk
return filenames[0]
# clang-tidy outputs complaints in '/path:line_number: complaint' format,
# so we can scan its output to get a list of files to fix
def _check_some_files(completed_processes, filenames):
result = completed_processes[_get_chunk_key(filenames)]
return lintutils.stdout_pathcolonline(result, filenames)
def _check_all(cmd, filenames):
# each clang-tidy instance will process 16 files
chunks = lintutils.chunk(filenames, 16)
cmds = [cmd + some for some in chunks]
results = lintutils.run_parallel(cmds, stderr=PIPE, stdout=PIPE)
error = False
# record completed processes (keyed by the first filename in the input
# chunk) for lookup in _check_some_files
completed_processes = {
_get_chunk_key(some): result
for some, result in zip(chunks, results)
}
checker = partial(_check_some_files, completed_processes)
pool = mp.Pool()
try:
# check output of completed clang-tidy invocations in parallel
for problem_files, stdout in pool.imap(checker, chunks):
if problem_files:
msg = "clang-tidy suggested fixes for {}"
print("\n".join(map(msg.format, problem_files)))
print(stdout)
error = True
except Exception:
error = True
raise
finally:
pool.terminate()
pool.join()
if error:
sys.exit(1)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Runs clang-tidy on all ")
parser.add_argument("--clang_tidy_binary",
required=True,
help="Path to the clang-tidy binary")
parser.add_argument("--exclude_globs",
help="Filename containing globs for files "
"that should be excluded from the checks")
parser.add_argument("--compile_commands",
required=True,
help="compile_commands.json to pass clang-tidy")
parser.add_argument("--source_dir",
required=True,
help="Root directory of the source code")
parser.add_argument("--fix", default=False,
action="store_true",
help="If specified, will attempt to fix the "
"source code instead of recommending fixes, "
"defaults to %(default)s")
parser.add_argument("--quiet", default=False,
action="store_true",
help="If specified, only print errors")
arguments = parser.parse_args()
exclude_globs = []
if arguments.exclude_globs:
for line in open(arguments.exclude_globs):
exclude_globs.append(line.strip())
linted_filenames = []
for path in lintutils.get_sources(arguments.source_dir, exclude_globs):
linted_filenames.append(path)
if not arguments.quiet:
msg = 'Tidying {}' if arguments.fix else 'Checking {}'
print("\n".join(map(msg.format, linted_filenames)))
cmd = [
arguments.clang_tidy_binary,
'-p',
arguments.compile_commands
]
if arguments.fix:
cmd.append('-fix')
results = lintutils.run_parallel(
[cmd + some for some in lintutils.chunk(linted_filenames, 16)])
for returncode, stdout, stderr in results:
if returncode != 0:
sys.exit(returncode)
else:
_check_all(cmd, linted_filenames)

132
cpp/build-support/run_cpplint.py Executable file
View File

@ -0,0 +1,132 @@
#!/usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import print_function
import lintutils
from subprocess import PIPE, STDOUT
import argparse
import multiprocessing as mp
import sys
import platform
from functools import partial
# NOTE(wesm):
#
# * readability/casting is disabled as it aggressively warns about functions
# with names like "int32", so "int32(x)", where int32 is a function name,
# warns with
_filters = '''
-whitespace/comments
-readability/casting
-readability/todo
-readability/alt_tokens
-build/header_guard
-build/c++11
-runtime/references
-build/include_order
'''.split()
def _get_chunk_key(filenames):
# lists are not hashable so key on the first filename in a chunk
return filenames[0]
def _check_some_files(completed_processes, filenames):
# cpplint outputs complaints in '/path:line_number: complaint' format,
# so we can scan its output to get a list of files to fix
result = completed_processes[_get_chunk_key(filenames)]
return lintutils.stdout_pathcolonline(result, filenames)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Runs cpplint on all of the source files.")
parser.add_argument("--cpplint_binary",
required=True,
help="Path to the cpplint binary")
parser.add_argument("--exclude_globs",
help="Filename containing globs for files "
"that should be excluded from the checks")
parser.add_argument("--source_dir",
required=True,
help="Root directory of the source code")
parser.add_argument("--quiet", default=False,
action="store_true",
help="If specified, only print errors")
arguments = parser.parse_args()
exclude_globs = []
if arguments.exclude_globs:
for line in open(arguments.exclude_globs):
exclude_globs.append(line.strip())
linted_filenames = []
for path in lintutils.get_sources(arguments.source_dir, exclude_globs):
linted_filenames.append(str(path))
cmd = [
arguments.cpplint_binary,
'--verbose=2',
'--linelength=120',
'--filter=' + ','.join(_filters)
]
if (arguments.cpplint_binary.endswith('.py') and
platform.system() == 'Windows'):
# Windows doesn't support executable scripts; execute with
# sys.executable
cmd.insert(0, sys.executable)
if arguments.quiet:
cmd.append('--quiet')
else:
print("\n".join(map(lambda x: "Linting {}".format(x),
linted_filenames)))
# lint files in chunks: each invocation of cpplint will process 16 files
chunks = lintutils.chunk(linted_filenames, 16)
cmds = [cmd + some for some in chunks]
results = lintutils.run_parallel(cmds, stdout=PIPE, stderr=STDOUT)
error = False
# record completed processes (keyed by the first filename in the input
# chunk) for lookup in _check_some_files
completed_processes = {
_get_chunk_key(filenames): result
for filenames, result in zip(chunks, results)
}
checker = partial(_check_some_files, completed_processes)
pool = mp.Pool()
try:
# scan the outputs of various cpplint invocations in parallel to
# distill a list of problematic files
for problem_files, stdout in pool.imap(checker, chunks):
if problem_files:
if isinstance(stdout, bytes):
stdout = stdout.decode('utf8')
print(stdout, file=sys.stderr)
error = True
except Exception:
error = True
raise
finally:
pool.terminate()
pool.join()
sys.exit(1 if error else 0)

View File

@ -7,12 +7,22 @@ MAKE_CLEAN="OFF"
BUILD_COVERAGE="OFF"
DB_PATH="/opt/milvus"
PROFILING="OFF"
BUILD_FAISS_WITH_MKL="OFF"
USE_JFROG_CACHE="OFF"
RUN_CPPLINT="OFF"
CUSTOMIZATION="ON"
CUDA_COMPILER=/usr/local/cuda/bin/nvcc
while getopts "p:d:t:uhrcgmj" arg
wget -q --method HEAD
while getopts "p:d:t:ulrcgjhx" arg
do
case $arg in
p)
INSTALL_PREFIX=$OPTARG
;;
d)
DB_PATH=$OPTARG
;;
t)
BUILD_TYPE=$OPTARG # BUILD_TYPE
;;
@ -20,11 +30,8 @@ do
echo "Build and run unittest cases" ;
BUILD_UNITTEST="ON";
;;
p)
INSTALL_PREFIX=$OPTARG
;;
d)
DB_PATH=$OPTARG
l)
RUN_CPPLINT="ON"
;;
r)
if [[ -d cmake_build ]]; then
@ -38,74 +45,102 @@ do
g)
PROFILING="ON"
;;
m)
BUILD_FAISS_WITH_MKL="ON"
;;
j)
USE_JFROG_CACHE="ON"
;;
x)
CUSTOMIZATION="OFF"
;;
h) # help
echo "
parameter:
-t: build type(default: Debug)
-u: building unit test options(default: OFF)
-p: install prefix(default: $(pwd)/milvus)
-d: db path(default: /opt/milvus)
-t: build type(default: Debug)
-u: building unit test options(default: OFF)
-l: run cpplint, clang-format and clang-tidy(default: OFF)
-r: remove previous build directory(default: OFF)
-c: code coverage(default: OFF)
-g: profiling(default: OFF)
-m: build faiss with MKL(default: OFF)
-j: use jfrog cache build directory
-j: use jfrog cache build directory(default: OFF)
-h: help
usage:
./build.sh -t \${BUILD_TYPE} [-u] [-h] [-g] [-r] [-c] [-k] [-m] [-j]
./build.sh -p \${INSTALL_PREFIX} -t \${BUILD_TYPE} [-u] [-l] [-r] [-c] [-g] [-j] [-h]
"
exit 0
;;
?)
echo "unknown argument"
echo "ERROR! unknown argument"
exit 1
;;
esac
done
if [[ ! -d cmake_build ]]; then
mkdir cmake_build
MAKE_CLEAN="ON"
mkdir cmake_build
fi
cd cmake_build
CUDA_COMPILER=/usr/local/cuda/bin/nvcc
CMAKE_CMD="cmake \
-DBUILD_UNIT_TEST=${BUILD_UNITTEST} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} \
-DBUILD_COVERAGE=${BUILD_COVERAGE} \
-DMILVUS_DB_PATH=${DB_PATH} \
-DMILVUS_ENABLE_PROFILING=${PROFILING} \
-DUSE_JFROG_CACHE=${USE_JFROG_CACHE} \
-DCUSTOMIZATION=${CUSTOMIZATION} \
../"
echo ${CMAKE_CMD}
${CMAKE_CMD}
if [[ ${MAKE_CLEAN} == "ON" ]]; then
CMAKE_CMD="cmake -DBUILD_UNIT_TEST=${BUILD_UNITTEST} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} \
-DBUILD_COVERAGE=${BUILD_COVERAGE} \
-DMILVUS_DB_PATH=${DB_PATH} \
-DMILVUS_ENABLE_PROFILING=${PROFILING} \
-DBUILD_FAISS_WITH_MKL=${BUILD_FAISS_WITH_MKL} \
-DUSE_JFROG_CACHE=${USE_JFROG_CACHE} \
../"
echo ${CMAKE_CMD}
${CMAKE_CMD}
make clean
fi
make -j 4 || exit 1
if [[ ${RUN_CPPLINT} == "ON" ]]; then
# cpplint check
make lint
if [ $? -ne 0 ]; then
echo "ERROR! cpplint check failed"
exit 1
fi
echo "cpplint check passed!"
if [[ ${BUILD_TYPE} != "Debug" ]]; then
strip src/milvus_server
fi
# clang-format check
make check-clang-format
if [ $? -ne 0 ]; then
echo "ERROR! clang-format check failed"
exit 1
fi
echo "clang-format check passed!"
make install || exit 1
# # clang-tidy check
# make check-clang-tidy
# if [ $? -ne 0 ]; then
# echo "ERROR! clang-tidy check failed"
# exit 1
# fi
# echo "clang-tidy check passed!"
else
# compile and build
make -j 4 || exit 1
if [[ ${BUILD_COVERAGE} == "ON" ]]; then
cd -
bash `pwd`/coverage.sh
cd -
fi
# strip binary symbol
if [[ ${BUILD_TYPE} != "Debug" ]]; then
strip src/milvus_server
fi
make install || exit 1
# evaluate code coverage
if [[ ${BUILD_COVERAGE} == "ON" ]]; then
cd -
bash `pwd`/coverage.sh
cd -
fi
fi

View File

@ -55,8 +55,6 @@ define_option_string(MILVUS_DEPENDENCY_SOURCE
define_option(MILVUS_VERBOSE_THIRDPARTY_BUILD
"Show output from ExternalProjects rather than just logging to files" ON)
define_option(MILVUS_WITH_ARROW "Build with ARROW" OFF)
define_option(MILVUS_BOOST_VENDORED "Use vendored Boost instead of existing Boost. \
Note that this requires linking Boost statically" ON)
@ -66,22 +64,10 @@ define_option(MILVUS_WITH_BZ2 "Build with BZ2 compression" ON)
define_option(MILVUS_WITH_EASYLOGGINGPP "Build with Easylogging++ library" ON)
define_option(MILVUS_WITH_FAISS "Build with FAISS library" OFF)
define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" OFF)
define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" OFF)
define_option(MILVUS_WITH_LZ4 "Build with lz4 compression" ON)
define_option(MILVUS_WITH_JSONCONS "Build with JSONCONS" OFF)
define_option(MILVUS_WITH_OPENBLAS "Build with OpenBLAS library" OFF)
define_option(MILVUS_WITH_PROMETHEUS "Build with PROMETHEUS library" ON)
define_option(MILVUS_WITH_ROCKSDB "Build with RocksDB library" OFF)
define_option(MILVUS_WITH_SNAPPY "Build with Snappy compression" ON)
define_option(MILVUS_WITH_SQLITE "Build with SQLite library" ON)
@ -94,10 +80,6 @@ define_option(MILVUS_WITH_YAMLCPP "Build with yaml-cpp library" ON)
define_option(MILVUS_WITH_ZLIB "Build with zlib compression" ON)
define_option(MILVUS_WITH_KNOWHERE "Build with Knowhere" OFF)
#define_option(MILVUS_ENABLE_PROFILING "Build with profiling" ON)
if(CMAKE_VERSION VERSION_LESS 3.7)
set(MILVUS_WITH_ZSTD_DEFAULT OFF)
else()
@ -106,13 +88,13 @@ else()
endif()
define_option(MILVUS_WITH_ZSTD "Build with zstd compression" ${MILVUS_WITH_ZSTD_DEFAULT})
define_option(MILVUS_WITH_AWS "Build with AWS SDK" ON)
if (MILVUS_ENABLE_PROFILING STREQUAL "ON")
define_option(MILVUS_WITH_LIBUNWIND "Build with libunwind" ON)
define_option(MILVUS_WITH_GPERFTOOLS "Build with gperftools" ON)
endif()
define_option(MILVUS_WITH_GRPC "Build with GRPC" ON)
#----------------------------------------------------------------------
if(MSVC)
set_option_category("MSVC")
@ -128,6 +110,7 @@ endif()
#----------------------------------------------------------------------
set_option_category("Test and benchmark")
unset(MILVUS_BUILD_TESTS CACHE)
if (BUILD_UNIT_TEST)
define_option(MILVUS_BUILD_TESTS "Build the MILVUS googletest unit tests" ON)
else()

View File

@ -0,0 +1,109 @@
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Tries to find the clang-tidy and clang-format modules
#
# Usage of this module as follows:
#
# find_package(ClangTools)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# ClangToolsBin_HOME -
# When set, this path is inspected instead of standard library binary locations
# to find clang-tidy and clang-format
#
# This module defines
# CLANG_TIDY_BIN, The path to the clang tidy binary
# CLANG_TIDY_FOUND, Whether clang tidy was found
# CLANG_FORMAT_BIN, The path to the clang format binary
# CLANG_TIDY_FOUND, Whether clang format was found
find_program(CLANG_TIDY_BIN
NAMES
clang-tidy-7.0
clang-tidy-6.0
clang-tidy-5.0
clang-tidy-4.0
clang-tidy-3.9
clang-tidy-3.8
clang-tidy-3.7
clang-tidy-3.6
clang-tidy
PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
NO_DEFAULT_PATH
)
if ( "${CLANG_TIDY_BIN}" STREQUAL "CLANG_TIDY_BIN-NOTFOUND" )
set(CLANG_TIDY_FOUND 0)
message("clang-tidy not found")
else()
set(CLANG_TIDY_FOUND 1)
message("clang-tidy found at ${CLANG_TIDY_BIN}")
endif()
if (CLANG_FORMAT_VERSION)
find_program(CLANG_FORMAT_BIN
NAMES clang-format-${CLANG_FORMAT_VERSION}
PATHS
${ClangTools_PATH}
$ENV{CLANG_TOOLS_PATH}
/usr/local/bin /usr/bin
NO_DEFAULT_PATH
)
# If not found yet, search alternative locations
if (("${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND") AND APPLE)
# Homebrew ships older LLVM versions in /usr/local/opt/llvm@version/
STRING(REGEX REPLACE "^([0-9]+)\\.[0-9]+" "\\1" CLANG_FORMAT_MAJOR_VERSION "${CLANG_FORMAT_VERSION}")
STRING(REGEX REPLACE "^[0-9]+\\.([0-9]+)" "\\1" CLANG_FORMAT_MINOR_VERSION "${CLANG_FORMAT_VERSION}")
if ("${CLANG_FORMAT_MINOR_VERSION}" STREQUAL "0")
find_program(CLANG_FORMAT_BIN
NAMES clang-format
PATHS /usr/local/opt/llvm@${CLANG_FORMAT_MAJOR_VERSION}/bin
NO_DEFAULT_PATH
)
else()
find_program(CLANG_FORMAT_BIN
NAMES clang-format
PATHS /usr/local/opt/llvm@${CLANG_FORMAT_VERSION}/bin
NO_DEFAULT_PATH
)
endif()
endif()
else()
find_program(CLANG_FORMAT_BIN
NAMES
clang-format-7.0
clang-format-6.0
clang-format-5.0
clang-format-4.0
clang-format-3.9
clang-format-3.8
clang-format-3.7
clang-format-3.6
clang-format
PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
NO_DEFAULT_PATH
)
endif()
if ( "${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND" )
set(CLANG_FORMAT_FOUND 0)
message("clang-format not found")
else()
set(CLANG_FORMAT_FOUND 1)
message("clang-format found at ${CLANG_FORMAT_BIN}")
endif()

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
* GLOBAL:
FORMAT = "%datetime | %level | %logger | %msg"
FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%H:%m}-global.log"
FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%y-%M-%d-%H:%m}-global.log"
ENABLED = true
TO_FILE = true
TO_STANDARD_OUTPUT = false
@ -8,12 +8,12 @@
PERFORMANCE_TRACKING = false
MAX_LOG_FILE_SIZE = 209715200 ## Throw log files away after 200MB
* DEBUG:
FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%H:%m}-debug.log"
FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%y-%M-%d-%H:%m}-debug.log"
ENABLED = true
* WARNING:
FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%H:%m}-warning.log"
FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%y-%M-%d-%H:%m}-warning.log"
* TRACE:
FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%H:%m}-trace.log"
FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%y-%M-%d-%H:%m}-trace.log"
* VERBOSE:
FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg"
TO_FILE = false
@ -21,7 +21,7 @@
## Error logs
* ERROR:
ENABLED = true
FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%H:%m}-error.log"
FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%y-%M-%d-%H:%m}-error.log"
* FATAL:
ENABLED = true
FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%H:%m}-fatal.log"
FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%y-%M-%d-%H:%m}-fatal.log"

View File

@ -1,42 +1,42 @@
# Default values are used when you make no changes to the following parameters.
server_config:
address: 0.0.0.0 # milvus server ip address (IPv4)
port: 19530 # the port milvus listen to, default: 19530, range: 1025 ~ 65534
mode: single # milvus deployment type: single, cluster, read_only
time_zone: UTC+8 # Use the UTC-x or UTC+x to specify a time zone. eg. UTC+8 for China Standard Time
address: 0.0.0.0 # milvus server ip address (IPv4)
port: 19530 # port range: 1025 ~ 65534
deploy_mode: single # deployment type: single, cluster_readonly, cluster_writable
time_zone: UTC+8
db_config:
db_path: @MILVUS_DB_PATH@ # milvus data storage path
db_slave_path: # secondry data storage path, split by semicolon
primary_path: @MILVUS_DB_PATH@ # path used to store data and meta
secondary_path: # path used to store data only, split by semicolon
# URI format: dialect://username:password@host:port/database
# All parts except dialect are optional, but you MUST include the delimiters
# Currently dialect supports mysql or sqlite
db_backend_url: sqlite://:@:/
backend_url: sqlite://:@:/ # URI format: dialect://username:password@host:port/database
# Keep 'dialect://:@:/', and replace other texts with real values
# Replace 'dialect' with 'mysql' or 'sqlite'
archive_disk_threshold: 0 # triger archive action if storage size exceed this value, 0 means no limit, unit: GB
archive_days_threshold: 0 # files older than x days will be archived, 0 means no limit, unit: day
insert_buffer_size: 4 # maximum insert buffer size allowed, default: 4, unit: GB, should be at least 1 GB.
# the sum of insert_buffer_size and cpu_cache_capacity should be less than total memory, unit: GB
build_index_gpu: 0 # which gpu is used to build index, default: 0, range: 0 ~ gpu number - 1
insert_buffer_size: 4 # GB, maximum insert buffer size allowed
# sum of insert_buffer_size and cpu_cache_capacity cannot exceed total memory
build_index_gpu: 0 # gpu id used for building index
preload_table: # preload data at startup, '*' means load all tables, empty value means no preload
# you can specify preload tables like this: table1,table2,table3
metric_config:
is_startup: off # if monitoring start: on, off
collector: prometheus # metrics collector: prometheus
prometheus_config: # following are prometheus configure
port: 8080 # the port prometheus use to fetch metrics
push_gateway_ip_address: 127.0.0.1 # push method configure: push gateway ip address
push_gateway_port: 9091 # push method configure: push gateway port
enable_monitor: false # enable monitoring or not
collector: prometheus # prometheus
prometheus_config:
port: 8080 # port prometheus uses to fetch metrics
cache_config:
cpu_cache_capacity: 16 # how many memory are used as cache, unit: GB, range: 0 ~ less than total memory
cpu_cache_free_percent: 0.85 # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0
insert_cache_immediately: false # insert data will be load into cache immediately for hot query
cpu_cache_capacity: 16 # GB, CPU memory used for cache
cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered
cache_insert_data: false # whether to load inserted data into cache
engine_config:
use_blas_threshold: 20
use_blas_threshold: 20 # if nq < use_blas_threshold, use SSE, faster with fluctuated response times
# if nq >= use_blas_threshold, use OpenBlas, slower with stable response times
resource_config:
mode: simple
resources:
# - cpu
resource_pool:
- cpu
- gpu0

View File

@ -70,11 +70,11 @@ fi
for test in `ls ${DIR_UNITTEST}`; do
echo $test
case ${test} in
db_test)
# set run args for db_test
test_db)
# set run args for test_db
args="mysql://${MYSQL_USER_NAME}:${MYSQL_PASSWORD}@${MYSQL_HOST}:${MYSQL_PORT}/${MYSQL_DB_NAME}"
;;
*_test)
*test_*)
args=""
;;
esac
@ -101,9 +101,12 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \
"src/core/cmake_build*" \
"src/core/thirdparty*" \
"src/grpc*"\
"src/metrics/MetricBase.h"\
"src/server/Server.cpp"\
"src/server/DBWrapper.cpp"\
"src/server/grpc_impl/GrpcMilvusServer.cpp"\
"src/server/grpc_impl/GrpcServer.cpp"\
"src/utils/easylogging++.h"\
"src/utils/easylogging++.cc"\
# gen html report
${LCOV_GEN_CMD} "${FILE_INFO_OUTPUT_NEW}" --output-directory ${DIR_LCOV_OUTPUT}/

View File

@ -1,155 +1,159 @@
#-------------------------------------------------------------------------------
# Copyright (Zilliz) - All Rights Reserved
# Unauthorized copying of this file, via any medium is strictly prohibited.
# Proprietary and confidential.
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http:#www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#-------------------------------------------------------------------------------
include_directories(${MILVUS_SOURCE_DIR})
include_directories(${MILVUS_ENGINE_SRC})
include_directories(${CUDA_TOOLKIT_ROOT_DIR}/include)
include_directories(${MILVUS_ENGINE_SRC}/grpc/gen-status)
include_directories(${MILVUS_ENGINE_SRC}/grpc/gen-milvus)
#this statement must put here, since the CORE_INCLUDE_DIRS is defined in code/CMakeList.txt
add_subdirectory(core)
set(CORE_INCLUDE_DIRS ${CORE_INCLUDE_DIRS} PARENT_SCOPE)
foreach(dir ${CORE_INCLUDE_DIRS})
foreach (dir ${CORE_INCLUDE_DIRS})
include_directories(${dir})
endforeach()
endforeach ()
aux_source_directory(cache cache_files)
aux_source_directory(config config_files)
aux_source_directory(server server_files)
aux_source_directory(server/grpc_impl grpcserver_files)
aux_source_directory(utils utils_files)
aux_source_directory(db db_main_files)
aux_source_directory(db/engine db_engine_files)
aux_source_directory(db/insert db_insert_files)
aux_source_directory(db/meta db_meta_files)
aux_source_directory(metrics metrics_files)
aux_source_directory(wrapper/knowhere knowhere_files)
aux_source_directory(scheduler/action scheduler_action_files)
aux_source_directory(scheduler/event scheduler_event_files)
aux_source_directory(scheduler/resource scheduler_resource_files)
aux_source_directory(scheduler/task scheduler_task_files)
aux_source_directory(scheduler scheduler_root_files)
set(scheduler_srcs
${scheduler_action_files}
${scheduler_event_files}
${scheduler_resource_files}
${scheduler_task_files}
${scheduler_root_files}
)
aux_source_directory(db/scheduler scheduler_files)
aux_source_directory(db/scheduler/context scheduler_context_files)
aux_source_directory(db/scheduler/task scheduler_task_files)
set(db_scheduler_files
${scheduler_files}
${scheduler_context_files}
${scheduler_task_files}
)
set(license_check_files
license/LicenseLibrary.cpp
license/LicenseCheck.cpp
)
set(license_generator_files
license/LicenseGenerator.cpp
license/LicenseLibrary.cpp
)
aux_source_directory(${MILVUS_ENGINE_SRC}/cache cache_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/metrics metrics_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/db db_main_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/db/engine db_engine_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/db/insert db_insert_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/db/meta db_meta_files)
set(grpc_service_files
metrics/SystemInfo.cpp
metrics/SystemInfo.h
grpc/gen-milvus/milvus.grpc.pb.cc
grpc/gen-milvus/milvus.pb.cc
grpc/gen-status/status.grpc.pb.cc
grpc/gen-status/status.pb.cc
scheduler/Utils.h)
${MILVUS_ENGINE_SRC}/grpc/gen-milvus/milvus.grpc.pb.cc
${MILVUS_ENGINE_SRC}/grpc/gen-milvus/milvus.pb.cc
${MILVUS_ENGINE_SRC}/grpc/gen-status/status.grpc.pb.cc
${MILVUS_ENGINE_SRC}/grpc/gen-status/status.pb.cc
)
set(db_files
aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler scheduler_main_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler/action scheduler_action_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler/event scheduler_event_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler/job scheduler_job_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler/optimizer scheduler_optimizer_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler/resource scheduler_resource_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler/task scheduler_task_files)
set(scheduler_files
${scheduler_main_files}
${scheduler_action_files}
${scheduler_event_files}
${scheduler_job_files}
${scheduler_optimizer_files}
${scheduler_resource_files}
${scheduler_task_files}
)
aux_source_directory(${MILVUS_ENGINE_SRC}/server server_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/server/grpc_impl grpc_server_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/utils utils_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_files)
set(engine_files
${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
${cache_files}
${db_main_files}
${db_engine_files}
${db_insert_files}
${db_meta_files}
${db_scheduler_files}
${metrics_files}
${knowhere_files}
${utils_files}
${wrapper_files}
)
set(s3_client_files
storage/s3/S3ClientWrapper.cpp
storage/s3/S3ClientWrapper.h)
include_directories(/usr/include)
include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include")
include_directories(/usr/include/mysql)
include_directories(grpc/gen-status)
include_directories(grpc/gen-milvus)
set(client_grpc_lib
grpcpp_channelz
grpc++
grpc
grpc_protobuf
grpc_protoc)
grpc_protoc
)
set(third_party_libs
knowhere
easyloggingpp
sqlite
${client_grpc_lib}
yaml-cpp
set(prometheus_lib
prometheus-cpp-push
prometheus-cpp-pull
prometheus-cpp-core
)
set(boost_lib
boost_system_static
boost_filesystem_static
boost_serialization_static
)
set(cuda_lib
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so
cudart
cublas
)
set(third_party_libs
sqlite
${client_grpc_lib}
yaml-cpp
${prometheus_lib}
${boost_lib}
bzip2
lz4
snappy
zlib
zstd
cudart
cublas
${cuda_lib}
mysqlpp
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so
cudart
)
if (MILVUS_ENABLE_PROFILING STREQUAL "ON")
set(third_party_libs ${third_party_libs}
gperftools
libunwind)
endif()
gperftools
libunwind
)
endif ()
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
set(engine_libs
pthread
libgomp.a
libgfortran.a
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so
)
if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
set(engine_libs
${engine_libs}
libquadmath.a
)
${engine_libs}
libquadmath.a
)
endif ()
cuda_add_library(milvus_engine STATIC ${db_files})
target_link_libraries(milvus_engine ${engine_libs} ${third_party_libs})
cuda_add_library(milvus_engine STATIC ${engine_files})
target_link_libraries(milvus_engine
knowhere
${engine_libs}
${third_party_libs}
)
add_library(metrics STATIC ${metrics_files})
set(metrics_lib
easyloggingpp
yaml-cpp
prometheus-cpp-push
prometheus-cpp-pull
prometheus-cpp-core
${prometheus_lib}
)
target_link_libraries(metrics ${metrics_lib})
@ -161,21 +165,19 @@ set(server_libs
metrics
)
set(knowhere_libs
knowhere
)
add_executable(milvus_server
${config_files}
${server_files}
${grpcserver_files}
${utils_files}
${grpc_service_files}
${metrics_files}
${scheduler_srcs}
${scheduler_files}
${server_files}
${grpc_server_files}
${grpc_service_files}
${utils_files}
)
target_link_libraries(milvus_server ${server_libs} ${knowhere_libs} ${third_party_libs})
target_link_libraries(milvus_server
${server_libs}
)
install(TARGETS milvus_server DESTINATION bin)

View File

@ -1,212 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include "Cache.h"
#include "utils/Log.h"
#include <set>
namespace zilliz {
namespace milvus {
namespace cache {
constexpr double DEFAULT_THRESHHOLD_PERCENT = 0.85;
Cache::Cache(int64_t capacity, uint64_t cache_max_count)
: usage_(0),
capacity_(capacity),
freemem_percent_(DEFAULT_THRESHHOLD_PERCENT),
lru_(cache_max_count) {
// AGENT_LOG_DEBUG << "Construct Cache with capacity " << std::to_string(mem_capacity)
}
void Cache::set_capacity(int64_t capacity) {
if(capacity > 0) {
capacity_ = capacity;
free_memory();
}
}
size_t Cache::size() const {
std::lock_guard<std::mutex> lock(mutex_);
return lru_.size();
}
bool Cache::exists(const std::string& key) {
std::lock_guard<std::mutex> lock(mutex_);
return lru_.exists(key);
}
DataObjPtr Cache::get(const std::string& key) {
std::lock_guard<std::mutex> lock(mutex_);
if(!lru_.exists(key)){
return nullptr;
}
const CacheObjPtr& cache_obj = lru_.get(key);
return cache_obj->data_;
}
void Cache::insert(const std::string& key, const DataObjPtr& data_ptr) {
{
std::lock_guard<std::mutex> lock(mutex_);
/* if key already exist, over-write old data */
if (lru_.exists(key)) {
CacheObjPtr obj_ptr = lru_.get(key);
usage_ -= obj_ptr->data_->size();
obj_ptr->data_ = data_ptr;
usage_ += data_ptr->size();
} else {
CacheObjPtr obj_ptr(new CacheObj(data_ptr));
lru_.put(key, obj_ptr);
usage_ += data_ptr->size();
}
SERVER_LOG_DEBUG << "Insert " << key << " size:" << data_ptr->size()
<< " bytes into cache, usage: " << usage_ << " bytes";
}
if (usage_ > capacity_) {
SERVER_LOG_DEBUG << "Current usage " << usage_
<< " exceeds cache capacity " << capacity_
<< ", start free memory";
free_memory();
}
}
void Cache::erase(const std::string& key) {
std::lock_guard<std::mutex> lock(mutex_);
if(!lru_.exists(key)){
return;
}
const CacheObjPtr& obj_ptr = lru_.get(key);
const DataObjPtr& data_ptr = obj_ptr->data_;
usage_ -= data_ptr->size();
SERVER_LOG_DEBUG << "Erase " << key << " size: " << data_ptr->size();
lru_.erase(key);
}
void Cache::clear() {
std::lock_guard<std::mutex> lock(mutex_);
lru_.clear();
usage_ = 0;
SERVER_LOG_DEBUG << "Clear cache !";
}
#if 0 /* caiyd 20190221, need more testing before enable */
void Cache::flush_to_file(const std::string& key, const CacheObjPtr& obj_ptr) {
if (!this->swap_enabled_) return;
const DataObjPtr data_ptr = obj_ptr->data();
if (data_ptr == nullptr || data_ptr->size() == 0) return;
if (data_ptr->ptr() == nullptr) return;
std::string name = std::to_string(reinterpret_cast<int64_t>(data_ptr.get()));
filesys::CreateDirectory(this->swap_path_);
/* write cache data to file */
obj_ptr->set_file_path(this->swap_path_ + "/" + name);
std::shared_ptr<arrow::io::OutputStream> outfile = nullptr;
filesys::OpenWritableFile(obj_ptr->file_path(), false, &outfile);
filesys::WriteFile(outfile, data_ptr->ptr().get(), data_ptr->size());
(void)outfile->Close();
AGENT_LOG_DEBUG << "Flush cache data: " << key << ", to file: " << obj_ptr->file_path();
/* free cache memory */
data_ptr->ptr().reset();
usage_ -= data_ptr->size();
}
void Cache::restore_from_file(const std::string& key, const CacheObjPtr& obj_ptr) {
if (!this->swap_enabled_) return;
const DataObjPtr data_ptr = obj_ptr->data();
if (data_ptr == nullptr || data_ptr->size() == 0) return;
std::shared_ptr<arrow::io::RandomAccessFile> infile = nullptr;
int64_t file_size, bytes_read;
/* load cache data from file */
if (!filesys::FileExist(obj_ptr->file_path())) {
THROW_AGENT_UNEXPECTED_ERROR("File not exist: " + obj_ptr->file_path());
}
filesys::OpenReadableFile(obj_ptr->file_path(), &infile);
infile->GetSize(&file_size);
if (data_ptr->size() != file_size) {
THROW_AGENT_UNEXPECTED_ERROR("File size not match: " + obj_ptr->file_path());
}
data_ptr->set_ptr(lib::gpu::MakeShared<char>(data_ptr->size(), lib::gpu::MallocHint::kUnifiedGlobal));
infile->Read(file_size, &bytes_read, data_ptr->ptr().get());
infile->Close();
AGENT_LOG_DEBUG << "Restore cache data: " << key << ", from file: " << obj_ptr->file_path();
/* clear file path */
obj_ptr->set_file_path("");
usage_ += data_ptr->size();
}
#endif
/* free memory space when CACHE occupation exceed its capacity */
void Cache::free_memory() {
if (usage_ <= capacity_) return;
int64_t threshhold = capacity_ * freemem_percent_;
int64_t delta_size = usage_ - threshhold;
if(delta_size <= 0) {
delta_size = 1;//ensure at least one item erased
}
std::set<std::string> key_array;
int64_t released_size = 0;
{
std::lock_guard<std::mutex> lock(mutex_);
auto it = lru_.rbegin();
while (it != lru_.rend() && released_size < delta_size) {
auto& key = it->first;
auto& obj_ptr = it->second;
const auto& data_ptr = obj_ptr->data_;
key_array.emplace(key);
released_size += data_ptr->size();
++it;
}
}
SERVER_LOG_DEBUG << "to be released memory size: " << released_size;
for (auto& key : key_array) {
erase(key);
}
print();
}
void Cache::print() {
size_t cache_count = 0;
{
std::lock_guard<std::mutex> lock(mutex_);
cache_count = lru_.size();
}
SERVER_LOG_DEBUG << "[Cache item count]: " << cache_count;
SERVER_LOG_DEBUG << "[Cache usage]: " << usage_ << " bytes";
SERVER_LOG_DEBUG << "[Cache capacity]: " << capacity_ << " bytes";
}
} // cache
} // milvus
} // zilliz

118
cpp/src/cache/Cache.h vendored
View File

@ -1,73 +1,91 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <string>
#include <mutex>
#include <atomic>
#include "LRU.h"
#include "DataObj.h"
#include "utils/Log.h"
#include <atomic>
#include <mutex>
#include <set>
#include <string>
namespace zilliz {
namespace milvus {
namespace cache {
const std::string SWAP_DIR = ".CACHE";
template <typename ItemObj>
class Cache {
private:
class CacheObj {
public:
CacheObj() = delete;
CacheObj(const DataObjPtr& data)
: data_(data) {
}
public:
DataObjPtr data_ = nullptr;
};
using CacheObjPtr = std::shared_ptr<CacheObj>;
public:
//mem_capacity, units:GB
public:
// mem_capacity, units:GB
Cache(int64_t capacity_gb, uint64_t cache_max_count);
~Cache() = default;
int64_t usage() const { return usage_; }
int64_t capacity() const { return capacity_; } //unit: BYTE
void set_capacity(int64_t capacity); //unit: BYTE
int64_t
usage() const {
return usage_;
}
double freemem_percent() const { return freemem_percent_; };
void set_freemem_percent(double percent) { freemem_percent_ = percent; }
int64_t
capacity() const {
return capacity_;
} // unit: BYTE
void
set_capacity(int64_t capacity); // unit: BYTE
size_t size() const;
bool exists(const std::string& key);
DataObjPtr get(const std::string& key);
void insert(const std::string& key, const DataObjPtr& data);
void erase(const std::string& key);
void print();
void clear();
void free_memory();
double
freemem_percent() const {
return freemem_percent_;
}
private:
void
set_freemem_percent(double percent) {
freemem_percent_ = percent;
}
size_t
size() const;
bool
exists(const std::string& key);
ItemObj
get(const std::string& key);
void
insert(const std::string& key, const ItemObj& item);
void
erase(const std::string& key);
void
print();
void
clear();
private:
void
free_memory();
private:
int64_t usage_;
int64_t capacity_;
double freemem_percent_;
LRU<std::string, CacheObjPtr> lru_;
LRU<std::string, ItemObj> lru_;
mutable std::mutex mutex_;
};
using CachePtr = std::shared_ptr<Cache>;
} // cache
} // milvus
} // zilliz
} // namespace cache
} // namespace milvus
#include "cache/Cache.inl"

194
cpp/src/cache/Cache.inl vendored Normal file
View File

@ -0,0 +1,194 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
namespace milvus {
namespace cache {
constexpr double DEFAULT_THRESHHOLD_PERCENT = 0.85;
template<typename ItemObj>
Cache<ItemObj>::Cache(int64_t capacity, uint64_t cache_max_count)
: usage_(0),
capacity_(capacity),
freemem_percent_(DEFAULT_THRESHHOLD_PERCENT),
lru_(cache_max_count) {
// AGENT_LOG_DEBUG << "Construct Cache with capacity " << std::to_string(mem_capacity)
}
template<typename ItemObj>
void
Cache<ItemObj>::set_capacity(int64_t capacity) {
if (capacity > 0) {
capacity_ = capacity;
free_memory();
}
}
template<typename ItemObj>
size_t
Cache<ItemObj>::size() const {
std::lock_guard<std::mutex> lock(mutex_);
return lru_.size();
}
template<typename ItemObj>
bool
Cache<ItemObj>::exists(const std::string &key) {
std::lock_guard<std::mutex> lock(mutex_);
return lru_.exists(key);
}
template<typename ItemObj>
ItemObj
Cache<ItemObj>::get(const std::string &key) {
std::lock_guard<std::mutex> lock(mutex_);
if (!lru_.exists(key)) {
return nullptr;
}
return lru_.get(key);
}
template<typename ItemObj>
void
Cache<ItemObj>::insert(const std::string &key, const ItemObj &item) {
if (item == nullptr) {
return;
}
// if(item->size() > capacity_) {
// SERVER_LOG_ERROR << "Item size " << item->size()
// << " is too large to insert into cache, capacity " << capacity_;
// return;
// }
//calculate usage
{
std::lock_guard<std::mutex> lock(mutex_);
//if key already exist, subtract old item size
if (lru_.exists(key)) {
const ItemObj &old_item = lru_.get(key);
usage_ -= old_item->Size();
}
//plus new item size
usage_ += item->Size();
}
//if usage exceed capacity, free some items
if (usage_ > capacity_) {
SERVER_LOG_DEBUG << "Current usage " << usage_
<< " exceeds cache capacity " << capacity_
<< ", start free memory";
free_memory();
}
//insert new item
{
std::lock_guard<std::mutex> lock(mutex_);
lru_.put(key, item);
SERVER_LOG_DEBUG << "Insert " << key << " size:" << item->Size()
<< " bytes into cache, usage: " << usage_ << " bytes";
}
}
template<typename ItemObj>
void
Cache<ItemObj>::erase(const std::string &key) {
std::lock_guard<std::mutex> lock(mutex_);
if (!lru_.exists(key)) {
return;
}
const ItemObj &old_item = lru_.get(key);
usage_ -= old_item->Size();
SERVER_LOG_DEBUG << "Erase " << key << " size: " << old_item->Size();
lru_.erase(key);
}
template<typename ItemObj>
void
Cache<ItemObj>::clear() {
std::lock_guard<std::mutex> lock(mutex_);
lru_.clear();
usage_ = 0;
SERVER_LOG_DEBUG << "Clear cache !";
}
/* free memory space when CACHE occupation exceed its capacity */
template<typename ItemObj>
void
Cache<ItemObj>::free_memory() {
if (usage_ <= capacity_) return;
int64_t threshhold = capacity_ * freemem_percent_;
int64_t delta_size = usage_ - threshhold;
if (delta_size <= 0) {
delta_size = 1;//ensure at least one item erased
}
std::set<std::string> key_array;
int64_t released_size = 0;
{
std::lock_guard<std::mutex> lock(mutex_);
auto it = lru_.rbegin();
while (it != lru_.rend() && released_size < delta_size) {
auto &key = it->first;
auto &obj_ptr = it->second;
key_array.emplace(key);
released_size += obj_ptr->Size();
++it;
}
}
SERVER_LOG_DEBUG << "to be released memory size: " << released_size;
for (auto &key : key_array) {
erase(key);
}
print();
}
template<typename ItemObj>
void
Cache<ItemObj>::print() {
size_t cache_count = 0;
{
std::lock_guard<std::mutex> lock(mutex_);
cache_count = lru_.size();
}
SERVER_LOG_DEBUG << "[Cache item count]: " << cache_count;
SERVER_LOG_DEBUG << "[Cache usage]: " << usage_ << " bytes";
SERVER_LOG_DEBUG << "[Cache capacity]: " << capacity_ << " bytes";
}
} // namespace cache
} // namespace milvus

View File

@ -1,135 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include "utils/Log.h"
#include "CacheMgr.h"
#include "metrics/Metrics.h"
namespace zilliz {
namespace milvus {
namespace cache {
CacheMgr::CacheMgr() {
}
CacheMgr::~CacheMgr() {
}
uint64_t CacheMgr::ItemCount() const {
if(cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return 0;
}
return (uint64_t)(cache_->size());
}
bool CacheMgr::ItemExists(const std::string& key) {
if(cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return false;
}
return cache_->exists(key);
}
DataObjPtr CacheMgr::GetItem(const std::string& key) {
if(cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return nullptr;
}
server::Metrics::GetInstance().CacheAccessTotalIncrement();
return cache_->get(key);
}
engine::VecIndexPtr CacheMgr::GetIndex(const std::string& key) {
DataObjPtr obj = GetItem(key);
if(obj != nullptr) {
return obj->data();
}
return nullptr;
}
void CacheMgr::InsertItem(const std::string& key, const DataObjPtr& data) {
if(cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return;
}
cache_->insert(key, data);
server::Metrics::GetInstance().CacheAccessTotalIncrement();
}
void CacheMgr::InsertItem(const std::string& key, const engine::VecIndexPtr& index) {
if(cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return;
}
DataObjPtr obj = std::make_shared<DataObj>(index);
cache_->insert(key, obj);
server::Metrics::GetInstance().CacheAccessTotalIncrement();
}
void CacheMgr::EraseItem(const std::string& key) {
if(cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return;
}
cache_->erase(key);
server::Metrics::GetInstance().CacheAccessTotalIncrement();
}
void CacheMgr::PrintInfo() {
if(cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return;
}
cache_->print();
}
void CacheMgr::ClearCache() {
if(cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return;
}
cache_->clear();
}
int64_t CacheMgr::CacheUsage() const {
if(cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return 0;
}
return cache_->usage();
}
int64_t CacheMgr::CacheCapacity() const {
if(cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return 0;
}
return cache_->capacity();
}
void CacheMgr::SetCapacity(int64_t capacity) {
if(cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return;
}
cache_->set_capacity(capacity);
}
}
}
}

View File

@ -1,48 +1,73 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "Cache.h"
#include "metrics/Metrics.h"
#include "utils/Log.h"
#include <memory>
#include <string>
namespace zilliz {
namespace milvus {
namespace cache {
template <typename ItemObj>
class CacheMgr {
public:
virtual uint64_t ItemCount() const;
public:
virtual uint64_t
ItemCount() const;
virtual bool ItemExists(const std::string& key);
virtual bool
ItemExists(const std::string& key);
virtual DataObjPtr GetItem(const std::string& key);
virtual engine::VecIndexPtr GetIndex(const std::string& key);
virtual ItemObj
GetItem(const std::string& key);
virtual void InsertItem(const std::string& key, const DataObjPtr& data);
virtual void InsertItem(const std::string& key, const engine::VecIndexPtr& index);
virtual void
InsertItem(const std::string& key, const ItemObj& data);
virtual void EraseItem(const std::string& key);
virtual void
EraseItem(const std::string& key);
virtual void PrintInfo();
virtual void
PrintInfo();
virtual void ClearCache();
virtual void
ClearCache();
int64_t CacheUsage() const;
int64_t CacheCapacity() const;
void SetCapacity(int64_t capacity);
int64_t
CacheUsage() const;
int64_t
CacheCapacity() const;
void
SetCapacity(int64_t capacity);
protected:
protected:
CacheMgr();
virtual ~CacheMgr();
protected:
protected:
using CachePtr = std::shared_ptr<Cache<ItemObj>>;
CachePtr cache_;
};
} // namespace cache
} // namespace milvus
}
}
}
#include "cache/CacheMgr.inl"

145
cpp/src/cache/CacheMgr.inl vendored Normal file
View File

@ -0,0 +1,145 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
namespace milvus {
namespace cache {
template<typename ItemObj>
CacheMgr<ItemObj>::CacheMgr() {
}
template<typename ItemObj>
CacheMgr<ItemObj>::~CacheMgr() {
}
template<typename ItemObj>
uint64_t
CacheMgr<ItemObj>::ItemCount() const {
if (cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return 0;
}
return (uint64_t) (cache_->size());
}
template<typename ItemObj>
bool
CacheMgr<ItemObj>::ItemExists(const std::string &key) {
if (cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return false;
}
return cache_->exists(key);
}
template<typename ItemObj>
ItemObj
CacheMgr<ItemObj>::GetItem(const std::string &key) {
if (cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return nullptr;
}
server::Metrics::GetInstance().CacheAccessTotalIncrement();
return cache_->get(key);
}
template<typename ItemObj>
void
CacheMgr<ItemObj>::InsertItem(const std::string &key, const ItemObj &data) {
if (cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return;
}
cache_->insert(key, data);
server::Metrics::GetInstance().CacheAccessTotalIncrement();
}
template<typename ItemObj>
void
CacheMgr<ItemObj>::EraseItem(const std::string &key) {
if (cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return;
}
cache_->erase(key);
server::Metrics::GetInstance().CacheAccessTotalIncrement();
}
template<typename ItemObj>
void
CacheMgr<ItemObj>::PrintInfo() {
if (cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return;
}
cache_->print();
}
template<typename ItemObj>
void
CacheMgr<ItemObj>::ClearCache() {
if (cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return;
}
cache_->clear();
}
template<typename ItemObj>
int64_t
CacheMgr<ItemObj>::CacheUsage() const {
if (cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return 0;
}
return cache_->usage();
}
template<typename ItemObj>
int64_t
CacheMgr<ItemObj>::CacheCapacity() const {
if (cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return 0;
}
return cache_->capacity();
}
template<typename ItemObj>
void
CacheMgr<ItemObj>::SetCapacity(int64_t capacity) {
if (cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return;
}
cache_->set_capacity(capacity);
}
} // namespace cache
} // namespace milvus

View File

@ -1,36 +1,69 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "CpuCacheMgr.h"
#include "server/ServerConfig.h"
#include "cache/CpuCacheMgr.h"
#include "server/Config.h"
#include "utils/Log.h"
namespace zilliz {
#include <utility>
namespace milvus {
namespace cache {
namespace {
constexpr int64_t unit = 1024 * 1024 * 1024;
constexpr int64_t unit = 1024 * 1024 * 1024;
}
CpuCacheMgr::CpuCacheMgr() {
server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE);
int64_t cap = config.GetInt64Value(server::CONFIG_CPU_CACHE_CAPACITY, 16);
cap *= unit;
cache_ = std::make_shared<Cache>(cap, 1UL<<32);
server::Config& config = server::Config::GetInstance();
Status s;
double free_percent = config.GetDoubleValue(server::CACHE_FREE_PERCENT, 0.85);
if(free_percent > 0.0 && free_percent <= 1.0) {
cache_->set_freemem_percent(free_percent);
int64_t cpu_cache_cap;
s = config.GetCacheConfigCpuCacheCapacity(cpu_cache_cap);
if (!s.ok()) {
SERVER_LOG_ERROR << s.message();
}
int64_t cap = cpu_cache_cap * unit;
cache_ = std::make_shared<Cache<DataObjPtr>>(cap, 1UL << 32);
float cpu_cache_threshold;
s = config.GetCacheConfigCpuCacheThreshold(cpu_cache_threshold);
if (!s.ok()) {
SERVER_LOG_ERROR << s.message();
}
if (cpu_cache_threshold > 0.0 && cpu_cache_threshold <= 1.0) {
cache_->set_freemem_percent(cpu_cache_threshold);
} else {
SERVER_LOG_ERROR << "Invalid cache_free_percent: " << free_percent <<
", defaultly set to " << cache_->freemem_percent();
SERVER_LOG_ERROR << "Invalid cpu_cache_threshold: " << cpu_cache_threshold << ", by default set to "
<< cache_->freemem_percent();
}
}
CpuCacheMgr*
CpuCacheMgr::GetInstance() {
static CpuCacheMgr s_mgr;
return &s_mgr;
}
DataObjPtr
CpuCacheMgr::GetIndex(const std::string& key) {
DataObjPtr obj = GetItem(key);
return obj;
}
}
} // namespace cache
} // namespace milvus

View File

@ -1,29 +1,43 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "CacheMgr.h"
#include "DataObj.h"
#include <memory>
#include <string>
namespace zilliz {
namespace milvus {
namespace cache {
class CpuCacheMgr : public CacheMgr {
private:
class CpuCacheMgr : public CacheMgr<DataObjPtr> {
private:
CpuCacheMgr();
public:
//TODO: use smart pointer instead
static CacheMgr* GetInstance() {
static CpuCacheMgr s_mgr;
return &s_mgr;
}
public:
// TODO(myh): use smart pointer instead
static CpuCacheMgr*
GetInstance();
DataObjPtr
GetIndex(const std::string& key);
};
}
}
}
} // namespace cache
} // namespace milvus

View File

@ -1,52 +1,34 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "wrapper/knowhere/vec_index.h"
#include <memory>
namespace zilliz {
namespace milvus {
namespace cache {
class DataObj {
public:
DataObj(const engine::VecIndexPtr& index)
: index_(index)
{}
DataObj(const engine::VecIndexPtr& index, int64_t size)
: index_(index),
size_(size)
{}
engine::VecIndexPtr data() { return index_; }
const engine::VecIndexPtr& data() const { return index_; }
int64_t size() const {
if(index_ == nullptr) {
return 0;
}
if(size_ > 0) {
return size_;
}
return index_->Count() * index_->Dimension() * sizeof(float);
}
private:
engine::VecIndexPtr index_ = nullptr;
int64_t size_ = 0;
public:
virtual int64_t
Size() = 0;
};
using DataObjPtr = std::shared_ptr<DataObj>;
}
}
}
} // namespace cache
} // namespace milvus

View File

@ -1,15 +1,27 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "cache/GpuCacheMgr.h"
#include "server/Config.h"
#include "utils/Log.h"
#include <sstream>
#include "utils/Log.h"
#include "GpuCacheMgr.h"
#include "server/ServerConfig.h"
#include <utility>
namespace zilliz {
namespace milvus {
namespace cache {
@ -17,26 +29,36 @@ std::mutex GpuCacheMgr::mutex_;
std::unordered_map<uint64_t, GpuCacheMgrPtr> GpuCacheMgr::instance_;
namespace {
constexpr int64_t G_BYTE = 1024 * 1024 * 1024;
constexpr int64_t G_BYTE = 1024 * 1024 * 1024;
}
GpuCacheMgr::GpuCacheMgr() {
server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE);
server::Config& config = server::Config::GetInstance();
Status s;
int64_t cap = config.GetInt64Value(server::CONFIG_GPU_CACHE_CAPACITY, 0);
cap *= G_BYTE;
cache_ = std::make_shared<Cache>(cap, 1UL<<32);
int64_t gpu_cache_cap;
s = config.GetCacheConfigGpuCacheCapacity(gpu_cache_cap);
if (!s.ok()) {
SERVER_LOG_ERROR << s.message();
}
int64_t cap = gpu_cache_cap * G_BYTE;
cache_ = std::make_shared<Cache<DataObjPtr>>(cap, 1UL << 32);
double free_percent = config.GetDoubleValue(server::GPU_CACHE_FREE_PERCENT, 0.85);
if (free_percent > 0.0 && free_percent <= 1.0) {
cache_->set_freemem_percent(free_percent);
float gpu_mem_threshold;
s = config.GetCacheConfigGpuCacheThreshold(gpu_mem_threshold);
if (!s.ok()) {
SERVER_LOG_ERROR << s.message();
}
if (gpu_mem_threshold > 0.0 && gpu_mem_threshold <= 1.0) {
cache_->set_freemem_percent(gpu_mem_threshold);
} else {
SERVER_LOG_ERROR << "Invalid gpu_cache_free_percent: " << free_percent <<
", defaultly set to " << cache_->freemem_percent();
SERVER_LOG_ERROR << "Invalid gpu_mem_threshold: " << gpu_mem_threshold << ", by default set to "
<< cache_->freemem_percent();
}
}
CacheMgr* GpuCacheMgr::GetInstance(uint64_t gpu_id) {
GpuCacheMgr*
GpuCacheMgr::GetInstance(uint64_t gpu_id) {
if (instance_.find(gpu_id) == instance_.end()) {
std::lock_guard<std::mutex> lock(mutex_);
if (instance_.find(gpu_id) == instance_.end()) {
@ -49,16 +71,11 @@ CacheMgr* GpuCacheMgr::GetInstance(uint64_t gpu_id) {
}
}
void GpuCacheMgr::InsertItem(const std::string& key, const DataObjPtr& data) {
//TODO: copy data to gpu
if (cache_ == nullptr) {
SERVER_LOG_ERROR << "Cache doesn't exist";
return;
}
cache_->insert(key, data);
DataObjPtr
GpuCacheMgr::GetIndex(const std::string& key) {
DataObjPtr obj = GetItem(key);
return obj;
}
}
}
}
} // namespace cache
} // namespace milvus

View File

@ -1,33 +1,47 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "CacheMgr.h"
#include <unordered_map>
#include <memory>
#include "DataObj.h"
#include <memory>
#include <string>
#include <unordered_map>
namespace zilliz {
namespace milvus {
namespace cache {
class GpuCacheMgr;
using GpuCacheMgrPtr = std::shared_ptr<GpuCacheMgr>;
class GpuCacheMgr : public CacheMgr {
public:
class GpuCacheMgr : public CacheMgr<DataObjPtr> {
public:
GpuCacheMgr();
static CacheMgr* GetInstance(uint64_t gpu_id);
static GpuCacheMgr*
GetInstance(uint64_t gpu_id);
void InsertItem(const std::string& key, const DataObjPtr& data) override;
DataObjPtr
GetIndex(const std::string& key);
private:
private:
static std::mutex mutex_;
static std::unordered_map<uint64_t, GpuCacheMgrPtr> instance_;
};
}
}
}
} // namespace cache
} // namespace milvus

132
cpp/src/cache/LRU.h vendored
View File

@ -1,102 +1,122 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <unordered_map>
#include <list>
#include <cstddef>
#include <list>
#include <stdexcept>
#include <unordered_map>
#include <utility>
namespace zilliz {
namespace milvus {
namespace cache {
template<typename key_t, typename value_t>
template <typename key_t, typename value_t>
class LRU {
public:
public:
typedef typename std::pair<key_t, value_t> key_value_pair_t;
typedef typename std::list<key_value_pair_t>::iterator list_iterator_t;
typedef typename std::list<key_value_pair_t>::reverse_iterator reverse_list_iterator_t;
LRU(size_t max_size) : _max_size(max_size) {}
explicit LRU(size_t max_size) : max_size_(max_size) {
}
void put(const key_t& key, const value_t& value) {
auto it = _cache_items_map.find(key);
_cache_items_list.push_front(key_value_pair_t(key, value));
if (it != _cache_items_map.end()) {
_cache_items_list.erase(it->second);
_cache_items_map.erase(it);
void
put(const key_t& key, const value_t& value) {
auto it = cache_items_map_.find(key);
cache_items_list_.push_front(key_value_pair_t(key, value));
if (it != cache_items_map_.end()) {
cache_items_list_.erase(it->second);
cache_items_map_.erase(it);
}
_cache_items_map[key] = _cache_items_list.begin();
cache_items_map_[key] = cache_items_list_.begin();
if (_cache_items_map.size() > _max_size) {
auto last = _cache_items_list.end();
if (cache_items_map_.size() > max_size_) {
auto last = cache_items_list_.end();
last--;
_cache_items_map.erase(last->first);
_cache_items_list.pop_back();
cache_items_map_.erase(last->first);
cache_items_list_.pop_back();
}
}
const value_t& get(const key_t& key) {
auto it = _cache_items_map.find(key);
if (it == _cache_items_map.end()) {
const value_t&
get(const key_t& key) {
auto it = cache_items_map_.find(key);
if (it == cache_items_map_.end()) {
throw std::range_error("There is no such key in cache");
} else {
_cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second);
cache_items_list_.splice(cache_items_list_.begin(), cache_items_list_, it->second);
return it->second->second;
}
}
void erase(const key_t& key) {
auto it = _cache_items_map.find(key);
if (it != _cache_items_map.end()) {
_cache_items_list.erase(it->second);
_cache_items_map.erase(it);
void
erase(const key_t& key) {
auto it = cache_items_map_.find(key);
if (it != cache_items_map_.end()) {
cache_items_list_.erase(it->second);
cache_items_map_.erase(it);
}
}
bool exists(const key_t& key) const {
return _cache_items_map.find(key) != _cache_items_map.end();
bool
exists(const key_t& key) const {
return cache_items_map_.find(key) != cache_items_map_.end();
}
size_t size() const {
return _cache_items_map.size();
size_t
size() const {
return cache_items_map_.size();
}
list_iterator_t begin() {
_iter = _cache_items_list.begin();
return _iter;
list_iterator_t
begin() {
iter_ = cache_items_list_.begin();
return iter_;
}
list_iterator_t end() {
return _cache_items_list.end();
list_iterator_t
end() {
return cache_items_list_.end();
}
reverse_list_iterator_t rbegin() {
return _cache_items_list.rbegin();
reverse_list_iterator_t
rbegin() {
return cache_items_list_.rbegin();
}
reverse_list_iterator_t rend() {
return _cache_items_list.rend();
reverse_list_iterator_t
rend() {
return cache_items_list_.rend();
}
void clear() {
_cache_items_list.clear();
_cache_items_map.clear();
void
clear() {
cache_items_list_.clear();
cache_items_map_.clear();
}
private:
std::list<key_value_pair_t> _cache_items_list;
std::unordered_map<key_t, list_iterator_t> _cache_items_map;
size_t _max_size;
list_iterator_t _iter;
private:
std::list<key_value_pair_t> cache_items_list_;
std::unordered_map<key_t, list_iterator_t> cache_items_map_;
size_t max_size_;
list_iterator_t iter_;
};
} // cache
} // milvus
} // zilliz
} // namespace cache
} // namespace milvus

View File

@ -1,20 +1,31 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "ConfigMgr.h"
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "config/ConfigMgr.h"
#include "YamlConfigMgr.h"
namespace zilliz {
namespace milvus {
namespace server {
ConfigMgr * ConfigMgr::GetInstance() {
ConfigMgr*
ConfigMgr::GetInstance() {
static YamlConfigMgr mgr;
return &mgr;
}
}
}
}
} // namespace server
} // namespace milvus

View File

@ -1,14 +1,27 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "utils/Error.h"
#include "ConfigNode.h"
#include "utils/Error.h"
#include <string>
namespace zilliz {
namespace milvus {
namespace server {
@ -28,16 +41,21 @@ namespace server {
class ConfigMgr {
public:
static ConfigMgr* GetInstance();
static ConfigMgr*
GetInstance();
virtual ErrorCode LoadConfigFile(const std::string &filename) = 0;
virtual void Print() const = 0;//will be deleted
virtual std::string DumpString() const = 0;
virtual ErrorCode
LoadConfigFile(const std::string& filename) = 0;
virtual void
Print() const = 0; // will be deleted
virtual std::string
DumpString() const = 0;
virtual const ConfigNode& GetRootNode() const = 0;
virtual ConfigNode& GetRootNode() = 0;
virtual const ConfigNode&
GetRootNode() const = 0;
virtual ConfigNode&
GetRootNode() = 0;
};
}
}
}
} // namespace server
} // namespace milvus

View File

@ -1,38 +1,50 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "ConfigNode.h"
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "config/ConfigNode.h"
#include "utils/Error.h"
#include "utils/Log.h"
#include <algorithm>
#include <sstream>
#include <string>
#include <algorithm>
namespace zilliz {
namespace milvus {
namespace server {
void ConfigNode::Combine(const ConfigNode& target) {
void
ConfigNode::Combine(const ConfigNode& target) {
const std::map<std::string, std::string>& kv = target.GetConfig();
for(auto itr = kv.begin(); itr != kv.end(); ++itr){
for (auto itr = kv.begin(); itr != kv.end(); ++itr) {
config_[itr->first] = itr->second;
}
const std::map<std::string, std::vector<std::string> >& sequences = target.GetSequences();
for(auto itr = sequences.begin(); itr != sequences.end(); ++itr){
for (auto itr = sequences.begin(); itr != sequences.end(); ++itr) {
sequences_[itr->first] = itr->second;
}
const std::map<std::string, ConfigNode>& children = target.GetChildren();
for(auto itr = children.begin(); itr != children.end(); ++itr){
for (auto itr = children.begin(); itr != children.end(); ++itr) {
children_[itr->first] = itr->second;
}
}
//key/value pair config
// key/value pair config
void
ConfigNode::SetValue(const std::string& key, const std::string& value) {
config_[key] = value;
@ -41,16 +53,16 @@ ConfigNode::SetValue(const std::string& key, const std::string& value) {
std::string
ConfigNode::GetValue(const std::string& param_key, const std::string& default_val) const {
auto ref = config_.find(param_key);
if(ref != config_.end()) {
if (ref != config_.end()) {
return ref->second;
}
//THROW_UNEXPECTED_ERROR("Can't find parameter key: " + param_key);
// THROW_UNEXPECTED_ERROR("Can't find parameter key: " + param_key);
return default_val;
}
bool
ConfigNode::GetBoolValue(const std::string &param_key, bool default_val) const {
ConfigNode::GetBoolValue(const std::string& param_key, bool default_val) const {
std::string val = GetValue(param_key);
if (!val.empty()) {
std::transform(val.begin(), val.end(), val.begin(), ::tolower);
@ -61,7 +73,7 @@ ConfigNode::GetBoolValue(const std::string &param_key, bool default_val) const {
}
int32_t
ConfigNode::GetInt32Value(const std::string &param_key, int32_t default_val) const {
ConfigNode::GetInt32Value(const std::string& param_key, int32_t default_val) const {
std::string val = GetValue(param_key);
if (!val.empty()) {
return (int32_t)std::strtol(val.c_str(), nullptr, 10);
@ -71,7 +83,7 @@ ConfigNode::GetInt32Value(const std::string &param_key, int32_t default_val) con
}
int64_t
ConfigNode::GetInt64Value(const std::string &param_key, int64_t default_val) const {
ConfigNode::GetInt64Value(const std::string& param_key, int64_t default_val) const {
std::string val = GetValue(param_key);
if (!val.empty()) {
return std::strtol(val.c_str(), nullptr, 10);
@ -81,7 +93,7 @@ ConfigNode::GetInt64Value(const std::string &param_key, int64_t default_val) con
}
float
ConfigNode::GetFloatValue(const std::string &param_key, float default_val) const {
ConfigNode::GetFloatValue(const std::string& param_key, float default_val) const {
std::string val = GetValue(param_key);
if (!val.empty()) {
return std::strtof(val.c_str(), nullptr);
@ -91,7 +103,7 @@ ConfigNode::GetFloatValue(const std::string &param_key, float default_val) const
}
double
ConfigNode::GetDoubleValue(const std::string &param_key, double default_val) const {
ConfigNode::GetDoubleValue(const std::string& param_key, double default_val) const {
std::string val = GetValue(param_key);
if (!val.empty()) {
return std::strtod(val.c_str(), nullptr);
@ -103,13 +115,14 @@ ConfigNode::GetDoubleValue(const std::string &param_key, double default_val) con
const std::map<std::string, std::string>&
ConfigNode::GetConfig() const {
return config_;
};
}
void ConfigNode::ClearConfig() {
void
ConfigNode::ClearConfig() {
config_.clear();
}
//key/object config
// key/object config
void
ConfigNode::AddChild(const std::string& type_name, const ConfigNode& config) {
children_[type_name] = config;
@ -118,7 +131,7 @@ ConfigNode::AddChild(const std::string& type_name, const ConfigNode& config) {
ConfigNode
ConfigNode::GetChild(const std::string& type_name) const {
auto ref = children_.find(type_name);
if(ref != children_.end()) {
if (ref != children_.end()) {
return ref->second;
}
@ -127,14 +140,14 @@ ConfigNode::GetChild(const std::string& type_name) const {
}
ConfigNode&
ConfigNode::GetChild(const std::string &type_name) {
ConfigNode::GetChild(const std::string& type_name) {
return children_[type_name];
}
void
ConfigNode::GetChildren(ConfigNodeArr& arr) const {
arr.clear();
for(auto ref : children_){
for (auto ref : children_) {
arr.push_back(ref.second);
}
}
@ -144,20 +157,21 @@ ConfigNode::GetChildren() const {
return children_;
}
void ConfigNode::ClearChildren() {
void
ConfigNode::ClearChildren() {
children_.clear();
}
//key/sequence config
// key/sequence config
void
ConfigNode::AddSequenceItem(const std::string &key, const std::string &item) {
ConfigNode::AddSequenceItem(const std::string& key, const std::string& item) {
sequences_[key].push_back(item);
}
std::vector<std::string>
ConfigNode::GetSequence(const std::string &key) const {
ConfigNode::GetSequence(const std::string& key) const {
auto itr = sequences_.find(key);
if(itr != sequences_.end()) {
if (itr != sequences_.end()) {
return itr->second;
} else {
std::vector<std::string> temp;
@ -170,45 +184,46 @@ ConfigNode::GetSequences() const {
return sequences_;
}
void ConfigNode::ClearSequences() {
void
ConfigNode::ClearSequences() {
sequences_.clear();
}
void
ConfigNode::PrintAll(const std::string& prefix) const {
for(auto& elem : config_) {
for (auto& elem : config_) {
SERVER_LOG_INFO << prefix << elem.first + ": " << elem.second;
}
for(auto& elem : sequences_) {
for (auto& elem : sequences_) {
SERVER_LOG_INFO << prefix << elem.first << ": ";
for(auto& str : elem.second) {
for (auto& str : elem.second) {
SERVER_LOG_INFO << prefix << " - " << str;
}
}
for(auto& elem : children_) {
for (auto& elem : children_) {
SERVER_LOG_INFO << prefix << elem.first << ": ";
elem.second.PrintAll(prefix + " ");
}
}
std::string
ConfigNode::DumpString(const std::string &prefix) const {
ConfigNode::DumpString(const std::string& prefix) const {
std::stringstream str_buffer;
const std::string endl = "\n";
for(auto& elem : config_) {
for (auto& elem : config_) {
str_buffer << prefix << elem.first << ": " << elem.second << endl;
}
for(auto& elem : sequences_) {
for (auto& elem : sequences_) {
str_buffer << prefix << elem.first << ": " << endl;
for(auto& str : elem.second) {
for (auto& str : elem.second) {
str_buffer << prefix + " - " << str << endl;
}
}
for(auto& elem : children_) {
for (auto& elem : children_) {
str_buffer << prefix << elem.first << ": " << endl;
str_buffer << elem.second.DumpString(prefix + " ") << endl;
}
@ -216,6 +231,5 @@ ConfigNode::DumpString(const std::string &prefix) const {
return str_buffer.str();
}
}
}
}
} // namespace server
} // namespace milvus

View File

@ -1,15 +1,26 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <vector>
#include <string>
#include <map>
#include <string>
#include <vector>
namespace zilliz {
namespace milvus {
namespace server {
@ -18,39 +29,61 @@ typedef std::vector<ConfigNode> ConfigNodeArr;
class ConfigNode {
public:
void Combine(const ConfigNode& target);
void
Combine(const ConfigNode& target);
//key/value pair config
void SetValue(const std::string &key, const std::string &value);
// key/value pair config
void
SetValue(const std::string& key, const std::string& value);
std::string GetValue(const std::string &param_key, const std::string &default_val = "") const;
bool GetBoolValue(const std::string &param_key, bool default_val = false) const;
int32_t GetInt32Value(const std::string &param_key, int32_t default_val = 0) const;
int64_t GetInt64Value(const std::string &param_key, int64_t default_val = 0) const;
float GetFloatValue(const std::string &param_key, float default_val = 0.0) const;
double GetDoubleValue(const std::string &param_key, double default_val = 0.0) const;
std::string
GetValue(const std::string& param_key, const std::string& default_val = "") const;
bool
GetBoolValue(const std::string& param_key, bool default_val = false) const;
int32_t
GetInt32Value(const std::string& param_key, int32_t default_val = 0) const;
int64_t
GetInt64Value(const std::string& param_key, int64_t default_val = 0) const;
float
GetFloatValue(const std::string& param_key, float default_val = 0.0) const;
double
GetDoubleValue(const std::string& param_key, double default_val = 0.0) const;
const std::map<std::string, std::string>& GetConfig() const;
void ClearConfig();
const std::map<std::string, std::string>&
GetConfig() const;
void
ClearConfig();
//key/object config
void AddChild(const std::string &type_name, const ConfigNode &config);
ConfigNode GetChild(const std::string &type_name) const;
ConfigNode& GetChild(const std::string &type_name);
void GetChildren(ConfigNodeArr &arr) const;
// key/object config
void
AddChild(const std::string& type_name, const ConfigNode& config);
ConfigNode
GetChild(const std::string& type_name) const;
ConfigNode&
GetChild(const std::string& type_name);
void
GetChildren(ConfigNodeArr& arr) const;
const std::map<std::string, ConfigNode>& GetChildren() const;
void ClearChildren();
const std::map<std::string, ConfigNode>&
GetChildren() const;
void
ClearChildren();
//key/sequence config
void AddSequenceItem(const std::string &key, const std::string &item);
std::vector<std::string> GetSequence(const std::string &key) const;
// key/sequence config
void
AddSequenceItem(const std::string& key, const std::string& item);
std::vector<std::string>
GetSequence(const std::string& key) const;
const std::map<std::string, std::vector<std::string> >& GetSequences() const;
void ClearSequences();
const std::map<std::string, std::vector<std::string> >&
GetSequences() const;
void
ClearSequences();
void PrintAll(const std::string &prefix = "") const;
std::string DumpString(const std::string &prefix = "") const;
void
PrintAll(const std::string& prefix = "") const;
std::string
DumpString(const std::string& prefix = "") const;
private:
std::map<std::string, std::string> config_;
@ -58,6 +91,5 @@ class ConfigNode {
std::map<std::string, std::vector<std::string> > sequences_;
};
}
}
}
} // namespace server
} // namespace milvus

View File

@ -1,18 +1,30 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "YamlConfigMgr.h"
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "config/YamlConfigMgr.h"
#include "utils/Log.h"
#include <sys/stat.h>
namespace zilliz {
namespace milvus {
namespace server {
ErrorCode YamlConfigMgr::LoadConfigFile(const std::string &filename) {
ErrorCode
YamlConfigMgr::LoadConfigFile(const std::string& filename) {
struct stat directoryStat;
int statOK = stat(filename.c_str(), &directoryStat);
if (statOK != 0) {
@ -23,37 +35,38 @@ ErrorCode YamlConfigMgr::LoadConfigFile(const std::string &filename) {
try {
node_ = YAML::LoadFile(filename);
LoadConfigNode(node_, config_);
}
catch (YAML::Exception& e) {
SERVER_LOG_ERROR << "Failed to load config file: " << std::string(e.what ());
} catch (YAML::Exception& e) {
SERVER_LOG_ERROR << "Failed to load config file: " << std::string(e.what());
return SERVER_UNEXPECTED_ERROR;
}
return SERVER_SUCCESS;
}
void YamlConfigMgr::Print() const {
void
YamlConfigMgr::Print() const {
SERVER_LOG_INFO << "System config content:";
config_.PrintAll();
}
std::string YamlConfigMgr::DumpString() const {
std::string
YamlConfigMgr::DumpString() const {
return config_.DumpString("");
}
const ConfigNode& YamlConfigMgr::GetRootNode() const {
const ConfigNode&
YamlConfigMgr::GetRootNode() const {
return config_;
}
ConfigNode& YamlConfigMgr::GetRootNode() {
ConfigNode&
YamlConfigMgr::GetRootNode() {
return config_;
}
bool
YamlConfigMgr::SetConfigValue(const YAML::Node& node,
const std::string& key,
ConfigNode& config) {
if(node[key].IsDefined ()) {
YamlConfigMgr::SetConfigValue(const YAML::Node& node, const std::string& key, ConfigNode& config) {
if (node[key].IsDefined()) {
config.SetValue(key, node[key].as<std::string>());
return true;
}
@ -61,10 +74,8 @@ YamlConfigMgr::SetConfigValue(const YAML::Node& node,
}
bool
YamlConfigMgr::SetChildConfig(const YAML::Node& node,
const std::string& child_name,
ConfigNode& config) {
if(node[child_name].IsDefined ()) {
YamlConfigMgr::SetChildConfig(const YAML::Node& node, const std::string& child_name, ConfigNode& config) {
if (node[child_name].IsDefined()) {
ConfigNode sub_config;
LoadConfigNode(node[child_name], sub_config);
config.AddChild(child_name, sub_config);
@ -74,12 +85,10 @@ YamlConfigMgr::SetChildConfig(const YAML::Node& node,
}
bool
YamlConfigMgr::SetSequence(const YAML::Node &node,
const std::string &child_name,
ConfigNode &config) {
if(node[child_name].IsDefined ()) {
YamlConfigMgr::SetSequence(const YAML::Node& node, const std::string& child_name, ConfigNode& config) {
if (node[child_name].IsDefined()) {
size_t cnt = node[child_name].size();
for(size_t i = 0; i < cnt; i++){
for (size_t i = 0; i < cnt; i++) {
config.AddSequenceItem(child_name, node[child_name][i].as<std::string>());
}
return true;
@ -91,19 +100,18 @@ void
YamlConfigMgr::LoadConfigNode(const YAML::Node& node, ConfigNode& config) {
std::string key;
for (YAML::const_iterator it = node.begin(); it != node.end(); ++it) {
if(!it->first.IsNull()){
if (!it->first.IsNull()) {
key = it->first.as<std::string>();
}
if(node[key].IsScalar()) {
if (node[key].IsScalar()) {
SetConfigValue(node, key, config);
} else if(node[key].IsMap()){
} else if (node[key].IsMap()) {
SetChildConfig(node, key, config);
} else if(node[key].IsSequence()){
} else if (node[key].IsSequence()) {
SetSequence(node, key, config);
}
}
}
}
}
}
} // namespace server
} // namespace milvus

View File

@ -1,8 +1,20 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "ConfigMgr.h"
@ -10,43 +22,42 @@
#include "utils/Error.h"
#include <yaml-cpp/yaml.h>
#include <string>
namespace zilliz {
namespace milvus {
namespace server {
class YamlConfigMgr : public ConfigMgr {
public:
virtual ErrorCode LoadConfigFile(const std::string &filename);
virtual void Print() const;
virtual std::string DumpString() const;
virtual ErrorCode
LoadConfigFile(const std::string& filename);
virtual void
Print() const;
virtual std::string
DumpString() const;
virtual const ConfigNode& GetRootNode() const;
virtual ConfigNode& GetRootNode();
virtual const ConfigNode&
GetRootNode() const;
virtual ConfigNode&
GetRootNode();
private:
bool SetConfigValue(const YAML::Node& node,
const std::string& key,
ConfigNode& config);
bool SetChildConfig(const YAML::Node& node,
const std::string &name,
ConfigNode &config);
bool
SetConfigValue(const YAML::Node& node, const std::string& key, ConfigNode& config);
bool
SetSequence(const YAML::Node &node,
const std::string &child_name,
ConfigNode &config);
SetChildConfig(const YAML::Node& node, const std::string& child_name, ConfigNode& config);
void LoadConfigNode(const YAML::Node& node, ConfigNode& config);
bool
SetSequence(const YAML::Node& node, const std::string& child_name, ConfigNode& config);
void
LoadConfigNode(const YAML::Node& node, ConfigNode& config);
private:
YAML::Node node_;
ConfigNode config_;
};
}
}
}
} // namespace server
} // namespace milvus

View File

@ -1,9 +1,23 @@
#-------------------------------------------------------------------------------
# Copyright (Zilliz) - All Rights Reserved
# Unauthorized copying of this file, via any medium is strictly prohibited.
# Proprietary and confidential.
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http:#www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#-------------------------------------------------------------------------------
cmake_minimum_required(VERSION 3.14)
message(STATUS "---------------core--------------")
message(STATUS "Building using CMake version: ${CMAKE_VERSION}")
@ -32,9 +46,11 @@ if(NOT CMAKE_BUILD_TYPE)
endif(NOT CMAKE_BUILD_TYPE)
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC -fopenmp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC -DELPP_THREAD_SAFE -fopenmp")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -O3")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -fPIC -fopenmp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -fPIC -DELPP_THREAD_SAFE -fopenmp")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -O0 -g")
endif()
MESSAGE(STATUS "CMAKE_CXX_FLAGS" ${CMAKE_CXX_FLAGS})
@ -70,7 +86,7 @@ include(DefineOptionsCore)
include(BuildUtilsCore)
include(ThirdPartyPackagesCore)
add_subdirectory(src)
add_subdirectory(knowhere)
if (BUILD_COVERAGE STREQUAL "ON")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
@ -79,7 +95,7 @@ endif()
set(CORE_INCLUDE_DIRS ${CORE_INCLUDE_DIRS} PARENT_SCOPE)
if(BUILD_UNIT_TEST STREQUAL "ON")
add_subdirectory(test)
add_subdirectory(unittest)
endif()
config_summary()

0
cpp/src/core/build.sh Executable file → Normal file
View File

View File

@ -123,9 +123,7 @@ if(NOT DEFINED USE_JFROG_CACHE)
set(USE_JFROG_CACHE "OFF")
endif()
if(USE_JFROG_CACHE STREQUAL "ON")
set(JFROG_ARTFACTORY_CACHE_URL "http://192.168.1.201:80/artifactory/generic-local/milvus/thirdparty/cache/${CMAKE_OS_NAME}/${KNOWHERE_BUILD_ARCH}/${BUILD_TYPE}")
set(JFROG_USER_NAME "test")
set(JFROG_PASSWORD "Fantast1c")
set(JFROG_ARTFACTORY_CACHE_URL "${JFROG_ARTFACTORY_URL}/generic-local/milvus/thirdparty/cache/${CMAKE_OS_NAME}/${KNOWHERE_BUILD_ARCH}/${BUILD_TYPE}")
set(THIRDPARTY_PACKAGE_CACHE "${THIRDPARTY_DIR}/cache")
endif()
@ -230,15 +228,24 @@ foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT})
set(${_LIB_NAME} "${_LIB_VERSION}")
endforeach()
if(DEFINED ENV{KNOWHERE_FAISS_URL})
set(FAISS_SOURCE_URL "$ENV{KNOWHERE_FAISS_URL}")
else()
if(CUSTOMIZATION)
set(FAISS_SOURCE_URL "http://192.168.1.105:6060/jinhai/faiss/-/archive/${FAISS_VERSION}/faiss-${FAISS_VERSION}.tar.gz")
message(STATUS "FAISS URL = ${FAISS_SOURCE_URL}")
# set(FAISS_MD5 "a589663865a8558205533c8ac414278c")
# set(FAISS_MD5 "57da9c4f599cc8fa4260488b1c96e1cc") # commit-id 6dbdf75987c34a2c853bd172ea0d384feea8358c branch-0.2.0
# set(FAISS_MD5 "21deb1c708490ca40ecb899122c01403") # commit-id 643e48f479637fd947e7b93fa4ca72b38ecc9a39 branch-0.2.0
# set(FAISS_MD5 "072db398351cca6e88f52d743bbb9fa0") # commit-id 3a2344d04744166af41ef1a74449d68a315bfe17 branch-0.2.1
set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1
execute_process(COMMAND wget -q --method HEAD ${FAISS_SOURCE_URL} RESULT_VARIABLE return_code)
message(STATUS "Check the remote cache file ${FAISS_SOURCE_URL}. return code = ${return_code}")
if (NOT return_code EQUAL 0)
set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz")
endif()
else()
set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz")
set(FAISS_MD5 "0bc12737b23def156f6a1eb782050135")
endif()
# set(FAISS_MD5 "a589663865a8558205533c8ac414278c")
# set(FAISS_MD5 "57da9c4f599cc8fa4260488b1c96e1cc") # commit-id 6dbdf75987c34a2c853bd172ea0d384feea8358c
set(FAISS_MD5 "21deb1c708490ca40ecb899122c01403") # commit-id 643e48f479637fd947e7b93fa4ca72b38ecc9a39
message(STATUS "FAISS URL = ${FAISS_SOURCE_URL}")
if(DEFINED ENV{KNOWHERE_ARROW_URL})
set(ARROW_SOURCE_URL "$ENV{KNOWHERE_ARROW_URL}")

View File

@ -1,18 +0,0 @@
#pragma once
#include <memory>
#include <knowhere/common/array.h>
namespace zilliz {
namespace knowhere {
ArrayPtr
CopyArray(const ArrayPtr &origin);
SchemaPtr
CopySchema(const SchemaPtr &origin);
} // namespace knowhere
} // namespace zilliz

View File

@ -1,20 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
namespace zilliz {
namespace knowhere {
#define GETTENSOR(dataset) \
auto tensor = dataset->tensor()[0]; \
auto p_data = tensor->raw_data(); \
auto dim = tensor->shape()[1]; \
auto rows = tensor->shape()[0]; \
}
}

View File

@ -1,24 +0,0 @@
#pragma once
#include <memory>
#include <knowhere/common/dataset.h>
#include <SPTAG/AnnService/inc/Core/VectorIndex.h>
namespace zilliz {
namespace knowhere {
std::shared_ptr<SPTAG::VectorSet>
ConvertToVectorSet(const DatasetPtr &dataset);
std::shared_ptr<SPTAG::MetadataSet>
ConvertToMetadataSet(const DatasetPtr &dataset);
std::vector<SPTAG::QueryResult>
ConvertToQueryResult(const DatasetPtr &dataset, const Config &config);
DatasetPtr
ConvertToDataset(std::vector<SPTAG::QueryResult> query_results);
} // namespace knowhere
} // namespace zilliz

View File

@ -1,42 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <memory>
#include <knowhere/common/dataset.h>
namespace zilliz {
namespace knowhere {
extern ArrayPtr
ConstructInt64ArraySmart(uint8_t *data, int64_t size);
extern ArrayPtr
ConstructFloatArraySmart(uint8_t *data, int64_t size);
extern TensorPtr
ConstructFloatTensorSmart(uint8_t *data, int64_t size, std::vector<int64_t> shape);
extern ArrayPtr
ConstructInt64Array(uint8_t *data, int64_t size);
extern ArrayPtr
ConstructFloatArray(uint8_t *data, int64_t size);
extern TensorPtr
ConstructFloatTensor(uint8_t *data, int64_t size, std::vector<int64_t> shape);
extern FieldPtr
ConstructInt64Field(const std::string &name);
extern FieldPtr
ConstructFloatField(const std::string &name);
}
}

View File

@ -1,35 +0,0 @@
#pragma once
#include "arrow/array.h"
#include "knowhere/common/schema.h"
namespace zilliz {
namespace knowhere {
using ArrayData = arrow::ArrayData;
using ArrayDataPtr = std::shared_ptr<ArrayData>;
using Array = arrow::Array;
using ArrayPtr = std::shared_ptr<Array>;
using BooleanArray = arrow::BooleanArray;
using BooleanArrayPtr = std::shared_ptr<arrow::BooleanArray>;
template<typename DType>
using NumericArray = arrow::NumericArray<DType>;
template<typename DType>
using NumericArrayPtr = std::shared_ptr<arrow::NumericArray<DType>>;
using BinaryArray = arrow::BinaryArray;
using BinaryArrayPtr = std::shared_ptr<arrow::BinaryArray>;
using FixedSizeBinaryArray = arrow::FixedSizeBinaryArray;
using FixedSizeBinaryArrayPtr = std::shared_ptr<arrow::FixedSizeBinaryArray>;
using Decimal128Array = arrow::Decimal128Array;
using Decimal128ArrayPtr = std::shared_ptr<arrow::Decimal128Array>;
} // namespace knowhere
} // namespace zilliz

View File

@ -1,62 +0,0 @@
#pragma once
#include <map>
#include <string>
#include <vector>
#include <memory>
#include "knowhere/common/id.h"
namespace zilliz {
namespace knowhere {
struct Binary {
ID id;
std::shared_ptr<uint8_t> data;
int64_t size = 0;
};
using BinaryPtr = std::shared_ptr<Binary>;
class BinarySet {
public:
BinaryPtr
GetByName(const std::string &name) const {
return binary_map_.at(name);
}
void
Append(const std::string &name, BinaryPtr binary) {
binary_map_[name] = std::move(binary);
}
void
Append(const std::string &name, std::shared_ptr<uint8_t> data, int64_t size) {
auto binary = std::make_shared<Binary>();
binary->data = data;
binary->size = size;
binary_map_[name] = std::move(binary);
}
//void
//Append(const std::string &name, void *data, int64_t size, ID id) {
// Binary binary;
// binary.data = data;
// binary.size = size;
// binary.id = id;
// binary_map_[name] = binary;
//}
void clear() {
binary_map_.clear();
}
public:
std::map<std::string, BinaryPtr> binary_map_;
};
} // namespace knowhere
} // namespace zilliz

View File

@ -1,45 +0,0 @@
#pragma once
#include <memory>
#include "arrow/buffer.h"
namespace zilliz {
namespace knowhere {
using Buffer = arrow::Buffer;
using BufferPtr = std::shared_ptr<Buffer>;
using MutableBuffer = arrow::MutableBuffer;
using MutableBufferPtr = std::shared_ptr<MutableBuffer>;
namespace internal {
struct BufferDeleter {
void operator()(Buffer *buffer) {
free((void *) buffer->data());
}
};
}
inline BufferPtr
MakeBufferSmart(uint8_t *data, const int64_t size) {
return BufferPtr(new Buffer(data, size), internal::BufferDeleter());
}
inline MutableBufferPtr
MakeMutableBufferSmart(uint8_t *data, const int64_t size) {
return MutableBufferPtr(new MutableBuffer(data, size), internal::BufferDeleter());
}
inline BufferPtr
MakeBuffer(uint8_t *data, const int64_t size) {
return std::make_shared<Buffer>(data, size);
}
inline MutableBufferPtr
MakeMutableBuffer(uint8_t *data, const int64_t size) {
return std::make_shared<MutableBuffer>(data, size);
}
} // namespace knowhere
} // namespace zilliz

View File

@ -1,14 +0,0 @@
#pragma once
#include <jsoncons/json.hpp>
namespace zilliz {
namespace knowhere {
using Config = jsoncons::json;
} // namespace knowhere
} // namespace zilliz

View File

@ -1,10 +0,0 @@
#pragma once
namespace zilliz {
namespace sched {
namespace master {
} // namespace master
} // namespace sched
} // namespace zilliz

View File

@ -1,40 +0,0 @@
#pragma once
#include <cstdint>
#include "zlibrary/error/error.h"
namespace zilliz {
namespace knowhere {
using Error = zilliz::lib::ErrorCode;
constexpr Error STORE_SUCCESS = zilliz::lib::SUCCESS_CODE;
constexpr Error ERROR_CODE_BASE = 0x36000;
constexpr Error ERROR_CODE_END = 0x37000;
constexpr Error
ToGlobalErrorCode(const Error error_code) {
return zilliz::lib::ToGlobalErrorCode(error_code, ERROR_CODE_BASE);
}
class Exception : public zilliz::lib::Exception {
public:
Exception(const Error error_code,
const std::string &message = nullptr)
: zilliz::lib::Exception(error_code, "KNOWHERE", message) {}
};
constexpr Error UNEXPECTED = ToGlobalErrorCode(0x001);
constexpr Error UNSUPPORTED = ToGlobalErrorCode(0x002);
constexpr Error NULL_POINTER = ToGlobalErrorCode(0x003);
constexpr Error OVERFLOW = ToGlobalErrorCode(0x004);
constexpr Error INVALID_ARGUMENT = ToGlobalErrorCode(0x005);
constexpr Error UNSUPPORTED_TYPE = ToGlobalErrorCode(0x006);
} // namespace store
} // namespace zilliz
using Error = zilliz::store::Error;

View File

@ -1,48 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <exception>
#include <string>
namespace zilliz {
namespace knowhere {
class KnowhereException : public std::exception {
public:
explicit KnowhereException(const std::string &msg);
KnowhereException(const std::string &msg, const char *funName,
const char *file, int line);
const char *what() const noexcept override;
std::string msg;
};
#define KNOHWERE_ERROR_MSG(MSG)\
printf("%s", KnowhereException(MSG, __PRETTY_FUNCTION__, __FILE__, __LINE__).what())
#define KNOWHERE_THROW_MSG(MSG)\
do {\
throw KnowhereException(MSG, __PRETTY_FUNCTION__, __FILE__, __LINE__);\
} while (false)
#define KNOHERE_THROW_FORMAT(FMT, ...)\
do { \
std::string __s;\
int __size = snprintf(nullptr, 0, FMT, __VA_ARGS__);\
__s.resize(__size + 1);\
snprintf(&__s[0], __s.size(), FMT, __VA_ARGS__);\
throw faiss::FaissException(__s, __PRETTY_FUNCTION__, __FILE__, __LINE__);\
} while (false)
}
}

View File

@ -1,42 +0,0 @@
#pragma once
//#include "zcommon/id/id.h"
//using ID = zilliz::common::ID;
#include <stdint.h>
#include <string>
namespace zilliz {
namespace knowhere {
class ID {
public:
constexpr static int64_t kIDSize = 20;
public:
const int32_t *
data() const { return content_; }
int32_t *
mutable_data() { return content_; }
bool
IsValid() const;
std::string
ToString() const;
bool
operator==(const ID &that) const;
bool
operator<(const ID &that) const;
protected:
int32_t content_[5] = {};
};
} // namespace knowhere
} // namespace zilliz

View File

@ -1,21 +0,0 @@
#pragma once
#include <memory>
#include "arrow/type.h"
namespace zilliz {
namespace knowhere {
using DataType = arrow::DataType;
using Field = arrow::Field;
using FieldPtr = std::shared_ptr<arrow::Field>;
using Schema = arrow::Schema;
using SchemaPtr = std::shared_ptr<Schema>;
using SchemaConstPtr = std::shared_ptr<const Schema>;
} // namespace knowhere
} // namespace zilliz

View File

@ -1,16 +0,0 @@
#pragma once
#include <memory>
#include "arrow/tensor.h"
namespace zilliz {
namespace knowhere {
using Tensor = arrow::Tensor;
using TensorPtr = std::shared_ptr<Tensor>;
} // namespace knowhere
} // namespace zilliz

View File

@ -1,41 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <string>
#include <chrono>
namespace zilliz {
namespace knowhere {
class TimeRecorder {
using stdclock = std::chrono::high_resolution_clock;
public:
TimeRecorder(const std::string &header,
int64_t log_level = 0);
~TimeRecorder();//trace = 0, debug = 1, info = 2, warn = 3, error = 4, critical = 5
double RecordSection(const std::string &msg);
double ElapseFromBegin(const std::string &msg);
static std::string GetTimeSpanStr(double span);
private:
void PrintTimeRecord(const std::string &msg, double span);
private:
std::string header_;
stdclock::time_point start_;
stdclock::time_point last_;
int64_t log_level_;
};
}
}

View File

@ -1,49 +0,0 @@
#pragma once
#include <memory>
#include "knowhere/common/binary_set.h"
#include "knowhere/common/dataset.h"
#include "knowhere/index/index_type.h"
#include "knowhere/index/index_model.h"
#include "knowhere/index/preprocessor/preprocessor.h"
namespace zilliz {
namespace knowhere {
class Index {
public:
virtual BinarySet
Serialize() = 0;
virtual void
Load(const BinarySet &index_binary) = 0;
// @throw
virtual DatasetPtr
Search(const DatasetPtr &dataset, const Config &config) = 0;
public:
IndexType
idx_type() const { return idx_type_; }
void
set_idx_type(IndexType idx_type) { idx_type_ = idx_type; }
virtual void
set_preprocessor(PreprocessorPtr preprocessor) {}
virtual void
set_index_model(IndexModelPtr model) {}
private:
IndexType idx_type_;
};
using IndexPtr = std::shared_ptr<Index>;
} // namespace knowhere
} // namespace zilliz

View File

@ -1,24 +0,0 @@
#pragma once
#include <memory>
#include "knowhere/common/binary_set.h"
namespace zilliz {
namespace knowhere {
class IndexModel {
public:
virtual BinarySet
Serialize() = 0;
virtual void
Load(const BinarySet &binary) = 0;
};
using IndexModelPtr = std::shared_ptr<IndexModel>;
} // namespace knowhere
} // namespace zilliz

View File

@ -1,17 +0,0 @@
#pragma once
namespace zilliz {
namespace knowhere {
enum class IndexType {
kUnknown = 0,
kVecIdxBegin = 100,
kVecIVFFlat = kVecIdxBegin,
kVecIdxEnd,
};
} // namespace knowhere
} // namespace zilliz

View File

@ -1,26 +0,0 @@
//#pragma once
//
//#include <memory>
//#include "preprocessor.h"
//
//
//namespace zilliz {
//namespace knowhere {
//
//class NormalizePreprocessor : public Preprocessor {
// public:
// DatasetPtr
// Preprocess(const DatasetPtr &input) override;
//
// private:
//
// void
// Normalize(float *arr, int64_t dimension);
//};
//
//
//using NormalizePreprocessorPtr = std::shared_ptr<NormalizePreprocessor>;
//
//
//} // namespace knowhere
//} // namespace zilliz

View File

@ -1,22 +0,0 @@
#pragma once
#include <memory>
#include "knowhere/common/dataset.h"
namespace zilliz {
namespace knowhere {
class Preprocessor {
public:
virtual DatasetPtr
Preprocess(const DatasetPtr &input) = 0;
};
using PreprocessorPtr = std::shared_ptr<Preprocessor>;
} // namespace knowhere
} // namespace zilliz

View File

@ -1,23 +0,0 @@
/*******************************************************************************
* Copyright (Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include "vector_index.h"
namespace zilliz {
namespace knowhere {
// TODO(linxj): rename CopyToGpu
extern VectorIndexPtr
CopyCpuToGpu(const VectorIndexPtr &index, const int64_t &device_id, const Config &config);
extern VectorIndexPtr
CopyGpuToCpu(const VectorIndexPtr &index, const Config &config);
}
}

View File

@ -1,70 +0,0 @@
#pragma once
#include <cstdint>
#include <memory>
#include "knowhere/index/vector_index/vector_index.h"
#include "knowhere/index/index_model.h"
#include <SPTAG/AnnService/inc/Core/VectorIndex.h>
namespace zilliz {
namespace knowhere {
class CPUKDTRNG : public VectorIndex {
public:
CPUKDTRNG() {
index_ptr_ = SPTAG::VectorIndex::CreateInstance(SPTAG::IndexAlgoType::KDT,
SPTAG::VectorValueType::Float);
index_ptr_->SetParameter("DistCalcMethod", "L2");
}
public:
BinarySet
Serialize() override;
VectorIndexPtr Clone() override;
void
Load(const BinarySet &index_array) override;
public:
//PreprocessorPtr
//BuildPreprocessor(const DatasetPtr &dataset, const Config &config) override;
int64_t Count() override;
int64_t Dimension() override;
IndexModelPtr
Train(const DatasetPtr &dataset, const Config &config) override;
void
Add(const DatasetPtr &dataset, const Config &config) override;
DatasetPtr
Search(const DatasetPtr &dataset, const Config &config) override;
void Seal() override;
private:
void
SetParameters(const Config &config);
private:
PreprocessorPtr preprocessor_;
std::shared_ptr<SPTAG::VectorIndex> index_ptr_;
};
using CPUKDTRNGPtr = std::shared_ptr<CPUKDTRNG>;
class CPUKDTRNGIndexModel : public IndexModel {
public:
BinarySet
Serialize() override;
void
Load(const BinarySet &binary) override;
private:
std::shared_ptr<SPTAG::VectorIndex> index_;
};
using CPUKDTRNGIndexModelPtr = std::shared_ptr<CPUKDTRNGIndexModel>;
} // namespace knowhere
} // namespace zilliz

View File

@ -1,14 +0,0 @@
#pragma once
#include <string>
namespace zilliz {
namespace knowhere {
#define META_ROWS ("rows")
#define META_DIM ("dimension")
#define META_K ("k")
} // namespace knowhere
} // namespace zilliz

View File

@ -1,180 +0,0 @@
#pragma once
#include <faiss/gpu/StandardGpuResources.h>
#include "ivf.h"
#include "src/utils/BlockingQueue.h"
namespace zilliz {
namespace knowhere {
struct Resource {
explicit Resource(std::shared_ptr<faiss::gpu::StandardGpuResources> &r): faiss_res(r) {
static int64_t global_id = 0;
id = global_id++;
}
std::shared_ptr<faiss::gpu::StandardGpuResources> faiss_res;
int64_t id;
std::mutex mutex;
};
using ResPtr = std::shared_ptr<Resource>;
using ResWPtr = std::weak_ptr<Resource>;
class FaissGpuResourceMgr {
public:
friend class ResScope;
public:
using ResBQ = zilliz::milvus::server::BlockingQueue<ResPtr>;
struct DeviceParams {
int64_t temp_mem_size = 0;
int64_t pinned_mem_size = 0;
int64_t resource_num = 2;
};
public:
static FaissGpuResourceMgr &
GetInstance();
// Free gpu resource, avoid cudaGetDevice error when deallocate.
// this func should be invoke before main return
void
Free();
void
AllocateTempMem(ResPtr &resource, const int64_t& device_id, const int64_t& size);
void
InitDevice(int64_t device_id,
int64_t pin_mem_size = 0,
int64_t temp_mem_size = 0,
int64_t res_num = 2);
void
InitResource();
// allocate gpu memory invoke by build or copy_to_gpu
ResPtr
GetRes(const int64_t &device_id, const int64_t& alloc_size = 0);
// allocate gpu memory before search
// this func will return True if the device is idle and exists an idle resource.
//bool
//GetRes(const int64_t& device_id, ResPtr &res, const int64_t& alloc_size = 0);
void
MoveToIdle(const int64_t &device_id, const ResPtr& res);
void
Dump();
protected:
bool is_init = false;
std::map<int64_t ,std::unique_ptr<std::mutex>> mutex_cache_;
std::map<int64_t, DeviceParams> devices_params_;
std::map<int64_t, ResBQ> idle_map_;
};
class ResScope {
public:
ResScope(ResPtr &res, const int64_t& device_id, const bool& isown)
: resource(res), device_id(device_id), move(true), own(isown) {
if (isown) FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->lock();
res->mutex.lock();
}
// specif for search
// get the ownership of gpuresource and gpu
ResScope(ResPtr &res, const int64_t &device_id)
: resource(res), device_id(device_id), move(false), own(true) {
FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->lock();
res->mutex.lock();
}
~ResScope() {
if (own) FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->unlock();
if (move) FaissGpuResourceMgr::GetInstance().MoveToIdle(device_id, resource);
resource->mutex.unlock();
}
private:
ResPtr resource;
int64_t device_id;
bool move = true;
bool own = false;
};
class GPUIndex {
public:
explicit GPUIndex(const int &device_id) : gpu_id_(device_id) {}
GPUIndex(const int& device_id, ResPtr resource): gpu_id_(device_id), res_(std::move(resource)){}
virtual VectorIndexPtr CopyGpuToCpu(const Config &config) = 0;
virtual VectorIndexPtr CopyGpuToGpu(const int64_t &device_id, const Config &config) = 0;
void SetGpuDevice(const int &gpu_id);
const int64_t &GetGpuDevice();
protected:
int64_t gpu_id_;
ResPtr res_ = nullptr;
};
class GPUIVF : public IVF, public GPUIndex {
public:
explicit GPUIVF(const int &device_id) : IVF(), GPUIndex(device_id) {}
explicit GPUIVF(std::shared_ptr<faiss::Index> index, const int64_t &device_id, ResPtr &resource)
: IVF(std::move(index)), GPUIndex(device_id, resource) {};
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
void Add(const DatasetPtr &dataset, const Config &config) override;
void set_index_model(IndexModelPtr model) override;
//DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override;
VectorIndexPtr CopyGpuToCpu(const Config &config) override;
VectorIndexPtr CopyGpuToGpu(const int64_t &device_id, const Config &config) override;
VectorIndexPtr Clone() final;
// TODO(linxj): Deprecated
virtual IVFIndexPtr Copy_index_gpu_to_cpu();
protected:
void search_impl(int64_t n,
const float *data,
int64_t k,
float *distances,
int64_t *labels,
const Config &cfg) override;
BinarySet SerializeImpl() override;
void LoadImpl(const BinarySet &index_binary) override;
};
class GPUIVFSQ : public GPUIVF {
public:
explicit GPUIVFSQ(const int &device_id) : GPUIVF(device_id) {}
explicit GPUIVFSQ(std::shared_ptr<faiss::Index> index, const int64_t &device_id, ResPtr &resource)
: GPUIVF(std::move(index), device_id, resource) {};
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
public:
VectorIndexPtr CopyGpuToCpu(const Config &config) override;
};
class GPUIVFPQ : public GPUIVF {
public:
explicit GPUIVFPQ(const int &device_id) : GPUIVF(device_id) {}
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
public:
VectorIndexPtr CopyGpuToCpu(const Config &config) override;
protected:
// TODO(linxj): remove GenParams.
std::shared_ptr<faiss::IVFSearchParameters> GenParams(const Config &config) override;
};
} // namespace knowhere
} // namespace zilliz

View File

@ -1,71 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "ivf.h"
#include "gpu_ivf.h"
namespace zilliz {
namespace knowhere {
class IDMAP : public VectorIndex, public BasicIndex {
public:
IDMAP() : BasicIndex(nullptr) {};
explicit IDMAP(std::shared_ptr<faiss::Index> index) : BasicIndex(std::move(index)) {};
BinarySet Serialize() override;
void Load(const BinarySet &index_binary) override;
void Train(const Config &config);
DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override;
int64_t Count() override;
VectorIndexPtr Clone() override;
int64_t Dimension() override;
void Add(const DatasetPtr &dataset, const Config &config) override;
VectorIndexPtr CopyCpuToGpu(const int64_t &device_id, const Config &config);
void Seal() override;
virtual float *GetRawVectors();
virtual int64_t *GetRawIds();
protected:
virtual void search_impl(int64_t n,
const float *data,
int64_t k,
float *distances,
int64_t *labels,
const Config &cfg);
std::mutex mutex_;
};
using IDMAPPtr = std::shared_ptr<IDMAP>;
class GPUIDMAP : public IDMAP, public GPUIndex {
public:
explicit GPUIDMAP(std::shared_ptr<faiss::Index> index, const int64_t &device_id, ResPtr& res)
: IDMAP(std::move(index)), GPUIndex(device_id, res) {}
VectorIndexPtr CopyGpuToCpu(const Config &config) override;
float *GetRawVectors() override;
int64_t *GetRawIds() override;
VectorIndexPtr Clone() override;
VectorIndexPtr CopyGpuToGpu(const int64_t &device_id, const Config &config) override;
protected:
void search_impl(int64_t n,
const float *data,
int64_t k,
float *distances,
int64_t *labels,
const Config &cfg) override;
BinarySet SerializeImpl() override;
void LoadImpl(const BinarySet &index_binary) override;
};
using GPUIDMAPPtr = std::shared_ptr<GPUIDMAP>;
}
}

View File

@ -1,145 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <memory>
#include <mutex>
#include <faiss/IndexIVF.h>
#include <faiss/AuxIndexStructures.h>
#include <faiss/Index.h>
#include "knowhere/index/vector_index/vector_index.h"
namespace zilliz {
namespace knowhere {
class BasicIndex {
protected:
explicit BasicIndex(std::shared_ptr<faiss::Index> index);
virtual BinarySet SerializeImpl();
virtual void LoadImpl(const BinarySet &index_binary);
virtual void
SealImpl();
protected:
std::shared_ptr<faiss::Index> index_ = nullptr;
};
using Graph = std::vector<std::vector<int64_t>>;
class IVF : public VectorIndex, protected BasicIndex {
public:
IVF() : BasicIndex(nullptr) {};
explicit IVF(std::shared_ptr<faiss::Index> index) : BasicIndex(std::move(index)) {}
VectorIndexPtr Clone() override;;
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
void set_index_model(IndexModelPtr model) override;
void Add(const DatasetPtr &dataset, const Config &config) override;
void AddWithoutIds(const DatasetPtr &dataset, const Config &config);
DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override;
void GenGraph(const int64_t &k, Graph &graph, const DatasetPtr &dataset, const Config &config);
BinarySet Serialize() override;
void Load(const BinarySet &index_binary) override;
int64_t Count() override;
int64_t Dimension() override;
void
Seal() override;
virtual VectorIndexPtr CopyCpuToGpu(const int64_t &device_id, const Config &config);
protected:
virtual std::shared_ptr<faiss::IVFSearchParameters> GenParams(const Config &config);
virtual VectorIndexPtr Clone_impl(const std::shared_ptr<faiss::Index> &index);
virtual void search_impl(int64_t n,
const float *data,
int64_t k,
float *distances,
int64_t *labels,
const Config &cfg);
protected:
std::mutex mutex_;
};
using IVFIndexPtr = std::shared_ptr<IVF>;
class IVFSQ : public IVF {
public:
explicit IVFSQ(std::shared_ptr<faiss::Index> index) : IVF(std::move(index)) {}
IVFSQ() = default;
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
VectorIndexPtr CopyCpuToGpu(const int64_t &device_id, const Config &config) override;
protected:
VectorIndexPtr Clone_impl(const std::shared_ptr<faiss::Index> &index) override;
};
class IVFPQ : public IVF {
public:
explicit IVFPQ(std::shared_ptr<faiss::Index> index) : IVF(std::move(index)) {}
IVFPQ() = default;
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
protected:
std::shared_ptr<faiss::IVFSearchParameters> GenParams(const Config &config) override;
VectorIndexPtr Clone_impl(const std::shared_ptr<faiss::Index> &index) override;
};
//class OPQIVFPQ : public IVFPQ {
// public:
// PreprocessorPtr BuildPreprocessor(const Dataset &dataset, const Config &config) override;
//};
class GPUIVF;
struct MemoryIOWriter : public faiss::IOWriter {
uint8_t *data_ = nullptr;
size_t total = 0;
size_t rp = 0;
size_t operator()(const void *ptr, size_t size, size_t nitems) override;
};
struct MemoryIOReader : public faiss::IOReader {
uint8_t *data_;
size_t rp = 0;
size_t total = 0;
size_t operator()(void *ptr, size_t size, size_t nitems) override;
};
class IVFIndexModel : public IndexModel, public BasicIndex {
friend IVF;
friend GPUIVF;
public:
explicit IVFIndexModel(std::shared_ptr<faiss::Index> index);
IVFIndexModel() : BasicIndex(nullptr) {};
BinarySet Serialize() override;
protected:
void SealImpl() override;
public:
void Load(const BinarySet &binary) override;
protected:
std::mutex mutex_;
};
using IVFIndexModelPtr = std::shared_ptr<IVFIndexModel>;
}
}

View File

@ -1,34 +0,0 @@
#pragma once
#include <string>
#include <vector>
namespace zilliz {
namespace knowhere {
using KDTParameter = std::pair<std::string, std::string>;
class KDTParameterManagement {
public:
const std::vector<KDTParameter> &
GetKDTParameters();
public:
static KDTParameterManagement &
GetInstance() {
static KDTParameterManagement instance;
return instance;
}
KDTParameterManagement(const KDTParameterManagement &) = delete;
KDTParameterManagement &operator=(const KDTParameterManagement &) = delete;
private:
KDTParameterManagement();
private:
std::vector<KDTParameter> kdt_parameters_;
};
} // namespace knowhere
} // namespace zilliz

View File

@ -1,51 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <mutex>
namespace zilliz {
namespace knowhere {
namespace algo {
using node_t = int64_t;
// TODO: search use simple neighbor
struct Neighbor {
node_t id; // offset of node in origin data
float distance;
bool has_explored;
Neighbor() = default;
explicit Neighbor(node_t id, float distance, bool f) : id{id}, distance{distance}, has_explored(f) {}
explicit Neighbor(node_t id, float distance) : id{id}, distance{distance}, has_explored(false) {}
inline bool operator<(const Neighbor &other) const {
return distance < other.distance;
}
};
//struct SimpleNeighbor {
// node_t id; // offset of node in origin data
// float distance;
//
// SimpleNeighbor() = default;
// explicit SimpleNeighbor(node_t id, float distance) : id{id}, distance{distance}{}
//
// inline bool operator<(const Neighbor &other) const {
// return distance < other.distance;
// }
//};
typedef std::lock_guard<std::mutex> LockGuard;
}
}
}

View File

@ -1,147 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <cstddef>
#include <vector>
#include <mutex>
#include <boost/dynamic_bitset.hpp>
#include "neighbor.h"
namespace zilliz {
namespace knowhere {
namespace algo {
using node_t = int64_t;
enum class MetricType {
METRIC_INNER_PRODUCT = 0,
METRIC_L2 = 1,
};
struct BuildParams {
size_t search_length;
size_t out_degree;
size_t candidate_pool_size;
};
struct SearchParams {
size_t search_length;
};
using Graph = std::vector<std::vector<node_t>>;
class NsgIndex {
public:
size_t dimension;
size_t ntotal; // totabl nb of indexed vectors
MetricType metric_type; // L2 | IP
float *ori_data_;
long *ids_; // TODO: support different type
Graph nsg; // final graph
Graph knng; // reset after build
node_t navigation_point; // offset of node in origin data
bool is_trained = false;
/*
* build and search parameter
*/
size_t search_length;
size_t candidate_pool_size; // search deepth in fullset
size_t out_degree;
public:
explicit NsgIndex(const size_t &dimension,
const size_t &n,
MetricType metric = MetricType::METRIC_L2);
NsgIndex() = default;
virtual ~NsgIndex();
void SetKnnGraph(Graph &knng);
virtual void Build_with_ids(size_t nb,
const float *data,
const long *ids,
const BuildParams &parameters);
void Search(const float *query,
const unsigned &nq,
const unsigned &dim,
const unsigned &k,
float *dist,
long *ids,
SearchParams &params);
// Not support yet.
//virtual void Add() = 0;
//virtual void Add_with_ids() = 0;
//virtual void Delete() = 0;
//virtual void Delete_with_ids() = 0;
//virtual void Rebuild(size_t nb,
// const float *data,
// const long *ids,
// const Parameters &parameters) = 0;
//virtual void Build(size_t nb,
// const float *data,
// const BuildParam &parameters);
protected:
virtual void InitNavigationPoint();
// link specify
void GetNeighbors(const float *query,
std::vector<Neighbor> &resset,
std::vector<Neighbor> &fullset,
boost::dynamic_bitset<> &has_calculated_dist);
// FindUnconnectedNode
void GetNeighbors(const float *query,
std::vector<Neighbor> &resset,
std::vector<Neighbor> &fullset);
// search and navigation-point
void GetNeighbors(const float *query,
std::vector<Neighbor> &resset,
Graph &graph,
SearchParams *param = nullptr);
void Link();
void SyncPrune(size_t q,
std::vector<Neighbor> &pool,
boost::dynamic_bitset<> &has_calculated,
float *cut_graph_dist
);
void SelectEdge(unsigned &cursor,
std::vector<Neighbor> &sort_pool,
std::vector<Neighbor> &result,
bool limit = false);
void InterInsert(unsigned n, std::vector<std::mutex> &mutex_vec, float *dist);
void CheckConnectivity();
void DFS(size_t root, boost::dynamic_bitset<> &flags, int64_t &count);
void FindUnconnectedNode(boost::dynamic_bitset<> &flags, int64_t &root);
//private:
// void GetKnnGraphFromFile();
};
}
}
}

View File

@ -1,15 +0,0 @@
%module nsg
%{
#define SWIG_FILE_WITH_INIT
#include <numpy/arrayobject.h>
/* Include the header in the wrapper code */
#include "nsg.h"
%}
/* Parse the header file */
%include "index.h"

View File

@ -1,22 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "nsg.h"
#include "knowhere/index/vector_index/ivf.h"
namespace zilliz {
namespace knowhere {
namespace algo {
extern void write_index(NsgIndex* index, MemoryIOWriter& writer);
extern NsgIndex* read_index(MemoryIOReader& reader);
}
}
}

View File

@ -1,41 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "knowhere/index/vector_index/vector_index.h"
namespace zilliz {
namespace knowhere {
namespace algo {
class NsgIndex;
}
class NSG : public VectorIndex {
public:
explicit NSG(const int64_t& gpu_num):gpu_(gpu_num){}
NSG() = default;
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override;
void Add(const DatasetPtr &dataset, const Config &config) override;
BinarySet Serialize() override;
void Load(const BinarySet &index_binary) override;
int64_t Count() override;
int64_t Dimension() override;
VectorIndexPtr Clone() override;
void Seal() override;
private:
std::shared_ptr<algo::NsgIndex> index_;
int64_t gpu_;
};
using NSGIndexPtr = std::shared_ptr<NSG>();
}
}

View File

@ -1,45 +0,0 @@
#pragma once
#include <memory>
#include "knowhere/common/config.h"
#include "knowhere/common/dataset.h"
#include "knowhere/index/index.h"
#include "knowhere/index/preprocessor/preprocessor.h"
namespace zilliz {
namespace knowhere {
class VectorIndex;
using VectorIndexPtr = std::shared_ptr<VectorIndex>;
class VectorIndex : public Index {
public:
virtual PreprocessorPtr
BuildPreprocessor(const DatasetPtr &dataset, const Config &config) { return nullptr; }
virtual IndexModelPtr
Train(const DatasetPtr &dataset, const Config &config) { return nullptr; }
virtual void
Add(const DatasetPtr &dataset, const Config &config) = 0;
virtual void
Seal() = 0;
virtual VectorIndexPtr
Clone() = 0;
virtual int64_t
Count() = 0;
virtual int64_t
Dimension() = 0;
};
} // namespace knowhere
} // namespace zilliz

View File

@ -5,10 +5,9 @@ include_directories(${TBB_DIR}/include)
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
include_directories(${CORE_SOURCE_DIR}/include)
include_directories(${CORE_SOURCE_DIR}/knowhere)
include_directories(${CORE_SOURCE_DIR}/thirdparty)
include_directories(${CORE_SOURCE_DIR}/thirdparty/SPTAG/AnnService)
include_directories(${CORE_SOURCE_DIR}/thirdparty/jsoncons-0.126.0/include)
set(SPTAG_SOURCE_DIR ${CORE_SOURCE_DIR}/thirdparty/SPTAG)
file(GLOB HDR_FILES
@ -29,25 +28,34 @@ if(NOT TARGET SPTAGLibStatic)
endif()
set(external_srcs
knowhere/adapter/sptag.cpp
knowhere/adapter/structure.cpp
knowhere/adapter/arrow.cpp
knowhere/common/exception.cpp
knowhere/common/timer.cpp
knowhere/adapter/SptagAdapter.cpp
knowhere/adapter/Structure.cpp
knowhere/adapter/ArrowAdapter.cpp
knowhere/common/Exception.cpp
knowhere/common/Timer.cpp
)
set(index_srcs
knowhere/index/preprocessor/normalize.cpp
knowhere/index/vector_index/cpu_kdt_rng.cpp
knowhere/index/vector_index/idmap.cpp
knowhere/index/vector_index/ivf.cpp
knowhere/index/vector_index/gpu_ivf.cpp
knowhere/index/vector_index/kdt_parameters.cpp
knowhere/index/vector_index/nsg_index.cpp
knowhere/index/vector_index/nsg/nsg.cpp
knowhere/index/vector_index/nsg/nsg_io.cpp
knowhere/index/vector_index/nsg/utils.cpp
knowhere/index/vector_index/cloner.cpp
knowhere/index/preprocessor/Normalize.cpp
knowhere/index/vector_index/IndexKDT.cpp
knowhere/index/vector_index/IndexIDMAP.cpp
knowhere/index/vector_index/IndexIVF.cpp
knowhere/index/vector_index/IndexGPUIVF.cpp
knowhere/index/vector_index/helpers/KDTParameterMgr.cpp
knowhere/index/vector_index/IndexNSG.cpp
knowhere/index/vector_index/nsg/NSG.cpp
knowhere/index/vector_index/nsg/NSGIO.cpp
knowhere/index/vector_index/nsg/NSGHelper.cpp
knowhere/index/vector_index/helpers/Cloner.cpp
knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp
knowhere/index/vector_index/IndexIVFSQ.cpp
knowhere/index/vector_index/IndexGPUIVFSQ.cpp
knowhere/index/vector_index/IndexIVFSQHybrid.cpp
knowhere/index/vector_index/IndexIVFPQ.cpp
knowhere/index/vector_index/IndexGPUIVFPQ.cpp
knowhere/index/vector_index/FaissBaseIndex.cpp
knowhere/index/vector_index/helpers/FaissIO.cpp
knowhere/index/vector_index/helpers/IndexParameter.cpp
)
set(depend_libs
@ -107,10 +115,9 @@ INSTALL(FILES ${CORE_SOURCE_DIR}/thirdparty/tbb/libtbb.so
)
set(CORE_INCLUDE_DIRS
${CORE_SOURCE_DIR}/include
${CORE_SOURCE_DIR}/knowhere
${CORE_SOURCE_DIR}/thirdparty
${CORE_SOURCE_DIR}/thirdparty/SPTAG/AnnService
${CORE_SOURCE_DIR}/thirdparty/jsoncons-0.126.0/include
${ARROW_INCLUDE_DIR}
${FAISS_INCLUDE_DIR}
${OPENBLAS_INCLUDE_DIR}
@ -120,18 +127,16 @@ set(CORE_INCLUDE_DIRS
set(CORE_INCLUDE_DIRS ${CORE_INCLUDE_DIRS} PARENT_SCOPE)
INSTALL(DIRECTORY
${CORE_SOURCE_DIR}/include/knowhere
${CORE_SOURCE_DIR}/thirdparty/jsoncons-0.126.0/include/jsoncons
${CORE_SOURCE_DIR}/thirdparty/jsoncons-0.126.0/include/jsoncons_ext
${ARROW_INCLUDE_DIR}/arrow
${FAISS_PREFIX}/include/faiss
${OPENBLAS_INCLUDE_DIR}/
${CORE_SOURCE_DIR}/thirdparty/tbb/include/tbb
DESTINATION
include)
INSTALL(DIRECTORY
${SPTAG_SOURCE_DIR}/AnnService/inc/
DESTINATION
include/SPTAG/AnnService/inc)
#INSTALL(DIRECTORY
# ${CORE_SOURCE_DIR}/include/knowhere
# ${ARROW_INCLUDE_DIR}/arrow
# ${FAISS_PREFIX}/include/faiss
# ${OPENBLAS_INCLUDE_DIR}/
# ${CORE_SOURCE_DIR}/thirdparty/tbb/include/tbb
# DESTINATION
# include)
#
#INSTALL(DIRECTORY
# ${SPTAG_SOURCE_DIR}/AnnService/inc/
# DESTINATION
# include/SPTAG/AnnService/inc)

View File

@ -0,0 +1,53 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "knowhere/adapter/ArrowAdapter.h"
namespace knowhere {
ArrayPtr
CopyArray(const ArrayPtr& origin) {
ArrayPtr copy = nullptr;
auto copy_data = origin->data()->Copy();
switch (origin->type_id()) {
#define DEFINE_TYPE(type, clazz) \
case arrow::Type::type: { \
copy = std::make_shared<arrow::clazz>(copy_data); \
}
DEFINE_TYPE(BOOL, BooleanArray)
DEFINE_TYPE(BINARY, BinaryArray)
DEFINE_TYPE(FIXED_SIZE_BINARY, FixedSizeBinaryArray)
DEFINE_TYPE(DECIMAL, Decimal128Array)
DEFINE_TYPE(FLOAT, NumericArray<arrow::FloatType>)
DEFINE_TYPE(INT64, NumericArray<arrow::Int64Type>)
default:
break;
}
return copy;
}
SchemaPtr
CopySchema(const SchemaPtr& origin) {
std::vector<std::shared_ptr<Field>> fields;
for (auto& field : origin->fields()) {
auto copy = std::make_shared<Field>(field->name(), field->type(), field->nullable(), nullptr);
fields.emplace_back(copy);
}
return std::make_shared<Schema>(std::move(fields));
}
} // namespace knowhere

View File

@ -0,0 +1,34 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <utility>
#include <vector>
#include "knowhere/common/Array.h"
namespace knowhere {
ArrayPtr
CopyArray(const ArrayPtr& origin);
SchemaPtr
CopySchema(const SchemaPtr& origin);
} // namespace knowhere

View File

@ -0,0 +1,122 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "knowhere/adapter/SptagAdapter.h"
#include "knowhere/adapter/Structure.h"
#include "knowhere/index/vector_index/helpers/Definitions.h"
namespace knowhere {
std::shared_ptr<SPTAG::MetadataSet>
ConvertToMetadataSet(const DatasetPtr& dataset) {
auto array = dataset->array()[0];
auto elems = array->length();
auto p_data = array->data()->GetValues<int64_t>(1, 0);
auto p_offset = (int64_t*)malloc(sizeof(int64_t) * elems);
for (auto i = 0; i <= elems; ++i) p_offset[i] = i * 8;
std::shared_ptr<SPTAG::MetadataSet> metaset(
new SPTAG::MemMetadataSet(SPTAG::ByteArray((std::uint8_t*)p_data, elems * sizeof(int64_t), false),
SPTAG::ByteArray((std::uint8_t*)p_offset, elems * sizeof(int64_t), true), elems));
return metaset;
}
std::shared_ptr<SPTAG::VectorSet>
ConvertToVectorSet(const DatasetPtr& dataset) {
auto tensor = dataset->tensor()[0];
auto p_data = tensor->raw_mutable_data();
auto dimension = tensor->shape()[1];
auto rows = tensor->shape()[0];
auto num_bytes = tensor->size() * sizeof(float);
SPTAG::ByteArray byte_array(p_data, num_bytes, false);
auto vectorset =
std::make_shared<SPTAG::BasicVectorSet>(byte_array, SPTAG::VectorValueType::Float, dimension, rows);
return vectorset;
}
std::vector<SPTAG::QueryResult>
ConvertToQueryResult(const DatasetPtr& dataset, const Config& config) {
auto tensor = dataset->tensor()[0];
auto p_data = (float*)tensor->raw_mutable_data();
auto dimension = tensor->shape()[1];
auto rows = tensor->shape()[0];
std::vector<SPTAG::QueryResult> query_results(rows, SPTAG::QueryResult(nullptr, config->k, true));
for (auto i = 0; i < rows; ++i) {
query_results[i].SetTarget(&p_data[i * dimension]);
}
return query_results;
}
DatasetPtr
ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
auto k = query_results[0].GetResultNum();
auto elems = query_results.size() * k;
auto p_id = (int64_t*)malloc(sizeof(int64_t) * elems);
auto p_dist = (float*)malloc(sizeof(float) * elems);
// TODO: throw if malloc failed.
#pragma omp parallel for
for (auto i = 0; i < query_results.size(); ++i) {
auto results = query_results[i].GetResults();
auto num_result = query_results[i].GetResultNum();
for (auto j = 0; j < num_result; ++j) {
// p_id[i * k + j] = results[j].VID;
p_id[i * k + j] = *(int64_t*)query_results[i].GetMetadata(j).Data();
p_dist[i * k + j] = results[j].Dist;
}
}
auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems);
// TODO: magic
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
// auto id_array_data = std::make_shared<ArrayData>(int64_type, sizeof(int64_t) * elems, id_bufs);
// auto dist_array_data = std::make_shared<ArrayData>(float_type, sizeof(float) * elems, dist_bufs);
// auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems);
// auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems);
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists};
auto field_id = std::make_shared<Field>("id", std::make_shared<arrow::Int64Type>());
auto field_dist = std::make_shared<Field>("dist", std::make_shared<arrow::FloatType>());
std::vector<FieldPtr> fields{field_id, field_dist};
auto schema = std::make_shared<Schema>(fields);
return std::make_shared<Dataset>(array, schema);
}
} // namespace knowhere

View File

@ -0,0 +1,40 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <SPTAG/AnnService/inc/Core/VectorIndex.h>
#include <memory>
#include <vector>
#include "knowhere/common/Dataset.h"
namespace knowhere {
std::shared_ptr<SPTAG::VectorSet>
ConvertToVectorSet(const DatasetPtr& dataset);
std::shared_ptr<SPTAG::MetadataSet>
ConvertToMetadataSet(const DatasetPtr& dataset);
std::vector<SPTAG::QueryResult>
ConvertToQueryResult(const DatasetPtr& dataset, const Config& config);
DatasetPtr
ConvertToDataset(std::vector<SPTAG::QueryResult> query_results);
} // namespace knowhere

View File

@ -1,17 +1,29 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "knowhere/adapter/structure.h"
#include "knowhere/adapter/Structure.h"
#include <string>
#include <vector>
namespace zilliz {
namespace knowhere {
ArrayPtr
ConstructInt64ArraySmart(uint8_t *data, int64_t size) {
ConstructInt64ArraySmart(uint8_t* data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBufferSmart(data, size)};
auto type = std::make_shared<arrow::Int64Type>();
@ -20,7 +32,7 @@ ConstructInt64ArraySmart(uint8_t *data, int64_t size) {
}
ArrayPtr
ConstructFloatArraySmart(uint8_t *data, int64_t size) {
ConstructFloatArraySmart(uint8_t* data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBufferSmart(data, size)};
auto type = std::make_shared<arrow::FloatType>();
@ -29,14 +41,14 @@ ConstructFloatArraySmart(uint8_t *data, int64_t size) {
}
TensorPtr
ConstructFloatTensorSmart(uint8_t *data, int64_t size, std::vector<int64_t> shape) {
ConstructFloatTensorSmart(uint8_t* data, int64_t size, std::vector<int64_t> shape) {
auto buffer = MakeMutableBufferSmart(data, size);
auto float_type = std::make_shared<arrow::FloatType>();
return std::make_shared<Tensor>(float_type, buffer, shape);
}
ArrayPtr
ConstructInt64Array(uint8_t *data, int64_t size) {
ConstructInt64Array(uint8_t* data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBuffer(data, size)};
auto type = std::make_shared<arrow::Int64Type>();
@ -45,7 +57,7 @@ ConstructInt64Array(uint8_t *data, int64_t size) {
}
ArrayPtr
ConstructFloatArray(uint8_t *data, int64_t size) {
ConstructFloatArray(uint8_t* data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBuffer(data, size)};
auto type = std::make_shared<arrow::FloatType>();
@ -54,23 +66,22 @@ ConstructFloatArray(uint8_t *data, int64_t size) {
}
TensorPtr
ConstructFloatTensor(uint8_t *data, int64_t size, std::vector<int64_t> shape) {
ConstructFloatTensor(uint8_t* data, int64_t size, std::vector<int64_t> shape) {
auto buffer = MakeMutableBuffer(data, size);
auto float_type = std::make_shared<arrow::FloatType>();
return std::make_shared<Tensor>(float_type, buffer, shape);
}
FieldPtr
ConstructInt64Field(const std::string &name) {
ConstructInt64Field(const std::string& name) {
auto type = std::make_shared<arrow::Int64Type>();
return std::make_shared<Field>(name, type);
}
FieldPtr
ConstructFloatField(const std::string &name) {
ConstructFloatField(const std::string& name) {
auto type = std::make_shared<arrow::FloatType>();
return std::make_shared<Field>(name, type);
}
}
}
} // namespace knowhere

View File

@ -0,0 +1,52 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "knowhere/common/Dataset.h"
namespace knowhere {
extern ArrayPtr
ConstructInt64ArraySmart(uint8_t* data, int64_t size);
extern ArrayPtr
ConstructFloatArraySmart(uint8_t* data, int64_t size);
extern TensorPtr
ConstructFloatTensorSmart(uint8_t* data, int64_t size, std::vector<int64_t> shape);
extern ArrayPtr
ConstructInt64Array(uint8_t* data, int64_t size);
extern ArrayPtr
ConstructFloatArray(uint8_t* data, int64_t size);
extern TensorPtr
ConstructFloatTensor(uint8_t* data, int64_t size, std::vector<int64_t> shape);
extern FieldPtr
ConstructInt64Field(const std::string& name);
extern FieldPtr
ConstructFloatField(const std::string& name);
} // namespace knowhere

View File

@ -0,0 +1,28 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
namespace knowhere {
#define GETTENSOR(dataset) \
auto tensor = dataset->tensor()[0]; \
auto p_data = tensor->raw_data(); \
auto dim = tensor->shape()[1]; \
auto rows = tensor->shape()[0];
} // namespace knowhere

View File

@ -0,0 +1,50 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <arrow/array.h>
#include <memory>
#include "Schema.h"
namespace knowhere {
using ArrayData = arrow::ArrayData;
using ArrayDataPtr = std::shared_ptr<ArrayData>;
using Array = arrow::Array;
using ArrayPtr = std::shared_ptr<Array>;
using BooleanArray = arrow::BooleanArray;
using BooleanArrayPtr = std::shared_ptr<arrow::BooleanArray>;
template <typename DType>
using NumericArray = arrow::NumericArray<DType>;
template <typename DType>
using NumericArrayPtr = std::shared_ptr<arrow::NumericArray<DType>>;
using BinaryArray = arrow::BinaryArray;
using BinaryArrayPtr = std::shared_ptr<arrow::BinaryArray>;
using FixedSizeBinaryArray = arrow::FixedSizeBinaryArray;
using FixedSizeBinaryArrayPtr = std::shared_ptr<arrow::FixedSizeBinaryArray>;
using Decimal128Array = arrow::Decimal128Array;
using Decimal128ArrayPtr = std::shared_ptr<arrow::Decimal128Array>;
} // namespace knowhere

View File

@ -0,0 +1,75 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "Id.h"
namespace knowhere {
struct Binary {
ID id;
std::shared_ptr<uint8_t> data;
int64_t size = 0;
};
using BinaryPtr = std::shared_ptr<Binary>;
class BinarySet {
public:
BinaryPtr
GetByName(const std::string& name) const {
return binary_map_.at(name);
}
void
Append(const std::string& name, BinaryPtr binary) {
binary_map_[name] = std::move(binary);
}
void
Append(const std::string& name, std::shared_ptr<uint8_t> data, int64_t size) {
auto binary = std::make_shared<Binary>();
binary->data = data;
binary->size = size;
binary_map_[name] = std::move(binary);
}
// void
// Append(const std::string &name, void *data, int64_t size, ID id) {
// Binary binary;
// binary.data = data;
// binary.size = size;
// binary.id = id;
// binary_map_[name] = binary;
//}
void
clear() {
binary_map_.clear();
}
public:
std::map<std::string, BinaryPtr> binary_map_;
};
} // namespace knowhere

Some files were not shown because too many files have changed in this diff Show More