mirror of https://github.com/milvus-io/milvus.git
Merge remote-tracking branch 'upstream/0.6.0' into 0.6.0
commit
0937d0115b
|
@ -20,7 +20,11 @@ Please mark all change in change log and use the ticket from JIRA.
|
|||
- \#440 - Query API in customization still uses old version
|
||||
- \#440 - Server cannot startup with gpu_resource_config.enable=false in GPU version
|
||||
- \#458 - Index data is not compatible between 0.5 and 0.6
|
||||
- \#465 - Server hang caused by searching with nsg index
|
||||
- \#486 - gpu no usage during index building
|
||||
- \#509 - IVF_PQ index build trapped into dead loop caused by invalid params
|
||||
- \#513 - Unittest DELETE_BY_RANGE sometimes failed
|
||||
- \#527 - faiss benchmark not compatible with faiss 1.6.0
|
||||
|
||||
## Feature
|
||||
- \#12 - Pure CPU version for Milvus
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
[![codebeat badge](https://codebeat.co/badges/e030a4f6-b126-4475-a938-4723d54ec3a7?style=plastic)](https://codebeat.co/projects/github-com-jinhai-cn-milvus-master)
|
||||
![Release](https://img.shields.io/badge/release-v0.5.3-yellowgreen)
|
||||
![Release_date](https://img.shields.io/badge/release%20date-November-yellowgreen)
|
||||
[![codecov](https://codecov.io/gh/milvus-io/milvus/branch/master/graph/badge.svg)](https://codecov.io/gh/milvus-io/milvus)
|
||||
|
||||
[中文版](README_CN.md) | [日本語版](README_JP.md)
|
||||
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
[![codebeat badge](https://codebeat.co/badges/e030a4f6-b126-4475-a938-4723d54ec3a7?style=plastic)](https://codebeat.co/projects/github-com-jinhai-cn-milvus-master)
|
||||
![Release](https://img.shields.io/badge/release-v0.5.3-yellowgreen)
|
||||
![Release_date](https://img.shields.io/badge/release_date-October-yellowgreen)
|
||||
[![codecov](https://codecov.io/gh/milvus-io/milvus/branch/master/graph/badge.svg)](https://codecov.io/gh/milvus-io/milvus)
|
||||
|
||||
|
||||
# 欢迎来到 Milvus
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ timeout(time: 5, unit: 'MINUTES') {
|
|||
sh "./yaml_processor.py merge -f /opt/milvus/conf/server_config.yaml -m ../yaml/update_server_config.yaml -i && rm /opt/milvus/conf/server_config.yaml.bak"
|
||||
sh "sed -i 's/\\/tmp\\/milvus/\\/opt\\/milvus/g' /opt/milvus/conf/log_config.conf"
|
||||
}
|
||||
sh "rm -rf /opt/milvus/unittest"
|
||||
sh "tar -zcvf ./${PROJECT_NAME}-${PACKAGE_VERSION}.tar.gz -C /opt/ milvus"
|
||||
withCredentials([usernamePassword(credentialsId: "${params.JFROG_CREDENTIALS_ID}", usernameVariable: 'JFROG_USERNAME', passwordVariable: 'JFROG_PASSWORD')]) {
|
||||
def uploadStatus = sh(returnStatus: true, script: "curl -u${JFROG_USERNAME}:${JFROG_PASSWORD} -T ./${PROJECT_NAME}-${PACKAGE_VERSION}.tar.gz ${params.JFROG_ARTFACTORY_URL}/milvus/package/${PROJECT_NAME}-${PACKAGE_VERSION}.tar.gz")
|
||||
|
|
|
@ -31,9 +31,21 @@ GET_CURRENT_TIME(BUILD_TIME)
|
|||
string(REGEX REPLACE "\n" "" BUILD_TIME ${BUILD_TIME})
|
||||
message(STATUS "Build time = ${BUILD_TIME}")
|
||||
|
||||
if (NOT DEFINED CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build.")
|
||||
endif ()
|
||||
|
||||
set (GIT_BRANCH_NAME_REGEX "[0-9]+\\.[0-9]+\\.[0-9]")
|
||||
|
||||
MACRO(GET_GIT_BRANCH_NAME GIT_BRANCH_NAME)
|
||||
execute_process(COMMAND sh "-c" "git log --decorate | head -n 1 | sed 's/.*(\\(.*\\))/\\1/' | sed 's/.* \\(.*\\),.*/\\1/' | sed 's=[a-zA-Z]*\/==g'"
|
||||
OUTPUT_VARIABLE ${GIT_BRANCH_NAME})
|
||||
if(NOT GIT_BRANCH_NAME MATCHES "${GIT_BRANCH_NAME_REGEX}")
|
||||
execute_process(COMMAND "git" rev-parse --abbrev-ref HEAD OUTPUT_VARIABLE ${GIT_BRANCH_NAME})
|
||||
endif ()
|
||||
if(NOT GIT_BRANCH_NAME MATCHES "${GIT_BRANCH_NAME_REGEX}")
|
||||
execute_process(COMMAND "git" symbolic-ref --short -q HEAD HEAD OUTPUT_VARIABLE ${GIT_BRANCH_NAME})
|
||||
endif ()
|
||||
ENDMACRO(GET_GIT_BRANCH_NAME)
|
||||
|
||||
GET_GIT_BRANCH_NAME(GIT_BRANCH_NAME)
|
||||
|
@ -43,7 +55,7 @@ if (NOT GIT_BRANCH_NAME STREQUAL "")
|
|||
endif ()
|
||||
|
||||
set(MILVUS_VERSION "${GIT_BRANCH_NAME}")
|
||||
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]" MILVUS_VERSION "${MILVUS_VERSION}")
|
||||
string(REGEX MATCH "${GIT_BRANCH_NAME_REGEX}" MILVUS_VERSION "${MILVUS_VERSION}")
|
||||
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Release")
|
||||
set(BUILD_TYPE "Release")
|
||||
|
|
|
@ -56,7 +56,7 @@ while getopts "p:d:t:f:ulrcgjhxzme" arg; do
|
|||
USE_JFROG_CACHE="ON"
|
||||
;;
|
||||
x)
|
||||
CUSTOMIZATION="OFF" # force use ori faiss
|
||||
CUSTOMIZATION="ON"
|
||||
;;
|
||||
g)
|
||||
GPU_VERSION="ON"
|
||||
|
|
|
@ -105,7 +105,8 @@ DBImpl::Stop() {
|
|||
shutting_down_.store(true, std::memory_order_release);
|
||||
|
||||
// makesure all memory data serialized
|
||||
MemSerialize();
|
||||
std::set<std::string> sync_table_ids;
|
||||
SyncMemData(sync_table_ids);
|
||||
|
||||
// wait compaction/buildindex finish
|
||||
bg_timer_thread_.join();
|
||||
|
@ -329,7 +330,10 @@ DBImpl::CreateIndex(const std::string& table_id, const TableIndex& index) {
|
|||
return SHUTDOWN_ERROR;
|
||||
}
|
||||
|
||||
Status status;
|
||||
// serialize memory data
|
||||
std::set<std::string> sync_table_ids;
|
||||
auto status = SyncMemData(sync_table_ids);
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(build_index_mutex_);
|
||||
|
||||
|
@ -588,12 +592,12 @@ DBImpl::StartMetricTask() {
|
|||
}
|
||||
|
||||
Status
|
||||
DBImpl::MemSerialize() {
|
||||
DBImpl::SyncMemData(std::set<std::string>& sync_table_ids) {
|
||||
std::lock_guard<std::mutex> lck(mem_serialize_mutex_);
|
||||
std::set<std::string> temp_table_ids;
|
||||
mem_mgr_->Serialize(temp_table_ids);
|
||||
for (auto& id : temp_table_ids) {
|
||||
compact_table_ids_.insert(id);
|
||||
sync_table_ids.insert(id);
|
||||
}
|
||||
|
||||
if (!temp_table_ids.empty()) {
|
||||
|
@ -612,7 +616,7 @@ DBImpl::StartCompactionTask() {
|
|||
}
|
||||
|
||||
// serialize memory data
|
||||
MemSerialize();
|
||||
SyncMemData(compact_table_ids_);
|
||||
|
||||
// compactiong has been finished?
|
||||
{
|
||||
|
|
|
@ -150,7 +150,7 @@ class DBImpl : public DB {
|
|||
BackgroundBuildIndex();
|
||||
|
||||
Status
|
||||
MemSerialize();
|
||||
SyncMemData(std::set<std::string>& sync_table_ids);
|
||||
|
||||
Status
|
||||
GetFilesToBuildIndex(const std::string& table_id, const std::vector<int>& file_types,
|
||||
|
|
|
@ -74,7 +74,7 @@ function(ExternalProject_Use_Cache project_name package_file install_path)
|
|||
${CMAKE_COMMAND} -E echo
|
||||
"Extracting ${package_file} to ${install_path}"
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E tar xzvf ${package_file} ${install_path}
|
||||
${CMAKE_COMMAND} -E tar xzf ${package_file} ${install_path}
|
||||
WORKING_DIRECTORY ${INDEX_BINARY_DIR}
|
||||
)
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
#include <iostream> // TODO(linxj): using Log instead
|
||||
|
||||
#include "knowhere/common/Log.h"
|
||||
#include "knowhere/common/Timer.h"
|
||||
|
||||
namespace knowhere {
|
||||
|
@ -51,30 +52,18 @@ TimeRecorder::PrintTimeRecord(const std::string& msg, double span) {
|
|||
std::cout << str_log << std::endl;
|
||||
break;
|
||||
}
|
||||
// case 1: {
|
||||
// SERVER_LOG_DEBUG << str_log;
|
||||
// break;
|
||||
//}
|
||||
case 1: {
|
||||
KNOWHERE_LOG_DEBUG << str_log;
|
||||
break;
|
||||
}
|
||||
// case 2: {
|
||||
// SERVER_LOG_INFO << str_log;
|
||||
// break;
|
||||
//}
|
||||
// KNOWHERE_LOG_TRACE << str_log;
|
||||
// break;
|
||||
// }
|
||||
// case 3: {
|
||||
// SERVER_LOG_WARNING << str_log;
|
||||
// break;
|
||||
//}
|
||||
// case 4: {
|
||||
// SERVER_LOG_ERROR << str_log;
|
||||
// break;
|
||||
//}
|
||||
// case 5: {
|
||||
// SERVER_LOG_FATAL << str_log;
|
||||
// break;
|
||||
//}
|
||||
// default: {
|
||||
// SERVER_LOG_INFO << str_log;
|
||||
// break;
|
||||
//}
|
||||
// KNOWHERE_LOG_WARNING << str_log;
|
||||
// break;
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -718,29 +718,38 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co
|
|||
int64_t* ids, SearchParams& params) {
|
||||
std::vector<std::vector<Neighbor>> resset(nq);
|
||||
|
||||
TimeRecorder rc("search");
|
||||
if (nq == 1) {
|
||||
params.search_length = k;
|
||||
TimeRecorder rc("NsgIndex::search", 1);
|
||||
// TODO(linxj): when to use openmp
|
||||
if (nq <= 4) {
|
||||
GetNeighbors(query, resset[0], nsg, ¶ms);
|
||||
} else {
|
||||
//#pragma omp parallel for schedule(dynamic, 50)
|
||||
#pragma omp parallel for
|
||||
for (unsigned int i = 0; i < nq; ++i) {
|
||||
// TODO(linxj): when to use openmp
|
||||
auto single_query = query + i * dim;
|
||||
GetNeighbors(single_query, resset[i], nsg, ¶ms);
|
||||
}
|
||||
}
|
||||
rc.ElapseFromBegin("cost");
|
||||
|
||||
rc.RecordSection("search");
|
||||
for (unsigned int i = 0; i < nq; ++i) {
|
||||
for (unsigned int j = 0; j < k; ++j) {
|
||||
// ids[i * k + j] = resset[i][j].id;
|
||||
|
||||
// Fix(linxj): bug, reset[i][j] out of range
|
||||
ids[i * k + j] = ids_[resset[i][j].id];
|
||||
dist[i * k + j] = resset[i][j].distance;
|
||||
int64_t var = resset[i].size() - k;
|
||||
if (var >= 0) {
|
||||
for (unsigned int j = 0; j < k; ++j) {
|
||||
ids[i * k + j] = ids_[resset[i][j].id];
|
||||
dist[i * k + j] = resset[i][j].distance;
|
||||
}
|
||||
} else {
|
||||
for (unsigned int j = 0; j < resset[i].size(); ++j) {
|
||||
ids[i * k + j] = ids_[resset[i][j].id];
|
||||
dist[i * k + j] = resset[i][j].distance;
|
||||
}
|
||||
for (unsigned int j = resset[i].size(); j < k; ++j) {
|
||||
ids[i * k + j] = -1;
|
||||
dist[i * k + j] = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
rc.RecordSection("merge");
|
||||
|
||||
//>> Debug: test single insert
|
||||
// int x_0 = resset[0].size();
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#define USE_FAISS_V0_2_1 0
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <hdf5.h>
|
||||
|
@ -26,21 +28,6 @@
|
|||
#include <cstdio>
|
||||
#include <vector>
|
||||
|
||||
#define USE_FAISS_V1_5_3 0
|
||||
|
||||
#if USE_FAISS_V1_5_3
|
||||
#include <faiss/gpu/GpuAutoTune.h>
|
||||
#include <faiss/utils.h>
|
||||
#include <sys/stat.h>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#else
|
||||
#include <faiss/gpu/GpuCloner.h>
|
||||
#include <faiss/index_factory.h>
|
||||
#include <faiss/utils/distances.h>
|
||||
#endif
|
||||
|
||||
#include <faiss/AutoTune.h>
|
||||
#include <faiss/Index.h>
|
||||
#include <faiss/IndexIVF.h>
|
||||
|
@ -48,8 +35,22 @@
|
|||
#include <faiss/gpu/StandardGpuResources.h>
|
||||
#include <faiss/index_io.h>
|
||||
|
||||
#if USE_FAISS_V0_2_1
|
||||
#include <faiss/gpu/GpuAutoTune.h>
|
||||
#include <faiss/utils.h>
|
||||
#include <sys/stat.h>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#else
|
||||
#include <faiss/gpu/GpuCloner.h>
|
||||
#include <faiss/index_factory.h>
|
||||
#include <faiss/utils/distances.h>
|
||||
#endif
|
||||
|
||||
#ifdef CUSTOMIZATION
|
||||
#include <faiss/gpu/GpuIndexIVFSQHybrid.h>
|
||||
#else
|
||||
#include <faiss/gpu/GpuIndexIVF.h>
|
||||
#endif
|
||||
|
||||
/*****************************************************
|
||||
|
@ -295,10 +296,12 @@ load_base_data(faiss::Index*& index, const std::string& ann_test_name, const std
|
|||
cpu_index = faiss::gpu::index_gpu_to_cpu(gpu_index);
|
||||
delete gpu_index;
|
||||
|
||||
#ifdef CUSTOMIZATION
|
||||
faiss::IndexIVF* cpu_ivf_index = dynamic_cast<faiss::IndexIVF*>(cpu_index);
|
||||
if (cpu_ivf_index != nullptr) {
|
||||
cpu_ivf_index->to_readonly();
|
||||
}
|
||||
#endif
|
||||
|
||||
printf("[%.3f s] Writing index file: %s\n", elapsed() - t0, index_file_name.c_str());
|
||||
faiss::write_index(cpu_index, index_file_name.c_str());
|
||||
|
@ -374,13 +377,15 @@ test_with_nprobes(const std::string& ann_test_name, const std::string& index_key
|
|||
faiss::Index *gpu_index, *index;
|
||||
if (query_mode != MODE_CPU) {
|
||||
faiss::gpu::GpuClonerOptions option;
|
||||
#ifdef CUSTOMIZATION
|
||||
option.allInGpu = true;
|
||||
|
||||
faiss::IndexComposition index_composition;
|
||||
index_composition.index = cpu_index;
|
||||
index_composition.quantizer = nullptr;
|
||||
|
||||
#endif
|
||||
switch (query_mode) {
|
||||
#ifdef CUSTOMIZATION
|
||||
case MODE_MIX: {
|
||||
index_composition.mode = 1; // 0: all data, 1: copy quantizer, 2: copy data
|
||||
|
||||
|
@ -403,7 +408,9 @@ test_with_nprobes(const std::string& ann_test_name, const std::string& index_key
|
|||
index = cpu_index;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case MODE_GPU:
|
||||
#ifdef CUSTOMIZATION
|
||||
index_composition.mode = 0; // 0: all data, 1: copy quantizer, 2: copy data
|
||||
|
||||
// warm up the transmission
|
||||
|
@ -412,6 +419,14 @@ test_with_nprobes(const std::string& ann_test_name, const std::string& index_key
|
|||
|
||||
copy_time = elapsed();
|
||||
gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, &index_composition, &option);
|
||||
#else
|
||||
// warm up the transmission
|
||||
gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, cpu_index, &option);
|
||||
delete gpu_index;
|
||||
|
||||
copy_time = elapsed();
|
||||
gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, cpu_index, &option);
|
||||
#endif
|
||||
copy_time = elapsed() - copy_time;
|
||||
printf("[%.3f s] Copy data completed, cost %f s\n", elapsed() - t0, copy_time);
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
// TODO(lxj): add conf checker
|
||||
|
||||
|
@ -129,17 +130,35 @@ IVFPQConfAdapter::Match(const TempMetaConf& metaconf) {
|
|||
conf->metric_type = metaconf.metric_type;
|
||||
conf->gpu_id = metaconf.gpu_id;
|
||||
conf->nbits = 8;
|
||||
|
||||
if (!(conf->d % 4))
|
||||
conf->m = conf->d / 4; // compression radio = 16
|
||||
else if (!(conf->d % 2))
|
||||
conf->m = conf->d / 2; // compression radio = 8
|
||||
else if (!(conf->d % 3))
|
||||
conf->m = conf->d / 3; // compression radio = 12
|
||||
else
|
||||
conf->m = conf->d; // same as SQ8, compression radio = 4
|
||||
|
||||
MatchBase(conf);
|
||||
|
||||
/*
|
||||
* Faiss 1.6
|
||||
* Only 1, 2, 3, 4, 6, 8, 10, 12, 16, 20, 24, 28, 32 dims per sub-quantizer are currently supporte with
|
||||
* no precomputed codes. Precomputed codes supports any number of dimensions, but will involve memory overheads.
|
||||
*/
|
||||
static std::vector<int64_t> support_dim_per_subquantizer{32, 28, 24, 20, 16, 12, 10, 8, 6, 4, 3, 2, 1};
|
||||
static std::vector<int64_t> support_subquantizer{96, 64, 56, 48, 40, 32, 28, 24, 20, 16, 12, 8, 4, 3, 2, 1};
|
||||
std::vector<int64_t> resset;
|
||||
for (const auto& dimperquantizer : support_dim_per_subquantizer) {
|
||||
if (!(conf->d % dimperquantizer)) {
|
||||
auto subquantzier_num = conf->d / dimperquantizer;
|
||||
auto finder = std::find(support_subquantizer.begin(), support_subquantizer.end(), subquantzier_num);
|
||||
if (finder != support_subquantizer.end()) {
|
||||
resset.push_back(subquantzier_num);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (resset.empty()) {
|
||||
// todo(linxj): throw exception here.
|
||||
return nullptr;
|
||||
}
|
||||
static int64_t compression_level = 1; // 1:low, 2:high
|
||||
if (compression_level == 1) {
|
||||
conf->m = resset[int(resset.size() / 2)];
|
||||
WRAPPER_LOG_DEBUG << "PQ m = " << conf->m << ", compression radio = " << conf->d / conf->m * 4;
|
||||
}
|
||||
return conf;
|
||||
}
|
||||
|
||||
|
|
|
@ -1208,9 +1208,9 @@ class TestCreateIndexParamsInvalid(object):
|
|||
nlist = index_params["nlist"]
|
||||
logging.getLogger().info(index_params)
|
||||
status, ids = connect.add_vectors(table, vectors)
|
||||
# if not isinstance(index_type, int) or not isinstance(nlist, int):
|
||||
with pytest.raises(Exception) as e:
|
||||
if (not index_type) or (not nlist) or (not isinstance(index_type, IndexType)) or (not isinstance(nlist, int)):
|
||||
with pytest.raises(Exception) as e:
|
||||
status = connect.create_index(table, index_params)
|
||||
else:
|
||||
status = connect.create_index(table, index_params)
|
||||
# else:
|
||||
# status = connect.create_index(table, index_params)
|
||||
# assert not status.OK()
|
||||
assert not status.OK()
|
||||
|
|
|
@ -54,12 +54,12 @@ def gen_long_str(num):
|
|||
|
||||
def gen_invalid_ips():
|
||||
ips = [
|
||||
"255.0.0.0",
|
||||
"255.255.0.0",
|
||||
"255.255.255.0",
|
||||
"255.255.255.255",
|
||||
# "255.0.0.0",
|
||||
# "255.255.0.0",
|
||||
# "255.255.255.0",
|
||||
# "255.255.255.255",
|
||||
"127.0.0",
|
||||
"123.0.0.2",
|
||||
# "123.0.0.2",
|
||||
"12-s",
|
||||
" ",
|
||||
"12 s",
|
||||
|
@ -114,12 +114,12 @@ def gen_invalid_uris():
|
|||
|
||||
# invalid ip
|
||||
"tcp:// :%s" % port,
|
||||
"tcp://123.0.0.1:%s" % port,
|
||||
# "tcp://123.0.0.1:%s" % port,
|
||||
"tcp://127.0.0:%s" % port,
|
||||
"tcp://255.0.0.0:%s" % port,
|
||||
"tcp://255.255.0.0:%s" % port,
|
||||
"tcp://255.255.255.0:%s" % port,
|
||||
"tcp://255.255.255.255:%s" % port,
|
||||
# "tcp://255.255.0.0:%s" % port,
|
||||
# "tcp://255.255.255.0:%s" % port,
|
||||
# "tcp://255.255.255.255:%s" % port,
|
||||
"tcp://\n:%s" % port,
|
||||
|
||||
]
|
||||
|
@ -263,9 +263,7 @@ def gen_invalid_index_types():
|
|||
|
||||
def gen_invalid_nlists():
|
||||
nlists = [
|
||||
0,
|
||||
-1,
|
||||
1000000000000001,
|
||||
# None,
|
||||
[1,2,3],
|
||||
(1,2),
|
||||
|
@ -549,4 +547,4 @@ if __name__ == "__main__":
|
|||
p.join()
|
||||
time.sleep(3)
|
||||
status, count = milvus.get_table_row_count(table)
|
||||
assert count == process_num * loop_num
|
||||
assert count == process_num * loop_num
|
||||
|
|
Loading…
Reference in New Issue