From 90c4546f7558ca551c14ddcc7ee18c868bb698c6 Mon Sep 17 00:00:00 2001 From: starlord Date: Mon, 14 Oct 2019 14:44:26 +0800 Subject: [PATCH 1/6] modify jenkins ci file Former-commit-id: 2c00819c479d278f12670cce25b867e3550ace2b --- ci/jenkinsfile/milvus_build.groovy | 2 +- ci/jenkinsfile/milvus_build_no_ut.groovy | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/jenkinsfile/milvus_build.groovy b/ci/jenkinsfile/milvus_build.groovy index 6130766400..63c9850d18 100644 --- a/ci/jenkinsfile/milvus_build.groovy +++ b/ci/jenkinsfile/milvus_build.groovy @@ -5,7 +5,7 @@ container('milvus-build-env') { try { checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'SubmoduleOption',disableSubmodules: false,parentCredentials: true,recursiveSubmodules: true,reference: '',trackingSubmodules: false]], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:megasearch/milvus.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]]) - dir ("cpp") { + dir ("core") { sh "git config --global user.email \"test@zilliz.com\"" sh "git config --global user.name \"test\"" withCredentials([usernamePassword(credentialsId: "${params.JFROG_USER}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) { diff --git a/ci/jenkinsfile/milvus_build_no_ut.groovy b/ci/jenkinsfile/milvus_build_no_ut.groovy index f72089e8c3..9947cf2c84 100644 --- a/ci/jenkinsfile/milvus_build_no_ut.groovy +++ b/ci/jenkinsfile/milvus_build_no_ut.groovy @@ -5,7 +5,7 @@ container('milvus-build-env') { try { checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'SubmoduleOption',disableSubmodules: false,parentCredentials: true,recursiveSubmodules: true,reference: '',trackingSubmodules: false]], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:megasearch/milvus.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]]) - dir ("cpp") { + dir ("core") { sh "git config --global user.email \"test@zilliz.com\"" sh "git config --global user.name \"test\"" withCredentials([usernamePassword(credentialsId: "${params.JFROG_USER}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) { From f1d549d22f6e4c2c47b5f0e1b3d881d0505171f6 Mon Sep 17 00:00:00 2001 From: starlord Date: Mon, 14 Oct 2019 15:13:50 +0800 Subject: [PATCH 2/6] fix build error Former-commit-id: bfdde4eb57a2be02a87bb989859ece66e6289915 --- core/src/db/meta/MetaTypes.h | 5 ----- .../knowhere/index/vector_index/IndexIVFSQHybrid.cpp | 8 +++++++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/core/src/db/meta/MetaTypes.h b/core/src/db/meta/MetaTypes.h index c6a6b6ae87..c973f3fdea 100644 --- a/core/src/db/meta/MetaTypes.h +++ b/core/src/db/meta/MetaTypes.h @@ -41,11 +41,6 @@ using DateT = int; const DateT EmptyDate = -1; using DatesT = std::vector; -struct DateRange { - DateT start_date_ = 0x1 << 32; - DateT end_date_ = 0; -}; - struct TableSchema { typedef enum { NORMAL, diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp index 268b7fb9e3..45ef51c62a 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp @@ -256,8 +256,14 @@ void IVFSQHybrid::UnsetQuantizer() { } -void +VectorIndexPtr IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { + return nullptr; +} + +std::pair +IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config) { + KNOWHERE_THROW_MSG("Not yet implemented"); } IndexModelPtr From 3b73e99b28394f4610a310c5e6a91be4644c2972 Mon Sep 17 00:00:00 2001 From: starlord Date: Mon, 14 Oct 2019 15:36:54 +0800 Subject: [PATCH 3/6] check insert vector count and id count Former-commit-id: bc2c5bd8e01271fb4f043d53e9c634e42b31ee64 --- core/src/server/grpc_impl/GrpcRequestTask.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/server/grpc_impl/GrpcRequestTask.cpp b/core/src/server/grpc_impl/GrpcRequestTask.cpp index 1279cbac9f..3172e73786 100644 --- a/core/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/core/src/server/grpc_impl/GrpcRequestTask.cpp @@ -423,8 +423,8 @@ InsertTask::OnExecute() { return Status(SERVER_INVALID_ROWRECORD_ARRAY, "Row record array is empty"); } - if (!record_ids_->vector_id_array().empty()) { - if (record_ids_->vector_id_array().size() != insert_param_->row_record_array_size()) { + if (!insert_param_->row_id_array().empty()) { + if (insert_param_->row_id_array().size() != insert_param_->row_record_array_size()) { return Status(SERVER_ILLEGAL_VECTOR_ID, "Size of vector ids is not equal to row record array size"); } } From 6f74ca826048f6f9fef79db0601011697f69390a Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Mon, 14 Oct 2019 15:58:38 +0800 Subject: [PATCH 4/6] MS-648 fix ivf test Former-commit-id: dedaac89b832cd6a8cf3c0ac754f221c762e2893 --- .../knowhere/index/vector_index/IndexIVFSQHybrid.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp index 268b7fb9e3..16e47735d1 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp @@ -79,12 +79,8 @@ IVFSQHybrid::CopyGpuToCpu(const Config& config) { VectorIndexPtr IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) { - if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { - auto p = CopyCpuToGpuWithQuantizer(device_id, config); - return p.first; - } else { - KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); - } + auto p = CopyCpuToGpuWithQuantizer(device_id, config); + return p.first; } void From 766c8f99c3fb8e6589f768a88918fbf29b24a0f9 Mon Sep 17 00:00:00 2001 From: starlord Date: Mon, 14 Oct 2019 16:06:46 +0800 Subject: [PATCH 5/6] MS-624 Search vectors failed if time ranges long enough Former-commit-id: ef8933fcce46e08184581e9dfb8f6dd46e65f86e --- core/CHANGELOG.md | 5 +- core/src/db/meta/Meta.h | 2 +- core/src/db/meta/MySQLMetaImpl.cpp | 6 +- core/src/db/meta/MySQLMetaImpl.h | 2 +- core/src/db/meta/SqliteMetaImpl.cpp | 90 ++++++++++++++++++++++------- core/src/db/meta/SqliteMetaImpl.h | 2 +- 6 files changed, 79 insertions(+), 28 deletions(-) diff --git a/core/CHANGELOG.md b/core/CHANGELOG.md index 6c561c0e5a..13248265d7 100644 --- a/core/CHANGELOG.md +++ b/core/CHANGELOG.md @@ -9,14 +9,15 @@ Please mark all change in change log and use the ticket from JIRA. - MS-572 - Milvus crash when get SIGINT - MS-577 - Unittest Query randomly hung - MS-587 - Count get wrong result after adding vectors and index built immediately -- MS-599 - search wrong result when table created with metric_type: IP +- MS-599 - Search wrong result when table created with metric_type: IP - MS-601 - Docker logs error caused by get CPUTemperature error - MS-622 - Delete vectors should be failed if date range is invalid - MS-620 - Get table row counts display wrong error code -- MS-637 - out of memory when load too many tasks +- MS-637 - Out of memory when load too many tasks - MS-640 - Cache object size calculate incorrect - MS-641 - Segment fault(signal 11) in PickToLoad - MS-639 - SQ8H index created failed and server hang +- MS-624 - Search vectors failed if time ranges long enough ## Improvement - MS-552 - Add and change the easylogging library diff --git a/core/src/db/meta/Meta.h b/core/src/db/meta/Meta.h index 8167834568..ec4b66916d 100644 --- a/core/src/db/meta/Meta.h +++ b/core/src/db/meta/Meta.h @@ -81,7 +81,7 @@ class Meta { UpdateTableFiles(TableFilesSchema& files) = 0; virtual Status - FilesToSearch(const std::string& table_id, const std::vector& ids, const DatesT& partition, + FilesToSearch(const std::string& table_id, const std::vector& ids, const DatesT& dates, DatePartionedTableFilesSchema& files) = 0; virtual Status diff --git a/core/src/db/meta/MySQLMetaImpl.cpp b/core/src/db/meta/MySQLMetaImpl.cpp index f9f1569a65..44594636ad 100644 --- a/core/src/db/meta/MySQLMetaImpl.cpp +++ b/core/src/db/meta/MySQLMetaImpl.cpp @@ -1088,7 +1088,7 @@ MySQLMetaImpl::FilesToIndex(TableFilesSchema& files) { } Status -MySQLMetaImpl::FilesToSearch(const std::string& table_id, const std::vector& ids, const DatesT& partition, +MySQLMetaImpl::FilesToSearch(const std::string& table_id, const std::vector& ids, const DatesT& dates, DatePartionedTableFilesSchema& files) { files.clear(); @@ -1108,9 +1108,9 @@ MySQLMetaImpl::FilesToSearch(const std::string& table_id, const std::vector& ids, const DatesT& partition, + FilesToSearch(const std::string& table_id, const std::vector& ids, const DatesT& dates, DatePartionedTableFilesSchema& files) override; Status diff --git a/core/src/db/meta/SqliteMetaImpl.cpp b/core/src/db/meta/SqliteMetaImpl.cpp index dd9bb6fd30..9fb0120ddd 100644 --- a/core/src/db/meta/SqliteMetaImpl.cpp +++ b/core/src/db/meta/SqliteMetaImpl.cpp @@ -159,7 +159,7 @@ SqliteMetaImpl::Initialize() { Status SqliteMetaImpl::DropPartitionsByDates(const std::string &table_id, const DatesT &dates) { - if (dates.size() == 0) { + if (dates.empty()) { return Status::OK(); } @@ -171,16 +171,35 @@ SqliteMetaImpl::DropPartitionsByDates(const std::string &table_id, } try { + //sqlite_orm has a bug, 'in' statement cannot handle too many elements + //so we split one query into multi-queries, this is a work-around!! + std::vector split_dates; + split_dates.push_back(DatesT()); + const size_t batch_size = 30; + for(DateT date : dates) { + DatesT& last_batch = *split_dates.rbegin(); + last_batch.push_back(date); + if(last_batch.size() > batch_size) { + split_dates.push_back(DatesT()); + } + } + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); - ConnectorPtr->update_all( - set( - c(&TableFileSchema::file_type_) = (int) TableFileSchema::TO_DELETE, - c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp()), - where( - c(&TableFileSchema::table_id_) == table_id and - in(&TableFileSchema::date_, dates))); + for(auto& batch_dates : split_dates) { + if(batch_dates.empty()) { + continue; + } + + ConnectorPtr->update_all( + set( + c(&TableFileSchema::file_type_) = (int)TableFileSchema::TO_DELETE, + c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp()), + where( + c(&TableFileSchema::table_id_) == table_id and + in(&TableFileSchema::date_, batch_dates))); + } ENGINE_LOG_DEBUG << "Successfully drop partitions, table id = " << table_schema.table_id_; } catch (std::exception &e) { @@ -673,7 +692,7 @@ SqliteMetaImpl::FilesToIndex(TableFilesSchema &files) { Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, const std::vector &ids, - const DatesT &partition, + const DatesT &dates, DatePartionedTableFilesSchema &files) { files.clear(); server::MetricCollector metric; @@ -702,23 +721,54 @@ SqliteMetaImpl::FilesToSearch(const std::string &table_id, auto status = DescribeTable(table_schema); if (!status.ok()) { return status; } + //sqlite_orm has a bug, 'in' statement cannot handle too many elements + //so we split one query into multi-queries, this is a work-around!! + std::vector split_dates; + split_dates.push_back(DatesT()); + const size_t batch_size = 30; + for(DateT date : dates) { + DatesT& last_batch = *split_dates.rbegin(); + last_batch.push_back(date); + if(last_batch.size() > batch_size) { + split_dates.push_back(DatesT()); + } + } + + //perform query decltype(ConnectorPtr->select(select_columns)) selected; - if (partition.empty() && ids.empty()) { + if (dates.empty() && ids.empty()) { auto filter = where(match_tableid and match_type); selected = ConnectorPtr->select(select_columns, filter); - } else if (partition.empty() && !ids.empty()) { + } else if (dates.empty() && !ids.empty()) { auto match_fileid = in(&TableFileSchema::id_, ids); auto filter = where(match_tableid and match_fileid and match_type); selected = ConnectorPtr->select(select_columns, filter); - } else if (!partition.empty() && ids.empty()) { - auto match_date = in(&TableFileSchema::date_, partition); - auto filter = where(match_tableid and match_date and match_type); - selected = ConnectorPtr->select(select_columns, filter); - } else if (!partition.empty() && !ids.empty()) { - auto match_fileid = in(&TableFileSchema::id_, ids); - auto match_date = in(&TableFileSchema::date_, partition); - auto filter = where(match_tableid and match_fileid and match_date and match_type); - selected = ConnectorPtr->select(select_columns, filter); + } else if (!dates.empty() && ids.empty()) { + for(auto& batch_dates : split_dates) { + if(batch_dates.empty()) { + continue; + } + auto match_date = in(&TableFileSchema::date_, batch_dates); + auto filter = where(match_tableid and match_date and match_type); + auto batch_selected = ConnectorPtr->select(select_columns, filter); + for (auto &file : selected) { + selected.push_back(file); + } + } + + } else if (!dates.empty() && !ids.empty()) { + for(auto& batch_dates : split_dates) { + if(batch_dates.empty()) { + continue; + } + auto match_fileid = in(&TableFileSchema::id_, ids); + auto match_date = in(&TableFileSchema::date_, batch_dates); + auto filter = where(match_tableid and match_fileid and match_date and match_type); + auto batch_selected = ConnectorPtr->select(select_columns, filter); + for (auto &file : selected) { + selected.push_back(file); + } + } } Status ret; diff --git a/core/src/db/meta/SqliteMetaImpl.h b/core/src/db/meta/SqliteMetaImpl.h index dc132c41ec..c8b99b358a 100644 --- a/core/src/db/meta/SqliteMetaImpl.h +++ b/core/src/db/meta/SqliteMetaImpl.h @@ -89,7 +89,7 @@ class SqliteMetaImpl : public Meta { UpdateTableFiles(TableFilesSchema& files) override; Status - FilesToSearch(const std::string& table_id, const std::vector& ids, const DatesT& partition, + FilesToSearch(const std::string& table_id, const std::vector& ids, const DatesT& dates, DatePartionedTableFilesSchema& files) override; Status From afa828959f2536f8a18f953b78a318c50649db84 Mon Sep 17 00:00:00 2001 From: starlord Date: Mon, 14 Oct 2019 16:41:58 +0800 Subject: [PATCH 6/6] fix jenkins build Former-commit-id: cffce6458aedadb4328fa6dc1a172de1c85a5a1b --- ci/jenkinsfile/packaged_milvus.groovy | 2 +- ci/jenkinsfile/packaged_milvus_no_ut.groovy | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/jenkinsfile/packaged_milvus.groovy b/ci/jenkinsfile/packaged_milvus.groovy index 407b100589..1d30e21910 100644 --- a/ci/jenkinsfile/packaged_milvus.groovy +++ b/ci/jenkinsfile/packaged_milvus.groovy @@ -1,7 +1,7 @@ container('milvus-build-env') { timeout(time: 5, unit: 'MINUTES') { dir ("milvus_engine") { - dir ("cpp") { + dir ("core") { gitlabCommitStatus(name: 'Packaged Engine') { if (fileExists('milvus')) { try { diff --git a/ci/jenkinsfile/packaged_milvus_no_ut.groovy b/ci/jenkinsfile/packaged_milvus_no_ut.groovy index b6c31540a1..bc68be374a 100644 --- a/ci/jenkinsfile/packaged_milvus_no_ut.groovy +++ b/ci/jenkinsfile/packaged_milvus_no_ut.groovy @@ -1,7 +1,7 @@ container('milvus-build-env') { timeout(time: 5, unit: 'MINUTES') { dir ("milvus_engine") { - dir ("cpp") { + dir ("core") { gitlabCommitStatus(name: 'Packaged Engine') { if (fileExists('milvus')) { try {