diff --git a/CHANGELOG.md b/CHANGELOG.md index be8b4bdff6..6e0a7ebc02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Please mark all changes in change log and use the issue from GitHub - \#4200 The scalar field information of search result is wrong after some entities deleted - \#4205 Restful API error: When the specified partition is retrieved, the partition_tag is not valid - \#4226 The Prometheus performance metrics sent by Milvus 0.11.0 is not right +- \#4242 Fix query hang up at unittest case ## Feature - \#4163 Update C++ sdk search interface diff --git a/ci/scripts/run_unittest.sh b/ci/scripts/run_unittest.sh index 375f54eb92..6899e58366 100755 --- a/ci/scripts/run_unittest.sh +++ b/ci/scripts/run_unittest.sh @@ -98,6 +98,10 @@ fi # run unittest for test in `ls ${UNITTEST_DIR}`; do + if [[ ${test} == *".log" ]] || [[ ${test} == *".info" ]]; then + echo "skip file ${test}" + continue + fi echo $test " running..." # run unittest ${UNITTEST_DIR}/${test} diff --git a/core/.gitignore b/core/.gitignore index b0514aba6e..5476dabe52 100644 --- a/core/.gitignore +++ b/core/.gitignore @@ -5,10 +5,7 @@ conf/log_config.conf src/config.h src/version.h lcov_out/ -base.info -output.info -output_new.info -server.info +*.info *.pyc src/grpc/python_gen.h src/grpc/python/ diff --git a/core/coverage.sh b/core/coverage.sh index 507d29ed37..c8718c2c39 100755 --- a/core/coverage.sh +++ b/core/coverage.sh @@ -27,6 +27,10 @@ fi # run unittest for test in `ls ${UNITTEST_DIR}`; do + if [[ ${test} == *".log" ]] || [[ ${test} == *".info" ]]; then + echo "skip file ${test}" + continue + fi echo $test "running..." # run unittest ./${UNITTEST_DIR}/${test} diff --git a/core/src/db/CMakeLists.txt b/core/src/db/CMakeLists.txt index fab34cf69f..f446ce778c 100644 --- a/core/src/db/CMakeLists.txt +++ b/core/src/db/CMakeLists.txt @@ -28,6 +28,7 @@ set( DB_META_FILES ${DB_META_MAIN_FILES} aux_source_directory( ${MILVUS_ENGINE_SRC}/index/archive WRAPPER_FILES ) +aux_source_directory(${MILVUS_ENGINE_SRC}/server/context SERVER_CONTEXT_FILES) aux_source_directory( ${MILVUS_THIRDPARTY_SRC}/easyloggingpp THIRDPARTY_EASYLOGGINGPP_FILES ) aux_source_directory( ${MILVUS_THIRDPARTY_SRC}/nlohmann THIRDPARTY_NLOHMANN_FILES ) @@ -46,6 +47,7 @@ set( ENGINE_FILES ${DB_MAIN_FILES} ${DB_TRANSCRIPT_FILES} ${THIRDPARTY_FILES} ${WRAPPER_FILES} + ${SERVER_CONTEXT_FILES} ) @@ -123,7 +125,6 @@ endif () # **************************** Link Libraries with milvus engine **************************** target_link_libraries( milvus_engine PUBLIC knowhere - server segment cache codecs diff --git a/core/src/db/Types.h b/core/src/db/Types.h index 7ffe58a62d..847cbeada5 100644 --- a/core/src/db/Types.h +++ b/core/src/db/Types.h @@ -160,7 +160,7 @@ struct AttrsData { /////////////////////////////////////////////////////////////////////////////////////////////////// struct QueryResult { - uint64_t row_num_; + uint64_t row_num_ = 0; engine::ResultIds result_ids_; engine::ResultDistances result_distances_; engine::DataChunkPtr data_chunk_; diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 57adedf682..ff4d62c204 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -45,8 +45,7 @@ #include "knowhere/index/vector_index/helpers/Cloner.h" #endif -namespace milvus { -namespace engine { +namespace milvus::engine { ExecutionEngineImpl::ExecutionEngineImpl(const std::string& dir_root, const SegmentVisitorPtr& segment_visitor) : gpu_enable_(config.gpu.enable()) { @@ -292,33 +291,17 @@ ExecutionEngineImpl::Search(ExecutionEngineContext& context) { segment::DeletedDocsPtr deleted_docs_ptr; segment_reader_->LoadDeletedDocs(deleted_docs_ptr); if (deleted_docs_ptr) { - faiss::ConcurrentBitsetPtr del_bitset = std::make_shared(entity_count_); auto& del_docs = deleted_docs_ptr->GetDeletedDocs(); - for (auto& offset : del_docs) { - del_bitset->set(offset); - } - if (bitset != nullptr) { + auto del_bitset = deleted_docs_ptr->GetBlacklist(); + if (bitset != nullptr && del_bitset != nullptr) { filter_list = (*bitset) | (*del_bitset); } else { - filter_list = del_bitset; + filter_list = (bitset != nullptr) ? bitset : del_bitset; } } else { filter_list = bitset; } - // TODO(yhz): The black list is obtain from deleted docs above, - // there is no need to get blacklist from index. - // list = vec_index->GetBlacklist(); - // if (list != nullptr) { - // if (filter_list != nullptr) { - // list = (*list) | (*filter_list); - // } - // } else { - // if (filter_list != nullptr) { - // list = filter_list; - // } - // } - auto& vector_param = context.query_ptr_->vectors.at(vector_placeholder); if (!vector_param->query_vector.float_data.empty()) { vector_param->nq = vector_param->query_vector.float_data.size() / vec_index->Dim(); @@ -790,18 +773,15 @@ ExecutionEngineImpl::BuildKnowhereIndex(const std::string& field_name, const Col LOG_ENGINE_DEBUG_ << "Index config: " << conf.dump(); std::shared_ptr> uids; - ConCurrentBitsetPtr blacklist; knowhere::DatasetPtr dataset; if (from_index) { dataset = knowhere::GenDatasetWithIds(row_count, dimension, from_index->GetRawVectors(), from_index->GetRawIds()); uids = from_index->GetUids(); - blacklist = from_index->GetBlacklist(); } else if (bin_from_index) { dataset = knowhere::GenDatasetWithIds(row_count, dimension, bin_from_index->GetRawVectors(), bin_from_index->GetRawIds()); uids = bin_from_index->GetUids(); - blacklist = bin_from_index->GetBlacklist(); } try { @@ -820,10 +800,8 @@ ExecutionEngineImpl::BuildKnowhereIndex(const std::string& field_name, const Col #endif new_index->SetUids(uids); - new_index->SetBlacklist(blacklist); return Status::OK(); } -} // namespace engine -} // namespace milvus +} // namespace milvus::engine diff --git a/core/src/index/knowhere/knowhere/index/vector_index/VecIndex.h b/core/src/index/knowhere/knowhere/index/vector_index/VecIndex.h index 95d2808ed6..b990e45be4 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/VecIndex.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/VecIndex.h @@ -84,16 +84,6 @@ class VecIndex : public Index { } #endif - faiss::ConcurrentBitsetPtr - GetBlacklist() { - return bitset_; - } - - void - SetBlacklist(faiss::ConcurrentBitsetPtr bitset_ptr) { - bitset_ = std::move(bitset_ptr); - } - std::shared_ptr> GetUids() const { return uids_; @@ -104,11 +94,6 @@ class VecIndex : public Index { uids_ = uids; } - size_t - BlacklistSize() { - return bitset_ ? bitset_->size() * sizeof(uint8_t) : 0; - } - size_t UidsSize() { return uids_ ? uids_->size() * sizeof(IDType) : 0; @@ -133,7 +118,7 @@ class VecIndex : public Index { int64_t Size() override { - return BlacklistSize() + UidsSize() + IndexSize(); + return UidsSize() + IndexSize(); } protected: @@ -141,9 +126,6 @@ class VecIndex : public Index { IndexMode index_mode_ = IndexMode::MODE_CPU; std::shared_ptr> uids_ = nullptr; int64_t index_size_ = -1; - - private: - faiss::ConcurrentBitsetPtr bitset_ = nullptr; }; using VecIndexPtr = std::shared_ptr; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/helpers/Cloner.cpp b/core/src/index/knowhere/knowhere/index/vector_index/helpers/Cloner.cpp index e333eea8e6..cbc1a98304 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/helpers/Cloner.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/helpers/Cloner.cpp @@ -28,7 +28,6 @@ namespace cloner { void CopyIndexData(const VecIndexPtr& dst_index, const VecIndexPtr& src_index) { dst_index->SetUids(src_index->GetUids()); - dst_index->SetBlacklist(src_index->GetBlacklist()); dst_index->SetIndexSize(src_index->IndexSize()); } diff --git a/core/src/index/unittest/test_annoy.cpp b/core/src/index/unittest/test_annoy.cpp index a2c90a3e9a..f0caa14d8a 100644 --- a/core/src/index/unittest/test_annoy.cpp +++ b/core/src/index/unittest/test_annoy.cpp @@ -294,8 +294,7 @@ main() { printf("----------------search xq with delete------------\n"); { // search xq with delete - index.SetBlacklist(bitset); - auto res = index.Query(query_dataset, query_conf); + auto res = index.Query(query_dataset, query_conf, bitset); auto I = res->Get(milvus::knowhere::meta::IDS); printf("I=\n"); diff --git a/core/src/index/unittest/test_hnsw.cpp b/core/src/index/unittest/test_hnsw.cpp index d5441e3dbc..525a37dece 100644 --- a/core/src/index/unittest/test_hnsw.cpp +++ b/core/src/index/unittest/test_hnsw.cpp @@ -271,8 +271,7 @@ main() { printf("----------------search xq with delete------------\n"); { // search xq with delete - index.SetBlacklist(bitset); - auto res = index.Query(query_dataset, query_conf); + auto res = index.Query(query_dataset, query_conf, bitset); auto I = res->Get(milvus::knowhere::meta::IDS); printf("I=\n"); diff --git a/core/src/metrics/Metrics.h b/core/src/metrics/Metrics.h index 008e3e0901..e122c89aba 100644 --- a/core/src/metrics/Metrics.h +++ b/core/src/metrics/Metrics.h @@ -11,8 +11,8 @@ #pragma once -#include "MetricBase.h" #include "db/Types.h" +#include "metrics/MetricBase.h" namespace milvus { namespace server { @@ -87,7 +87,7 @@ class CollectQueryMetrics : CollectMetricsBase { explicit CollectQueryMetrics(size_t nq) : nq_(nq) { } - ~CollectQueryMetrics() { + ~CollectQueryMetrics() override { if (nq_ > 0) { auto total_time = TimeFromBegine(); for (size_t i = 0; i < nq_; ++i) { diff --git a/core/src/query/GeneralQuery.h b/core/src/query/GeneralQuery.h index 01e985141e..1e777a7b41 100644 --- a/core/src/query/GeneralQuery.h +++ b/core/src/query/GeneralQuery.h @@ -70,7 +70,7 @@ struct RangeQuery { using RangeQueryPtr = std::shared_ptr; struct VectorRecord { - size_t vector_count; + size_t vector_count = 0; std::vector float_data; std::vector binary_data; }; @@ -78,10 +78,10 @@ struct VectorRecord { struct VectorQuery { std::string field_name; milvus::json extra_params = {}; - int64_t topk; - int64_t nq; + int64_t topk = 0; + int64_t nq = 0; std::string metric_type = ""; - float boost; + float boost = 0.0f; VectorRecord query_vector; }; using VectorQueryPtr = std::shared_ptr; diff --git a/core/src/segment/DeletedDocs.cpp b/core/src/segment/DeletedDocs.cpp index b085ed1da5..226cf2fd1a 100644 --- a/core/src/segment/DeletedDocs.cpp +++ b/core/src/segment/DeletedDocs.cpp @@ -38,6 +38,20 @@ DeletedDocs::GetDeletedDocs() const { // DeletedDocs::GetName() const { // return name_; //} +const faiss::ConcurrentBitsetPtr +DeletedDocs::GetBlacklist() const { + return bitset_; +} + +void +DeletedDocs::GenBlacklist(size_t size) { + if (size > 0) { + bitset_ = std::make_shared(size); + for (auto& offset : deleted_doc_offsets_) { + bitset_->set(offset); + } + } +} size_t DeletedDocs::GetCount() const { diff --git a/core/src/segment/DeletedDocs.h b/core/src/segment/DeletedDocs.h index 8335f08743..6367bdb6de 100644 --- a/core/src/segment/DeletedDocs.h +++ b/core/src/segment/DeletedDocs.h @@ -23,8 +23,7 @@ #include "cache/DataObj.h" #include "db/Types.h" -namespace milvus { -namespace segment { +namespace milvus::segment { class DeletedDocs : public cache::DataObj { public: @@ -42,6 +41,12 @@ class DeletedDocs : public cache::DataObj { // const std::string& // GetName() const; + const faiss::ConcurrentBitsetPtr + GetBlacklist() const; + + void + GenBlacklist(size_t size); + size_t GetCount() const; @@ -62,11 +67,10 @@ class DeletedDocs : public cache::DataObj { private: std::vector deleted_doc_offsets_; - // faiss::ConcurrentBitsetPtr bitset_; + faiss::ConcurrentBitsetPtr bitset_; // const std::string name_ = "deleted_docs"; }; using DeletedDocsPtr = std::shared_ptr; -} // namespace segment -} // namespace milvus +} // namespace milvus::segment diff --git a/core/src/segment/SegmentReader.cpp b/core/src/segment/SegmentReader.cpp index aa02612b3d..ddbf3b4ee7 100644 --- a/core/src/segment/SegmentReader.cpp +++ b/core/src/segment/SegmentReader.cpp @@ -161,6 +161,10 @@ SegmentReader::LoadField(const std::string& field_name, engine::BinaryDataPtr& r } auto raw_visitor = field_visitor->GetElementVisitor(engine::FieldElementType::FET_RAW); + if (raw_visitor->GetFile() == nullptr) { + std::string emsg = "File of field " + field_name + " is not found"; + return Status(DB_FILE_NOT_FOUND, emsg); + } std::string file_path = engine::snapshot::GetResPath(dir_collections_, raw_visitor->GetFile()); @@ -390,20 +394,7 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex return Status(DB_ERROR, "Field is not vector type"); } - // load deleted doc int64_t row_count = GetRowCount(); - faiss::ConcurrentBitsetPtr concurrent_bitset_ptr = nullptr; - segment::DeletedDocsPtr deleted_docs_ptr; - LoadDeletedDocs(deleted_docs_ptr); - if (deleted_docs_ptr != nullptr) { - auto& deleted_docs = deleted_docs_ptr->GetDeletedDocs(); - if (!deleted_docs.empty()) { - concurrent_bitset_ptr = std::make_shared(row_count); - for (auto& offset : deleted_docs) { - concurrent_bitset_ptr->set(offset); - } - } - } recorder.RecordSection("prepare"); knowhere::BinarySet index_data; @@ -448,7 +439,6 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex index_ptr->Train(knowhere::DatasetPtr(), conf); index_ptr->AddWithoutIds(dataset, conf); index_ptr->SetUids(uids_ptr); - index_ptr->SetBlacklist(concurrent_bitset_ptr); segment_ptr_->SetVectorIndex(field_name, index_ptr); cache::CpuCacheMgr::GetInstance().InsertItem(temp_index_path, index_ptr); @@ -508,7 +498,6 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex STATUS_CHECK(LoadUids(*uids_ptr)); index_ptr->SetUids(uids_ptr); - index_ptr->SetBlacklist(concurrent_bitset_ptr); segment_ptr_->SetVectorIndex(field_name, index_ptr); cache::CpuCacheMgr::GetInstance().InsertItem(index_file_path, index_ptr); // put into cache @@ -679,6 +668,11 @@ SegmentReader::LoadDeletedDocs(segment::DeletedDocsPtr& deleted_docs_ptr) { if (data_obj == nullptr) { auto& ss_codec = codec::Codec::instance(); STATUS_CHECK(ss_codec.GetDeletedDocsFormat()->Read(fs_ptr_, file_path, deleted_docs_ptr)); + auto id = segment_visitor_->GetSegment()->GetID(); + auto sc = segment_visitor_->GetSnapshot()->GetSegmentCommitBySegmentId(id); + if (sc) { + deleted_docs_ptr->GenBlacklist(sc->GetRowCount()); + } cache::CpuCacheMgr::GetInstance().InsertItem(file_path, deleted_docs_ptr); // put into cache } else { deleted_docs_ptr = std::static_pointer_cast(data_obj); diff --git a/core/src/storage/CMakeLists.txt b/core/src/storage/CMakeLists.txt index 4397cac1a6..861c3d75a5 100644 --- a/core/src/storage/CMakeLists.txt +++ b/core/src/storage/CMakeLists.txt @@ -21,7 +21,7 @@ set( STORAGE_FILES ${STORAGE_MAIN_FILES} add_library( storage STATIC ) target_sources( storage PRIVATE ${STORAGE_FILES} ) -set ( LINK_LIBRARY fiu log ) +set ( LINK_LIBRARY stdc++fs fiu log ) if ( MILVUS_WITH_AWS ) list( APPEND LINK_LIBRARY diff --git a/core/src/utils/Error.h b/core/src/utils/Error.h index dc503acc37..fa46f74c64 100644 --- a/core/src/utils/Error.h +++ b/core/src/utils/Error.h @@ -112,6 +112,7 @@ constexpr ErrorCode DB_BLOOM_FILTER_ERROR = ToDbErrorCode(9); constexpr ErrorCode DB_PARTITION_NOT_FOUND = ToDbErrorCode(10); constexpr ErrorCode DB_OUT_OF_STORAGE = ToDbErrorCode(11); constexpr ErrorCode DB_META_QUERY_FAILED = ToDbErrorCode(12); +constexpr ErrorCode DB_FILE_NOT_FOUND = ToDbErrorCode(13); // knowhere error code constexpr ErrorCode KNOWHERE_ERROR = ToKnowhereErrorCode(1); diff --git a/core/unittest/CMakeLists.txt b/core/unittest/CMakeLists.txt index d6fe074e05..39c9ae72b8 100644 --- a/core/unittest/CMakeLists.txt +++ b/core/unittest/CMakeLists.txt @@ -52,7 +52,7 @@ set( UNITTEST_LIBS sqlite gmock gtest_main gmock_main - libstdc++fs.a + stdc++fs pthread gfortran opentracing::opentracing diff --git a/core/unittest/db/CMakeLists.txt b/core/unittest/db/CMakeLists.txt index 8b222850b5..33bc75702d 100644 --- a/core/unittest/db/CMakeLists.txt +++ b/core/unittest/db/CMakeLists.txt @@ -24,16 +24,9 @@ set( TEST_FILES ${CMAKE_CURRENT_SOURCE_DIR}/utils.cpp add_executable( test_db ${SCHEDULER_FILES} ${TEST_FILES} - # ${grpc_server_files} - # ${grpc_service_files} - # ${web_server_files} - # ${server_delivery_files} - # ${server_files} - # ${server_init_files} ) target_link_libraries( test_db ${UNITTEST_LIBS} - server milvus_engine metrics value diff --git a/core/unittest/db/test_db.cpp b/core/unittest/db/test_db.cpp index 4e8d852918..ebc455d8d0 100644 --- a/core/unittest/db/test_db.cpp +++ b/core/unittest/db/test_db.cpp @@ -13,12 +13,12 @@ #include #include -#include #include #include #include #include +#include "cache/CpuCacheMgr.h" #include "db/SnapshotUtils.h" #include "db/SnapshotVisitor.h" #include "db/merge/MergeAdaptiveStrategy.h" @@ -233,6 +233,7 @@ BuildQueryPtr(const std::string& collection_name, int64_t n, int64_t topk, std:: for (int64_t j = 0; j < COLLECTION_DIM; j++) vector_record.float_data[COLLECTION_DIM * i + j] = drand48(); vector_record.float_data[COLLECTION_DIM * i] += i / 2000.; } + vector_record.vector_count = n; vector_query->query_vector = vector_record; vector_query->metric_type = "L2"; vector_query->extra_params = {{"nprobe", 1024}}; diff --git a/core/unittest/storage/CMakeLists.txt b/core/unittest/storage/CMakeLists.txt index 12bd72a7be..58c17d0c98 100644 --- a/core/unittest/storage/CMakeLists.txt +++ b/core/unittest/storage/CMakeLists.txt @@ -27,13 +27,9 @@ add_executable(test_storage target_link_libraries(test_storage ${UNITTEST_LIBS} stdc++ - server - milvus_engine - metrics + storage value utils - tracing - query log )