From fbe5531ba0d67fea3fb7a38502142e180761d97d Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Wed, 11 Mar 2020 12:06:15 +0800 Subject: [PATCH 1/5] Test Signed-off-by: JinHai-CN --- core/src/db/insert/MemTable.cpp | 38 ++++++++++++++++---------- sdk/examples/simple/src/ClientTest.cpp | 22 +++++++++------ 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/core/src/db/insert/MemTable.cpp b/core/src/db/insert/MemTable.cpp index f0bf8c94d9..3eb43b3e20 100644 --- a/core/src/db/insert/MemTable.cpp +++ b/core/src/db/insert/MemTable.cpp @@ -179,7 +179,7 @@ MemTable::ApplyDeletes() { auto start_total = std::chrono::high_resolution_clock::now(); - auto start = std::chrono::high_resolution_clock::now(); +// auto start = std::chrono::high_resolution_clock::now(); std::vector file_types{meta::TableFileSchema::FILE_TYPE::RAW, meta::TableFileSchema::FILE_TYPE::TO_INDEX, meta::TableFileSchema::FILE_TYPE::BACKUP}; @@ -221,7 +221,7 @@ MemTable::ApplyDeletes() { OngoingFileChecker::GetInstance().MarkOngoingFiles(files_to_check); auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration diff = end - start; + std::chrono::duration diff = end - start_total; ENGINE_LOG_DEBUG << "Found " << ids_to_check_map.size() << " segment to apply deletes in " << diff.count() << " s"; meta::TableFilesSchema table_files_to_update; @@ -230,7 +230,7 @@ MemTable::ApplyDeletes() { auto& table_file = table_files[kv.first]; ENGINE_LOG_DEBUG << "Applying deletes in segment: " << table_file.segment_id_; - start = std::chrono::high_resolution_clock::now(); + auto time0 = std::chrono::high_resolution_clock::now(); std::string segment_dir; utils::GetParentPath(table_file.location_, segment_dir); @@ -274,16 +274,14 @@ MemTable::ApplyDeletes() { segment::DeletedDocsPtr deleted_docs = std::make_shared(); - end = std::chrono::high_resolution_clock::now(); - diff = end - start; + auto time1 = std::chrono::high_resolution_clock::now(); + diff = time1 - time0; ENGINE_LOG_DEBUG << "Loading uids and deleted docs took " << diff.count() << " s"; - start = std::chrono::high_resolution_clock::now(); - std::sort(ids_to_check.begin(), ids_to_check.end()); - end = std::chrono::high_resolution_clock::now(); - diff = end - start; + auto time2 = std::chrono::high_resolution_clock::now(); + diff = time2 - time1; ENGINE_LOG_DEBUG << "Sorting " << ids_to_check.size() << " ids took " << diff.count() << " s"; size_t delete_count = 0; @@ -324,11 +322,13 @@ MemTable::ApplyDeletes() { << find_diff.count() << " s in total"; ENGINE_LOG_DEBUG << "Setting deleted docs and bloom filter took " << set_diff.count() << " s in total"; + auto time3 = std::chrono::high_resolution_clock::now(); + for (auto i = 0; i < indexes.size(); ++i) { indexes[i]->SetBlacklist(blacklists[i]); } - start = std::chrono::high_resolution_clock::now(); +// start = std::chrono::high_resolution_clock::now(); segment::Segment tmp_segment; segment::SegmentWriter segment_writer(segment_dir); @@ -337,20 +337,20 @@ MemTable::ApplyDeletes() { break; } - end = std::chrono::high_resolution_clock::now(); - diff = end - start; + auto time4 = std::chrono::high_resolution_clock::now(); + diff = time4 - time3; ENGINE_LOG_DEBUG << "Appended " << deleted_docs->GetSize() << " offsets to deleted docs in segment: " << table_file.segment_id_ << " in " << diff.count() << " s"; - start = std::chrono::high_resolution_clock::now(); +// start = std::chrono::high_resolution_clock::now(); status = segment_writer.WriteBloomFilter(id_bloom_filter_ptr); if (!status.ok()) { break; } - end = std::chrono::high_resolution_clock::now(); - diff = end - start; + auto time5 = std::chrono::high_resolution_clock::now(); + diff = time5 - time4; ENGINE_LOG_DEBUG << "Updated bloom filter in segment: " << table_file.segment_id_ << " in " << diff.count() << " s"; @@ -362,8 +362,15 @@ MemTable::ApplyDeletes() { table_files_to_update.emplace_back(file); } } + auto time6 = std::chrono::high_resolution_clock::now(); + diff = time6 - time5; + ENGINE_LOG_DEBUG << "Update table file row count in vector of segment: " << table_file.segment_id_ << " in " << diff.count() + << " s"; } + auto time7 = std::chrono::high_resolution_clock::now(); + + status = meta_->UpdateTableFilesRowCount(table_files_to_update); if (!status.ok()) { @@ -375,6 +382,7 @@ MemTable::ApplyDeletes() { doc_ids_to_delete_.clear(); auto end_total = std::chrono::high_resolution_clock::now(); + ENGINE_LOG_DEBUG << "Update deletes to meta in table " << table_id_ << " in " << (end_total - time7).count() << " s"; std::chrono::duration diff_total = end_total - start_total; ENGINE_LOG_DEBUG << "Finished applying deletes in table " << table_id_ << " in " << diff_total.count() << " s"; diff --git a/sdk/examples/simple/src/ClientTest.cpp b/sdk/examples/simple/src/ClientTest.cpp index fb2e2b138d..06161166aa 100644 --- a/sdk/examples/simple/src/ClientTest.cpp +++ b/sdk/examples/simple/src/ClientTest.cpp @@ -31,7 +31,7 @@ constexpr int64_t NQ = 5; constexpr int64_t TOP_K = 10; constexpr int64_t NPROBE = 32; constexpr int64_t SEARCH_TARGET = 5000; // change this value, result is different -constexpr int64_t ADD_ENTITY_LOOP = 5; +constexpr int64_t ADD_ENTITY_LOOP = 50; constexpr milvus::IndexType INDEX_TYPE = milvus::IndexType::IVFSQ8; constexpr int32_t NLIST = 16384; @@ -245,15 +245,19 @@ ClientTest::Test() { Flush(collection_name); ShowCollectionInfo(collection_name); - GetEntityById(collection_name, search_id_array_[0]); - SearchEntities(collection_name, TOP_K, NPROBE); +// GetEntityById(collection_name, search_id_array_[0]); +// SearchEntities(collection_name, TOP_K, NPROBE); +// +// CreateIndex(collection_name, INDEX_TYPE, NLIST); +// ShowCollectionInfo(collection_name); +// +// PreloadCollection(collection_name); - CreateIndex(collection_name, INDEX_TYPE, NLIST); - ShowCollectionInfo(collection_name); - - PreloadCollection(collection_name); - - std::vector delete_ids = {search_id_array_[0], search_id_array_[1]}; + constexpr long vector_size = 2000000; + std::vector delete_ids(vector_size); // = {search_id_array_[0], search_id_array_[1]}; + for(long i= 0; i < vector_size; ++ i) { + delete_ids[i] = i; + } DeleteByIds(collection_name, delete_ids); SearchEntities(collection_name, TOP_K, NPROBE); // this line get two search error since we delete two entities From a5cb8f994ddebe3d1c34cef2dc3684517898c28a Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Wed, 11 Mar 2020 13:36:48 +0800 Subject: [PATCH 2/5] Update time print log Signed-off-by: JinHai-CN --- core/src/db/insert/MemTable.cpp | 44 +++++++++++++++++---------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/core/src/db/insert/MemTable.cpp b/core/src/db/insert/MemTable.cpp index 3eb43b3e20..e4943ae48c 100644 --- a/core/src/db/insert/MemTable.cpp +++ b/core/src/db/insert/MemTable.cpp @@ -220,9 +220,9 @@ MemTable::ApplyDeletes() { OngoingFileChecker::GetInstance().MarkOngoingFiles(files_to_check); - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration diff = end - start_total; - ENGINE_LOG_DEBUG << "Found " << ids_to_check_map.size() << " segment to apply deletes in " << diff.count() << " s"; + auto time0 = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff0 = time0 - start_total; + ENGINE_LOG_DEBUG << "Found " << ids_to_check_map.size() << " segment to apply deletes in " << diff0.count() << " s"; meta::TableFilesSchema table_files_to_update; @@ -230,7 +230,7 @@ MemTable::ApplyDeletes() { auto& table_file = table_files[kv.first]; ENGINE_LOG_DEBUG << "Applying deletes in segment: " << table_file.segment_id_; - auto time0 = std::chrono::high_resolution_clock::now(); + auto time1 = std::chrono::high_resolution_clock::now(); std::string segment_dir; utils::GetParentPath(table_file.location_, segment_dir); @@ -274,15 +274,15 @@ MemTable::ApplyDeletes() { segment::DeletedDocsPtr deleted_docs = std::make_shared(); - auto time1 = std::chrono::high_resolution_clock::now(); - diff = time1 - time0; - ENGINE_LOG_DEBUG << "Loading uids and deleted docs took " << diff.count() << " s"; + auto time2 = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff1 = time2 - time1; + ENGINE_LOG_DEBUG << "Loading uids and deleted docs took " << diff1.count() << " s"; std::sort(ids_to_check.begin(), ids_to_check.end()); - auto time2 = std::chrono::high_resolution_clock::now(); - diff = time2 - time1; - ENGINE_LOG_DEBUG << "Sorting " << ids_to_check.size() << " ids took " << diff.count() << " s"; + auto time3 = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff2 = time3 - time2; + ENGINE_LOG_DEBUG << "Sorting " << ids_to_check.size() << " ids took " << diff2.count() << " s"; size_t delete_count = 0; auto find_diff = std::chrono::duration::zero(); @@ -322,7 +322,7 @@ MemTable::ApplyDeletes() { << find_diff.count() << " s in total"; ENGINE_LOG_DEBUG << "Setting deleted docs and bloom filter took " << set_diff.count() << " s in total"; - auto time3 = std::chrono::high_resolution_clock::now(); + auto time4 = std::chrono::high_resolution_clock::now(); for (auto i = 0; i < indexes.size(); ++i) { indexes[i]->SetBlacklist(blacklists[i]); @@ -337,10 +337,10 @@ MemTable::ApplyDeletes() { break; } - auto time4 = std::chrono::high_resolution_clock::now(); - diff = time4 - time3; + auto time5 = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff4 = time5 - time4; ENGINE_LOG_DEBUG << "Appended " << deleted_docs->GetSize() - << " offsets to deleted docs in segment: " << table_file.segment_id_ << " in " << diff.count() + << " offsets to deleted docs in segment: " << table_file.segment_id_ << " in " << diff4.count() << " s"; // start = std::chrono::high_resolution_clock::now(); @@ -349,9 +349,9 @@ MemTable::ApplyDeletes() { if (!status.ok()) { break; } - auto time5 = std::chrono::high_resolution_clock::now(); - diff = time5 - time4; - ENGINE_LOG_DEBUG << "Updated bloom filter in segment: " << table_file.segment_id_ << " in " << diff.count() + auto time6 = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff5 = time6 - time5; + ENGINE_LOG_DEBUG << "Updated bloom filter in segment: " << table_file.segment_id_ << " in " << diff5.count() << " s"; // Update table file row count @@ -362,9 +362,10 @@ MemTable::ApplyDeletes() { table_files_to_update.emplace_back(file); } } - auto time6 = std::chrono::high_resolution_clock::now(); - diff = time6 - time5; - ENGINE_LOG_DEBUG << "Update table file row count in vector of segment: " << table_file.segment_id_ << " in " << diff.count() + auto time7 = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff6 = time7 - time6; + diff6 = time6 - time5; + ENGINE_LOG_DEBUG << "Update table file row count in vector of segment: " << table_file.segment_id_ << " in " << diff6.count() << " s"; } @@ -382,7 +383,8 @@ MemTable::ApplyDeletes() { doc_ids_to_delete_.clear(); auto end_total = std::chrono::high_resolution_clock::now(); - ENGINE_LOG_DEBUG << "Update deletes to meta in table " << table_id_ << " in " << (end_total - time7).count() << " s"; + std::chrono::duration diff7 = end_total - time7; + ENGINE_LOG_DEBUG << "Update deletes to meta in table " << table_id_ << " in " << diff7.count() << " s"; std::chrono::duration diff_total = end_total - start_total; ENGINE_LOG_DEBUG << "Finished applying deletes in table " << table_id_ << " in " << diff_total.count() << " s"; From aa0b416ef546e5cdf59d9df0c37549eba02d3a66 Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Wed, 11 Mar 2020 14:03:09 +0800 Subject: [PATCH 3/5] Update code Signed-off-by: JinHai-CN --- sdk/examples/simple/src/ClientTest.cpp | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/sdk/examples/simple/src/ClientTest.cpp b/sdk/examples/simple/src/ClientTest.cpp index 06161166aa..fb2e2b138d 100644 --- a/sdk/examples/simple/src/ClientTest.cpp +++ b/sdk/examples/simple/src/ClientTest.cpp @@ -31,7 +31,7 @@ constexpr int64_t NQ = 5; constexpr int64_t TOP_K = 10; constexpr int64_t NPROBE = 32; constexpr int64_t SEARCH_TARGET = 5000; // change this value, result is different -constexpr int64_t ADD_ENTITY_LOOP = 50; +constexpr int64_t ADD_ENTITY_LOOP = 5; constexpr milvus::IndexType INDEX_TYPE = milvus::IndexType::IVFSQ8; constexpr int32_t NLIST = 16384; @@ -245,19 +245,15 @@ ClientTest::Test() { Flush(collection_name); ShowCollectionInfo(collection_name); -// GetEntityById(collection_name, search_id_array_[0]); -// SearchEntities(collection_name, TOP_K, NPROBE); -// -// CreateIndex(collection_name, INDEX_TYPE, NLIST); -// ShowCollectionInfo(collection_name); -// -// PreloadCollection(collection_name); + GetEntityById(collection_name, search_id_array_[0]); + SearchEntities(collection_name, TOP_K, NPROBE); - constexpr long vector_size = 2000000; - std::vector delete_ids(vector_size); // = {search_id_array_[0], search_id_array_[1]}; - for(long i= 0; i < vector_size; ++ i) { - delete_ids[i] = i; - } + CreateIndex(collection_name, INDEX_TYPE, NLIST); + ShowCollectionInfo(collection_name); + + PreloadCollection(collection_name); + + std::vector delete_ids = {search_id_array_[0], search_id_array_[1]}; DeleteByIds(collection_name, delete_ids); SearchEntities(collection_name, TOP_K, NPROBE); // this line get two search error since we delete two entities From 664a1ab4e1bc30edf535eb658cb728030a7bcb51 Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Wed, 11 Mar 2020 14:06:47 +0800 Subject: [PATCH 4/5] Fix format Signed-off-by: JinHai-CN --- core/src/db/insert/MemTable.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/core/src/db/insert/MemTable.cpp b/core/src/db/insert/MemTable.cpp index e4943ae48c..ad4a613f5d 100644 --- a/core/src/db/insert/MemTable.cpp +++ b/core/src/db/insert/MemTable.cpp @@ -179,7 +179,7 @@ MemTable::ApplyDeletes() { auto start_total = std::chrono::high_resolution_clock::now(); -// auto start = std::chrono::high_resolution_clock::now(); + // auto start = std::chrono::high_resolution_clock::now(); std::vector file_types{meta::TableFileSchema::FILE_TYPE::RAW, meta::TableFileSchema::FILE_TYPE::TO_INDEX, meta::TableFileSchema::FILE_TYPE::BACKUP}; @@ -328,7 +328,7 @@ MemTable::ApplyDeletes() { indexes[i]->SetBlacklist(blacklists[i]); } -// start = std::chrono::high_resolution_clock::now(); + // start = std::chrono::high_resolution_clock::now(); segment::Segment tmp_segment; segment::SegmentWriter segment_writer(segment_dir); @@ -343,7 +343,7 @@ MemTable::ApplyDeletes() { << " offsets to deleted docs in segment: " << table_file.segment_id_ << " in " << diff4.count() << " s"; -// start = std::chrono::high_resolution_clock::now(); + // start = std::chrono::high_resolution_clock::now(); status = segment_writer.WriteBloomFilter(id_bloom_filter_ptr); if (!status.ok()) { @@ -365,13 +365,12 @@ MemTable::ApplyDeletes() { auto time7 = std::chrono::high_resolution_clock::now(); std::chrono::duration diff6 = time7 - time6; diff6 = time6 - time5; - ENGINE_LOG_DEBUG << "Update table file row count in vector of segment: " << table_file.segment_id_ << " in " << diff6.count() - << " s"; + ENGINE_LOG_DEBUG << "Update table file row count in vector of segment: " << table_file.segment_id_ << " in " + << diff6.count() << " s"; } auto time7 = std::chrono::high_resolution_clock::now(); - status = meta_->UpdateTableFilesRowCount(table_files_to_update); if (!status.ok()) { From 2f815a7d833b382c63bccc0e561433d94ae23426 Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Wed, 11 Mar 2020 15:02:26 +0800 Subject: [PATCH 5/5] Update Changelog Signed-off-by: JinHai-CN --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6b4119fda..99da3355a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,7 +40,7 @@ Please mark all change in change log and use the issue from GitHub - \#1507 set_config for insert_buffer_size is wrong - \#1510 Add set interfaces for WAL configurations - \#1511 Fix big integer cannot pass to server correctly -- \#1517 result is not correct when search vectors in multi partition, index type is RNSG +- \#1517 Result is not correct when search vectors in multi partition, index type is RNSG - \#1518 Table count did not match after deleting vectors and compact - \#1521 Make cache_insert_data take effect in-service - \#1525 Add setter API for config preload_table