From 74ac528095893c6872d04361a824d7f049ed8c43 Mon Sep 17 00:00:00 2001 From: Cai Yudong Date: Tue, 14 Sep 2021 10:53:04 +0800 Subject: [PATCH] Improve segcore (#7851) Signed-off-by: yudong.cai --- internal/core/src/segcore/InsertRecord.cpp | 3 ++- internal/core/src/segcore/SegmentGrowingImpl.cpp | 4 ++-- internal/core/unittest/test_binary.cpp | 7 ++----- internal/core/unittest/test_utils/DataGen.h | 7 ++++--- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/internal/core/src/segcore/InsertRecord.cpp b/internal/core/src/segcore/InsertRecord.cpp index 460c1e6f4f..b26745acb7 100644 --- a/internal/core/src/segcore/InsertRecord.cpp +++ b/internal/core/src/segcore/InsertRecord.cpp @@ -13,7 +13,8 @@ namespace milvus::segcore { -InsertRecord::InsertRecord(const Schema& schema, int64_t size_per_chunk) : uids_(1), timestamps_(1) { +InsertRecord::InsertRecord(const Schema& schema, int64_t size_per_chunk) + : uids_(size_per_chunk), timestamps_(size_per_chunk) { for (auto& field : schema) { if (field.is_vector()) { if (field.get_data_type() == DataType::VECTOR_FLOAT) { diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index acb7f3d0af..f4de039617 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -140,7 +140,7 @@ SegmentGrowingImpl::Insert(int64_t reserved_begin, } std::sort(ordering.begin(), ordering.end()); - // step 3: and convert row-base data to column base accordingly + // step 3: and convert row-based data to column-based data accordingly auto sizeof_infos = schema_->get_sizeof_infos(); std::vector offset_infos(schema_->size() + 1, 0); std::partial_sum(sizeof_infos.begin(), sizeof_infos.end(), offset_infos.begin() + 1); @@ -161,7 +161,7 @@ SegmentGrowingImpl::Insert(int64_t reserved_begin, for (int fid = 0; fid < schema_->size(); ++fid) { auto len = sizeof_infos[fid]; auto offset = offset_infos[fid]; - auto src = raw_data + offset + order_index * len_per_row; + auto src = raw_data + order_index * len_per_row + offset; auto dst = entities[fid].data() + index * len; memcpy(dst, src, len); } diff --git a/internal/core/unittest/test_binary.cpp b/internal/core/unittest/test_binary.cpp index 419668fcff..6d70f7ab49 100644 --- a/internal/core/unittest/test_binary.cpp +++ b/internal/core/unittest/test_binary.cpp @@ -18,14 +18,11 @@ using namespace milvus::segcore; TEST(Binary, Insert) { int64_t N = 100000; - int64_t num_queries = 10; - int64_t topK = 5; auto schema = std::make_shared(); schema->AddDebugField("vecbin", DataType::VECTOR_BINARY, 128, MetricType::METRIC_Jaccard); schema->AddDebugField("age", DataType::INT32); auto dataset = DataGen(schema, N, 10); auto segment = CreateGrowingSegment(schema); - segment->PreInsert(N); - segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_); - int i = 1 + 1; + auto offset = segment->PreInsert(N); + segment->Insert(offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_); } diff --git a/internal/core/unittest/test_utils/DataGen.h b/internal/core/unittest/test_utils/DataGen.h index bf678e3ec2..95c9e6ed4d 100644 --- a/internal/core/unittest/test_utils/DataGen.h +++ b/internal/core/unittest/test_utils/DataGen.h @@ -30,7 +30,7 @@ using boost::algorithm::starts_with; namespace milvus::segcore { struct GeneratedData { - std::vector rows_; + std::vector rows_; std::vector> cols_; std::vector row_ids_; std::vector timestamps_; @@ -68,13 +68,14 @@ GeneratedData::generate_rows(int64_t N, SchemaPtr schema) { int64_t len_per_row = offset_infos.back(); assert(len_per_row == schema->get_total_sizeof()); - std::vector result(len_per_row * N); + // change column-based data to row-based data + std::vector result(len_per_row * N); for (int index = 0; index < N; ++index) { for (int fid = 0; fid < schema->size(); ++fid) { auto len = sizeof_infos[fid]; auto offset = offset_infos[fid]; auto src = cols_[fid].data() + index * len; - auto dst = result.data() + offset + index * len_per_row; + auto dst = result.data() + index * len_per_row + offset; memcpy(dst, src, len); } }