mirror of https://github.com/milvus-io/milvus.git
Add sealedSegment (go&c) unittest, fix growingSegment field id check
Signed-off-by: FluorineDog <guilin.gou@zilliz.com>pull/4973/head^2
parent
067c30c422
commit
bff208d78c
|
@ -319,14 +319,14 @@ Parser::ParseItemList(const Json& body) {
|
||||||
std::vector<ExprPtr> results;
|
std::vector<ExprPtr> results;
|
||||||
if (body.is_object()) {
|
if (body.is_object()) {
|
||||||
// only one item;
|
// only one item;
|
||||||
auto new_entry = ParseAnyNode(body);
|
auto new_expr = ParseAnyNode(body);
|
||||||
results.emplace_back(std::move(new_entry));
|
results.emplace_back(std::move(new_expr));
|
||||||
} else {
|
} else {
|
||||||
// item array
|
// item array
|
||||||
Assert(body.is_array());
|
Assert(body.is_array());
|
||||||
for (auto& item : body) {
|
for (auto& item : body) {
|
||||||
auto new_entry = ParseAnyNode(item);
|
auto new_expr = ParseAnyNode(item);
|
||||||
results.emplace_back(std::move(new_entry));
|
results.emplace_back(std::move(new_expr));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
auto old_size = results.size();
|
auto old_size = results.size();
|
||||||
|
|
|
@ -24,7 +24,7 @@ SubQueryResult
|
||||||
BinarySearchBruteForceFast(MetricType metric_type,
|
BinarySearchBruteForceFast(MetricType metric_type,
|
||||||
int64_t dim,
|
int64_t dim,
|
||||||
const uint8_t* binary_chunk,
|
const uint8_t* binary_chunk,
|
||||||
int64_t chunk_size,
|
int64_t size_per_chunk,
|
||||||
int64_t topk,
|
int64_t topk,
|
||||||
int64_t num_queries,
|
int64_t num_queries,
|
||||||
const uint8_t* query_data,
|
const uint8_t* query_data,
|
||||||
|
@ -34,7 +34,7 @@ BinarySearchBruteForceFast(MetricType metric_type,
|
||||||
idx_t* result_labels = sub_result.get_labels();
|
idx_t* result_labels = sub_result.get_labels();
|
||||||
|
|
||||||
int64_t code_size = dim / 8;
|
int64_t code_size = dim / 8;
|
||||||
const idx_t block_size = chunk_size;
|
const idx_t block_size = size_per_chunk;
|
||||||
bool use_heap = true;
|
bool use_heap = true;
|
||||||
|
|
||||||
if (metric_type == faiss::METRIC_Jaccard || metric_type == faiss::METRIC_Tanimoto) {
|
if (metric_type == faiss::METRIC_Jaccard || metric_type == faiss::METRIC_Tanimoto) {
|
||||||
|
@ -50,7 +50,7 @@ BinarySearchBruteForceFast(MetricType metric_type,
|
||||||
result_labels + query_base_index * topk, D + query_base_index * topk};
|
result_labels + query_base_index * topk, D + query_base_index * topk};
|
||||||
|
|
||||||
binary_distence_knn_hc(metric_type, &res, query_data + query_base_index * code_size, binary_chunk,
|
binary_distence_knn_hc(metric_type, &res, query_data + query_base_index * code_size, binary_chunk,
|
||||||
chunk_size, code_size,
|
size_per_chunk, code_size,
|
||||||
/* ordered = */ true, bitset);
|
/* ordered = */ true, bitset);
|
||||||
}
|
}
|
||||||
if (metric_type == faiss::METRIC_Tanimoto) {
|
if (metric_type == faiss::METRIC_Tanimoto) {
|
||||||
|
@ -67,7 +67,7 @@ BinarySearchBruteForceFast(MetricType metric_type,
|
||||||
}
|
}
|
||||||
|
|
||||||
// only match ids will be chosed, not to use heap
|
// only match ids will be chosed, not to use heap
|
||||||
binary_distence_knn_mc(metric_type, query_data + s * code_size, binary_chunk, nn, chunk_size, topk,
|
binary_distence_knn_mc(metric_type, query_data + s * code_size, binary_chunk, nn, size_per_chunk, topk,
|
||||||
code_size, D + s * topk, result_labels + s * topk, bitset);
|
code_size, D + s * topk, result_labels + s * topk, bitset);
|
||||||
}
|
}
|
||||||
} else if (metric_type == faiss::METRIC_Hamming) {
|
} else if (metric_type == faiss::METRIC_Hamming) {
|
||||||
|
@ -82,10 +82,10 @@ BinarySearchBruteForceFast(MetricType metric_type,
|
||||||
faiss::int_maxheap_array_t res = {size_t(nn), size_t(topk), result_labels + s * topk,
|
faiss::int_maxheap_array_t res = {size_t(nn), size_t(topk), result_labels + s * topk,
|
||||||
int_distances.data() + s * topk};
|
int_distances.data() + s * topk};
|
||||||
|
|
||||||
hammings_knn_hc(&res, query_data + s * code_size, binary_chunk, chunk_size, code_size,
|
hammings_knn_hc(&res, query_data + s * code_size, binary_chunk, size_per_chunk, code_size,
|
||||||
/* ordered = */ true, bitset);
|
/* ordered = */ true, bitset);
|
||||||
} else {
|
} else {
|
||||||
hammings_knn_mc(query_data + s * code_size, binary_chunk, nn, chunk_size, topk, code_size,
|
hammings_knn_mc(query_data + s * code_size, binary_chunk, nn, size_per_chunk, topk, code_size,
|
||||||
int_distances.data() + s * topk, result_labels + s * topk, bitset);
|
int_distances.data() + s * topk, result_labels + s * topk, bitset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -101,7 +101,7 @@ BinarySearchBruteForceFast(MetricType metric_type,
|
||||||
SubQueryResult
|
SubQueryResult
|
||||||
FloatSearchBruteForce(const dataset::FloatQueryDataset& query_dataset,
|
FloatSearchBruteForce(const dataset::FloatQueryDataset& query_dataset,
|
||||||
const float* chunk_data,
|
const float* chunk_data,
|
||||||
int64_t chunk_size,
|
int64_t size_per_chunk,
|
||||||
const faiss::BitsetView& bitset) {
|
const faiss::BitsetView& bitset) {
|
||||||
auto metric_type = query_dataset.metric_type;
|
auto metric_type = query_dataset.metric_type;
|
||||||
auto num_queries = query_dataset.num_queries;
|
auto num_queries = query_dataset.num_queries;
|
||||||
|
@ -111,11 +111,11 @@ FloatSearchBruteForce(const dataset::FloatQueryDataset& query_dataset,
|
||||||
|
|
||||||
if (metric_type == MetricType::METRIC_L2) {
|
if (metric_type == MetricType::METRIC_L2) {
|
||||||
faiss::float_maxheap_array_t buf{(size_t)num_queries, (size_t)topk, sub_qr.get_labels(), sub_qr.get_values()};
|
faiss::float_maxheap_array_t buf{(size_t)num_queries, (size_t)topk, sub_qr.get_labels(), sub_qr.get_values()};
|
||||||
faiss::knn_L2sqr(query_dataset.query_data, chunk_data, dim, num_queries, chunk_size, &buf, bitset);
|
faiss::knn_L2sqr(query_dataset.query_data, chunk_data, dim, num_queries, size_per_chunk, &buf, bitset);
|
||||||
return sub_qr;
|
return sub_qr;
|
||||||
} else {
|
} else {
|
||||||
faiss::float_minheap_array_t buf{(size_t)num_queries, (size_t)topk, sub_qr.get_labels(), sub_qr.get_values()};
|
faiss::float_minheap_array_t buf{(size_t)num_queries, (size_t)topk, sub_qr.get_labels(), sub_qr.get_values()};
|
||||||
faiss::knn_inner_product(query_dataset.query_data, chunk_data, dim, num_queries, chunk_size, &buf, bitset);
|
faiss::knn_inner_product(query_dataset.query_data, chunk_data, dim, num_queries, size_per_chunk, &buf, bitset);
|
||||||
return sub_qr;
|
return sub_qr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -123,10 +123,10 @@ FloatSearchBruteForce(const dataset::FloatQueryDataset& query_dataset,
|
||||||
SubQueryResult
|
SubQueryResult
|
||||||
BinarySearchBruteForce(const dataset::BinaryQueryDataset& query_dataset,
|
BinarySearchBruteForce(const dataset::BinaryQueryDataset& query_dataset,
|
||||||
const uint8_t* binary_chunk,
|
const uint8_t* binary_chunk,
|
||||||
int64_t chunk_size,
|
int64_t size_per_chunk,
|
||||||
const faiss::BitsetView& bitset) {
|
const faiss::BitsetView& bitset) {
|
||||||
// TODO: refactor the internal function
|
// TODO: refactor the internal function
|
||||||
return BinarySearchBruteForceFast(query_dataset.metric_type, query_dataset.dim, binary_chunk, chunk_size,
|
return BinarySearchBruteForceFast(query_dataset.metric_type, query_dataset.dim, binary_chunk, size_per_chunk,
|
||||||
query_dataset.topk, query_dataset.num_queries, query_dataset.query_data, bitset);
|
query_dataset.topk, query_dataset.num_queries, query_dataset.query_data, bitset);
|
||||||
}
|
}
|
||||||
} // namespace milvus::query
|
} // namespace milvus::query
|
||||||
|
|
|
@ -21,13 +21,13 @@ namespace milvus::query {
|
||||||
SubQueryResult
|
SubQueryResult
|
||||||
BinarySearchBruteForce(const dataset::BinaryQueryDataset& query_dataset,
|
BinarySearchBruteForce(const dataset::BinaryQueryDataset& query_dataset,
|
||||||
const uint8_t* binary_chunk,
|
const uint8_t* binary_chunk,
|
||||||
int64_t chunk_size,
|
int64_t size_per_chunk,
|
||||||
const faiss::BitsetView& bitset);
|
const faiss::BitsetView& bitset);
|
||||||
|
|
||||||
SubQueryResult
|
SubQueryResult
|
||||||
FloatSearchBruteForce(const dataset::FloatQueryDataset& query_dataset,
|
FloatSearchBruteForce(const dataset::FloatQueryDataset& query_dataset,
|
||||||
const float* chunk_data,
|
const float* chunk_data,
|
||||||
int64_t chunk_size,
|
int64_t size_per_chunk,
|
||||||
const faiss::BitsetView& bitset);
|
const faiss::BitsetView& bitset);
|
||||||
|
|
||||||
} // namespace milvus::query
|
} // namespace milvus::query
|
||||||
|
|
|
@ -72,46 +72,46 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
||||||
dataset::FloatQueryDataset query_dataset{metric_type, num_queries, topK, dim, query_data};
|
dataset::FloatQueryDataset query_dataset{metric_type, num_queries, topK, dim, query_data};
|
||||||
|
|
||||||
auto max_indexed_id = indexing_record.get_finished_ack();
|
auto max_indexed_id = indexing_record.get_finished_ack();
|
||||||
const auto& indexing_entry = indexing_record.get_vec_entry(vecfield_offset);
|
const auto& field_indexing = indexing_record.get_vec_field_indexing(vecfield_offset);
|
||||||
auto search_conf = indexing_entry.get_search_conf(topK);
|
auto search_conf = field_indexing.get_search_conf(topK);
|
||||||
|
|
||||||
for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) {
|
for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) {
|
||||||
auto chunk_size = indexing_entry.get_chunk_size();
|
auto size_per_chunk = field_indexing.get_size_per_chunk();
|
||||||
auto indexing = indexing_entry.get_indexing(chunk_id);
|
auto indexing = field_indexing.get_chunk_indexing(chunk_id);
|
||||||
|
|
||||||
auto sub_view = BitsetSubView(bitset, chunk_id * chunk_size, chunk_size);
|
auto sub_view = BitsetSubView(bitset, chunk_id * size_per_chunk, size_per_chunk);
|
||||||
auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view);
|
auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view);
|
||||||
|
|
||||||
// convert chunk uid to segment uid
|
// convert chunk uid to segment uid
|
||||||
for (auto& x : sub_qr.mutable_labels()) {
|
for (auto& x : sub_qr.mutable_labels()) {
|
||||||
if (x != -1) {
|
if (x != -1) {
|
||||||
x += chunk_id * chunk_size;
|
x += chunk_id * size_per_chunk;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final_qr.merge(sub_qr);
|
final_qr.merge(sub_qr);
|
||||||
}
|
}
|
||||||
auto vec_ptr = record.get_entity<FloatVector>(vecfield_offset);
|
auto vec_ptr = record.get_field_data<FloatVector>(vecfield_offset);
|
||||||
|
|
||||||
// step 4: brute force search where small indexing is unavailable
|
// step 4: brute force search where small indexing is unavailable
|
||||||
auto vec_chunk_size = vec_ptr->get_chunk_size();
|
auto vec_size_per_chunk = vec_ptr->get_size_per_chunk();
|
||||||
Assert(vec_chunk_size == indexing_entry.get_chunk_size());
|
Assert(vec_size_per_chunk == field_indexing.get_size_per_chunk());
|
||||||
auto max_chunk = upper_div(ins_barrier, vec_chunk_size);
|
auto max_chunk = upper_div(ins_barrier, vec_size_per_chunk);
|
||||||
|
|
||||||
for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) {
|
for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) {
|
||||||
auto& chunk = vec_ptr->get_chunk(chunk_id);
|
auto& chunk = vec_ptr->get_chunk(chunk_id);
|
||||||
|
|
||||||
auto element_begin = chunk_id * vec_chunk_size;
|
auto element_begin = chunk_id * vec_size_per_chunk;
|
||||||
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_chunk_size);
|
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_size_per_chunk);
|
||||||
auto chunk_size = element_end - element_begin;
|
auto size_per_chunk = element_end - element_begin;
|
||||||
|
|
||||||
auto sub_view = BitsetSubView(bitset, element_begin, chunk_size);
|
auto sub_view = BitsetSubView(bitset, element_begin, size_per_chunk);
|
||||||
auto sub_qr = FloatSearchBruteForce(query_dataset, chunk.data(), chunk_size, sub_view);
|
auto sub_qr = FloatSearchBruteForce(query_dataset, chunk.data(), size_per_chunk, sub_view);
|
||||||
|
|
||||||
// convert chunk uid to segment uid
|
// convert chunk uid to segment uid
|
||||||
for (auto& x : sub_qr.mutable_labels()) {
|
for (auto& x : sub_qr.mutable_labels()) {
|
||||||
if (x != -1) {
|
if (x != -1) {
|
||||||
x += chunk_id * vec_chunk_size;
|
x += chunk_id * vec_size_per_chunk;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final_qr.merge(sub_qr);
|
final_qr.merge(sub_qr);
|
||||||
|
@ -160,18 +160,18 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
|
||||||
// step 3: small indexing search
|
// step 3: small indexing search
|
||||||
query::dataset::BinaryQueryDataset query_dataset{metric_type, num_queries, topK, dim, query_data};
|
query::dataset::BinaryQueryDataset query_dataset{metric_type, num_queries, topK, dim, query_data};
|
||||||
|
|
||||||
auto vec_ptr = record.get_entity<BinaryVector>(vecfield_offset);
|
auto vec_ptr = record.get_field_data<BinaryVector>(vecfield_offset);
|
||||||
|
|
||||||
auto max_indexed_id = 0;
|
auto max_indexed_id = 0;
|
||||||
// step 4: brute force search where small indexing is unavailable
|
// step 4: brute force search where small indexing is unavailable
|
||||||
|
|
||||||
auto vec_chunk_size = vec_ptr->get_chunk_size();
|
auto vec_size_per_chunk = vec_ptr->get_size_per_chunk();
|
||||||
auto max_chunk = upper_div(ins_barrier, vec_chunk_size);
|
auto max_chunk = upper_div(ins_barrier, vec_size_per_chunk);
|
||||||
SubQueryResult final_result(num_queries, topK, metric_type);
|
SubQueryResult final_result(num_queries, topK, metric_type);
|
||||||
for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) {
|
for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) {
|
||||||
auto& chunk = vec_ptr->get_chunk(chunk_id);
|
auto& chunk = vec_ptr->get_chunk(chunk_id);
|
||||||
auto element_begin = chunk_id * vec_chunk_size;
|
auto element_begin = chunk_id * vec_size_per_chunk;
|
||||||
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_chunk_size);
|
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_size_per_chunk);
|
||||||
auto nsize = element_end - element_begin;
|
auto nsize = element_end - element_begin;
|
||||||
|
|
||||||
auto sub_view = BitsetSubView(bitset, element_begin, nsize);
|
auto sub_view = BitsetSubView(bitset, element_begin, nsize);
|
||||||
|
@ -180,7 +180,7 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
|
||||||
// convert chunk uid to segment uid
|
// convert chunk uid to segment uid
|
||||||
for (auto& x : sub_result.mutable_labels()) {
|
for (auto& x : sub_result.mutable_labels()) {
|
||||||
if (x != -1) {
|
if (x != -1) {
|
||||||
x += chunk_id * vec_chunk_size;
|
x += chunk_id * vec_size_per_chunk;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final_result.merge(sub_result);
|
final_result.merge(sub_result);
|
||||||
|
|
|
@ -62,16 +62,16 @@ SearchOnSealed(const Schema& schema,
|
||||||
auto dim = field.get_dim();
|
auto dim = field.get_dim();
|
||||||
|
|
||||||
Assert(record.is_ready(field_offset));
|
Assert(record.is_ready(field_offset));
|
||||||
auto indexing_entry = record.get_entry(field_offset);
|
auto field_indexing = record.get_field_indexing(field_offset);
|
||||||
Assert(indexing_entry->metric_type_ == query_info.metric_type_);
|
Assert(field_indexing->metric_type_ == query_info.metric_type_);
|
||||||
|
|
||||||
auto final = [&] {
|
auto final = [&] {
|
||||||
auto ds = knowhere::GenDataset(num_queries, dim, query_data);
|
auto ds = knowhere::GenDataset(num_queries, dim, query_data);
|
||||||
|
|
||||||
auto conf = query_info.search_params_;
|
auto conf = query_info.search_params_;
|
||||||
conf[milvus::knowhere::meta::TOPK] = query_info.topK_;
|
conf[milvus::knowhere::meta::TOPK] = query_info.topK_;
|
||||||
conf[milvus::knowhere::Metric::TYPE] = MetricTypeToName(indexing_entry->metric_type_);
|
conf[milvus::knowhere::Metric::TYPE] = MetricTypeToName(field_indexing->metric_type_);
|
||||||
return indexing_entry->indexing_->Query(ds, conf, bitset);
|
return field_indexing->indexing_->Query(ds, conf, bitset);
|
||||||
}();
|
}();
|
||||||
|
|
||||||
auto ids = final->Get<idx_t*>(knowhere::meta::IDS);
|
auto ids = final->Get<idx_t*>(knowhere::meta::IDS);
|
||||||
|
|
|
@ -120,41 +120,33 @@ template <typename T, typename IndexFunc, typename ElementFunc>
|
||||||
auto
|
auto
|
||||||
ExecExprVisitor::ExecRangeVisitorImpl(RangeExprImpl<T>& expr, IndexFunc index_func, ElementFunc element_func)
|
ExecExprVisitor::ExecRangeVisitorImpl(RangeExprImpl<T>& expr, IndexFunc index_func, ElementFunc element_func)
|
||||||
-> RetType {
|
-> RetType {
|
||||||
auto data_type = expr.data_type_;
|
|
||||||
auto& schema = segment_.get_schema();
|
auto& schema = segment_.get_schema();
|
||||||
auto field_offset = expr.field_offset_;
|
auto field_offset = expr.field_offset_;
|
||||||
auto& field_meta = schema[field_offset];
|
auto& field_meta = schema[field_offset];
|
||||||
// auto vec_ptr = records.get_entity<T>(field_offset);
|
auto indexing_barrier = segment_.num_chunk_index(field_offset);
|
||||||
// auto& vec = *vec_ptr;
|
auto size_per_chunk = segment_.size_per_chunk();
|
||||||
// const segcore::ScalarIndexingEntry<T>& entry = indexing_record.get_scalar_entry<T>(field_offset);
|
auto num_chunk = upper_div(row_count_, size_per_chunk);
|
||||||
|
|
||||||
// RetType results(vec.num_chunk());
|
|
||||||
auto indexing_barrier = segment_.num_chunk_index_safe(field_offset);
|
|
||||||
auto chunk_size = segment_.size_per_chunk();
|
|
||||||
auto num_chunk = upper_div(row_count_, chunk_size);
|
|
||||||
RetType results;
|
RetType results;
|
||||||
|
|
||||||
using Index = knowhere::scalar::StructuredIndex<T>;
|
using Index = knowhere::scalar::StructuredIndex<T>;
|
||||||
for (auto chunk_id = 0; chunk_id < indexing_barrier; ++chunk_id) {
|
for (auto chunk_id = 0; chunk_id < indexing_barrier; ++chunk_id) {
|
||||||
// auto& result = results[chunk_id];
|
|
||||||
const Index& indexing = segment_.chunk_scalar_index<T>(field_offset, chunk_id);
|
const Index& indexing = segment_.chunk_scalar_index<T>(field_offset, chunk_id);
|
||||||
// NOTE: knowhere is not const-ready
|
// NOTE: knowhere is not const-ready
|
||||||
// This is a dirty workaround
|
// This is a dirty workaround
|
||||||
auto data = index_func(const_cast<Index*>(&indexing));
|
auto data = index_func(const_cast<Index*>(&indexing));
|
||||||
Assert(data->size() == chunk_size);
|
Assert(data->size() == size_per_chunk);
|
||||||
results.emplace_back(std::move(*data));
|
results.emplace_back(std::move(*data));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto chunk_id = indexing_barrier; chunk_id < num_chunk; ++chunk_id) {
|
for (auto chunk_id = indexing_barrier; chunk_id < num_chunk; ++chunk_id) {
|
||||||
boost::dynamic_bitset<> result(chunk_size);
|
boost::dynamic_bitset<> result(size_per_chunk);
|
||||||
// auto& result = results[chunk_id];
|
result.resize(size_per_chunk);
|
||||||
result.resize(chunk_size);
|
|
||||||
auto chunk = segment_.chunk_data<T>(field_offset, chunk_id);
|
auto chunk = segment_.chunk_data<T>(field_offset, chunk_id);
|
||||||
const T* data = chunk.data();
|
const T* data = chunk.data();
|
||||||
for (int index = 0; index < chunk_size; ++index) {
|
for (int index = 0; index < size_per_chunk; ++index) {
|
||||||
result[index] = element_func(data[index]);
|
result[index] = element_func(data[index]);
|
||||||
}
|
}
|
||||||
Assert(result.size() == chunk_size);
|
Assert(result.size() == size_per_chunk);
|
||||||
results.emplace_back(std::move(result));
|
results.emplace_back(std::move(result));
|
||||||
}
|
}
|
||||||
return results;
|
return results;
|
||||||
|
@ -282,27 +274,19 @@ template <typename T>
|
||||||
auto
|
auto
|
||||||
ExecExprVisitor::ExecTermVisitorImpl(TermExpr& expr_raw) -> RetType {
|
ExecExprVisitor::ExecTermVisitorImpl(TermExpr& expr_raw) -> RetType {
|
||||||
auto& expr = static_cast<TermExprImpl<T>&>(expr_raw);
|
auto& expr = static_cast<TermExprImpl<T>&>(expr_raw);
|
||||||
// auto& records = segment_.get_insert_record();
|
|
||||||
auto data_type = expr.data_type_;
|
|
||||||
auto& schema = segment_.get_schema();
|
auto& schema = segment_.get_schema();
|
||||||
|
|
||||||
auto field_offset = expr_raw.field_offset_;
|
auto field_offset = expr_raw.field_offset_;
|
||||||
auto& field_meta = schema[field_offset];
|
auto& field_meta = schema[field_offset];
|
||||||
// auto vec_ptr = records.get_entity<T>(field_offset);
|
auto size_per_chunk = segment_.size_per_chunk();
|
||||||
// auto& vec = *vec_ptr;
|
auto num_chunk = upper_div(row_count_, size_per_chunk);
|
||||||
auto chunk_size = segment_.size_per_chunk();
|
|
||||||
auto num_chunk = upper_div(row_count_, chunk_size);
|
|
||||||
RetType bitsets;
|
RetType bitsets;
|
||||||
|
|
||||||
// auto N = records.ack_responder_.GetAck();
|
|
||||||
// TODO: enable index for term
|
|
||||||
|
|
||||||
for (int64_t chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
|
for (int64_t chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
|
||||||
Span<T> chunk = segment_.chunk_data<T>(field_offset, chunk_id);
|
Span<T> chunk = segment_.chunk_data<T>(field_offset, chunk_id);
|
||||||
|
|
||||||
auto size = chunk_id == num_chunk - 1 ? row_count_ - chunk_id * chunk_size : chunk_size;
|
auto size = chunk_id == num_chunk - 1 ? row_count_ - chunk_id * size_per_chunk : size_per_chunk;
|
||||||
|
|
||||||
boost::dynamic_bitset<> bitset(chunk_size);
|
boost::dynamic_bitset<> bitset(size_per_chunk);
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
auto value = chunk.data()[i];
|
auto value = chunk.data()[i];
|
||||||
bool is_in = std::binary_search(expr.terms_.begin(), expr.terms_.end(), value);
|
bool is_in = std::binary_search(expr.terms_.begin(), expr.terms_.end(), value);
|
||||||
|
|
|
@ -6,7 +6,7 @@ set(SEGCORE_FILES
|
||||||
SegmentGrowing.cpp
|
SegmentGrowing.cpp
|
||||||
SegmentGrowingImpl.cpp
|
SegmentGrowingImpl.cpp
|
||||||
SegmentSealedImpl.cpp
|
SegmentSealedImpl.cpp
|
||||||
IndexingEntry.cpp
|
FieldIndexing.cpp
|
||||||
InsertRecord.cpp
|
InsertRecord.cpp
|
||||||
Reduce.cpp
|
Reduce.cpp
|
||||||
plan_c.cpp
|
plan_c.cpp
|
||||||
|
|
|
@ -72,7 +72,7 @@ class ThreadSafeVector {
|
||||||
|
|
||||||
class VectorBase {
|
class VectorBase {
|
||||||
public:
|
public:
|
||||||
explicit VectorBase(int64_t chunk_size) : chunk_size_(chunk_size) {
|
explicit VectorBase(int64_t size_per_chunk) : size_per_chunk_(size_per_chunk) {
|
||||||
}
|
}
|
||||||
virtual ~VectorBase() = default;
|
virtual ~VectorBase() = default;
|
||||||
|
|
||||||
|
@ -86,12 +86,12 @@ class VectorBase {
|
||||||
get_span_base(int64_t chunk_id) const = 0;
|
get_span_base(int64_t chunk_id) const = 0;
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
get_chunk_size() const {
|
get_size_per_chunk() const {
|
||||||
return chunk_size_;
|
return size_per_chunk_;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const int64_t chunk_size_;
|
const int64_t size_per_chunk_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Type, bool is_scalar = false>
|
template <typename Type, bool is_scalar = false>
|
||||||
|
@ -111,27 +111,28 @@ class ConcurrentVectorImpl : public VectorBase {
|
||||||
std::conditional_t<is_scalar, Type, std::conditional_t<std::is_same_v<Type, float>, FloatVector, BinaryVector>>;
|
std::conditional_t<is_scalar, Type, std::conditional_t<std::is_same_v<Type, float>, FloatVector, BinaryVector>>;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit ConcurrentVectorImpl(ssize_t dim, int64_t chunk_size) : VectorBase(chunk_size), Dim(is_scalar ? 1 : dim) {
|
explicit ConcurrentVectorImpl(ssize_t dim, int64_t size_per_chunk)
|
||||||
|
: VectorBase(size_per_chunk), Dim(is_scalar ? 1 : dim) {
|
||||||
Assert(is_scalar ? dim == 1 : dim != 1);
|
Assert(is_scalar ? dim == 1 : dim != 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
grow_to_at_least(int64_t element_count) override {
|
grow_to_at_least(int64_t element_count) override {
|
||||||
auto chunk_count = upper_div(element_count, chunk_size_);
|
auto chunk_count = upper_div(element_count, size_per_chunk_);
|
||||||
chunks_.emplace_to_at_least(chunk_count, Dim * chunk_size_);
|
chunks_.emplace_to_at_least(chunk_count, Dim * size_per_chunk_);
|
||||||
}
|
}
|
||||||
|
|
||||||
Span<TraitType>
|
Span<TraitType>
|
||||||
get_span(int64_t chunk_id) const {
|
get_span(int64_t chunk_id) const {
|
||||||
auto& chunk = get_chunk(chunk_id);
|
auto& chunk = get_chunk(chunk_id);
|
||||||
if constexpr (is_scalar) {
|
if constexpr (is_scalar) {
|
||||||
return Span<TraitType>(chunk.data(), chunk_size_);
|
return Span<TraitType>(chunk.data(), size_per_chunk_);
|
||||||
} else if constexpr (std::is_same_v<Type, int64_t> || std::is_same_v<Type, int>) {
|
} else if constexpr (std::is_same_v<Type, int64_t> || std::is_same_v<Type, int>) {
|
||||||
// only for testing
|
// only for testing
|
||||||
PanicInfo("unimplemented");
|
PanicInfo("unimplemented");
|
||||||
} else {
|
} else {
|
||||||
static_assert(std::is_same_v<typename TraitType::embedded_type, Type>);
|
static_assert(std::is_same_v<typename TraitType::embedded_type, Type>);
|
||||||
return Span<TraitType>(chunk.data(), chunk_size_, Dim);
|
return Span<TraitType>(chunk.data(), size_per_chunk_, Dim);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -151,28 +152,28 @@ class ConcurrentVectorImpl : public VectorBase {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
this->grow_to_at_least(element_offset + element_count);
|
this->grow_to_at_least(element_offset + element_count);
|
||||||
auto chunk_id = element_offset / chunk_size_;
|
auto chunk_id = element_offset / size_per_chunk_;
|
||||||
auto chunk_offset = element_offset % chunk_size_;
|
auto chunk_offset = element_offset % size_per_chunk_;
|
||||||
ssize_t source_offset = 0;
|
ssize_t source_offset = 0;
|
||||||
// first partition:
|
// first partition:
|
||||||
if (chunk_offset + element_count <= chunk_size_) {
|
if (chunk_offset + element_count <= size_per_chunk_) {
|
||||||
// only first
|
// only first
|
||||||
fill_chunk(chunk_id, chunk_offset, element_count, source, source_offset);
|
fill_chunk(chunk_id, chunk_offset, element_count, source, source_offset);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto first_size = chunk_size_ - chunk_offset;
|
auto first_size = size_per_chunk_ - chunk_offset;
|
||||||
fill_chunk(chunk_id, chunk_offset, first_size, source, source_offset);
|
fill_chunk(chunk_id, chunk_offset, first_size, source, source_offset);
|
||||||
|
|
||||||
source_offset += chunk_size_ - chunk_offset;
|
source_offset += size_per_chunk_ - chunk_offset;
|
||||||
element_count -= first_size;
|
element_count -= first_size;
|
||||||
++chunk_id;
|
++chunk_id;
|
||||||
|
|
||||||
// the middle
|
// the middle
|
||||||
while (element_count >= chunk_size_) {
|
while (element_count >= size_per_chunk_) {
|
||||||
fill_chunk(chunk_id, 0, chunk_size_, source, source_offset);
|
fill_chunk(chunk_id, 0, size_per_chunk_, source, source_offset);
|
||||||
source_offset += chunk_size_;
|
source_offset += size_per_chunk_;
|
||||||
element_count -= chunk_size_;
|
element_count -= size_per_chunk_;
|
||||||
++chunk_id;
|
++chunk_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -190,16 +191,16 @@ class ConcurrentVectorImpl : public VectorBase {
|
||||||
// just for fun, don't use it directly
|
// just for fun, don't use it directly
|
||||||
const Type*
|
const Type*
|
||||||
get_element(ssize_t element_index) const {
|
get_element(ssize_t element_index) const {
|
||||||
auto chunk_id = element_index / chunk_size_;
|
auto chunk_id = element_index / size_per_chunk_;
|
||||||
auto chunk_offset = element_index % chunk_size_;
|
auto chunk_offset = element_index % size_per_chunk_;
|
||||||
return get_chunk(chunk_id).data() + chunk_offset * Dim;
|
return get_chunk(chunk_id).data() + chunk_offset * Dim;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Type&
|
const Type&
|
||||||
operator[](ssize_t element_index) const {
|
operator[](ssize_t element_index) const {
|
||||||
Assert(Dim == 1);
|
Assert(Dim == 1);
|
||||||
auto chunk_id = element_index / chunk_size_;
|
auto chunk_id = element_index / size_per_chunk_;
|
||||||
auto chunk_offset = element_index % chunk_size_;
|
auto chunk_offset = element_index % size_per_chunk_;
|
||||||
return get_chunk(chunk_id)[chunk_offset];
|
return get_chunk(chunk_id)[chunk_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -232,24 +233,24 @@ template <typename Type>
|
||||||
class ConcurrentVector : public ConcurrentVectorImpl<Type, true> {
|
class ConcurrentVector : public ConcurrentVectorImpl<Type, true> {
|
||||||
public:
|
public:
|
||||||
static_assert(std::is_fundamental_v<Type>);
|
static_assert(std::is_fundamental_v<Type>);
|
||||||
explicit ConcurrentVector(int64_t chunk_size)
|
explicit ConcurrentVector(int64_t size_per_chunk)
|
||||||
: ConcurrentVectorImpl<Type, true>::ConcurrentVectorImpl(1, chunk_size) {
|
: ConcurrentVectorImpl<Type, true>::ConcurrentVectorImpl(1, size_per_chunk) {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
class ConcurrentVector<FloatVector> : public ConcurrentVectorImpl<float, false> {
|
class ConcurrentVector<FloatVector> : public ConcurrentVectorImpl<float, false> {
|
||||||
public:
|
public:
|
||||||
ConcurrentVector(int64_t dim, int64_t chunk_size)
|
ConcurrentVector(int64_t dim, int64_t size_per_chunk)
|
||||||
: ConcurrentVectorImpl<float, false>::ConcurrentVectorImpl(dim, chunk_size) {
|
: ConcurrentVectorImpl<float, false>::ConcurrentVectorImpl(dim, size_per_chunk) {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
class ConcurrentVector<BinaryVector> : public ConcurrentVectorImpl<uint8_t, false> {
|
class ConcurrentVector<BinaryVector> : public ConcurrentVectorImpl<uint8_t, false> {
|
||||||
public:
|
public:
|
||||||
explicit ConcurrentVector(int64_t dim, int64_t chunk_size)
|
explicit ConcurrentVector(int64_t dim, int64_t size_per_chunk)
|
||||||
: binary_dim_(dim), ConcurrentVectorImpl(dim / 8, chunk_size) {
|
: binary_dim_(dim), ConcurrentVectorImpl(dim / 8, size_per_chunk) {
|
||||||
Assert(dim % 8 == 0);
|
Assert(dim % 8 == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,9 +29,11 @@ struct DeletedRecord {
|
||||||
std::shared_ptr<TmpBitmap>
|
std::shared_ptr<TmpBitmap>
|
||||||
clone(int64_t capacity);
|
clone(int64_t capacity);
|
||||||
};
|
};
|
||||||
static constexpr int64_t deprecated_chunk_size = 32 * 1024;
|
static constexpr int64_t deprecated_size_per_chunk = 32 * 1024;
|
||||||
DeletedRecord()
|
DeletedRecord()
|
||||||
: lru_(std::make_shared<TmpBitmap>()), timestamps_(deprecated_chunk_size), uids_(deprecated_chunk_size) {
|
: lru_(std::make_shared<TmpBitmap>()),
|
||||||
|
timestamps_(deprecated_size_per_chunk),
|
||||||
|
uids_(deprecated_size_per_chunk) {
|
||||||
lru_->bitmap_ptr = std::make_shared<faiss::ConcurrentBitset>(0);
|
lru_->bitmap_ptr = std::make_shared<faiss::ConcurrentBitset>(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,14 +9,14 @@
|
||||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||||
|
|
||||||
#include "segcore/IndexingEntry.h"
|
#include "segcore/FieldIndexing.h"
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <knowhere/index/vector_index/IndexIVF.h>
|
#include <knowhere/index/vector_index/IndexIVF.h>
|
||||||
#include <knowhere/index/vector_index/adapter/VectorAdapter.h>
|
#include <knowhere/index/vector_index/adapter/VectorAdapter.h>
|
||||||
|
|
||||||
namespace milvus::segcore {
|
namespace milvus::segcore {
|
||||||
void
|
void
|
||||||
VecIndexingEntry::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) {
|
VectorFieldIndexing::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) {
|
||||||
assert(field_meta_.get_data_type() == DataType::VECTOR_FLOAT);
|
assert(field_meta_.get_data_type() == DataType::VECTOR_FLOAT);
|
||||||
auto dim = field_meta_.get_dim();
|
auto dim = field_meta_.get_dim();
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ VecIndexingEntry::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vector
|
||||||
const auto& chunk = source->get_chunk(chunk_id);
|
const auto& chunk = source->get_chunk(chunk_id);
|
||||||
// build index for chunk
|
// build index for chunk
|
||||||
auto indexing = std::make_unique<knowhere::IVF>();
|
auto indexing = std::make_unique<knowhere::IVF>();
|
||||||
auto dataset = knowhere::GenDataset(source->get_chunk_size(), dim, chunk.data());
|
auto dataset = knowhere::GenDataset(source->get_size_per_chunk(), dim, chunk.data());
|
||||||
indexing->Train(dataset, conf);
|
indexing->Train(dataset, conf);
|
||||||
indexing->AddWithoutIds(dataset, conf);
|
indexing->AddWithoutIds(dataset, conf);
|
||||||
data_[chunk_id] = std::move(indexing);
|
data_[chunk_id] = std::move(indexing);
|
||||||
|
@ -38,7 +38,7 @@ VecIndexingEntry::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vector
|
||||||
}
|
}
|
||||||
|
|
||||||
knowhere::Config
|
knowhere::Config
|
||||||
VecIndexingEntry::get_build_conf() const {
|
VectorFieldIndexing::get_build_conf() const {
|
||||||
return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()},
|
return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()},
|
||||||
{knowhere::IndexParams::nlist, 100},
|
{knowhere::IndexParams::nlist, 100},
|
||||||
{knowhere::IndexParams::nprobe, 4},
|
{knowhere::IndexParams::nprobe, 4},
|
||||||
|
@ -47,7 +47,7 @@ VecIndexingEntry::get_build_conf() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
knowhere::Config
|
knowhere::Config
|
||||||
VecIndexingEntry::get_search_conf(int top_K) const {
|
VectorFieldIndexing::get_search_conf(int top_K) const {
|
||||||
return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()},
|
return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()},
|
||||||
{knowhere::meta::TOPK, top_K},
|
{knowhere::meta::TOPK, top_K},
|
||||||
{knowhere::IndexParams::nlist, 100},
|
{knowhere::IndexParams::nlist, 100},
|
||||||
|
@ -71,8 +71,8 @@ IndexingRecord::UpdateResourceAck(int64_t chunk_ack, const InsertRecord& record)
|
||||||
lck.unlock();
|
lck.unlock();
|
||||||
|
|
||||||
// std::thread([this, old_ack, chunk_ack, &record] {
|
// std::thread([this, old_ack, chunk_ack, &record] {
|
||||||
for (auto& [field_offset, entry] : entries_) {
|
for (auto& [field_offset, entry] : field_indexings_) {
|
||||||
auto vec_base = record.get_base_entity(field_offset);
|
auto vec_base = record.get_field_data_base(field_offset);
|
||||||
entry->BuildIndexRange(old_ack, chunk_ack, vec_base);
|
entry->BuildIndexRange(old_ack, chunk_ack, vec_base);
|
||||||
}
|
}
|
||||||
finished_ack_.AddSegment(old_ack, chunk_ack);
|
finished_ack_.AddSegment(old_ack, chunk_ack);
|
||||||
|
@ -81,7 +81,7 @@ IndexingRecord::UpdateResourceAck(int64_t chunk_ack, const InsertRecord& record)
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
ScalarIndexingEntry<T>::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) {
|
ScalarFieldIndexing<T>::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) {
|
||||||
auto source = dynamic_cast<const ConcurrentVector<T>*>(vec_base);
|
auto source = dynamic_cast<const ConcurrentVector<T>*>(vec_base);
|
||||||
Assert(source);
|
Assert(source);
|
||||||
auto num_chunk = source->num_chunk();
|
auto num_chunk = source->num_chunk();
|
||||||
|
@ -92,16 +92,16 @@ ScalarIndexingEntry<T>::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const
|
||||||
// build index for chunk
|
// build index for chunk
|
||||||
// TODO
|
// TODO
|
||||||
auto indexing = std::make_unique<knowhere::scalar::StructuredIndexSort<T>>();
|
auto indexing = std::make_unique<knowhere::scalar::StructuredIndexSort<T>>();
|
||||||
indexing->Build(vec_base->get_chunk_size(), chunk.data());
|
indexing->Build(vec_base->get_size_per_chunk(), chunk.data());
|
||||||
data_[chunk_id] = std::move(indexing);
|
data_[chunk_id] = std::move(indexing);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<IndexingEntry>
|
std::unique_ptr<FieldIndexing>
|
||||||
CreateIndex(const FieldMeta& field_meta, int64_t chunk_size) {
|
CreateIndex(const FieldMeta& field_meta, int64_t size_per_chunk) {
|
||||||
if (field_meta.is_vector()) {
|
if (field_meta.is_vector()) {
|
||||||
if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||||
return std::make_unique<VecIndexingEntry>(field_meta, chunk_size);
|
return std::make_unique<VectorFieldIndexing>(field_meta, size_per_chunk);
|
||||||
} else {
|
} else {
|
||||||
// TODO
|
// TODO
|
||||||
PanicInfo("unsupported");
|
PanicInfo("unsupported");
|
||||||
|
@ -109,19 +109,19 @@ CreateIndex(const FieldMeta& field_meta, int64_t chunk_size) {
|
||||||
}
|
}
|
||||||
switch (field_meta.get_data_type()) {
|
switch (field_meta.get_data_type()) {
|
||||||
case DataType::BOOL:
|
case DataType::BOOL:
|
||||||
return std::make_unique<ScalarIndexingEntry<bool>>(field_meta, chunk_size);
|
return std::make_unique<ScalarFieldIndexing<bool>>(field_meta, size_per_chunk);
|
||||||
case DataType::INT8:
|
case DataType::INT8:
|
||||||
return std::make_unique<ScalarIndexingEntry<int8_t>>(field_meta, chunk_size);
|
return std::make_unique<ScalarFieldIndexing<int8_t>>(field_meta, size_per_chunk);
|
||||||
case DataType::INT16:
|
case DataType::INT16:
|
||||||
return std::make_unique<ScalarIndexingEntry<int16_t>>(field_meta, chunk_size);
|
return std::make_unique<ScalarFieldIndexing<int16_t>>(field_meta, size_per_chunk);
|
||||||
case DataType::INT32:
|
case DataType::INT32:
|
||||||
return std::make_unique<ScalarIndexingEntry<int32_t>>(field_meta, chunk_size);
|
return std::make_unique<ScalarFieldIndexing<int32_t>>(field_meta, size_per_chunk);
|
||||||
case DataType::INT64:
|
case DataType::INT64:
|
||||||
return std::make_unique<ScalarIndexingEntry<int64_t>>(field_meta, chunk_size);
|
return std::make_unique<ScalarFieldIndexing<int64_t>>(field_meta, size_per_chunk);
|
||||||
case DataType::FLOAT:
|
case DataType::FLOAT:
|
||||||
return std::make_unique<ScalarIndexingEntry<float>>(field_meta, chunk_size);
|
return std::make_unique<ScalarFieldIndexing<float>>(field_meta, size_per_chunk);
|
||||||
case DataType::DOUBLE:
|
case DataType::DOUBLE:
|
||||||
return std::make_unique<ScalarIndexingEntry<double>>(field_meta, chunk_size);
|
return std::make_unique<ScalarFieldIndexing<double>>(field_meta, size_per_chunk);
|
||||||
default:
|
default:
|
||||||
PanicInfo("unsupported");
|
PanicInfo("unsupported");
|
||||||
}
|
}
|
|
@ -24,14 +24,14 @@ namespace milvus::segcore {
|
||||||
|
|
||||||
// this should be concurrent
|
// this should be concurrent
|
||||||
// All concurrent
|
// All concurrent
|
||||||
class IndexingEntry {
|
class FieldIndexing {
|
||||||
public:
|
public:
|
||||||
explicit IndexingEntry(const FieldMeta& field_meta, int64_t chunk_size)
|
explicit FieldIndexing(const FieldMeta& field_meta, int64_t size_per_chunk)
|
||||||
: field_meta_(field_meta), chunk_size_(chunk_size) {
|
: field_meta_(field_meta), size_per_chunk_(size_per_chunk) {
|
||||||
}
|
}
|
||||||
IndexingEntry(const IndexingEntry&) = delete;
|
FieldIndexing(const FieldIndexing&) = delete;
|
||||||
IndexingEntry&
|
FieldIndexing&
|
||||||
operator=(const IndexingEntry&) = delete;
|
operator=(const FieldIndexing&) = delete;
|
||||||
|
|
||||||
// Do this in parallel
|
// Do this in parallel
|
||||||
virtual void
|
virtual void
|
||||||
|
@ -43,29 +43,29 @@ class IndexingEntry {
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
get_chunk_size() const {
|
get_size_per_chunk() const {
|
||||||
return chunk_size_;
|
return size_per_chunk_;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual knowhere::Index*
|
virtual knowhere::Index*
|
||||||
get_indexing(int64_t chunk_id) const = 0;
|
get_chunk_indexing(int64_t chunk_id) const = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// additional info
|
// additional info
|
||||||
const FieldMeta& field_meta_;
|
const FieldMeta& field_meta_;
|
||||||
const int64_t chunk_size_;
|
const int64_t size_per_chunk_;
|
||||||
};
|
};
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class ScalarIndexingEntry : public IndexingEntry {
|
class ScalarFieldIndexing : public FieldIndexing {
|
||||||
public:
|
public:
|
||||||
using IndexingEntry::IndexingEntry;
|
using FieldIndexing::FieldIndexing;
|
||||||
|
|
||||||
void
|
void
|
||||||
BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) override;
|
BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) override;
|
||||||
|
|
||||||
// concurrent
|
// concurrent
|
||||||
knowhere::scalar::StructuredIndex<T>*
|
knowhere::scalar::StructuredIndex<T>*
|
||||||
get_indexing(int64_t chunk_id) const override {
|
get_chunk_indexing(int64_t chunk_id) const override {
|
||||||
Assert(!field_meta_.is_vector());
|
Assert(!field_meta_.is_vector());
|
||||||
return data_.at(chunk_id).get();
|
return data_.at(chunk_id).get();
|
||||||
}
|
}
|
||||||
|
@ -74,16 +74,16 @@ class ScalarIndexingEntry : public IndexingEntry {
|
||||||
tbb::concurrent_vector<std::unique_ptr<knowhere::scalar::StructuredIndex<T>>> data_;
|
tbb::concurrent_vector<std::unique_ptr<knowhere::scalar::StructuredIndex<T>>> data_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class VecIndexingEntry : public IndexingEntry {
|
class VectorFieldIndexing : public FieldIndexing {
|
||||||
public:
|
public:
|
||||||
using IndexingEntry::IndexingEntry;
|
using FieldIndexing::FieldIndexing;
|
||||||
|
|
||||||
void
|
void
|
||||||
BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) override;
|
BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) override;
|
||||||
|
|
||||||
// concurrent
|
// concurrent
|
||||||
knowhere::VecIndex*
|
knowhere::VecIndex*
|
||||||
get_indexing(int64_t chunk_id) const override {
|
get_chunk_indexing(int64_t chunk_id) const override {
|
||||||
Assert(field_meta_.is_vector());
|
Assert(field_meta_.is_vector());
|
||||||
return data_.at(chunk_id).get();
|
return data_.at(chunk_id).get();
|
||||||
}
|
}
|
||||||
|
@ -97,12 +97,13 @@ class VecIndexingEntry : public IndexingEntry {
|
||||||
tbb::concurrent_vector<std::unique_ptr<knowhere::VecIndex>> data_;
|
tbb::concurrent_vector<std::unique_ptr<knowhere::VecIndex>> data_;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unique_ptr<IndexingEntry>
|
std::unique_ptr<FieldIndexing>
|
||||||
CreateIndex(const FieldMeta& field_meta, int64_t chunk_size);
|
CreateIndex(const FieldMeta& field_meta, int64_t size_per_chunk);
|
||||||
|
|
||||||
class IndexingRecord {
|
class IndexingRecord {
|
||||||
public:
|
public:
|
||||||
explicit IndexingRecord(const Schema& schema, int64_t chunk_size) : schema_(schema), chunk_size_(chunk_size) {
|
explicit IndexingRecord(const Schema& schema, int64_t size_per_chunk)
|
||||||
|
: schema_(schema), size_per_chunk_(size_per_chunk) {
|
||||||
Initialize();
|
Initialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,7 +112,7 @@ class IndexingRecord {
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
for (auto& field : schema_) {
|
for (auto& field : schema_) {
|
||||||
if (field.get_data_type() != DataType::VECTOR_BINARY) {
|
if (field.get_data_type() != DataType::VECTOR_BINARY) {
|
||||||
entries_.try_emplace(FieldOffset(offset), CreateIndex(field, chunk_size_));
|
field_indexings_.try_emplace(FieldOffset(offset), CreateIndex(field, size_per_chunk_));
|
||||||
}
|
}
|
||||||
++offset;
|
++offset;
|
||||||
}
|
}
|
||||||
|
@ -128,24 +129,24 @@ class IndexingRecord {
|
||||||
return finished_ack_.GetAck();
|
return finished_ack_.GetAck();
|
||||||
}
|
}
|
||||||
|
|
||||||
const IndexingEntry&
|
const FieldIndexing&
|
||||||
get_entry(FieldOffset field_offset) const {
|
get_field_indexing(FieldOffset field_offset) const {
|
||||||
assert(entries_.count(field_offset));
|
assert(field_indexings_.count(field_offset));
|
||||||
return *entries_.at(field_offset);
|
return *field_indexings_.at(field_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
const VecIndexingEntry&
|
const VectorFieldIndexing&
|
||||||
get_vec_entry(FieldOffset field_offset) const {
|
get_vec_field_indexing(FieldOffset field_offset) const {
|
||||||
auto& entry = get_entry(field_offset);
|
auto& field_indexing = get_field_indexing(field_offset);
|
||||||
auto ptr = dynamic_cast<const VecIndexingEntry*>(&entry);
|
auto ptr = dynamic_cast<const VectorFieldIndexing*>(&field_indexing);
|
||||||
AssertInfo(ptr, "invalid indexing");
|
AssertInfo(ptr, "invalid indexing");
|
||||||
return *ptr;
|
return *ptr;
|
||||||
}
|
}
|
||||||
template <typename T>
|
template <typename T>
|
||||||
auto
|
auto
|
||||||
get_scalar_entry(FieldOffset field_offset) const -> const ScalarIndexingEntry<T>& {
|
get_scalar_field_indexing(FieldOffset field_offset) const -> const ScalarFieldIndexing<T>& {
|
||||||
auto& entry = get_entry(field_offset);
|
auto& entry = get_field_indexing(field_offset);
|
||||||
auto ptr = dynamic_cast<const ScalarIndexingEntry<T>*>(&entry);
|
auto ptr = dynamic_cast<const ScalarFieldIndexing<T>*>(&entry);
|
||||||
AssertInfo(ptr, "invalid indexing");
|
AssertInfo(ptr, "invalid indexing");
|
||||||
return *ptr;
|
return *ptr;
|
||||||
}
|
}
|
||||||
|
@ -159,11 +160,11 @@ class IndexingRecord {
|
||||||
// std::atomic<int64_t> finished_ack_ = 0;
|
// std::atomic<int64_t> finished_ack_ = 0;
|
||||||
AckResponder finished_ack_;
|
AckResponder finished_ack_;
|
||||||
std::mutex mutex_;
|
std::mutex mutex_;
|
||||||
int64_t chunk_size_;
|
int64_t size_per_chunk_;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// field_offset => indexing
|
// field_offset => indexing
|
||||||
std::map<FieldOffset, std::unique_ptr<IndexingEntry>> entries_;
|
std::map<FieldOffset, std::unique_ptr<FieldIndexing>> field_indexings_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace milvus::segcore
|
} // namespace milvus::segcore
|
|
@ -13,14 +13,14 @@
|
||||||
|
|
||||||
namespace milvus::segcore {
|
namespace milvus::segcore {
|
||||||
|
|
||||||
InsertRecord::InsertRecord(const Schema& schema, int64_t chunk_size) : uids_(1), timestamps_(1) {
|
InsertRecord::InsertRecord(const Schema& schema, int64_t size_per_chunk) : uids_(1), timestamps_(1) {
|
||||||
for (auto& field : schema) {
|
for (auto& field : schema) {
|
||||||
if (field.is_vector()) {
|
if (field.is_vector()) {
|
||||||
if (field.get_data_type() == DataType::VECTOR_FLOAT) {
|
if (field.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||||
this->insert_entity<FloatVector>(field.get_dim(), chunk_size);
|
this->append_field_data<FloatVector>(field.get_dim(), size_per_chunk);
|
||||||
continue;
|
continue;
|
||||||
} else if (field.get_data_type() == DataType::VECTOR_BINARY) {
|
} else if (field.get_data_type() == DataType::VECTOR_BINARY) {
|
||||||
this->insert_entity<BinaryVector>(field.get_dim(), chunk_size);
|
this->append_field_data<BinaryVector>(field.get_dim(), size_per_chunk);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
PanicInfo("unsupported");
|
PanicInfo("unsupported");
|
||||||
|
@ -28,34 +28,34 @@ InsertRecord::InsertRecord(const Schema& schema, int64_t chunk_size) : uids_(1),
|
||||||
}
|
}
|
||||||
switch (field.get_data_type()) {
|
switch (field.get_data_type()) {
|
||||||
case DataType::BOOL: {
|
case DataType::BOOL: {
|
||||||
this->insert_entity<bool>(chunk_size);
|
this->append_field_data<bool>(size_per_chunk);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT8: {
|
case DataType::INT8: {
|
||||||
this->insert_entity<int8_t>(chunk_size);
|
this->append_field_data<int8_t>(size_per_chunk);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT16: {
|
case DataType::INT16: {
|
||||||
this->insert_entity<int16_t>(chunk_size);
|
this->append_field_data<int16_t>(size_per_chunk);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT32: {
|
case DataType::INT32: {
|
||||||
this->insert_entity<int32_t>(chunk_size);
|
this->append_field_data<int32_t>(size_per_chunk);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case DataType::INT64: {
|
case DataType::INT64: {
|
||||||
this->insert_entity<int64_t>(chunk_size);
|
this->append_field_data<int64_t>(size_per_chunk);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case DataType::FLOAT: {
|
case DataType::FLOAT: {
|
||||||
this->insert_entity<float>(chunk_size);
|
this->append_field_data<float>(size_per_chunk);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case DataType::DOUBLE: {
|
case DataType::DOUBLE: {
|
||||||
this->insert_entity<double>(chunk_size);
|
this->append_field_data<double>(size_per_chunk);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
|
|
|
@ -24,47 +24,53 @@ struct InsertRecord {
|
||||||
ConcurrentVector<Timestamp> timestamps_;
|
ConcurrentVector<Timestamp> timestamps_;
|
||||||
ConcurrentVector<idx_t> uids_;
|
ConcurrentVector<idx_t> uids_;
|
||||||
|
|
||||||
explicit InsertRecord(const Schema& schema, int64_t chunk_size);
|
explicit InsertRecord(const Schema& schema, int64_t size_per_chunk);
|
||||||
|
|
||||||
|
// get field data without knowing the type
|
||||||
|
// return VectorBase type
|
||||||
auto
|
auto
|
||||||
get_base_entity(FieldOffset field_offset) const {
|
get_field_data_base(FieldOffset field_offset) const {
|
||||||
auto ptr = entity_vec_[field_offset.get()].get();
|
auto ptr = field_datas_[field_offset.get()].get();
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// get field data in given type, const version
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
auto
|
auto
|
||||||
get_entity(FieldOffset field_offset) const {
|
get_field_data(FieldOffset field_offset) const {
|
||||||
auto base_ptr = get_base_entity(field_offset);
|
auto base_ptr = get_field_data_base(field_offset);
|
||||||
auto ptr = dynamic_cast<const ConcurrentVector<Type>*>(base_ptr);
|
auto ptr = dynamic_cast<const ConcurrentVector<Type>*>(base_ptr);
|
||||||
Assert(ptr);
|
Assert(ptr);
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// get field data in given type, nonconst version
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
auto
|
auto
|
||||||
get_entity(FieldOffset field_offset) {
|
get_field_data(FieldOffset field_offset) {
|
||||||
auto base_ptr = get_base_entity(field_offset);
|
auto base_ptr = get_field_data_base(field_offset);
|
||||||
auto ptr = dynamic_cast<ConcurrentVector<Type>*>(base_ptr);
|
auto ptr = dynamic_cast<ConcurrentVector<Type>*>(base_ptr);
|
||||||
Assert(ptr);
|
Assert(ptr);
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// append a column of scalar type
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
void
|
void
|
||||||
insert_entity(int64_t chunk_size) {
|
append_field_data(int64_t size_per_chunk) {
|
||||||
static_assert(std::is_fundamental_v<Type>);
|
static_assert(std::is_fundamental_v<Type>);
|
||||||
entity_vec_.emplace_back(std::make_unique<ConcurrentVector<Type>>(chunk_size));
|
field_datas_.emplace_back(std::make_unique<ConcurrentVector<Type>>(size_per_chunk));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// append a column of vector type
|
||||||
template <typename VectorType>
|
template <typename VectorType>
|
||||||
void
|
void
|
||||||
insert_entity(int64_t dim, int64_t chunk_size) {
|
append_field_data(int64_t dim, int64_t size_per_chunk) {
|
||||||
static_assert(std::is_base_of_v<VectorTrait, VectorType>);
|
static_assert(std::is_base_of_v<VectorTrait, VectorType>);
|
||||||
entity_vec_.emplace_back(std::make_unique<ConcurrentVector<VectorType>>(dim, chunk_size));
|
field_datas_.emplace_back(std::make_unique<ConcurrentVector<VectorType>>(dim, size_per_chunk));
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<std::unique_ptr<VectorBase>> entity_vec_;
|
std::vector<std::unique_ptr<VectorBase>> field_datas_;
|
||||||
};
|
};
|
||||||
} // namespace milvus::segcore
|
} // namespace milvus::segcore
|
||||||
|
|
|
@ -31,30 +31,30 @@ using SealedIndexingEntryPtr = std::unique_ptr<SealedIndexingEntry>;
|
||||||
|
|
||||||
struct SealedIndexingRecord {
|
struct SealedIndexingRecord {
|
||||||
void
|
void
|
||||||
add_entry(FieldOffset field_offset, MetricType metric_type, knowhere::VecIndexPtr indexing) {
|
append_field_indexing(FieldOffset field_offset, MetricType metric_type, knowhere::VecIndexPtr indexing) {
|
||||||
auto ptr = std::make_unique<SealedIndexingEntry>();
|
auto ptr = std::make_unique<SealedIndexingEntry>();
|
||||||
ptr->indexing_ = indexing;
|
ptr->indexing_ = indexing;
|
||||||
ptr->metric_type_ = metric_type;
|
ptr->metric_type_ = metric_type;
|
||||||
std::unique_lock lck(mutex_);
|
std::unique_lock lck(mutex_);
|
||||||
entries_[field_offset] = std::move(ptr);
|
field_indexings_[field_offset] = std::move(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
const SealedIndexingEntry*
|
const SealedIndexingEntry*
|
||||||
get_entry(FieldOffset field_offset) const {
|
get_field_indexing(FieldOffset field_offset) const {
|
||||||
std::shared_lock lck(mutex_);
|
std::shared_lock lck(mutex_);
|
||||||
AssertInfo(entries_.count(field_offset), "field_offset not found");
|
AssertInfo(field_indexings_.count(field_offset), "field_offset not found");
|
||||||
return entries_.at(field_offset).get();
|
return field_indexings_.at(field_offset).get();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
is_ready(FieldOffset field_offset) const {
|
is_ready(FieldOffset field_offset) const {
|
||||||
std::shared_lock lck(mutex_);
|
std::shared_lock lck(mutex_);
|
||||||
return entries_.count(field_offset);
|
return field_indexings_.count(field_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// field_offset -> SealedIndexingEntry
|
// field_offset -> SealedIndexingEntry
|
||||||
std::map<FieldOffset, SealedIndexingEntryPtr> entries_;
|
std::map<FieldOffset, SealedIndexingEntryPtr> field_indexings_;
|
||||||
mutable std::shared_mutex mutex_;
|
mutable std::shared_mutex mutex_;
|
||||||
};
|
};
|
||||||
} // namespace milvus::segcore
|
} // namespace milvus::segcore
|
||||||
|
|
|
@ -20,8 +20,8 @@ TestABI() {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<SegmentGrowing>
|
std::unique_ptr<SegmentGrowing>
|
||||||
CreateGrowingSegment(SchemaPtr schema, int64_t chunk_size) {
|
CreateGrowingSegment(SchemaPtr schema, int64_t size_per_chunk) {
|
||||||
auto segment = std::make_unique<SegmentGrowingImpl>(schema, chunk_size);
|
auto segment = std::make_unique<SegmentGrowingImpl>(schema, size_per_chunk);
|
||||||
return segment;
|
return segment;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -80,7 +80,7 @@ class SegmentGrowing : public SegmentInternalInterface {
|
||||||
using SegmentGrowingPtr = std::unique_ptr<SegmentGrowing>;
|
using SegmentGrowingPtr = std::unique_ptr<SegmentGrowing>;
|
||||||
|
|
||||||
SegmentGrowingPtr
|
SegmentGrowingPtr
|
||||||
CreateGrowingSegment(SchemaPtr schema, int64_t chunk_size = 32 * 1024);
|
CreateGrowingSegment(SchemaPtr schema, int64_t size_per_chunk = 32 * 1024);
|
||||||
|
|
||||||
} // namespace segcore
|
} // namespace segcore
|
||||||
} // namespace milvus
|
} // namespace milvus
|
||||||
|
|
|
@ -170,7 +170,7 @@ SegmentGrowingImpl::Insert(int64_t reserved_begin,
|
||||||
record_.uids_.set_data(reserved_begin, uids.data(), size);
|
record_.uids_.set_data(reserved_begin, uids.data(), size);
|
||||||
for (int fid = 0; fid < schema_->size(); ++fid) {
|
for (int fid = 0; fid < schema_->size(); ++fid) {
|
||||||
auto field_offset = FieldOffset(fid);
|
auto field_offset = FieldOffset(fid);
|
||||||
record_.get_base_entity(field_offset)->set_data_raw(reserved_begin, entities[fid].data(), size);
|
record_.get_field_data_base(field_offset)->set_data_raw(reserved_begin, entities[fid].data(), size);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < uids.size(); ++i) {
|
for (int i = 0; i < uids.size(); ++i) {
|
||||||
|
@ -180,7 +180,7 @@ SegmentGrowingImpl::Insert(int64_t reserved_begin,
|
||||||
}
|
}
|
||||||
|
|
||||||
record_.ack_responder_.AddSegment(reserved_begin, reserved_begin + size);
|
record_.ack_responder_.AddSegment(reserved_begin, reserved_begin + size);
|
||||||
indexing_record_.UpdateResourceAck(record_.ack_responder_.GetAck() / chunk_size_, record_);
|
indexing_record_.UpdateResourceAck(record_.ack_responder_.GetAck() / size_per_chunk_, record_);
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -231,9 +231,9 @@ SegmentGrowingImpl::Close() {
|
||||||
int64_t
|
int64_t
|
||||||
SegmentGrowingImpl::GetMemoryUsageInBytes() const {
|
SegmentGrowingImpl::GetMemoryUsageInBytes() const {
|
||||||
int64_t total_bytes = 0;
|
int64_t total_bytes = 0;
|
||||||
int64_t ins_n = upper_align(record_.reserved, chunk_size_);
|
int64_t ins_n = upper_align(record_.reserved, size_per_chunk_);
|
||||||
total_bytes += ins_n * (schema_->get_total_sizeof() + 16 + 1);
|
total_bytes += ins_n * (schema_->get_total_sizeof() + 16 + 1);
|
||||||
int64_t del_n = upper_align(deleted_record_.reserved, chunk_size_);
|
int64_t del_n = upper_align(deleted_record_.reserved, size_per_chunk_);
|
||||||
total_bytes += del_n * (16 * 2);
|
total_bytes += del_n * (16 * 2);
|
||||||
return total_bytes;
|
return total_bytes;
|
||||||
}
|
}
|
||||||
|
@ -245,20 +245,20 @@ SegmentGrowingImpl::LoadIndexing(const LoadIndexInfo& info) {
|
||||||
Assert(info.index_params.count("metric_type"));
|
Assert(info.index_params.count("metric_type"));
|
||||||
auto metric_type_str = info.index_params.at("metric_type");
|
auto metric_type_str = info.index_params.at("metric_type");
|
||||||
|
|
||||||
sealed_indexing_record_.add_entry(field_offset, GetMetricType(metric_type_str), info.index);
|
sealed_indexing_record_.append_field_indexing(field_offset, GetMetricType(metric_type_str), info.index);
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
SpanBase
|
SpanBase
|
||||||
SegmentGrowingImpl::chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const {
|
SegmentGrowingImpl::chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const {
|
||||||
auto vec = get_insert_record().get_base_entity(field_offset);
|
auto vec = get_insert_record().get_field_data_base(field_offset);
|
||||||
return vec->get_span_base(chunk_id);
|
return vec->get_span_base(chunk_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
SegmentGrowingImpl::num_chunk_data() const {
|
SegmentGrowingImpl::num_chunk() const {
|
||||||
auto size = get_insert_record().ack_responder_.GetAck();
|
auto size = get_insert_record().ack_responder_.GetAck();
|
||||||
return upper_div(size, chunk_size_);
|
return upper_div(size, size_per_chunk_);
|
||||||
}
|
}
|
||||||
void
|
void
|
||||||
SegmentGrowingImpl::vector_search(int64_t vec_count,
|
SegmentGrowingImpl::vector_search(int64_t vec_count,
|
||||||
|
|
|
@ -27,7 +27,7 @@
|
||||||
#include "utils/Status.h"
|
#include "utils/Status.h"
|
||||||
#include "segcore/DeletedRecord.h"
|
#include "segcore/DeletedRecord.h"
|
||||||
#include "utils/EasyAssert.h"
|
#include "utils/EasyAssert.h"
|
||||||
#include "IndexingEntry.h"
|
#include "FieldIndexing.h"
|
||||||
#include "InsertRecord.h"
|
#include "InsertRecord.h"
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
@ -89,18 +89,18 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
||||||
|
|
||||||
// return count of index that has index, i.e., [0, num_chunk_index) have built index
|
// return count of index that has index, i.e., [0, num_chunk_index) have built index
|
||||||
int64_t
|
int64_t
|
||||||
num_chunk_index_safe(FieldOffset field_offset) const final {
|
num_chunk_index(FieldOffset field_offset) const final {
|
||||||
return indexing_record_.get_finished_ack();
|
return indexing_record_.get_finished_ack();
|
||||||
}
|
}
|
||||||
|
|
||||||
const knowhere::Index*
|
const knowhere::Index*
|
||||||
chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const final {
|
chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const final {
|
||||||
return indexing_record_.get_entry(field_offset).get_indexing(chunk_id);
|
return indexing_record_.get_field_indexing(field_offset).get_chunk_indexing(chunk_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
size_per_chunk() const final {
|
size_per_chunk() const final {
|
||||||
return chunk_size_;
|
return size_per_chunk_;
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -152,27 +152,27 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
||||||
void
|
void
|
||||||
bulk_subscript(FieldOffset field_offset, const int64_t* seg_offsets, int64_t count, void* output) const override {
|
bulk_subscript(FieldOffset field_offset, const int64_t* seg_offsets, int64_t count, void* output) const override {
|
||||||
// TODO: support more types
|
// TODO: support more types
|
||||||
auto vec_ptr = record_.get_base_entity(field_offset);
|
auto vec_ptr = record_.get_field_data_base(field_offset);
|
||||||
auto data_type = schema_->operator[](field_offset).get_data_type();
|
auto data_type = schema_->operator[](field_offset).get_data_type();
|
||||||
Assert(data_type == DataType::INT64);
|
Assert(data_type == DataType::INT64);
|
||||||
bulk_subscript_impl<int64_t>(*vec_ptr, seg_offsets, count, output);
|
bulk_subscript_impl<int64_t>(*vec_ptr, seg_offsets, count, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
num_chunk_data() const override;
|
num_chunk() const override;
|
||||||
|
|
||||||
Status
|
Status
|
||||||
LoadIndexing(const LoadIndexInfo& info) override;
|
LoadIndexing(const LoadIndexInfo& info) override;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
friend std::unique_ptr<SegmentGrowing>
|
friend std::unique_ptr<SegmentGrowing>
|
||||||
CreateGrowingSegment(SchemaPtr schema, int64_t chunk_size);
|
CreateGrowingSegment(SchemaPtr schema, int64_t size_per_chunk);
|
||||||
|
|
||||||
explicit SegmentGrowingImpl(SchemaPtr schema, int64_t chunk_size)
|
explicit SegmentGrowingImpl(SchemaPtr schema, int64_t size_per_chunk)
|
||||||
: chunk_size_(chunk_size),
|
: size_per_chunk_(size_per_chunk),
|
||||||
schema_(std::move(schema)),
|
schema_(std::move(schema)),
|
||||||
record_(*schema_, chunk_size),
|
record_(*schema_, size_per_chunk),
|
||||||
indexing_record_(*schema_, chunk_size) {
|
indexing_record_(*schema_, size_per_chunk) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -192,7 +192,7 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
||||||
chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const override;
|
chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int64_t chunk_size_;
|
int64_t size_per_chunk_;
|
||||||
SchemaPtr schema_;
|
SchemaPtr schema_;
|
||||||
std::atomic<SegmentState> state_ = SegmentState::Open;
|
std::atomic<SegmentState> state_ = SegmentState::Open;
|
||||||
|
|
||||||
|
|
|
@ -14,15 +14,18 @@
|
||||||
#include "common/Schema.h"
|
#include "common/Schema.h"
|
||||||
#include "query/Plan.h"
|
#include "query/Plan.h"
|
||||||
#include "common/Span.h"
|
#include "common/Span.h"
|
||||||
#include "IndexingEntry.h"
|
#include "FieldIndexing.h"
|
||||||
#include <knowhere/index/vector_index/VecIndex.h>
|
#include <knowhere/index/vector_index/VecIndex.h>
|
||||||
#include "common/SystemProperty.h"
|
#include "common/SystemProperty.h"
|
||||||
#include "query/PlanNode.h"
|
#include "query/PlanNode.h"
|
||||||
|
|
||||||
namespace milvus::segcore {
|
namespace milvus::segcore {
|
||||||
|
|
||||||
|
// common interface of SegmentSealed and SegmentGrowing
|
||||||
|
// used by C API
|
||||||
class SegmentInterface {
|
class SegmentInterface {
|
||||||
public:
|
public:
|
||||||
|
// fill results according to target_entries in plan
|
||||||
void
|
void
|
||||||
FillTargetEntry(const query::Plan* plan, QueryResult& results) const;
|
FillTargetEntry(const query::Plan* plan, QueryResult& results) const;
|
||||||
|
|
||||||
|
@ -44,14 +47,17 @@ class SegmentInterface {
|
||||||
virtual ~SegmentInterface() = default;
|
virtual ~SegmentInterface() = default;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
// calculate output[i] = Vec[seg_offsets[i]}, where Vec binds to system_type
|
||||||
virtual void
|
virtual void
|
||||||
bulk_subscript(SystemFieldType system_type, const int64_t* seg_offsets, int64_t count, void* output) const = 0;
|
bulk_subscript(SystemFieldType system_type, const int64_t* seg_offsets, int64_t count, void* output) const = 0;
|
||||||
|
|
||||||
|
// calculate output[i] = Vec[seg_offsets[i]}, where Vec binds to field_offset
|
||||||
virtual void
|
virtual void
|
||||||
bulk_subscript(FieldOffset field_offset, const int64_t* seg_offsets, int64_t count, void* output) const = 0;
|
bulk_subscript(FieldOffset field_offset, const int64_t* seg_offsets, int64_t count, void* output) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
// internal API for DSL calculation
|
// internal API for DSL calculation
|
||||||
|
// only for implementation
|
||||||
class SegmentInternalInterface : public SegmentInterface {
|
class SegmentInternalInterface : public SegmentInterface {
|
||||||
public:
|
public:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
@ -80,21 +86,24 @@ class SegmentInternalInterface : public SegmentInterface {
|
||||||
const BitsetView& bitset,
|
const BitsetView& bitset,
|
||||||
QueryResult& output) const = 0;
|
QueryResult& output) const = 0;
|
||||||
|
|
||||||
|
// count of chunk that has index available
|
||||||
virtual int64_t
|
virtual int64_t
|
||||||
num_chunk_index_safe(FieldOffset field_offset) const = 0;
|
num_chunk_index(FieldOffset field_offset) const = 0;
|
||||||
|
|
||||||
|
// count of chunks
|
||||||
virtual int64_t
|
virtual int64_t
|
||||||
num_chunk_data() const = 0;
|
num_chunk() const = 0;
|
||||||
|
|
||||||
// return chunk_size for each chunk, renaming against confusion
|
// element size in each chunk
|
||||||
virtual int64_t
|
virtual int64_t
|
||||||
size_per_chunk() const = 0;
|
size_per_chunk() const = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// blob and row_count
|
// internal API: return chunk_data in span
|
||||||
virtual SpanBase
|
virtual SpanBase
|
||||||
chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const = 0;
|
chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const = 0;
|
||||||
|
|
||||||
|
// internal API: return chunk_index in span, support scalar index only
|
||||||
virtual const knowhere::Index*
|
virtual const knowhere::Index*
|
||||||
chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const = 0;
|
chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const = 0;
|
||||||
};
|
};
|
||||||
|
|
|
@ -27,6 +27,6 @@ class SegmentSealed : public SegmentInternalInterface {
|
||||||
using SegmentSealedPtr = std::unique_ptr<SegmentSealed>;
|
using SegmentSealedPtr = std::unique_ptr<SegmentSealed>;
|
||||||
|
|
||||||
SegmentSealedPtr
|
SegmentSealedPtr
|
||||||
CreateSealedSegment(SchemaPtr schema, int64_t chunk_size = 32 * 1024);
|
CreateSealedSegment(SchemaPtr schema, int64_t size_per_chunk = 32 * 1024);
|
||||||
|
|
||||||
} // namespace milvus::segcore
|
} // namespace milvus::segcore
|
||||||
|
|
|
@ -29,7 +29,7 @@ SegmentSealedImpl::LoadIndex(const LoadIndexInfo& info) {
|
||||||
row_count_opt_ = row_count;
|
row_count_opt_ = row_count;
|
||||||
}
|
}
|
||||||
Assert(!vec_indexings_.is_ready(field_offset));
|
Assert(!vec_indexings_.is_ready(field_offset));
|
||||||
vec_indexings_.add_entry(field_offset, GetMetricType(metric_type_str), info.index);
|
vec_indexings_.append_field_indexing(field_offset, GetMetricType(metric_type_str), info.index);
|
||||||
++ready_count_;
|
++ready_count_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,13 +77,13 @@ SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& info) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
SegmentSealedImpl::num_chunk_index_safe(FieldOffset field_offset) const {
|
SegmentSealedImpl::num_chunk_index(FieldOffset field_offset) const {
|
||||||
// TODO: support scalar index
|
// TODO: support scalar index
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
SegmentSealedImpl::num_chunk_data() const {
|
SegmentSealedImpl::num_chunk() const {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -142,7 +142,7 @@ SegmentSealedImpl::vector_search(int64_t vec_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
SegmentSealedPtr
|
SegmentSealedPtr
|
||||||
CreateSealedSegment(SchemaPtr schema, int64_t chunk_size) {
|
CreateSealedSegment(SchemaPtr schema, int64_t size_per_chunk) {
|
||||||
return std::make_unique<SegmentSealedImpl>(schema);
|
return std::make_unique<SegmentSealedImpl>(schema);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -37,12 +37,12 @@ class SegmentSealedImpl : public SegmentSealed {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
int64_t
|
int64_t
|
||||||
num_chunk_index_safe(FieldOffset field_offset) const override;
|
num_chunk_index(FieldOffset field_offset) const override;
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
num_chunk_data() const override;
|
num_chunk() const override;
|
||||||
|
|
||||||
// return chunk_size for each chunk, renaming against confusion
|
// return size_per_chunk for each chunk, renaming against confusion
|
||||||
int64_t
|
int64_t
|
||||||
size_per_chunk() const override;
|
size_per_chunk() const override;
|
||||||
|
|
||||||
|
|
|
@ -277,7 +277,7 @@ TEST(Sealed, LoadFieldData) {
|
||||||
vec_info.index_params["metric_type"] = milvus::knowhere::Metric::L2;
|
vec_info.index_params["metric_type"] = milvus::knowhere::Metric::L2;
|
||||||
segment->LoadIndex(vec_info);
|
segment->LoadIndex(vec_info);
|
||||||
}
|
}
|
||||||
ASSERT_EQ(segment->num_chunk_data(), 1);
|
ASSERT_EQ(segment->num_chunk(), 1);
|
||||||
auto chunk_span1 = segment->chunk_data<int64_t>(FieldOffset(1), 0);
|
auto chunk_span1 = segment->chunk_data<int64_t>(FieldOffset(1), 0);
|
||||||
auto chunk_span2 = segment->chunk_data<double>(FieldOffset(2), 0);
|
auto chunk_span2 = segment->chunk_data<double>(FieldOffset(2), 0);
|
||||||
auto ref1 = dataset.get_col<int64_t>(1);
|
auto ref1 = dataset.get_col<int64_t>(1);
|
||||||
|
|
|
@ -19,38 +19,38 @@ TEST(Span, Naive) {
|
||||||
using namespace milvus::query;
|
using namespace milvus::query;
|
||||||
using namespace milvus::segcore;
|
using namespace milvus::segcore;
|
||||||
int64_t N = 1000 * 1000;
|
int64_t N = 1000 * 1000;
|
||||||
constexpr int64_t chunk_size = 32 * 1024;
|
constexpr int64_t size_per_chunk = 32 * 1024;
|
||||||
auto schema = std::make_shared<Schema>();
|
auto schema = std::make_shared<Schema>();
|
||||||
schema->AddDebugField("binaryvec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard);
|
schema->AddDebugField("binaryvec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard);
|
||||||
schema->AddDebugField("age", DataType::FLOAT);
|
schema->AddDebugField("age", DataType::FLOAT);
|
||||||
schema->AddDebugField("floatvec", DataType::VECTOR_FLOAT, 32, MetricType::METRIC_L2);
|
schema->AddDebugField("floatvec", DataType::VECTOR_FLOAT, 32, MetricType::METRIC_L2);
|
||||||
|
|
||||||
auto dataset = DataGen(schema, N);
|
auto dataset = DataGen(schema, N);
|
||||||
auto segment = CreateGrowingSegment(schema, chunk_size);
|
auto segment = CreateGrowingSegment(schema, size_per_chunk);
|
||||||
segment->PreInsert(N);
|
segment->PreInsert(N);
|
||||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||||
auto vec_ptr = dataset.get_col<uint8_t>(0);
|
auto vec_ptr = dataset.get_col<uint8_t>(0);
|
||||||
auto age_ptr = dataset.get_col<float>(1);
|
auto age_ptr = dataset.get_col<float>(1);
|
||||||
auto float_ptr = dataset.get_col<float>(2);
|
auto float_ptr = dataset.get_col<float>(2);
|
||||||
SegmentInternalInterface& interface = *segment;
|
SegmentInternalInterface& interface = *segment;
|
||||||
auto num_chunk = interface.num_chunk_data();
|
auto num_chunk = interface.num_chunk();
|
||||||
ASSERT_EQ(num_chunk, upper_div(N, chunk_size));
|
ASSERT_EQ(num_chunk, upper_div(N, size_per_chunk));
|
||||||
auto row_count = interface.get_row_count();
|
auto row_count = interface.get_row_count();
|
||||||
ASSERT_EQ(N, row_count);
|
ASSERT_EQ(N, row_count);
|
||||||
for (auto chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
|
for (auto chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
|
||||||
auto vec_span = interface.chunk_data<BinaryVector>(FieldOffset(0), chunk_id);
|
auto vec_span = interface.chunk_data<BinaryVector>(FieldOffset(0), chunk_id);
|
||||||
auto age_span = interface.chunk_data<float>(FieldOffset(1), chunk_id);
|
auto age_span = interface.chunk_data<float>(FieldOffset(1), chunk_id);
|
||||||
auto float_span = interface.chunk_data<FloatVector>(FieldOffset(2), chunk_id);
|
auto float_span = interface.chunk_data<FloatVector>(FieldOffset(2), chunk_id);
|
||||||
auto begin = chunk_id * chunk_size;
|
auto begin = chunk_id * size_per_chunk;
|
||||||
auto end = std::min((chunk_id + 1) * chunk_size, N);
|
auto end = std::min((chunk_id + 1) * size_per_chunk, N);
|
||||||
auto chunk_size = end - begin;
|
auto size_per_chunk = end - begin;
|
||||||
for (int i = 0; i < chunk_size * 512 / 8; ++i) {
|
for (int i = 0; i < size_per_chunk * 512 / 8; ++i) {
|
||||||
ASSERT_EQ(vec_span.data()[i], vec_ptr[i + begin * 512 / 8]);
|
ASSERT_EQ(vec_span.data()[i], vec_ptr[i + begin * 512 / 8]);
|
||||||
}
|
}
|
||||||
for (int i = 0; i < chunk_size; ++i) {
|
for (int i = 0; i < size_per_chunk; ++i) {
|
||||||
ASSERT_EQ(age_span.data()[i], age_ptr[i + begin]);
|
ASSERT_EQ(age_span.data()[i], age_ptr[i + begin]);
|
||||||
}
|
}
|
||||||
for (int i = 0; i < chunk_size; ++i) {
|
for (int i = 0; i < size_per_chunk; ++i) {
|
||||||
ASSERT_EQ(float_span.data()[i], float_ptr[i + begin * 32]);
|
ASSERT_EQ(float_span.data()[i], float_ptr[i + begin * 32]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue