Add sealedSegment (go&c) unittest, fix growingSegment field id check

Signed-off-by: FluorineDog <guilin.gou@zilliz.com>
pull/4973/head^2
FluorineDog 2021-01-22 17:40:58 +08:00 committed by yefu.chen
parent 067c30c422
commit bff208d78c
24 changed files with 233 additions and 230 deletions

View File

@ -319,14 +319,14 @@ Parser::ParseItemList(const Json& body) {
std::vector<ExprPtr> results;
if (body.is_object()) {
// only one item;
auto new_entry = ParseAnyNode(body);
results.emplace_back(std::move(new_entry));
auto new_expr = ParseAnyNode(body);
results.emplace_back(std::move(new_expr));
} else {
// item array
Assert(body.is_array());
for (auto& item : body) {
auto new_entry = ParseAnyNode(item);
results.emplace_back(std::move(new_entry));
auto new_expr = ParseAnyNode(item);
results.emplace_back(std::move(new_expr));
}
}
auto old_size = results.size();

View File

@ -24,7 +24,7 @@ SubQueryResult
BinarySearchBruteForceFast(MetricType metric_type,
int64_t dim,
const uint8_t* binary_chunk,
int64_t chunk_size,
int64_t size_per_chunk,
int64_t topk,
int64_t num_queries,
const uint8_t* query_data,
@ -34,7 +34,7 @@ BinarySearchBruteForceFast(MetricType metric_type,
idx_t* result_labels = sub_result.get_labels();
int64_t code_size = dim / 8;
const idx_t block_size = chunk_size;
const idx_t block_size = size_per_chunk;
bool use_heap = true;
if (metric_type == faiss::METRIC_Jaccard || metric_type == faiss::METRIC_Tanimoto) {
@ -50,7 +50,7 @@ BinarySearchBruteForceFast(MetricType metric_type,
result_labels + query_base_index * topk, D + query_base_index * topk};
binary_distence_knn_hc(metric_type, &res, query_data + query_base_index * code_size, binary_chunk,
chunk_size, code_size,
size_per_chunk, code_size,
/* ordered = */ true, bitset);
}
if (metric_type == faiss::METRIC_Tanimoto) {
@ -67,7 +67,7 @@ BinarySearchBruteForceFast(MetricType metric_type,
}
// only match ids will be chosed, not to use heap
binary_distence_knn_mc(metric_type, query_data + s * code_size, binary_chunk, nn, chunk_size, topk,
binary_distence_knn_mc(metric_type, query_data + s * code_size, binary_chunk, nn, size_per_chunk, topk,
code_size, D + s * topk, result_labels + s * topk, bitset);
}
} else if (metric_type == faiss::METRIC_Hamming) {
@ -82,10 +82,10 @@ BinarySearchBruteForceFast(MetricType metric_type,
faiss::int_maxheap_array_t res = {size_t(nn), size_t(topk), result_labels + s * topk,
int_distances.data() + s * topk};
hammings_knn_hc(&res, query_data + s * code_size, binary_chunk, chunk_size, code_size,
hammings_knn_hc(&res, query_data + s * code_size, binary_chunk, size_per_chunk, code_size,
/* ordered = */ true, bitset);
} else {
hammings_knn_mc(query_data + s * code_size, binary_chunk, nn, chunk_size, topk, code_size,
hammings_knn_mc(query_data + s * code_size, binary_chunk, nn, size_per_chunk, topk, code_size,
int_distances.data() + s * topk, result_labels + s * topk, bitset);
}
}
@ -101,7 +101,7 @@ BinarySearchBruteForceFast(MetricType metric_type,
SubQueryResult
FloatSearchBruteForce(const dataset::FloatQueryDataset& query_dataset,
const float* chunk_data,
int64_t chunk_size,
int64_t size_per_chunk,
const faiss::BitsetView& bitset) {
auto metric_type = query_dataset.metric_type;
auto num_queries = query_dataset.num_queries;
@ -111,11 +111,11 @@ FloatSearchBruteForce(const dataset::FloatQueryDataset& query_dataset,
if (metric_type == MetricType::METRIC_L2) {
faiss::float_maxheap_array_t buf{(size_t)num_queries, (size_t)topk, sub_qr.get_labels(), sub_qr.get_values()};
faiss::knn_L2sqr(query_dataset.query_data, chunk_data, dim, num_queries, chunk_size, &buf, bitset);
faiss::knn_L2sqr(query_dataset.query_data, chunk_data, dim, num_queries, size_per_chunk, &buf, bitset);
return sub_qr;
} else {
faiss::float_minheap_array_t buf{(size_t)num_queries, (size_t)topk, sub_qr.get_labels(), sub_qr.get_values()};
faiss::knn_inner_product(query_dataset.query_data, chunk_data, dim, num_queries, chunk_size, &buf, bitset);
faiss::knn_inner_product(query_dataset.query_data, chunk_data, dim, num_queries, size_per_chunk, &buf, bitset);
return sub_qr;
}
}
@ -123,10 +123,10 @@ FloatSearchBruteForce(const dataset::FloatQueryDataset& query_dataset,
SubQueryResult
BinarySearchBruteForce(const dataset::BinaryQueryDataset& query_dataset,
const uint8_t* binary_chunk,
int64_t chunk_size,
int64_t size_per_chunk,
const faiss::BitsetView& bitset) {
// TODO: refactor the internal function
return BinarySearchBruteForceFast(query_dataset.metric_type, query_dataset.dim, binary_chunk, chunk_size,
return BinarySearchBruteForceFast(query_dataset.metric_type, query_dataset.dim, binary_chunk, size_per_chunk,
query_dataset.topk, query_dataset.num_queries, query_dataset.query_data, bitset);
}
} // namespace milvus::query

View File

@ -21,13 +21,13 @@ namespace milvus::query {
SubQueryResult
BinarySearchBruteForce(const dataset::BinaryQueryDataset& query_dataset,
const uint8_t* binary_chunk,
int64_t chunk_size,
int64_t size_per_chunk,
const faiss::BitsetView& bitset);
SubQueryResult
FloatSearchBruteForce(const dataset::FloatQueryDataset& query_dataset,
const float* chunk_data,
int64_t chunk_size,
int64_t size_per_chunk,
const faiss::BitsetView& bitset);
} // namespace milvus::query

View File

@ -72,46 +72,46 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
dataset::FloatQueryDataset query_dataset{metric_type, num_queries, topK, dim, query_data};
auto max_indexed_id = indexing_record.get_finished_ack();
const auto& indexing_entry = indexing_record.get_vec_entry(vecfield_offset);
auto search_conf = indexing_entry.get_search_conf(topK);
const auto& field_indexing = indexing_record.get_vec_field_indexing(vecfield_offset);
auto search_conf = field_indexing.get_search_conf(topK);
for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) {
auto chunk_size = indexing_entry.get_chunk_size();
auto indexing = indexing_entry.get_indexing(chunk_id);
auto size_per_chunk = field_indexing.get_size_per_chunk();
auto indexing = field_indexing.get_chunk_indexing(chunk_id);
auto sub_view = BitsetSubView(bitset, chunk_id * chunk_size, chunk_size);
auto sub_view = BitsetSubView(bitset, chunk_id * size_per_chunk, size_per_chunk);
auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view);
// convert chunk uid to segment uid
for (auto& x : sub_qr.mutable_labels()) {
if (x != -1) {
x += chunk_id * chunk_size;
x += chunk_id * size_per_chunk;
}
}
final_qr.merge(sub_qr);
}
auto vec_ptr = record.get_entity<FloatVector>(vecfield_offset);
auto vec_ptr = record.get_field_data<FloatVector>(vecfield_offset);
// step 4: brute force search where small indexing is unavailable
auto vec_chunk_size = vec_ptr->get_chunk_size();
Assert(vec_chunk_size == indexing_entry.get_chunk_size());
auto max_chunk = upper_div(ins_barrier, vec_chunk_size);
auto vec_size_per_chunk = vec_ptr->get_size_per_chunk();
Assert(vec_size_per_chunk == field_indexing.get_size_per_chunk());
auto max_chunk = upper_div(ins_barrier, vec_size_per_chunk);
for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) {
auto& chunk = vec_ptr->get_chunk(chunk_id);
auto element_begin = chunk_id * vec_chunk_size;
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_chunk_size);
auto chunk_size = element_end - element_begin;
auto element_begin = chunk_id * vec_size_per_chunk;
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_size_per_chunk);
auto size_per_chunk = element_end - element_begin;
auto sub_view = BitsetSubView(bitset, element_begin, chunk_size);
auto sub_qr = FloatSearchBruteForce(query_dataset, chunk.data(), chunk_size, sub_view);
auto sub_view = BitsetSubView(bitset, element_begin, size_per_chunk);
auto sub_qr = FloatSearchBruteForce(query_dataset, chunk.data(), size_per_chunk, sub_view);
// convert chunk uid to segment uid
for (auto& x : sub_qr.mutable_labels()) {
if (x != -1) {
x += chunk_id * vec_chunk_size;
x += chunk_id * vec_size_per_chunk;
}
}
final_qr.merge(sub_qr);
@ -160,18 +160,18 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
// step 3: small indexing search
query::dataset::BinaryQueryDataset query_dataset{metric_type, num_queries, topK, dim, query_data};
auto vec_ptr = record.get_entity<BinaryVector>(vecfield_offset);
auto vec_ptr = record.get_field_data<BinaryVector>(vecfield_offset);
auto max_indexed_id = 0;
// step 4: brute force search where small indexing is unavailable
auto vec_chunk_size = vec_ptr->get_chunk_size();
auto max_chunk = upper_div(ins_barrier, vec_chunk_size);
auto vec_size_per_chunk = vec_ptr->get_size_per_chunk();
auto max_chunk = upper_div(ins_barrier, vec_size_per_chunk);
SubQueryResult final_result(num_queries, topK, metric_type);
for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) {
auto& chunk = vec_ptr->get_chunk(chunk_id);
auto element_begin = chunk_id * vec_chunk_size;
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_chunk_size);
auto element_begin = chunk_id * vec_size_per_chunk;
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_size_per_chunk);
auto nsize = element_end - element_begin;
auto sub_view = BitsetSubView(bitset, element_begin, nsize);
@ -180,7 +180,7 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
// convert chunk uid to segment uid
for (auto& x : sub_result.mutable_labels()) {
if (x != -1) {
x += chunk_id * vec_chunk_size;
x += chunk_id * vec_size_per_chunk;
}
}
final_result.merge(sub_result);

View File

@ -62,16 +62,16 @@ SearchOnSealed(const Schema& schema,
auto dim = field.get_dim();
Assert(record.is_ready(field_offset));
auto indexing_entry = record.get_entry(field_offset);
Assert(indexing_entry->metric_type_ == query_info.metric_type_);
auto field_indexing = record.get_field_indexing(field_offset);
Assert(field_indexing->metric_type_ == query_info.metric_type_);
auto final = [&] {
auto ds = knowhere::GenDataset(num_queries, dim, query_data);
auto conf = query_info.search_params_;
conf[milvus::knowhere::meta::TOPK] = query_info.topK_;
conf[milvus::knowhere::Metric::TYPE] = MetricTypeToName(indexing_entry->metric_type_);
return indexing_entry->indexing_->Query(ds, conf, bitset);
conf[milvus::knowhere::Metric::TYPE] = MetricTypeToName(field_indexing->metric_type_);
return field_indexing->indexing_->Query(ds, conf, bitset);
}();
auto ids = final->Get<idx_t*>(knowhere::meta::IDS);

View File

@ -120,41 +120,33 @@ template <typename T, typename IndexFunc, typename ElementFunc>
auto
ExecExprVisitor::ExecRangeVisitorImpl(RangeExprImpl<T>& expr, IndexFunc index_func, ElementFunc element_func)
-> RetType {
auto data_type = expr.data_type_;
auto& schema = segment_.get_schema();
auto field_offset = expr.field_offset_;
auto& field_meta = schema[field_offset];
// auto vec_ptr = records.get_entity<T>(field_offset);
// auto& vec = *vec_ptr;
// const segcore::ScalarIndexingEntry<T>& entry = indexing_record.get_scalar_entry<T>(field_offset);
// RetType results(vec.num_chunk());
auto indexing_barrier = segment_.num_chunk_index_safe(field_offset);
auto chunk_size = segment_.size_per_chunk();
auto num_chunk = upper_div(row_count_, chunk_size);
auto indexing_barrier = segment_.num_chunk_index(field_offset);
auto size_per_chunk = segment_.size_per_chunk();
auto num_chunk = upper_div(row_count_, size_per_chunk);
RetType results;
using Index = knowhere::scalar::StructuredIndex<T>;
for (auto chunk_id = 0; chunk_id < indexing_barrier; ++chunk_id) {
// auto& result = results[chunk_id];
const Index& indexing = segment_.chunk_scalar_index<T>(field_offset, chunk_id);
// NOTE: knowhere is not const-ready
// This is a dirty workaround
auto data = index_func(const_cast<Index*>(&indexing));
Assert(data->size() == chunk_size);
Assert(data->size() == size_per_chunk);
results.emplace_back(std::move(*data));
}
for (auto chunk_id = indexing_barrier; chunk_id < num_chunk; ++chunk_id) {
boost::dynamic_bitset<> result(chunk_size);
// auto& result = results[chunk_id];
result.resize(chunk_size);
boost::dynamic_bitset<> result(size_per_chunk);
result.resize(size_per_chunk);
auto chunk = segment_.chunk_data<T>(field_offset, chunk_id);
const T* data = chunk.data();
for (int index = 0; index < chunk_size; ++index) {
for (int index = 0; index < size_per_chunk; ++index) {
result[index] = element_func(data[index]);
}
Assert(result.size() == chunk_size);
Assert(result.size() == size_per_chunk);
results.emplace_back(std::move(result));
}
return results;
@ -282,27 +274,19 @@ template <typename T>
auto
ExecExprVisitor::ExecTermVisitorImpl(TermExpr& expr_raw) -> RetType {
auto& expr = static_cast<TermExprImpl<T>&>(expr_raw);
// auto& records = segment_.get_insert_record();
auto data_type = expr.data_type_;
auto& schema = segment_.get_schema();
auto field_offset = expr_raw.field_offset_;
auto& field_meta = schema[field_offset];
// auto vec_ptr = records.get_entity<T>(field_offset);
// auto& vec = *vec_ptr;
auto chunk_size = segment_.size_per_chunk();
auto num_chunk = upper_div(row_count_, chunk_size);
auto size_per_chunk = segment_.size_per_chunk();
auto num_chunk = upper_div(row_count_, size_per_chunk);
RetType bitsets;
// auto N = records.ack_responder_.GetAck();
// TODO: enable index for term
for (int64_t chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
Span<T> chunk = segment_.chunk_data<T>(field_offset, chunk_id);
auto size = chunk_id == num_chunk - 1 ? row_count_ - chunk_id * chunk_size : chunk_size;
auto size = chunk_id == num_chunk - 1 ? row_count_ - chunk_id * size_per_chunk : size_per_chunk;
boost::dynamic_bitset<> bitset(chunk_size);
boost::dynamic_bitset<> bitset(size_per_chunk);
for (int i = 0; i < size; ++i) {
auto value = chunk.data()[i];
bool is_in = std::binary_search(expr.terms_.begin(), expr.terms_.end(), value);

View File

@ -6,7 +6,7 @@ set(SEGCORE_FILES
SegmentGrowing.cpp
SegmentGrowingImpl.cpp
SegmentSealedImpl.cpp
IndexingEntry.cpp
FieldIndexing.cpp
InsertRecord.cpp
Reduce.cpp
plan_c.cpp

View File

@ -72,7 +72,7 @@ class ThreadSafeVector {
class VectorBase {
public:
explicit VectorBase(int64_t chunk_size) : chunk_size_(chunk_size) {
explicit VectorBase(int64_t size_per_chunk) : size_per_chunk_(size_per_chunk) {
}
virtual ~VectorBase() = default;
@ -86,12 +86,12 @@ class VectorBase {
get_span_base(int64_t chunk_id) const = 0;
int64_t
get_chunk_size() const {
return chunk_size_;
get_size_per_chunk() const {
return size_per_chunk_;
}
protected:
const int64_t chunk_size_;
const int64_t size_per_chunk_;
};
template <typename Type, bool is_scalar = false>
@ -111,27 +111,28 @@ class ConcurrentVectorImpl : public VectorBase {
std::conditional_t<is_scalar, Type, std::conditional_t<std::is_same_v<Type, float>, FloatVector, BinaryVector>>;
public:
explicit ConcurrentVectorImpl(ssize_t dim, int64_t chunk_size) : VectorBase(chunk_size), Dim(is_scalar ? 1 : dim) {
explicit ConcurrentVectorImpl(ssize_t dim, int64_t size_per_chunk)
: VectorBase(size_per_chunk), Dim(is_scalar ? 1 : dim) {
Assert(is_scalar ? dim == 1 : dim != 1);
}
void
grow_to_at_least(int64_t element_count) override {
auto chunk_count = upper_div(element_count, chunk_size_);
chunks_.emplace_to_at_least(chunk_count, Dim * chunk_size_);
auto chunk_count = upper_div(element_count, size_per_chunk_);
chunks_.emplace_to_at_least(chunk_count, Dim * size_per_chunk_);
}
Span<TraitType>
get_span(int64_t chunk_id) const {
auto& chunk = get_chunk(chunk_id);
if constexpr (is_scalar) {
return Span<TraitType>(chunk.data(), chunk_size_);
return Span<TraitType>(chunk.data(), size_per_chunk_);
} else if constexpr (std::is_same_v<Type, int64_t> || std::is_same_v<Type, int>) {
// only for testing
PanicInfo("unimplemented");
} else {
static_assert(std::is_same_v<typename TraitType::embedded_type, Type>);
return Span<TraitType>(chunk.data(), chunk_size_, Dim);
return Span<TraitType>(chunk.data(), size_per_chunk_, Dim);
}
}
@ -151,28 +152,28 @@ class ConcurrentVectorImpl : public VectorBase {
return;
}
this->grow_to_at_least(element_offset + element_count);
auto chunk_id = element_offset / chunk_size_;
auto chunk_offset = element_offset % chunk_size_;
auto chunk_id = element_offset / size_per_chunk_;
auto chunk_offset = element_offset % size_per_chunk_;
ssize_t source_offset = 0;
// first partition:
if (chunk_offset + element_count <= chunk_size_) {
if (chunk_offset + element_count <= size_per_chunk_) {
// only first
fill_chunk(chunk_id, chunk_offset, element_count, source, source_offset);
return;
}
auto first_size = chunk_size_ - chunk_offset;
auto first_size = size_per_chunk_ - chunk_offset;
fill_chunk(chunk_id, chunk_offset, first_size, source, source_offset);
source_offset += chunk_size_ - chunk_offset;
source_offset += size_per_chunk_ - chunk_offset;
element_count -= first_size;
++chunk_id;
// the middle
while (element_count >= chunk_size_) {
fill_chunk(chunk_id, 0, chunk_size_, source, source_offset);
source_offset += chunk_size_;
element_count -= chunk_size_;
while (element_count >= size_per_chunk_) {
fill_chunk(chunk_id, 0, size_per_chunk_, source, source_offset);
source_offset += size_per_chunk_;
element_count -= size_per_chunk_;
++chunk_id;
}
@ -190,16 +191,16 @@ class ConcurrentVectorImpl : public VectorBase {
// just for fun, don't use it directly
const Type*
get_element(ssize_t element_index) const {
auto chunk_id = element_index / chunk_size_;
auto chunk_offset = element_index % chunk_size_;
auto chunk_id = element_index / size_per_chunk_;
auto chunk_offset = element_index % size_per_chunk_;
return get_chunk(chunk_id).data() + chunk_offset * Dim;
}
const Type&
operator[](ssize_t element_index) const {
Assert(Dim == 1);
auto chunk_id = element_index / chunk_size_;
auto chunk_offset = element_index % chunk_size_;
auto chunk_id = element_index / size_per_chunk_;
auto chunk_offset = element_index % size_per_chunk_;
return get_chunk(chunk_id)[chunk_offset];
}
@ -232,24 +233,24 @@ template <typename Type>
class ConcurrentVector : public ConcurrentVectorImpl<Type, true> {
public:
static_assert(std::is_fundamental_v<Type>);
explicit ConcurrentVector(int64_t chunk_size)
: ConcurrentVectorImpl<Type, true>::ConcurrentVectorImpl(1, chunk_size) {
explicit ConcurrentVector(int64_t size_per_chunk)
: ConcurrentVectorImpl<Type, true>::ConcurrentVectorImpl(1, size_per_chunk) {
}
};
template <>
class ConcurrentVector<FloatVector> : public ConcurrentVectorImpl<float, false> {
public:
ConcurrentVector(int64_t dim, int64_t chunk_size)
: ConcurrentVectorImpl<float, false>::ConcurrentVectorImpl(dim, chunk_size) {
ConcurrentVector(int64_t dim, int64_t size_per_chunk)
: ConcurrentVectorImpl<float, false>::ConcurrentVectorImpl(dim, size_per_chunk) {
}
};
template <>
class ConcurrentVector<BinaryVector> : public ConcurrentVectorImpl<uint8_t, false> {
public:
explicit ConcurrentVector(int64_t dim, int64_t chunk_size)
: binary_dim_(dim), ConcurrentVectorImpl(dim / 8, chunk_size) {
explicit ConcurrentVector(int64_t dim, int64_t size_per_chunk)
: binary_dim_(dim), ConcurrentVectorImpl(dim / 8, size_per_chunk) {
Assert(dim % 8 == 0);
}

View File

@ -29,9 +29,11 @@ struct DeletedRecord {
std::shared_ptr<TmpBitmap>
clone(int64_t capacity);
};
static constexpr int64_t deprecated_chunk_size = 32 * 1024;
static constexpr int64_t deprecated_size_per_chunk = 32 * 1024;
DeletedRecord()
: lru_(std::make_shared<TmpBitmap>()), timestamps_(deprecated_chunk_size), uids_(deprecated_chunk_size) {
: lru_(std::make_shared<TmpBitmap>()),
timestamps_(deprecated_size_per_chunk),
uids_(deprecated_size_per_chunk) {
lru_->bitmap_ptr = std::make_shared<faiss::ConcurrentBitset>(0);
}

View File

@ -9,14 +9,14 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "segcore/IndexingEntry.h"
#include "segcore/FieldIndexing.h"
#include <thread>
#include <knowhere/index/vector_index/IndexIVF.h>
#include <knowhere/index/vector_index/adapter/VectorAdapter.h>
namespace milvus::segcore {
void
VecIndexingEntry::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) {
VectorFieldIndexing::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) {
assert(field_meta_.get_data_type() == DataType::VECTOR_FLOAT);
auto dim = field_meta_.get_dim();
@ -30,7 +30,7 @@ VecIndexingEntry::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vector
const auto& chunk = source->get_chunk(chunk_id);
// build index for chunk
auto indexing = std::make_unique<knowhere::IVF>();
auto dataset = knowhere::GenDataset(source->get_chunk_size(), dim, chunk.data());
auto dataset = knowhere::GenDataset(source->get_size_per_chunk(), dim, chunk.data());
indexing->Train(dataset, conf);
indexing->AddWithoutIds(dataset, conf);
data_[chunk_id] = std::move(indexing);
@ -38,7 +38,7 @@ VecIndexingEntry::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vector
}
knowhere::Config
VecIndexingEntry::get_build_conf() const {
VectorFieldIndexing::get_build_conf() const {
return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()},
{knowhere::IndexParams::nlist, 100},
{knowhere::IndexParams::nprobe, 4},
@ -47,7 +47,7 @@ VecIndexingEntry::get_build_conf() const {
}
knowhere::Config
VecIndexingEntry::get_search_conf(int top_K) const {
VectorFieldIndexing::get_search_conf(int top_K) const {
return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()},
{knowhere::meta::TOPK, top_K},
{knowhere::IndexParams::nlist, 100},
@ -71,8 +71,8 @@ IndexingRecord::UpdateResourceAck(int64_t chunk_ack, const InsertRecord& record)
lck.unlock();
// std::thread([this, old_ack, chunk_ack, &record] {
for (auto& [field_offset, entry] : entries_) {
auto vec_base = record.get_base_entity(field_offset);
for (auto& [field_offset, entry] : field_indexings_) {
auto vec_base = record.get_field_data_base(field_offset);
entry->BuildIndexRange(old_ack, chunk_ack, vec_base);
}
finished_ack_.AddSegment(old_ack, chunk_ack);
@ -81,7 +81,7 @@ IndexingRecord::UpdateResourceAck(int64_t chunk_ack, const InsertRecord& record)
template <typename T>
void
ScalarIndexingEntry<T>::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) {
ScalarFieldIndexing<T>::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) {
auto source = dynamic_cast<const ConcurrentVector<T>*>(vec_base);
Assert(source);
auto num_chunk = source->num_chunk();
@ -92,16 +92,16 @@ ScalarIndexingEntry<T>::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const
// build index for chunk
// TODO
auto indexing = std::make_unique<knowhere::scalar::StructuredIndexSort<T>>();
indexing->Build(vec_base->get_chunk_size(), chunk.data());
indexing->Build(vec_base->get_size_per_chunk(), chunk.data());
data_[chunk_id] = std::move(indexing);
}
}
std::unique_ptr<IndexingEntry>
CreateIndex(const FieldMeta& field_meta, int64_t chunk_size) {
std::unique_ptr<FieldIndexing>
CreateIndex(const FieldMeta& field_meta, int64_t size_per_chunk) {
if (field_meta.is_vector()) {
if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) {
return std::make_unique<VecIndexingEntry>(field_meta, chunk_size);
return std::make_unique<VectorFieldIndexing>(field_meta, size_per_chunk);
} else {
// TODO
PanicInfo("unsupported");
@ -109,19 +109,19 @@ CreateIndex(const FieldMeta& field_meta, int64_t chunk_size) {
}
switch (field_meta.get_data_type()) {
case DataType::BOOL:
return std::make_unique<ScalarIndexingEntry<bool>>(field_meta, chunk_size);
return std::make_unique<ScalarFieldIndexing<bool>>(field_meta, size_per_chunk);
case DataType::INT8:
return std::make_unique<ScalarIndexingEntry<int8_t>>(field_meta, chunk_size);
return std::make_unique<ScalarFieldIndexing<int8_t>>(field_meta, size_per_chunk);
case DataType::INT16:
return std::make_unique<ScalarIndexingEntry<int16_t>>(field_meta, chunk_size);
return std::make_unique<ScalarFieldIndexing<int16_t>>(field_meta, size_per_chunk);
case DataType::INT32:
return std::make_unique<ScalarIndexingEntry<int32_t>>(field_meta, chunk_size);
return std::make_unique<ScalarFieldIndexing<int32_t>>(field_meta, size_per_chunk);
case DataType::INT64:
return std::make_unique<ScalarIndexingEntry<int64_t>>(field_meta, chunk_size);
return std::make_unique<ScalarFieldIndexing<int64_t>>(field_meta, size_per_chunk);
case DataType::FLOAT:
return std::make_unique<ScalarIndexingEntry<float>>(field_meta, chunk_size);
return std::make_unique<ScalarFieldIndexing<float>>(field_meta, size_per_chunk);
case DataType::DOUBLE:
return std::make_unique<ScalarIndexingEntry<double>>(field_meta, chunk_size);
return std::make_unique<ScalarFieldIndexing<double>>(field_meta, size_per_chunk);
default:
PanicInfo("unsupported");
}

View File

@ -24,14 +24,14 @@ namespace milvus::segcore {
// this should be concurrent
// All concurrent
class IndexingEntry {
class FieldIndexing {
public:
explicit IndexingEntry(const FieldMeta& field_meta, int64_t chunk_size)
: field_meta_(field_meta), chunk_size_(chunk_size) {
explicit FieldIndexing(const FieldMeta& field_meta, int64_t size_per_chunk)
: field_meta_(field_meta), size_per_chunk_(size_per_chunk) {
}
IndexingEntry(const IndexingEntry&) = delete;
IndexingEntry&
operator=(const IndexingEntry&) = delete;
FieldIndexing(const FieldIndexing&) = delete;
FieldIndexing&
operator=(const FieldIndexing&) = delete;
// Do this in parallel
virtual void
@ -43,29 +43,29 @@ class IndexingEntry {
}
int64_t
get_chunk_size() const {
return chunk_size_;
get_size_per_chunk() const {
return size_per_chunk_;
}
virtual knowhere::Index*
get_indexing(int64_t chunk_id) const = 0;
get_chunk_indexing(int64_t chunk_id) const = 0;
protected:
// additional info
const FieldMeta& field_meta_;
const int64_t chunk_size_;
const int64_t size_per_chunk_;
};
template <typename T>
class ScalarIndexingEntry : public IndexingEntry {
class ScalarFieldIndexing : public FieldIndexing {
public:
using IndexingEntry::IndexingEntry;
using FieldIndexing::FieldIndexing;
void
BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) override;
// concurrent
knowhere::scalar::StructuredIndex<T>*
get_indexing(int64_t chunk_id) const override {
get_chunk_indexing(int64_t chunk_id) const override {
Assert(!field_meta_.is_vector());
return data_.at(chunk_id).get();
}
@ -74,16 +74,16 @@ class ScalarIndexingEntry : public IndexingEntry {
tbb::concurrent_vector<std::unique_ptr<knowhere::scalar::StructuredIndex<T>>> data_;
};
class VecIndexingEntry : public IndexingEntry {
class VectorFieldIndexing : public FieldIndexing {
public:
using IndexingEntry::IndexingEntry;
using FieldIndexing::FieldIndexing;
void
BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) override;
// concurrent
knowhere::VecIndex*
get_indexing(int64_t chunk_id) const override {
get_chunk_indexing(int64_t chunk_id) const override {
Assert(field_meta_.is_vector());
return data_.at(chunk_id).get();
}
@ -97,12 +97,13 @@ class VecIndexingEntry : public IndexingEntry {
tbb::concurrent_vector<std::unique_ptr<knowhere::VecIndex>> data_;
};
std::unique_ptr<IndexingEntry>
CreateIndex(const FieldMeta& field_meta, int64_t chunk_size);
std::unique_ptr<FieldIndexing>
CreateIndex(const FieldMeta& field_meta, int64_t size_per_chunk);
class IndexingRecord {
public:
explicit IndexingRecord(const Schema& schema, int64_t chunk_size) : schema_(schema), chunk_size_(chunk_size) {
explicit IndexingRecord(const Schema& schema, int64_t size_per_chunk)
: schema_(schema), size_per_chunk_(size_per_chunk) {
Initialize();
}
@ -111,7 +112,7 @@ class IndexingRecord {
int offset = 0;
for (auto& field : schema_) {
if (field.get_data_type() != DataType::VECTOR_BINARY) {
entries_.try_emplace(FieldOffset(offset), CreateIndex(field, chunk_size_));
field_indexings_.try_emplace(FieldOffset(offset), CreateIndex(field, size_per_chunk_));
}
++offset;
}
@ -128,24 +129,24 @@ class IndexingRecord {
return finished_ack_.GetAck();
}
const IndexingEntry&
get_entry(FieldOffset field_offset) const {
assert(entries_.count(field_offset));
return *entries_.at(field_offset);
const FieldIndexing&
get_field_indexing(FieldOffset field_offset) const {
assert(field_indexings_.count(field_offset));
return *field_indexings_.at(field_offset);
}
const VecIndexingEntry&
get_vec_entry(FieldOffset field_offset) const {
auto& entry = get_entry(field_offset);
auto ptr = dynamic_cast<const VecIndexingEntry*>(&entry);
const VectorFieldIndexing&
get_vec_field_indexing(FieldOffset field_offset) const {
auto& field_indexing = get_field_indexing(field_offset);
auto ptr = dynamic_cast<const VectorFieldIndexing*>(&field_indexing);
AssertInfo(ptr, "invalid indexing");
return *ptr;
}
template <typename T>
auto
get_scalar_entry(FieldOffset field_offset) const -> const ScalarIndexingEntry<T>& {
auto& entry = get_entry(field_offset);
auto ptr = dynamic_cast<const ScalarIndexingEntry<T>*>(&entry);
get_scalar_field_indexing(FieldOffset field_offset) const -> const ScalarFieldIndexing<T>& {
auto& entry = get_field_indexing(field_offset);
auto ptr = dynamic_cast<const ScalarFieldIndexing<T>*>(&entry);
AssertInfo(ptr, "invalid indexing");
return *ptr;
}
@ -159,11 +160,11 @@ class IndexingRecord {
// std::atomic<int64_t> finished_ack_ = 0;
AckResponder finished_ack_;
std::mutex mutex_;
int64_t chunk_size_;
int64_t size_per_chunk_;
private:
// field_offset => indexing
std::map<FieldOffset, std::unique_ptr<IndexingEntry>> entries_;
std::map<FieldOffset, std::unique_ptr<FieldIndexing>> field_indexings_;
};
} // namespace milvus::segcore

View File

@ -13,14 +13,14 @@
namespace milvus::segcore {
InsertRecord::InsertRecord(const Schema& schema, int64_t chunk_size) : uids_(1), timestamps_(1) {
InsertRecord::InsertRecord(const Schema& schema, int64_t size_per_chunk) : uids_(1), timestamps_(1) {
for (auto& field : schema) {
if (field.is_vector()) {
if (field.get_data_type() == DataType::VECTOR_FLOAT) {
this->insert_entity<FloatVector>(field.get_dim(), chunk_size);
this->append_field_data<FloatVector>(field.get_dim(), size_per_chunk);
continue;
} else if (field.get_data_type() == DataType::VECTOR_BINARY) {
this->insert_entity<BinaryVector>(field.get_dim(), chunk_size);
this->append_field_data<BinaryVector>(field.get_dim(), size_per_chunk);
continue;
} else {
PanicInfo("unsupported");
@ -28,34 +28,34 @@ InsertRecord::InsertRecord(const Schema& schema, int64_t chunk_size) : uids_(1),
}
switch (field.get_data_type()) {
case DataType::BOOL: {
this->insert_entity<bool>(chunk_size);
this->append_field_data<bool>(size_per_chunk);
break;
}
case DataType::INT8: {
this->insert_entity<int8_t>(chunk_size);
this->append_field_data<int8_t>(size_per_chunk);
break;
}
case DataType::INT16: {
this->insert_entity<int16_t>(chunk_size);
this->append_field_data<int16_t>(size_per_chunk);
break;
}
case DataType::INT32: {
this->insert_entity<int32_t>(chunk_size);
this->append_field_data<int32_t>(size_per_chunk);
break;
}
case DataType::INT64: {
this->insert_entity<int64_t>(chunk_size);
this->append_field_data<int64_t>(size_per_chunk);
break;
}
case DataType::FLOAT: {
this->insert_entity<float>(chunk_size);
this->append_field_data<float>(size_per_chunk);
break;
}
case DataType::DOUBLE: {
this->insert_entity<double>(chunk_size);
this->append_field_data<double>(size_per_chunk);
break;
}
default: {

View File

@ -24,47 +24,53 @@ struct InsertRecord {
ConcurrentVector<Timestamp> timestamps_;
ConcurrentVector<idx_t> uids_;
explicit InsertRecord(const Schema& schema, int64_t chunk_size);
explicit InsertRecord(const Schema& schema, int64_t size_per_chunk);
// get field data without knowing the type
// return VectorBase type
auto
get_base_entity(FieldOffset field_offset) const {
auto ptr = entity_vec_[field_offset.get()].get();
get_field_data_base(FieldOffset field_offset) const {
auto ptr = field_datas_[field_offset.get()].get();
return ptr;
}
// get field data in given type, const version
template <typename Type>
auto
get_entity(FieldOffset field_offset) const {
auto base_ptr = get_base_entity(field_offset);
get_field_data(FieldOffset field_offset) const {
auto base_ptr = get_field_data_base(field_offset);
auto ptr = dynamic_cast<const ConcurrentVector<Type>*>(base_ptr);
Assert(ptr);
return ptr;
}
// get field data in given type, nonconst version
template <typename Type>
auto
get_entity(FieldOffset field_offset) {
auto base_ptr = get_base_entity(field_offset);
get_field_data(FieldOffset field_offset) {
auto base_ptr = get_field_data_base(field_offset);
auto ptr = dynamic_cast<ConcurrentVector<Type>*>(base_ptr);
Assert(ptr);
return ptr;
}
// append a column of scalar type
template <typename Type>
void
insert_entity(int64_t chunk_size) {
append_field_data(int64_t size_per_chunk) {
static_assert(std::is_fundamental_v<Type>);
entity_vec_.emplace_back(std::make_unique<ConcurrentVector<Type>>(chunk_size));
field_datas_.emplace_back(std::make_unique<ConcurrentVector<Type>>(size_per_chunk));
}
// append a column of vector type
template <typename VectorType>
void
insert_entity(int64_t dim, int64_t chunk_size) {
append_field_data(int64_t dim, int64_t size_per_chunk) {
static_assert(std::is_base_of_v<VectorTrait, VectorType>);
entity_vec_.emplace_back(std::make_unique<ConcurrentVector<VectorType>>(dim, chunk_size));
field_datas_.emplace_back(std::make_unique<ConcurrentVector<VectorType>>(dim, size_per_chunk));
}
private:
std::vector<std::unique_ptr<VectorBase>> entity_vec_;
std::vector<std::unique_ptr<VectorBase>> field_datas_;
};
} // namespace milvus::segcore

View File

@ -31,30 +31,30 @@ using SealedIndexingEntryPtr = std::unique_ptr<SealedIndexingEntry>;
struct SealedIndexingRecord {
void
add_entry(FieldOffset field_offset, MetricType metric_type, knowhere::VecIndexPtr indexing) {
append_field_indexing(FieldOffset field_offset, MetricType metric_type, knowhere::VecIndexPtr indexing) {
auto ptr = std::make_unique<SealedIndexingEntry>();
ptr->indexing_ = indexing;
ptr->metric_type_ = metric_type;
std::unique_lock lck(mutex_);
entries_[field_offset] = std::move(ptr);
field_indexings_[field_offset] = std::move(ptr);
}
const SealedIndexingEntry*
get_entry(FieldOffset field_offset) const {
get_field_indexing(FieldOffset field_offset) const {
std::shared_lock lck(mutex_);
AssertInfo(entries_.count(field_offset), "field_offset not found");
return entries_.at(field_offset).get();
AssertInfo(field_indexings_.count(field_offset), "field_offset not found");
return field_indexings_.at(field_offset).get();
}
bool
is_ready(FieldOffset field_offset) const {
std::shared_lock lck(mutex_);
return entries_.count(field_offset);
return field_indexings_.count(field_offset);
}
private:
// field_offset -> SealedIndexingEntry
std::map<FieldOffset, SealedIndexingEntryPtr> entries_;
std::map<FieldOffset, SealedIndexingEntryPtr> field_indexings_;
mutable std::shared_mutex mutex_;
};
} // namespace milvus::segcore

View File

@ -20,8 +20,8 @@ TestABI() {
}
std::unique_ptr<SegmentGrowing>
CreateGrowingSegment(SchemaPtr schema, int64_t chunk_size) {
auto segment = std::make_unique<SegmentGrowingImpl>(schema, chunk_size);
CreateGrowingSegment(SchemaPtr schema, int64_t size_per_chunk) {
auto segment = std::make_unique<SegmentGrowingImpl>(schema, size_per_chunk);
return segment;
}

View File

@ -80,7 +80,7 @@ class SegmentGrowing : public SegmentInternalInterface {
using SegmentGrowingPtr = std::unique_ptr<SegmentGrowing>;
SegmentGrowingPtr
CreateGrowingSegment(SchemaPtr schema, int64_t chunk_size = 32 * 1024);
CreateGrowingSegment(SchemaPtr schema, int64_t size_per_chunk = 32 * 1024);
} // namespace segcore
} // namespace milvus

View File

@ -170,7 +170,7 @@ SegmentGrowingImpl::Insert(int64_t reserved_begin,
record_.uids_.set_data(reserved_begin, uids.data(), size);
for (int fid = 0; fid < schema_->size(); ++fid) {
auto field_offset = FieldOffset(fid);
record_.get_base_entity(field_offset)->set_data_raw(reserved_begin, entities[fid].data(), size);
record_.get_field_data_base(field_offset)->set_data_raw(reserved_begin, entities[fid].data(), size);
}
for (int i = 0; i < uids.size(); ++i) {
@ -180,7 +180,7 @@ SegmentGrowingImpl::Insert(int64_t reserved_begin,
}
record_.ack_responder_.AddSegment(reserved_begin, reserved_begin + size);
indexing_record_.UpdateResourceAck(record_.ack_responder_.GetAck() / chunk_size_, record_);
indexing_record_.UpdateResourceAck(record_.ack_responder_.GetAck() / size_per_chunk_, record_);
return Status::OK();
}
@ -231,9 +231,9 @@ SegmentGrowingImpl::Close() {
int64_t
SegmentGrowingImpl::GetMemoryUsageInBytes() const {
int64_t total_bytes = 0;
int64_t ins_n = upper_align(record_.reserved, chunk_size_);
int64_t ins_n = upper_align(record_.reserved, size_per_chunk_);
total_bytes += ins_n * (schema_->get_total_sizeof() + 16 + 1);
int64_t del_n = upper_align(deleted_record_.reserved, chunk_size_);
int64_t del_n = upper_align(deleted_record_.reserved, size_per_chunk_);
total_bytes += del_n * (16 * 2);
return total_bytes;
}
@ -245,20 +245,20 @@ SegmentGrowingImpl::LoadIndexing(const LoadIndexInfo& info) {
Assert(info.index_params.count("metric_type"));
auto metric_type_str = info.index_params.at("metric_type");
sealed_indexing_record_.add_entry(field_offset, GetMetricType(metric_type_str), info.index);
sealed_indexing_record_.append_field_indexing(field_offset, GetMetricType(metric_type_str), info.index);
return Status::OK();
}
SpanBase
SegmentGrowingImpl::chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const {
auto vec = get_insert_record().get_base_entity(field_offset);
auto vec = get_insert_record().get_field_data_base(field_offset);
return vec->get_span_base(chunk_id);
}
int64_t
SegmentGrowingImpl::num_chunk_data() const {
SegmentGrowingImpl::num_chunk() const {
auto size = get_insert_record().ack_responder_.GetAck();
return upper_div(size, chunk_size_);
return upper_div(size, size_per_chunk_);
}
void
SegmentGrowingImpl::vector_search(int64_t vec_count,

View File

@ -27,7 +27,7 @@
#include "utils/Status.h"
#include "segcore/DeletedRecord.h"
#include "utils/EasyAssert.h"
#include "IndexingEntry.h"
#include "FieldIndexing.h"
#include "InsertRecord.h"
#include <utility>
#include <memory>
@ -89,18 +89,18 @@ class SegmentGrowingImpl : public SegmentGrowing {
// return count of index that has index, i.e., [0, num_chunk_index) have built index
int64_t
num_chunk_index_safe(FieldOffset field_offset) const final {
num_chunk_index(FieldOffset field_offset) const final {
return indexing_record_.get_finished_ack();
}
const knowhere::Index*
chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const final {
return indexing_record_.get_entry(field_offset).get_indexing(chunk_id);
return indexing_record_.get_field_indexing(field_offset).get_chunk_indexing(chunk_id);
}
int64_t
size_per_chunk() const final {
return chunk_size_;
return size_per_chunk_;
}
public:
@ -152,27 +152,27 @@ class SegmentGrowingImpl : public SegmentGrowing {
void
bulk_subscript(FieldOffset field_offset, const int64_t* seg_offsets, int64_t count, void* output) const override {
// TODO: support more types
auto vec_ptr = record_.get_base_entity(field_offset);
auto vec_ptr = record_.get_field_data_base(field_offset);
auto data_type = schema_->operator[](field_offset).get_data_type();
Assert(data_type == DataType::INT64);
bulk_subscript_impl<int64_t>(*vec_ptr, seg_offsets, count, output);
}
int64_t
num_chunk_data() const override;
num_chunk() const override;
Status
LoadIndexing(const LoadIndexInfo& info) override;
public:
friend std::unique_ptr<SegmentGrowing>
CreateGrowingSegment(SchemaPtr schema, int64_t chunk_size);
CreateGrowingSegment(SchemaPtr schema, int64_t size_per_chunk);
explicit SegmentGrowingImpl(SchemaPtr schema, int64_t chunk_size)
: chunk_size_(chunk_size),
explicit SegmentGrowingImpl(SchemaPtr schema, int64_t size_per_chunk)
: size_per_chunk_(size_per_chunk),
schema_(std::move(schema)),
record_(*schema_, chunk_size),
indexing_record_(*schema_, chunk_size) {
record_(*schema_, size_per_chunk),
indexing_record_(*schema_, size_per_chunk) {
}
void
@ -192,7 +192,7 @@ class SegmentGrowingImpl : public SegmentGrowing {
chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const override;
private:
int64_t chunk_size_;
int64_t size_per_chunk_;
SchemaPtr schema_;
std::atomic<SegmentState> state_ = SegmentState::Open;

View File

@ -14,15 +14,18 @@
#include "common/Schema.h"
#include "query/Plan.h"
#include "common/Span.h"
#include "IndexingEntry.h"
#include "FieldIndexing.h"
#include <knowhere/index/vector_index/VecIndex.h>
#include "common/SystemProperty.h"
#include "query/PlanNode.h"
namespace milvus::segcore {
// common interface of SegmentSealed and SegmentGrowing
// used by C API
class SegmentInterface {
public:
// fill results according to target_entries in plan
void
FillTargetEntry(const query::Plan* plan, QueryResult& results) const;
@ -44,14 +47,17 @@ class SegmentInterface {
virtual ~SegmentInterface() = default;
protected:
// calculate output[i] = Vec[seg_offsets[i]}, where Vec binds to system_type
virtual void
bulk_subscript(SystemFieldType system_type, const int64_t* seg_offsets, int64_t count, void* output) const = 0;
// calculate output[i] = Vec[seg_offsets[i]}, where Vec binds to field_offset
virtual void
bulk_subscript(FieldOffset field_offset, const int64_t* seg_offsets, int64_t count, void* output) const = 0;
};
// internal API for DSL calculation
// only for implementation
class SegmentInternalInterface : public SegmentInterface {
public:
template <typename T>
@ -80,21 +86,24 @@ class SegmentInternalInterface : public SegmentInterface {
const BitsetView& bitset,
QueryResult& output) const = 0;
// count of chunk that has index available
virtual int64_t
num_chunk_index_safe(FieldOffset field_offset) const = 0;
num_chunk_index(FieldOffset field_offset) const = 0;
// count of chunks
virtual int64_t
num_chunk_data() const = 0;
num_chunk() const = 0;
// return chunk_size for each chunk, renaming against confusion
// element size in each chunk
virtual int64_t
size_per_chunk() const = 0;
protected:
// blob and row_count
// internal API: return chunk_data in span
virtual SpanBase
chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const = 0;
// internal API: return chunk_index in span, support scalar index only
virtual const knowhere::Index*
chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const = 0;
};

View File

@ -27,6 +27,6 @@ class SegmentSealed : public SegmentInternalInterface {
using SegmentSealedPtr = std::unique_ptr<SegmentSealed>;
SegmentSealedPtr
CreateSealedSegment(SchemaPtr schema, int64_t chunk_size = 32 * 1024);
CreateSealedSegment(SchemaPtr schema, int64_t size_per_chunk = 32 * 1024);
} // namespace milvus::segcore

View File

@ -29,7 +29,7 @@ SegmentSealedImpl::LoadIndex(const LoadIndexInfo& info) {
row_count_opt_ = row_count;
}
Assert(!vec_indexings_.is_ready(field_offset));
vec_indexings_.add_entry(field_offset, GetMetricType(metric_type_str), info.index);
vec_indexings_.append_field_indexing(field_offset, GetMetricType(metric_type_str), info.index);
++ready_count_;
}
@ -77,13 +77,13 @@ SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& info) {
}
int64_t
SegmentSealedImpl::num_chunk_index_safe(FieldOffset field_offset) const {
SegmentSealedImpl::num_chunk_index(FieldOffset field_offset) const {
// TODO: support scalar index
return 0;
}
int64_t
SegmentSealedImpl::num_chunk_data() const {
SegmentSealedImpl::num_chunk() const {
return 1;
}
@ -142,7 +142,7 @@ SegmentSealedImpl::vector_search(int64_t vec_count,
}
SegmentSealedPtr
CreateSealedSegment(SchemaPtr schema, int64_t chunk_size) {
CreateSealedSegment(SchemaPtr schema, int64_t size_per_chunk) {
return std::make_unique<SegmentSealedImpl>(schema);
}

View File

@ -37,12 +37,12 @@ class SegmentSealedImpl : public SegmentSealed {
public:
int64_t
num_chunk_index_safe(FieldOffset field_offset) const override;
num_chunk_index(FieldOffset field_offset) const override;
int64_t
num_chunk_data() const override;
num_chunk() const override;
// return chunk_size for each chunk, renaming against confusion
// return size_per_chunk for each chunk, renaming against confusion
int64_t
size_per_chunk() const override;

View File

@ -277,7 +277,7 @@ TEST(Sealed, LoadFieldData) {
vec_info.index_params["metric_type"] = milvus::knowhere::Metric::L2;
segment->LoadIndex(vec_info);
}
ASSERT_EQ(segment->num_chunk_data(), 1);
ASSERT_EQ(segment->num_chunk(), 1);
auto chunk_span1 = segment->chunk_data<int64_t>(FieldOffset(1), 0);
auto chunk_span2 = segment->chunk_data<double>(FieldOffset(2), 0);
auto ref1 = dataset.get_col<int64_t>(1);

View File

@ -19,38 +19,38 @@ TEST(Span, Naive) {
using namespace milvus::query;
using namespace milvus::segcore;
int64_t N = 1000 * 1000;
constexpr int64_t chunk_size = 32 * 1024;
constexpr int64_t size_per_chunk = 32 * 1024;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("binaryvec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard);
schema->AddDebugField("age", DataType::FLOAT);
schema->AddDebugField("floatvec", DataType::VECTOR_FLOAT, 32, MetricType::METRIC_L2);
auto dataset = DataGen(schema, N);
auto segment = CreateGrowingSegment(schema, chunk_size);
auto segment = CreateGrowingSegment(schema, size_per_chunk);
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
auto vec_ptr = dataset.get_col<uint8_t>(0);
auto age_ptr = dataset.get_col<float>(1);
auto float_ptr = dataset.get_col<float>(2);
SegmentInternalInterface& interface = *segment;
auto num_chunk = interface.num_chunk_data();
ASSERT_EQ(num_chunk, upper_div(N, chunk_size));
auto num_chunk = interface.num_chunk();
ASSERT_EQ(num_chunk, upper_div(N, size_per_chunk));
auto row_count = interface.get_row_count();
ASSERT_EQ(N, row_count);
for (auto chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
auto vec_span = interface.chunk_data<BinaryVector>(FieldOffset(0), chunk_id);
auto age_span = interface.chunk_data<float>(FieldOffset(1), chunk_id);
auto float_span = interface.chunk_data<FloatVector>(FieldOffset(2), chunk_id);
auto begin = chunk_id * chunk_size;
auto end = std::min((chunk_id + 1) * chunk_size, N);
auto chunk_size = end - begin;
for (int i = 0; i < chunk_size * 512 / 8; ++i) {
auto begin = chunk_id * size_per_chunk;
auto end = std::min((chunk_id + 1) * size_per_chunk, N);
auto size_per_chunk = end - begin;
for (int i = 0; i < size_per_chunk * 512 / 8; ++i) {
ASSERT_EQ(vec_span.data()[i], vec_ptr[i + begin * 512 / 8]);
}
for (int i = 0; i < chunk_size; ++i) {
for (int i = 0; i < size_per_chunk; ++i) {
ASSERT_EQ(age_span.data()[i], age_ptr[i + begin]);
}
for (int i = 0; i < chunk_size; ++i) {
for (int i = 0; i < size_per_chunk; ++i) {
ASSERT_EQ(float_span.data()[i], float_ptr[i + begin * 32]);
}
}