mirror of https://github.com/milvus-io/milvus.git
Support string type in segcore (#16546)
Signed-off-by: xige-16 <xi.ge@zilliz.com> Co-authored-by: dragondriver <jiquan.long@zilliz.com> Co-authored-by: dragondriver <jiquan.long@zilliz.com>pull/16735/head
parent
9537394971
commit
515d0369de
|
@ -15,6 +15,11 @@
|
|||
#include <string>
|
||||
|
||||
namespace milvus {
|
||||
inline bool
|
||||
IsVectorType(CDataType dtype) {
|
||||
return dtype == CDataType::FloatVector || dtype == CDataType::BinaryVector;
|
||||
}
|
||||
|
||||
template <typename T, typename = std::enable_if_t<std::is_fundamental_v<T> || std::is_same_v<T, std::string>>>
|
||||
inline CDataType
|
||||
GetDType() {
|
||||
|
|
|
@ -17,7 +17,11 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include "Types.h"
|
||||
|
||||
const int64_t INVALID_ID = -1;
|
||||
const int64_t INVALID_OFFSET = -1;
|
||||
const int64_t INVALID_FIELD_ID = -1;
|
||||
const int64_t INVALID_SEG_OFFSET = -1;
|
||||
const milvus::PkType INVALID_PK; // of std::monostate if not set.
|
||||
// TODO: default field start id, could get from config.yaml
|
||||
const int64_t START_USER_FIELDID = 100;
|
||||
const char MAX_LENGTH_PER_ROW[] = "max_length_per_row";
|
||||
|
|
|
@ -73,6 +73,8 @@ datatype_name(DataType data_type) {
|
|||
return "float";
|
||||
case DataType::DOUBLE:
|
||||
return "double";
|
||||
case DataType::VARCHAR:
|
||||
return "varChar";
|
||||
case DataType::VECTOR_FLOAT:
|
||||
return "vector_float";
|
||||
case DataType::VECTOR_BINARY: {
|
||||
|
@ -90,6 +92,17 @@ datatype_is_vector(DataType datatype) {
|
|||
return datatype == DataType::VECTOR_BINARY || datatype == DataType::VECTOR_FLOAT;
|
||||
}
|
||||
|
||||
inline bool
|
||||
datatype_is_string(DataType datatype) {
|
||||
switch (datatype) {
|
||||
case DataType::VARCHAR:
|
||||
case DataType::STRING:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool
|
||||
datatype_is_integer(DataType datatype) {
|
||||
switch (datatype) {
|
||||
|
@ -128,6 +141,11 @@ class FieldMeta {
|
|||
Assert(!is_vector());
|
||||
}
|
||||
|
||||
FieldMeta(const FieldName& name, FieldId id, DataType type, int64_t max_length_per_row)
|
||||
: name_(name), id_(id), type_(type), string_info_(StringInfo{max_length_per_row}) {
|
||||
Assert(is_string());
|
||||
}
|
||||
|
||||
FieldMeta(const FieldName& name, FieldId id, DataType type, int64_t dim, std::optional<MetricType> metric_type)
|
||||
: name_(name), id_(id), type_(type), vector_info_(VectorInfo{dim, metric_type}) {
|
||||
Assert(is_vector());
|
||||
|
@ -139,6 +157,12 @@ class FieldMeta {
|
|||
return type_ == DataType::VECTOR_BINARY || type_ == DataType::VECTOR_FLOAT;
|
||||
}
|
||||
|
||||
bool
|
||||
is_string() const {
|
||||
Assert(type_ != DataType::NONE);
|
||||
return type_ == DataType::VARCHAR || type_ == DataType::STRING;
|
||||
}
|
||||
|
||||
int64_t
|
||||
get_dim() const {
|
||||
Assert(is_vector());
|
||||
|
@ -146,6 +170,13 @@ class FieldMeta {
|
|||
return vector_info_->dim_;
|
||||
}
|
||||
|
||||
int64_t
|
||||
get_max_len() const {
|
||||
Assert(is_string());
|
||||
Assert(string_info_.has_value());
|
||||
return string_info_->max_length_per_row;
|
||||
}
|
||||
|
||||
std::optional<MetricType>
|
||||
get_metric_type() const {
|
||||
Assert(is_vector());
|
||||
|
@ -168,10 +199,12 @@ class FieldMeta {
|
|||
return type_;
|
||||
}
|
||||
|
||||
int
|
||||
int64_t
|
||||
get_sizeof() const {
|
||||
if (is_vector()) {
|
||||
return datatype_sizeof(type_, get_dim());
|
||||
} else if (is_string()) {
|
||||
return string_info_->max_length_per_row;
|
||||
} else {
|
||||
return datatype_sizeof(type_);
|
||||
}
|
||||
|
@ -182,10 +215,14 @@ class FieldMeta {
|
|||
int64_t dim_;
|
||||
std::optional<MetricType> metric_type_;
|
||||
};
|
||||
struct StringInfo {
|
||||
int64_t max_length_per_row;
|
||||
};
|
||||
FieldName name_;
|
||||
FieldId id_;
|
||||
DataType type_ = DataType::NONE;
|
||||
std::optional<VectorInfo> vector_info_;
|
||||
std::optional<StringInfo> string_info_;
|
||||
};
|
||||
|
||||
} // namespace milvus
|
||||
|
|
|
@ -18,25 +18,31 @@
|
|||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "Types.h"
|
||||
|
||||
#include "knowhere/index/vector_index/VecIndex.h"
|
||||
#include "common/CDataType.h"
|
||||
#include "knowhere/index/Index.h"
|
||||
|
||||
struct LoadIndexInfo {
|
||||
int64_t field_id;
|
||||
CDataType field_type;
|
||||
std::map<std::string, std::string> index_params;
|
||||
knowhere::VecIndexPtr index;
|
||||
knowhere::IndexPtr index;
|
||||
};
|
||||
|
||||
// NOTE: field_id can be system field
|
||||
// NOTE: Refer to common/SystemProperty.cpp for details
|
||||
// TODO: use arrow to pass field data instead of proto
|
||||
struct LoadFieldDataInfo {
|
||||
int64_t field_id;
|
||||
const void* blob = nullptr;
|
||||
// const void* blob = nullptr;
|
||||
const milvus::DataArray* field_data;
|
||||
int64_t row_count = -1;
|
||||
};
|
||||
|
||||
struct LoadDeletedRecordInfo {
|
||||
const void* timestamps = nullptr;
|
||||
const void* primary_keys = nullptr;
|
||||
const milvus::IdArray* primary_keys = nullptr;
|
||||
int64_t row_count = -1;
|
||||
};
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <map>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
@ -32,52 +33,49 @@
|
|||
namespace milvus {
|
||||
struct SearchResult {
|
||||
SearchResult() = default;
|
||||
SearchResult(int64_t num_queries, int64_t topk) : topk_(topk), num_queries_(num_queries) {
|
||||
auto count = get_row_count();
|
||||
distances_.resize(count);
|
||||
ids_.resize(count);
|
||||
|
||||
int64_t
|
||||
get_total_result_count() const {
|
||||
int64_t count = 0;
|
||||
for (auto topk : real_topK_per_nq_) {
|
||||
count += topk;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
int64_t
|
||||
get_row_count() const {
|
||||
return topk_ * num_queries_;
|
||||
}
|
||||
|
||||
// vector type
|
||||
void
|
||||
AddField(const FieldName& name,
|
||||
const FieldId id,
|
||||
DataType data_type,
|
||||
int64_t dim,
|
||||
std::optional<MetricType> metric_type) {
|
||||
this->AddField(FieldMeta(name, id, data_type, dim, metric_type));
|
||||
}
|
||||
|
||||
// scalar type
|
||||
void
|
||||
AddField(const FieldName& name, const FieldId id, DataType data_type) {
|
||||
this->AddField(FieldMeta(name, id, data_type));
|
||||
}
|
||||
|
||||
void
|
||||
AddField(FieldMeta&& field_meta) {
|
||||
output_fields_meta_.emplace_back(std::move(field_meta));
|
||||
get_result_count(int nq_offset) const {
|
||||
AssertInfo(nq_offset <= real_topK_per_nq_.size(), "wrong nq offset when get real search result count");
|
||||
int64_t count = 0;
|
||||
for (auto i = 0; i < nq_offset; i++) {
|
||||
count += real_topK_per_nq_[i];
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
public:
|
||||
int64_t num_queries_;
|
||||
int64_t topk_;
|
||||
std::vector<float> distances_;
|
||||
std::vector<int64_t> ids_; // primary keys
|
||||
|
||||
public:
|
||||
// TODO(gexi): utilize these fields
|
||||
void* segment_;
|
||||
|
||||
// first fill data during search, and then update data after reducing search results
|
||||
std::vector<float> distances_;
|
||||
std::vector<int64_t> seg_offsets_;
|
||||
|
||||
// fist fill data during fillPrimaryKey, and then update data after reducing search results
|
||||
std::vector<PkType> primary_keys_;
|
||||
DataType pk_type_;
|
||||
|
||||
// fill data during reducing search result
|
||||
std::vector<int64_t> result_offsets_;
|
||||
std::vector<int64_t> primary_keys_;
|
||||
aligned_vector<char> ids_data_;
|
||||
std::vector<aligned_vector<char>> output_fields_data_;
|
||||
std::vector<FieldMeta> output_fields_meta_;
|
||||
// after reducing search result done, size(distances_) = size(seg_offsets_) = size(primary_keys_) =
|
||||
// size(primary_keys_)
|
||||
|
||||
// set output fields data when fill target entity
|
||||
std::map<FieldId, std::unique_ptr<milvus::DataArray>> output_fields_data_;
|
||||
|
||||
// used for reduce, filter invalid pk, get real topks count
|
||||
std::vector<int64_t> real_topK_per_nq_;
|
||||
};
|
||||
|
||||
using SearchResultPtr = std::shared_ptr<SearchResult>;
|
||||
|
|
|
@ -43,7 +43,6 @@ Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) {
|
|||
// NOTE: only two system
|
||||
|
||||
for (const milvus::proto::schema::FieldSchema& child : schema_proto.fields()) {
|
||||
auto field_offset = FieldOffset(schema->size());
|
||||
auto field_id = FieldId(child.fieldid());
|
||||
auto name = FieldName(child.name());
|
||||
|
||||
|
@ -69,25 +68,26 @@ Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) {
|
|||
auto metric_type = GetMetricType(index_map.at("metric_type"));
|
||||
schema->AddField(name, field_id, data_type, dim, metric_type);
|
||||
}
|
||||
} else if (datatype_is_string(data_type)) {
|
||||
auto type_map = RepeatedKeyValToMap(child.type_params());
|
||||
AssertInfo(type_map.count(MAX_LENGTH_PER_ROW), "max_length_per_row not found");
|
||||
auto max_len = boost::lexical_cast<int64_t>(type_map.at(MAX_LENGTH_PER_ROW));
|
||||
schema->AddField(name, field_id, data_type, max_len);
|
||||
} else {
|
||||
schema->AddField(name, field_id, data_type);
|
||||
}
|
||||
|
||||
if (child.is_primary_key()) {
|
||||
AssertInfo(!schema->get_primary_key_offset().has_value(), "repetitive primary key");
|
||||
Assert(!schema_proto.autoid());
|
||||
schema->set_primary_key(field_offset);
|
||||
AssertInfo(!schema->get_primary_field_id().has_value(), "repetitive primary key");
|
||||
schema->set_primary_field_id(field_id);
|
||||
}
|
||||
}
|
||||
if (schema->get_is_auto_id()) {
|
||||
AssertInfo(!schema->get_primary_key_offset().has_value(), "auto id mode: shouldn't have primary key");
|
||||
} else {
|
||||
AssertInfo(schema->get_primary_key_offset().has_value(), "primary key should be specified when autoId is off");
|
||||
}
|
||||
|
||||
AssertInfo(schema->get_primary_field_id().has_value(), "primary key should be specified");
|
||||
|
||||
return schema;
|
||||
}
|
||||
|
||||
const FieldMeta FieldMeta::RowIdMeta(FieldName("RowID"), FieldId(0), DataType::INT64);
|
||||
const FieldMeta FieldMeta::RowIdMeta(FieldName("RowID"), RowFieldID, DataType::INT64);
|
||||
|
||||
} // namespace milvus
|
||||
|
|
|
@ -25,16 +25,18 @@
|
|||
|
||||
#include "FieldMeta.h"
|
||||
#include "pb/schema.pb.h"
|
||||
#include "Consts.h"
|
||||
|
||||
namespace milvus {
|
||||
|
||||
static int64_t debug_id = START_USER_FIELDID;
|
||||
|
||||
class Schema {
|
||||
public:
|
||||
FieldId
|
||||
AddDebugField(const std::string& name, DataType data_type) {
|
||||
static int64_t debug_id = 1000;
|
||||
auto field_id = FieldId(debug_id);
|
||||
debug_id += 2;
|
||||
debug_id++;
|
||||
this->AddField(FieldName(name), field_id, data_type);
|
||||
return field_id;
|
||||
}
|
||||
|
@ -42,9 +44,8 @@ class Schema {
|
|||
// auto gen field_id for convenience
|
||||
FieldId
|
||||
AddDebugField(const std::string& name, DataType data_type, int64_t dim, std::optional<MetricType> metric_type) {
|
||||
static int64_t debug_id = 2001;
|
||||
auto field_id = FieldId(debug_id);
|
||||
debug_id += 2;
|
||||
debug_id++;
|
||||
auto field_meta = FieldMeta(FieldName(name), field_id, data_type, dim, metric_type);
|
||||
this->AddField(std::move(field_meta));
|
||||
return field_id;
|
||||
|
@ -57,6 +58,13 @@ class Schema {
|
|||
this->AddField(std::move(field_meta));
|
||||
}
|
||||
|
||||
// string type
|
||||
void
|
||||
AddField(const FieldName& name, const FieldId id, DataType data_type, int64_t max_length_per_row) {
|
||||
auto field_meta = FieldMeta(name, id, data_type, max_length_per_row);
|
||||
this->AddField(std::move(field_meta));
|
||||
}
|
||||
|
||||
// vector type
|
||||
void
|
||||
AddField(const FieldName& name,
|
||||
|
@ -69,19 +77,8 @@ class Schema {
|
|||
}
|
||||
|
||||
void
|
||||
set_auto_id(bool is_auto_id) {
|
||||
is_auto_id_ = is_auto_id;
|
||||
}
|
||||
|
||||
void
|
||||
set_primary_key(FieldOffset field_offset) {
|
||||
is_auto_id_ = false;
|
||||
this->primary_key_offset_opt_ = field_offset;
|
||||
}
|
||||
|
||||
bool
|
||||
get_is_auto_id() const {
|
||||
return is_auto_id_;
|
||||
set_primary_field_id(FieldId field_id) {
|
||||
this->primary_field_id_opt_ = field_id;
|
||||
}
|
||||
|
||||
auto
|
||||
|
@ -100,10 +97,10 @@ class Schema {
|
|||
}
|
||||
|
||||
const FieldMeta&
|
||||
operator[](FieldOffset field_offset) const {
|
||||
Assert(field_offset.get() >= 0);
|
||||
Assert(field_offset.get() < fields_.size());
|
||||
return fields_[field_offset.get()];
|
||||
operator[](FieldId field_id) const {
|
||||
Assert(field_id.get() >= 0);
|
||||
AssertInfo(fields_.count(field_id), "Cannot find field_id");
|
||||
return fields_.at(field_id);
|
||||
}
|
||||
|
||||
auto
|
||||
|
@ -111,39 +108,32 @@ class Schema {
|
|||
return total_sizeof_;
|
||||
}
|
||||
|
||||
const std::vector<int64_t>&
|
||||
get_sizeof_infos() const {
|
||||
return sizeof_infos_;
|
||||
FieldId
|
||||
get_field_id(const FieldName& field_name) const {
|
||||
AssertInfo(name_ids_.count(field_name), "Cannot find field_name");
|
||||
return name_ids_.at(field_name);
|
||||
}
|
||||
|
||||
FieldOffset
|
||||
get_offset(const FieldName& field_name) const {
|
||||
Assert(name_offsets_.count(field_name));
|
||||
return name_offsets_.at(field_name);
|
||||
}
|
||||
|
||||
FieldOffset
|
||||
get_offset(const FieldId& field_id) const {
|
||||
Assert(id_offsets_.count(field_id));
|
||||
return id_offsets_.at(field_id);
|
||||
}
|
||||
|
||||
const std::vector<FieldMeta>&
|
||||
const std::unordered_map<FieldId, FieldMeta>&
|
||||
get_fields() const {
|
||||
return fields_;
|
||||
}
|
||||
|
||||
const FieldMeta&
|
||||
operator[](const FieldName& field_name) const {
|
||||
auto offset_iter = name_offsets_.find(field_name);
|
||||
AssertInfo(offset_iter != name_offsets_.end(), "Cannot find field_name: " + field_name.get());
|
||||
auto offset = offset_iter->second;
|
||||
return (*this)[offset];
|
||||
const std::vector<FieldId>&
|
||||
get_field_ids() const {
|
||||
return field_ids_;
|
||||
}
|
||||
|
||||
std::optional<FieldOffset>
|
||||
get_primary_key_offset() const {
|
||||
return primary_key_offset_opt_;
|
||||
const FieldMeta&
|
||||
operator[](const FieldName& field_name) const {
|
||||
auto id_iter = name_ids_.find(field_name);
|
||||
AssertInfo(id_iter != name_ids_.end(), "Cannot find field_name: " + field_name.get());
|
||||
return fields_.at(id_iter->second);
|
||||
}
|
||||
|
||||
std::optional<FieldId>
|
||||
get_primary_field_id() const {
|
||||
return primary_field_id_opt_;
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -152,29 +142,33 @@ class Schema {
|
|||
|
||||
void
|
||||
AddField(FieldMeta&& field_meta) {
|
||||
auto offset = fields_.size();
|
||||
AssertInfo(!name_offsets_.count(field_meta.get_name()), "duplicated field name");
|
||||
name_offsets_.emplace(field_meta.get_name(), offset);
|
||||
AssertInfo(!id_offsets_.count(field_meta.get_id()), "duplicated field id");
|
||||
id_offsets_.emplace(field_meta.get_id(), offset);
|
||||
auto field_name = field_meta.get_name();
|
||||
auto field_id = field_meta.get_id();
|
||||
AssertInfo(!name_ids_.count(field_name), "duplicated field name");
|
||||
AssertInfo(!id_names_.count(field_id), "duplicated field id");
|
||||
name_ids_.emplace(field_name, field_id);
|
||||
id_names_.emplace(field_id, field_name);
|
||||
|
||||
fields_.emplace(field_id, field_meta);
|
||||
field_ids_.emplace_back(field_id);
|
||||
|
||||
auto field_sizeof = field_meta.get_sizeof();
|
||||
sizeof_infos_.push_back(std::move(field_sizeof));
|
||||
fields_.emplace_back(std::move(field_meta));
|
||||
total_sizeof_ += field_sizeof;
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t debug_id = START_USER_FIELDID;
|
||||
std::vector<FieldId> field_ids_;
|
||||
|
||||
// this is where data holds
|
||||
std::vector<FieldMeta> fields_;
|
||||
std::unordered_map<FieldId, FieldMeta> fields_;
|
||||
|
||||
// a mapping for random access
|
||||
std::unordered_map<FieldName, FieldOffset> name_offsets_; // field_name -> offset
|
||||
std::unordered_map<FieldId, FieldOffset> id_offsets_; // field_id -> offset
|
||||
std::vector<int64_t> sizeof_infos_;
|
||||
int total_sizeof_ = 0;
|
||||
bool is_auto_id_ = true;
|
||||
std::optional<FieldOffset> primary_key_offset_opt_;
|
||||
std::unordered_map<FieldName, FieldId> name_ids_; // field_name -> field_id
|
||||
std::unordered_map<FieldId, FieldName> id_names_; // field_id -> field_name
|
||||
|
||||
int64_t total_sizeof_ = 0;
|
||||
std::optional<FieldId> primary_field_id_opt_;
|
||||
};
|
||||
|
||||
using SchemaPtr = std::shared_ptr<Schema>;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
#include <cassert>
|
||||
#include <type_traits>
|
||||
#include <string>
|
||||
|
||||
#include "Types.h"
|
||||
#include "VectorTrait.h"
|
||||
|
@ -56,7 +57,7 @@ class Span;
|
|||
|
||||
// TODO: refine Span to support T=FloatVector
|
||||
template <typename T>
|
||||
class Span<T, typename std::enable_if_t<std::is_fundamental_v<T>>> {
|
||||
class Span<T, typename std::enable_if_t<IsScalar<T> || std::is_same_v<T, PkType>>> {
|
||||
public:
|
||||
using embeded_type = T;
|
||||
explicit Span(const T* data, int64_t row_count) : data_(data), row_count_(row_count) {
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
#include <yaml-cpp/yaml.h>
|
||||
|
||||
#include "SystemProperty.h"
|
||||
#include "Types.h"
|
||||
#include "Consts.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
|
||||
namespace milvus {
|
||||
|
@ -73,10 +73,10 @@ InstanceImpl() {
|
|||
using Type = SystemFieldType;
|
||||
|
||||
impl.name_to_types_.emplace(FieldName("RowID"), Type::RowId);
|
||||
impl.id_to_types_.emplace(FieldId(0), Type::RowId);
|
||||
impl.id_to_types_.emplace(RowFieldID, Type::RowId);
|
||||
|
||||
impl.name_to_types_.emplace(FieldName("Timestamp"), Type::Timestamp);
|
||||
impl.id_to_types_.emplace(FieldId(1), Type::Timestamp);
|
||||
impl.id_to_types_.emplace(TimestampFieldID, Type::Timestamp);
|
||||
|
||||
return impl;
|
||||
}();
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "common/type_c.h"
|
||||
#include "pb/schema.pb.h"
|
||||
#include "CGoHelper.h"
|
||||
#include "common/Consts.h"
|
||||
|
||||
namespace milvus {
|
||||
|
||||
|
@ -55,4 +56,9 @@ MetricTypeToName(MetricType metric_type) {
|
|||
return metric_bimap.right.at(metric_type);
|
||||
}
|
||||
|
||||
bool
|
||||
IsPrimaryKeyDataType(DataType data_type) {
|
||||
return data_type == engine::DataType::INT64 || data_type == DataType::VARCHAR;
|
||||
}
|
||||
|
||||
} // namespace milvus
|
||||
|
|
|
@ -22,11 +22,14 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
#include <boost/align/aligned_allocator.hpp>
|
||||
#include <boost/container/vector.hpp>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#include <NamedType/named_type.hpp>
|
||||
#include <variant>
|
||||
|
||||
#include "knowhere/common/MetricType.h"
|
||||
#include "pb/schema.pb.h"
|
||||
#include "pb/segcore.pb.h"
|
||||
#include "utils/Types.h"
|
||||
|
||||
namespace milvus {
|
||||
|
@ -36,12 +39,15 @@ constexpr auto MAX_TIMESTAMP = std::numeric_limits<Timestamp>::max();
|
|||
|
||||
using engine::DataType;
|
||||
using engine::idx_t;
|
||||
constexpr auto MAX_ROW_COUNT = std::numeric_limits<engine::idx_t>::max();
|
||||
|
||||
using ScalarArray = proto::schema::ScalarField;
|
||||
using DataArray = proto::schema::FieldData;
|
||||
using VectorArray = proto::schema::VectorField;
|
||||
using IdArray = proto::schema::IDs;
|
||||
using MetricType = faiss::MetricType;
|
||||
using InsertData = proto::segcore::InsertRecord;
|
||||
using PkType = std::variant<std::monostate, int64_t, std::string>;
|
||||
|
||||
MetricType
|
||||
GetMetricType(const std::string& type);
|
||||
|
@ -49,6 +55,9 @@ GetMetricType(const std::string& type);
|
|||
std::string
|
||||
MetricTypeToName(MetricType metric_type);
|
||||
|
||||
bool
|
||||
IsPrimaryKeyDataType(DataType data_type);
|
||||
|
||||
// NOTE: dependent type
|
||||
// used at meta-template programming
|
||||
template <class...>
|
||||
|
@ -70,11 +79,16 @@ struct SegOffsetTag;
|
|||
|
||||
using FieldId = fluent::NamedType<int64_t, impl::FieldIdTag, fluent::Comparable, fluent::Hashable>;
|
||||
using FieldName = fluent::NamedType<std::string, impl::FieldNameTag, fluent::Comparable, fluent::Hashable>;
|
||||
using FieldOffset = fluent::NamedType<int64_t, impl::FieldOffsetTag, fluent::Comparable, fluent::Hashable>;
|
||||
// using FieldOffset = fluent::NamedType<int64_t, impl::FieldOffsetTag, fluent::Comparable, fluent::Hashable>;
|
||||
using SegOffset = fluent::NamedType<int64_t, impl::SegOffsetTag, fluent::Arithmetic>;
|
||||
|
||||
using BitsetType = boost::dynamic_bitset<>;
|
||||
using BitsetTypePtr = std::shared_ptr<boost::dynamic_bitset<>>;
|
||||
using BitsetTypeOpt = std::optional<BitsetType>;
|
||||
|
||||
template <typename Type>
|
||||
using FixedVector = boost::container::vector<Type>;
|
||||
|
||||
const FieldId RowFieldID = FieldId(0);
|
||||
const FieldId TimestampFieldID = FieldId(1);
|
||||
} // namespace milvus
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace milvus {
|
||||
inline bool
|
||||
PrefixMatch(const std::string& str, const std::string& prefix) {
|
||||
auto ret = strncmp(str.c_str(), prefix.c_str(), prefix.length());
|
||||
if (ret != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool
|
||||
PostfixMatch(const std::string& str, const std::string& postfix) {
|
||||
if (postfix.length() > str.length()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int offset = str.length() - postfix.length();
|
||||
auto ret = strncmp(str.c_str() + offset, postfix.c_str(), postfix.length());
|
||||
if (ret != 0) {
|
||||
return false;
|
||||
}
|
||||
//
|
||||
// int i = postfix.length() - 1;
|
||||
// int j = str.length() - 1;
|
||||
// for (; i >= 0; i--, j--) {
|
||||
// if (postfix[i] != str[j]) {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
return true;
|
||||
}
|
||||
} // namespace milvus
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#pragma once
|
||||
#include "Types.h"
|
||||
#include <string>
|
||||
|
||||
namespace milvus {
|
||||
|
||||
|
@ -48,7 +49,7 @@ template <typename T>
|
|||
constexpr bool IsVector = std::is_base_of_v<VectorTrait, T>;
|
||||
|
||||
template <typename T>
|
||||
constexpr bool IsScalar = std::is_fundamental_v<T>;
|
||||
constexpr bool IsScalar = std::is_fundamental_v<T> || std::is_same_v<T, std::string>;
|
||||
|
||||
template <typename T, typename Enabled = void>
|
||||
struct EmbeddedTypeImpl;
|
||||
|
@ -66,4 +67,17 @@ struct EmbeddedTypeImpl<T, std::enable_if_t<IsVector<T>>> {
|
|||
template <typename T>
|
||||
using EmbeddedType = typename EmbeddedTypeImpl<T>::type;
|
||||
|
||||
struct FundamentalTag {};
|
||||
struct StringTag {};
|
||||
|
||||
template <class T>
|
||||
struct TagDispatchTrait {
|
||||
using Tag = FundamentalTag;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct TagDispatchTrait<std::string> {
|
||||
using Tag = StringTag;
|
||||
};
|
||||
|
||||
} // namespace milvus
|
||||
|
|
|
@ -70,13 +70,13 @@ typedef struct CProto {
|
|||
|
||||
typedef struct CLoadFieldDataInfo {
|
||||
int64_t field_id;
|
||||
void* blob;
|
||||
const char* blob;
|
||||
int64_t row_count;
|
||||
} CLoadFieldDataInfo;
|
||||
|
||||
typedef struct CLoadDeletedRecordInfo {
|
||||
void* timestamps;
|
||||
void* primary_keys;
|
||||
const char* primary_keys;
|
||||
int64_t row_count;
|
||||
} CLoadDeletedRecordInfo;
|
||||
|
||||
|
|
|
@ -33,6 +33,9 @@ class IndexBase : public Index {
|
|||
|
||||
virtual const TargetBitmapPtr
|
||||
Query(const DatasetPtr& dataset) = 0;
|
||||
|
||||
virtual size_t
|
||||
Count() = 0;
|
||||
};
|
||||
using IndexBasePtr = std::unique_ptr<IndexBase>;
|
||||
|
||||
|
|
|
@ -37,6 +37,11 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
void
|
||||
BuildWithDataset(const DatasetPtr& dataset) override;
|
||||
|
||||
size_t
|
||||
Count() override {
|
||||
return data_.size();
|
||||
}
|
||||
|
||||
void
|
||||
Build(size_t n, const T* values) override;
|
||||
|
||||
|
|
|
@ -35,6 +35,11 @@ class StringIndexMarisa : public StringIndex {
|
|||
void
|
||||
Load(const BinarySet& set) override;
|
||||
|
||||
size_t
|
||||
Count() override {
|
||||
return str_ids_.size();
|
||||
}
|
||||
|
||||
void
|
||||
Build(size_t n, const std::string* values) override;
|
||||
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "index/ScalarIndexSort.h"
|
||||
#include "index/StringIndex.h"
|
||||
#include "common/Utils.h"
|
||||
|
||||
namespace milvus::scalar {
|
||||
// TODO: should inherit from StringIndex?
|
||||
class StringIndexSort : public ScalarIndexSort<std::string> {
|
||||
public:
|
||||
void
|
||||
BuildWithDataset(const DatasetPtr& dataset) override {
|
||||
auto size = dataset->Get<int64_t>(knowhere::meta::ROWS);
|
||||
auto data = dataset->Get<const void*>(knowhere::meta::TENSOR);
|
||||
proto::schema::StringArray arr;
|
||||
arr.ParseFromArray(data, size);
|
||||
|
||||
{
|
||||
// TODO: optimize here. avoid memory copy.
|
||||
std::vector<std::string> vecs{arr.data().begin(), arr.data().end()};
|
||||
Build(arr.data().size(), vecs.data());
|
||||
}
|
||||
|
||||
{
|
||||
// TODO: test this way.
|
||||
// auto strs = (const std::string*)arr.data().data();
|
||||
// Build(arr.data().size(), strs);
|
||||
}
|
||||
}
|
||||
|
||||
const TargetBitmapPtr
|
||||
Query(const DatasetPtr& dataset) override {
|
||||
auto op = dataset->Get<OperatorType>(OPERATOR_TYPE);
|
||||
if (op == PrefixMatchOp) {
|
||||
auto prefix = dataset->Get<std::string>(PREFIX_VALUE);
|
||||
return PrefixMatch(prefix);
|
||||
}
|
||||
return ScalarIndex<std::string>::Query(dataset);
|
||||
}
|
||||
|
||||
const TargetBitmapPtr
|
||||
PrefixMatch(std::string prefix) {
|
||||
auto data = GetData();
|
||||
TargetBitmapPtr bitset = std::make_unique<TargetBitmap>(data.size());
|
||||
for (size_t i = 0; i < data.size(); i++) {
|
||||
if (milvus::PrefixMatch(data[i].a_, prefix)) {
|
||||
bitset->set(data[i].idx_);
|
||||
}
|
||||
}
|
||||
return bitset;
|
||||
}
|
||||
};
|
||||
using StringIndexSortPtr = std::unique_ptr<StringIndexSort>;
|
||||
|
||||
inline StringIndexSortPtr
|
||||
CreateStringIndexSort() {
|
||||
return std::make_unique<StringIndexSort>();
|
||||
}
|
||||
} // namespace milvus::scalar
|
|
@ -532,13 +532,14 @@ const char descriptor_table_protodef_plan_2eproto[] PROTOBUF_SECTION_VARIABLE(pr
|
|||
"\030\001 \001(\0132\035.milvus.proto.plan.VectorANNSH\000\022"
|
||||
"-\n\npredicates\030\002 \001(\0132\027.milvus.proto.plan."
|
||||
"ExprH\000\022\030\n\020output_field_ids\030\003 \003(\003B\006\n\004node"
|
||||
"*n\n\006OpType\022\013\n\007Invalid\020\000\022\017\n\013GreaterThan\020\001"
|
||||
"\022\020\n\014GreaterEqual\020\002\022\014\n\010LessThan\020\003\022\r\n\tLess"
|
||||
"Equal\020\004\022\t\n\005Equal\020\005\022\014\n\010NotEqual\020\006*G\n\013Arit"
|
||||
"hOpType\022\013\n\007Unknown\020\000\022\007\n\003Add\020\001\022\007\n\003Sub\020\002\022\007"
|
||||
"\n\003Mul\020\003\022\007\n\003Div\020\004\022\007\n\003Mod\020\005B3Z1github.com/"
|
||||
"milvus-io/milvus/internal/proto/planpbb\006"
|
||||
"proto3"
|
||||
"*\221\001\n\006OpType\022\013\n\007Invalid\020\000\022\017\n\013GreaterThan\020"
|
||||
"\001\022\020\n\014GreaterEqual\020\002\022\014\n\010LessThan\020\003\022\r\n\tLes"
|
||||
"sEqual\020\004\022\t\n\005Equal\020\005\022\014\n\010NotEqual\020\006\022\017\n\013Pre"
|
||||
"fixMatch\020\007\022\020\n\014PostfixMatch\020\010*G\n\013ArithOpT"
|
||||
"ype\022\013\n\007Unknown\020\000\022\007\n\003Add\020\001\022\007\n\003Sub\020\002\022\007\n\003Mu"
|
||||
"l\020\003\022\007\n\003Div\020\004\022\007\n\003Mod\020\005B3Z1github.com/milv"
|
||||
"us-io/milvus/internal/proto/planpbb\006prot"
|
||||
"o3"
|
||||
;
|
||||
static const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable*const descriptor_table_plan_2eproto_deps[1] = {
|
||||
&::descriptor_table_schema_2eproto,
|
||||
|
@ -560,7 +561,7 @@ static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_pla
|
|||
static ::PROTOBUF_NAMESPACE_ID::internal::once_flag descriptor_table_plan_2eproto_once;
|
||||
static bool descriptor_table_plan_2eproto_initialized = false;
|
||||
const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_plan_2eproto = {
|
||||
&descriptor_table_plan_2eproto_initialized, descriptor_table_protodef_plan_2eproto, "plan.proto", 2846,
|
||||
&descriptor_table_plan_2eproto_initialized, descriptor_table_protodef_plan_2eproto, "plan.proto", 2882,
|
||||
&descriptor_table_plan_2eproto_once, descriptor_table_plan_2eproto_sccs, descriptor_table_plan_2eproto_deps, 12, 1,
|
||||
schemas, file_default_instances, TableStruct_plan_2eproto::offsets,
|
||||
file_level_metadata_plan_2eproto, 14, file_level_enum_descriptors_plan_2eproto, file_level_service_descriptors_plan_2eproto,
|
||||
|
@ -628,6 +629,8 @@ bool OpType_IsValid(int value) {
|
|||
case 4:
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
case 8:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -183,12 +183,14 @@ enum OpType : int {
|
|||
LessEqual = 4,
|
||||
Equal = 5,
|
||||
NotEqual = 6,
|
||||
PrefixMatch = 7,
|
||||
PostfixMatch = 8,
|
||||
OpType_INT_MIN_SENTINEL_DO_NOT_USE_ = std::numeric_limits<::PROTOBUF_NAMESPACE_ID::int32>::min(),
|
||||
OpType_INT_MAX_SENTINEL_DO_NOT_USE_ = std::numeric_limits<::PROTOBUF_NAMESPACE_ID::int32>::max()
|
||||
};
|
||||
bool OpType_IsValid(int value);
|
||||
constexpr OpType OpType_MIN = Invalid;
|
||||
constexpr OpType OpType_MAX = NotEqual;
|
||||
constexpr OpType OpType_MAX = PostfixMatch;
|
||||
constexpr int OpType_ARRAYSIZE = OpType_MAX + 1;
|
||||
|
||||
const ::PROTOBUF_NAMESPACE_ID::EnumDescriptor* OpType_descriptor();
|
||||
|
|
|
@ -33,9 +33,28 @@ class LoadSegmentMetaDefaultTypeInternal {
|
|||
public:
|
||||
::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed<LoadSegmentMeta> _instance;
|
||||
} _LoadSegmentMeta_default_instance_;
|
||||
class InsertRecordDefaultTypeInternal {
|
||||
public:
|
||||
::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed<InsertRecord> _instance;
|
||||
} _InsertRecord_default_instance_;
|
||||
} // namespace segcore
|
||||
} // namespace proto
|
||||
} // namespace milvus
|
||||
static void InitDefaultsscc_info_InsertRecord_segcore_2eproto() {
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
|
||||
{
|
||||
void* ptr = &::milvus::proto::segcore::_InsertRecord_default_instance_;
|
||||
new (ptr) ::milvus::proto::segcore::InsertRecord();
|
||||
::PROTOBUF_NAMESPACE_ID::internal::OnShutdownDestroyMessage(ptr);
|
||||
}
|
||||
::milvus::proto::segcore::InsertRecord::InitAsDefaultInstance();
|
||||
}
|
||||
|
||||
::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<1> scc_info_InsertRecord_segcore_2eproto =
|
||||
{{ATOMIC_VAR_INIT(::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase::kUninitialized), 1, InitDefaultsscc_info_InsertRecord_segcore_2eproto}, {
|
||||
&scc_info_FieldData_schema_2eproto.base,}};
|
||||
|
||||
static void InitDefaultsscc_info_LoadFieldMeta_segcore_2eproto() {
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
|
||||
|
@ -81,7 +100,7 @@ static void InitDefaultsscc_info_RetrieveResults_segcore_2eproto() {
|
|||
&scc_info_IDs_schema_2eproto.base,
|
||||
&scc_info_FieldData_schema_2eproto.base,}};
|
||||
|
||||
static ::PROTOBUF_NAMESPACE_ID::Metadata file_level_metadata_segcore_2eproto[3];
|
||||
static ::PROTOBUF_NAMESPACE_ID::Metadata file_level_metadata_segcore_2eproto[4];
|
||||
static constexpr ::PROTOBUF_NAMESPACE_ID::EnumDescriptor const** file_level_enum_descriptors_segcore_2eproto = nullptr;
|
||||
static constexpr ::PROTOBUF_NAMESPACE_ID::ServiceDescriptor const** file_level_service_descriptors_segcore_2eproto = nullptr;
|
||||
|
||||
|
@ -109,17 +128,26 @@ const ::PROTOBUF_NAMESPACE_ID::uint32 TableStruct_segcore_2eproto::offsets[] PRO
|
|||
~0u, // no _weak_field_map_
|
||||
PROTOBUF_FIELD_OFFSET(::milvus::proto::segcore::LoadSegmentMeta, metas_),
|
||||
PROTOBUF_FIELD_OFFSET(::milvus::proto::segcore::LoadSegmentMeta, total_size_),
|
||||
~0u, // no _has_bits_
|
||||
PROTOBUF_FIELD_OFFSET(::milvus::proto::segcore::InsertRecord, _internal_metadata_),
|
||||
~0u, // no _extensions_
|
||||
~0u, // no _oneof_case_
|
||||
~0u, // no _weak_field_map_
|
||||
PROTOBUF_FIELD_OFFSET(::milvus::proto::segcore::InsertRecord, fields_data_),
|
||||
PROTOBUF_FIELD_OFFSET(::milvus::proto::segcore::InsertRecord, num_rows_),
|
||||
};
|
||||
static const ::PROTOBUF_NAMESPACE_ID::internal::MigrationSchema schemas[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) = {
|
||||
{ 0, -1, sizeof(::milvus::proto::segcore::RetrieveResults)},
|
||||
{ 8, -1, sizeof(::milvus::proto::segcore::LoadFieldMeta)},
|
||||
{ 16, -1, sizeof(::milvus::proto::segcore::LoadSegmentMeta)},
|
||||
{ 23, -1, sizeof(::milvus::proto::segcore::InsertRecord)},
|
||||
};
|
||||
|
||||
static ::PROTOBUF_NAMESPACE_ID::Message const * const file_default_instances[] = {
|
||||
reinterpret_cast<const ::PROTOBUF_NAMESPACE_ID::Message*>(&::milvus::proto::segcore::_RetrieveResults_default_instance_),
|
||||
reinterpret_cast<const ::PROTOBUF_NAMESPACE_ID::Message*>(&::milvus::proto::segcore::_LoadFieldMeta_default_instance_),
|
||||
reinterpret_cast<const ::PROTOBUF_NAMESPACE_ID::Message*>(&::milvus::proto::segcore::_LoadSegmentMeta_default_instance_),
|
||||
reinterpret_cast<const ::PROTOBUF_NAMESPACE_ID::Message*>(&::milvus::proto::segcore::_InsertRecord_default_instance_),
|
||||
};
|
||||
|
||||
const char descriptor_table_protodef_segcore_2eproto[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) =
|
||||
|
@ -131,14 +159,17 @@ const char descriptor_table_protodef_segcore_2eproto[] PROTOBUF_SECTION_VARIABLE
|
|||
"in_timestamp\030\001 \001(\003\022\025\n\rmax_timestamp\030\002 \001("
|
||||
"\003\022\021\n\trow_count\030\003 \001(\003\"Y\n\017LoadSegmentMeta\022"
|
||||
"2\n\005metas\030\001 \003(\0132#.milvus.proto.segcore.Lo"
|
||||
"adFieldMeta\022\022\n\ntotal_size\030\002 \001(\003B6Z4githu"
|
||||
"b.com/milvus-io/milvus/internal/proto/se"
|
||||
"gcorepbb\006proto3"
|
||||
"adFieldMeta\022\022\n\ntotal_size\030\002 \001(\003\"U\n\014Inser"
|
||||
"tRecord\0223\n\013fields_data\030\001 \003(\0132\036.milvus.pr"
|
||||
"oto.schema.FieldData\022\020\n\010num_rows\030\002 \001(\003B6"
|
||||
"Z4github.com/milvus-io/milvus/internal/p"
|
||||
"roto/segcorepbb\006proto3"
|
||||
;
|
||||
static const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable*const descriptor_table_segcore_2eproto_deps[1] = {
|
||||
&::descriptor_table_schema_2eproto,
|
||||
};
|
||||
static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_segcore_2eproto_sccs[3] = {
|
||||
static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_segcore_2eproto_sccs[4] = {
|
||||
&scc_info_InsertRecord_segcore_2eproto.base,
|
||||
&scc_info_LoadFieldMeta_segcore_2eproto.base,
|
||||
&scc_info_LoadSegmentMeta_segcore_2eproto.base,
|
||||
&scc_info_RetrieveResults_segcore_2eproto.base,
|
||||
|
@ -146,10 +177,10 @@ static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_seg
|
|||
static ::PROTOBUF_NAMESPACE_ID::internal::once_flag descriptor_table_segcore_2eproto_once;
|
||||
static bool descriptor_table_segcore_2eproto_initialized = false;
|
||||
const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_segcore_2eproto = {
|
||||
&descriptor_table_segcore_2eproto_initialized, descriptor_table_protodef_segcore_2eproto, "segcore.proto", 415,
|
||||
&descriptor_table_segcore_2eproto_once, descriptor_table_segcore_2eproto_sccs, descriptor_table_segcore_2eproto_deps, 3, 1,
|
||||
&descriptor_table_segcore_2eproto_initialized, descriptor_table_protodef_segcore_2eproto, "segcore.proto", 502,
|
||||
&descriptor_table_segcore_2eproto_once, descriptor_table_segcore_2eproto_sccs, descriptor_table_segcore_2eproto_deps, 4, 1,
|
||||
schemas, file_default_instances, TableStruct_segcore_2eproto::offsets,
|
||||
file_level_metadata_segcore_2eproto, 3, file_level_enum_descriptors_segcore_2eproto, file_level_service_descriptors_segcore_2eproto,
|
||||
file_level_metadata_segcore_2eproto, 4, file_level_enum_descriptors_segcore_2eproto, file_level_service_descriptors_segcore_2eproto,
|
||||
};
|
||||
|
||||
// Force running AddDescriptors() at dynamic initialization time.
|
||||
|
@ -1199,6 +1230,314 @@ void LoadSegmentMeta::InternalSwap(LoadSegmentMeta* other) {
|
|||
}
|
||||
|
||||
|
||||
// ===================================================================
|
||||
|
||||
void InsertRecord::InitAsDefaultInstance() {
|
||||
}
|
||||
class InsertRecord::_Internal {
|
||||
public:
|
||||
};
|
||||
|
||||
void InsertRecord::clear_fields_data() {
|
||||
fields_data_.Clear();
|
||||
}
|
||||
InsertRecord::InsertRecord()
|
||||
: ::PROTOBUF_NAMESPACE_ID::Message(), _internal_metadata_(nullptr) {
|
||||
SharedCtor();
|
||||
// @@protoc_insertion_point(constructor:milvus.proto.segcore.InsertRecord)
|
||||
}
|
||||
InsertRecord::InsertRecord(const InsertRecord& from)
|
||||
: ::PROTOBUF_NAMESPACE_ID::Message(),
|
||||
_internal_metadata_(nullptr),
|
||||
fields_data_(from.fields_data_) {
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
num_rows_ = from.num_rows_;
|
||||
// @@protoc_insertion_point(copy_constructor:milvus.proto.segcore.InsertRecord)
|
||||
}
|
||||
|
||||
void InsertRecord::SharedCtor() {
|
||||
::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&scc_info_InsertRecord_segcore_2eproto.base);
|
||||
num_rows_ = PROTOBUF_LONGLONG(0);
|
||||
}
|
||||
|
||||
InsertRecord::~InsertRecord() {
|
||||
// @@protoc_insertion_point(destructor:milvus.proto.segcore.InsertRecord)
|
||||
SharedDtor();
|
||||
}
|
||||
|
||||
void InsertRecord::SharedDtor() {
|
||||
}
|
||||
|
||||
void InsertRecord::SetCachedSize(int size) const {
|
||||
_cached_size_.Set(size);
|
||||
}
|
||||
const InsertRecord& InsertRecord::default_instance() {
|
||||
::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&::scc_info_InsertRecord_segcore_2eproto.base);
|
||||
return *internal_default_instance();
|
||||
}
|
||||
|
||||
|
||||
void InsertRecord::Clear() {
|
||||
// @@protoc_insertion_point(message_clear_start:milvus.proto.segcore.InsertRecord)
|
||||
::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0;
|
||||
// Prevent compiler warnings about cached_has_bits being unused
|
||||
(void) cached_has_bits;
|
||||
|
||||
fields_data_.Clear();
|
||||
num_rows_ = PROTOBUF_LONGLONG(0);
|
||||
_internal_metadata_.Clear();
|
||||
}
|
||||
|
||||
#if GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER
|
||||
const char* InsertRecord::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) {
|
||||
#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure
|
||||
while (!ctx->Done(&ptr)) {
|
||||
::PROTOBUF_NAMESPACE_ID::uint32 tag;
|
||||
ptr = ::PROTOBUF_NAMESPACE_ID::internal::ReadTag(ptr, &tag);
|
||||
CHK_(ptr);
|
||||
switch (tag >> 3) {
|
||||
// repeated .milvus.proto.schema.FieldData fields_data = 1;
|
||||
case 1:
|
||||
if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 10)) {
|
||||
ptr -= 1;
|
||||
do {
|
||||
ptr += 1;
|
||||
ptr = ctx->ParseMessage(add_fields_data(), ptr);
|
||||
CHK_(ptr);
|
||||
if (!ctx->DataAvailable(ptr)) break;
|
||||
} while (::PROTOBUF_NAMESPACE_ID::internal::UnalignedLoad<::PROTOBUF_NAMESPACE_ID::uint8>(ptr) == 10);
|
||||
} else goto handle_unusual;
|
||||
continue;
|
||||
// int64 num_rows = 2;
|
||||
case 2:
|
||||
if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 16)) {
|
||||
num_rows_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint(&ptr);
|
||||
CHK_(ptr);
|
||||
} else goto handle_unusual;
|
||||
continue;
|
||||
default: {
|
||||
handle_unusual:
|
||||
if ((tag & 7) == 4 || tag == 0) {
|
||||
ctx->SetLastTag(tag);
|
||||
goto success;
|
||||
}
|
||||
ptr = UnknownFieldParse(tag, &_internal_metadata_, ptr, ctx);
|
||||
CHK_(ptr != nullptr);
|
||||
continue;
|
||||
}
|
||||
} // switch
|
||||
} // while
|
||||
success:
|
||||
return ptr;
|
||||
failure:
|
||||
ptr = nullptr;
|
||||
goto success;
|
||||
#undef CHK_
|
||||
}
|
||||
#else // GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER
|
||||
bool InsertRecord::MergePartialFromCodedStream(
|
||||
::PROTOBUF_NAMESPACE_ID::io::CodedInputStream* input) {
|
||||
#define DO_(EXPRESSION) if (!PROTOBUF_PREDICT_TRUE(EXPRESSION)) goto failure
|
||||
::PROTOBUF_NAMESPACE_ID::uint32 tag;
|
||||
// @@protoc_insertion_point(parse_start:milvus.proto.segcore.InsertRecord)
|
||||
for (;;) {
|
||||
::std::pair<::PROTOBUF_NAMESPACE_ID::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
|
||||
tag = p.first;
|
||||
if (!p.second) goto handle_unusual;
|
||||
switch (::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::GetTagFieldNumber(tag)) {
|
||||
// repeated .milvus.proto.schema.FieldData fields_data = 1;
|
||||
case 1: {
|
||||
if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (10 & 0xFF)) {
|
||||
DO_(::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadMessage(
|
||||
input, add_fields_data()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// int64 num_rows = 2;
|
||||
case 2: {
|
||||
if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (16 & 0xFF)) {
|
||||
|
||||
DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive<
|
||||
::PROTOBUF_NAMESPACE_ID::int64, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_INT64>(
|
||||
input, &num_rows_)));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
handle_unusual:
|
||||
if (tag == 0) {
|
||||
goto success;
|
||||
}
|
||||
DO_(::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SkipField(
|
||||
input, tag, _internal_metadata_.mutable_unknown_fields()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
success:
|
||||
// @@protoc_insertion_point(parse_success:milvus.proto.segcore.InsertRecord)
|
||||
return true;
|
||||
failure:
|
||||
// @@protoc_insertion_point(parse_failure:milvus.proto.segcore.InsertRecord)
|
||||
return false;
|
||||
#undef DO_
|
||||
}
|
||||
#endif // GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER
|
||||
|
||||
void InsertRecord::SerializeWithCachedSizes(
|
||||
::PROTOBUF_NAMESPACE_ID::io::CodedOutputStream* output) const {
|
||||
// @@protoc_insertion_point(serialize_start:milvus.proto.segcore.InsertRecord)
|
||||
::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
// repeated .milvus.proto.schema.FieldData fields_data = 1;
|
||||
for (unsigned int i = 0,
|
||||
n = static_cast<unsigned int>(this->fields_data_size()); i < n; i++) {
|
||||
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteMessageMaybeToArray(
|
||||
1,
|
||||
this->fields_data(static_cast<int>(i)),
|
||||
output);
|
||||
}
|
||||
|
||||
// int64 num_rows = 2;
|
||||
if (this->num_rows() != 0) {
|
||||
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64(2, this->num_rows(), output);
|
||||
}
|
||||
|
||||
if (_internal_metadata_.have_unknown_fields()) {
|
||||
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SerializeUnknownFields(
|
||||
_internal_metadata_.unknown_fields(), output);
|
||||
}
|
||||
// @@protoc_insertion_point(serialize_end:milvus.proto.segcore.InsertRecord)
|
||||
}
|
||||
|
||||
::PROTOBUF_NAMESPACE_ID::uint8* InsertRecord::InternalSerializeWithCachedSizesToArray(
|
||||
::PROTOBUF_NAMESPACE_ID::uint8* target) const {
|
||||
// @@protoc_insertion_point(serialize_to_array_start:milvus.proto.segcore.InsertRecord)
|
||||
::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
// repeated .milvus.proto.schema.FieldData fields_data = 1;
|
||||
for (unsigned int i = 0,
|
||||
n = static_cast<unsigned int>(this->fields_data_size()); i < n; i++) {
|
||||
target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::
|
||||
InternalWriteMessageToArray(
|
||||
1, this->fields_data(static_cast<int>(i)), target);
|
||||
}
|
||||
|
||||
// int64 num_rows = 2;
|
||||
if (this->num_rows() != 0) {
|
||||
target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64ToArray(2, this->num_rows(), target);
|
||||
}
|
||||
|
||||
if (_internal_metadata_.have_unknown_fields()) {
|
||||
target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SerializeUnknownFieldsToArray(
|
||||
_internal_metadata_.unknown_fields(), target);
|
||||
}
|
||||
// @@protoc_insertion_point(serialize_to_array_end:milvus.proto.segcore.InsertRecord)
|
||||
return target;
|
||||
}
|
||||
|
||||
size_t InsertRecord::ByteSizeLong() const {
|
||||
// @@protoc_insertion_point(message_byte_size_start:milvus.proto.segcore.InsertRecord)
|
||||
size_t total_size = 0;
|
||||
|
||||
if (_internal_metadata_.have_unknown_fields()) {
|
||||
total_size +=
|
||||
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::ComputeUnknownFieldsSize(
|
||||
_internal_metadata_.unknown_fields());
|
||||
}
|
||||
::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0;
|
||||
// Prevent compiler warnings about cached_has_bits being unused
|
||||
(void) cached_has_bits;
|
||||
|
||||
// repeated .milvus.proto.schema.FieldData fields_data = 1;
|
||||
{
|
||||
unsigned int count = static_cast<unsigned int>(this->fields_data_size());
|
||||
total_size += 1UL * count;
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
total_size +=
|
||||
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize(
|
||||
this->fields_data(static_cast<int>(i)));
|
||||
}
|
||||
}
|
||||
|
||||
// int64 num_rows = 2;
|
||||
if (this->num_rows() != 0) {
|
||||
total_size += 1 +
|
||||
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int64Size(
|
||||
this->num_rows());
|
||||
}
|
||||
|
||||
int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size);
|
||||
SetCachedSize(cached_size);
|
||||
return total_size;
|
||||
}
|
||||
|
||||
void InsertRecord::MergeFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) {
|
||||
// @@protoc_insertion_point(generalized_merge_from_start:milvus.proto.segcore.InsertRecord)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
const InsertRecord* source =
|
||||
::PROTOBUF_NAMESPACE_ID::DynamicCastToGenerated<InsertRecord>(
|
||||
&from);
|
||||
if (source == nullptr) {
|
||||
// @@protoc_insertion_point(generalized_merge_from_cast_fail:milvus.proto.segcore.InsertRecord)
|
||||
::PROTOBUF_NAMESPACE_ID::internal::ReflectionOps::Merge(from, this);
|
||||
} else {
|
||||
// @@protoc_insertion_point(generalized_merge_from_cast_success:milvus.proto.segcore.InsertRecord)
|
||||
MergeFrom(*source);
|
||||
}
|
||||
}
|
||||
|
||||
void InsertRecord::MergeFrom(const InsertRecord& from) {
|
||||
// @@protoc_insertion_point(class_specific_merge_from_start:milvus.proto.segcore.InsertRecord)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
fields_data_.MergeFrom(from.fields_data_);
|
||||
if (from.num_rows() != 0) {
|
||||
set_num_rows(from.num_rows());
|
||||
}
|
||||
}
|
||||
|
||||
void InsertRecord::CopyFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) {
|
||||
// @@protoc_insertion_point(generalized_copy_from_start:milvus.proto.segcore.InsertRecord)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
void InsertRecord::CopyFrom(const InsertRecord& from) {
|
||||
// @@protoc_insertion_point(class_specific_copy_from_start:milvus.proto.segcore.InsertRecord)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
bool InsertRecord::IsInitialized() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void InsertRecord::InternalSwap(InsertRecord* other) {
|
||||
using std::swap;
|
||||
_internal_metadata_.Swap(&other->_internal_metadata_);
|
||||
CastToBase(&fields_data_)->InternalSwap(CastToBase(&other->fields_data_));
|
||||
swap(num_rows_, other->num_rows_);
|
||||
}
|
||||
|
||||
::PROTOBUF_NAMESPACE_ID::Metadata InsertRecord::GetMetadata() const {
|
||||
return GetMetadataStatic();
|
||||
}
|
||||
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
} // namespace segcore
|
||||
} // namespace proto
|
||||
|
@ -1213,6 +1552,9 @@ template<> PROTOBUF_NOINLINE ::milvus::proto::segcore::LoadFieldMeta* Arena::Cre
|
|||
template<> PROTOBUF_NOINLINE ::milvus::proto::segcore::LoadSegmentMeta* Arena::CreateMaybeMessage< ::milvus::proto::segcore::LoadSegmentMeta >(Arena* arena) {
|
||||
return Arena::CreateInternal< ::milvus::proto::segcore::LoadSegmentMeta >(arena);
|
||||
}
|
||||
template<> PROTOBUF_NOINLINE ::milvus::proto::segcore::InsertRecord* Arena::CreateMaybeMessage< ::milvus::proto::segcore::InsertRecord >(Arena* arena) {
|
||||
return Arena::CreateInternal< ::milvus::proto::segcore::InsertRecord >(arena);
|
||||
}
|
||||
PROTOBUF_NAMESPACE_CLOSE
|
||||
|
||||
// @@protoc_insertion_point(global_scope)
|
||||
|
|
|
@ -48,7 +48,7 @@ struct TableStruct_segcore_2eproto {
|
|||
PROTOBUF_SECTION_VARIABLE(protodesc_cold);
|
||||
static const ::PROTOBUF_NAMESPACE_ID::internal::AuxillaryParseTableField aux[]
|
||||
PROTOBUF_SECTION_VARIABLE(protodesc_cold);
|
||||
static const ::PROTOBUF_NAMESPACE_ID::internal::ParseTable schema[3]
|
||||
static const ::PROTOBUF_NAMESPACE_ID::internal::ParseTable schema[4]
|
||||
PROTOBUF_SECTION_VARIABLE(protodesc_cold);
|
||||
static const ::PROTOBUF_NAMESPACE_ID::internal::FieldMetadata field_metadata[];
|
||||
static const ::PROTOBUF_NAMESPACE_ID::internal::SerializationTable serialization_table[];
|
||||
|
@ -58,6 +58,9 @@ extern const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table
|
|||
namespace milvus {
|
||||
namespace proto {
|
||||
namespace segcore {
|
||||
class InsertRecord;
|
||||
class InsertRecordDefaultTypeInternal;
|
||||
extern InsertRecordDefaultTypeInternal _InsertRecord_default_instance_;
|
||||
class LoadFieldMeta;
|
||||
class LoadFieldMetaDefaultTypeInternal;
|
||||
extern LoadFieldMetaDefaultTypeInternal _LoadFieldMeta_default_instance_;
|
||||
|
@ -71,6 +74,7 @@ extern RetrieveResultsDefaultTypeInternal _RetrieveResults_default_instance_;
|
|||
} // namespace proto
|
||||
} // namespace milvus
|
||||
PROTOBUF_NAMESPACE_OPEN
|
||||
template<> ::milvus::proto::segcore::InsertRecord* Arena::CreateMaybeMessage<::milvus::proto::segcore::InsertRecord>(Arena*);
|
||||
template<> ::milvus::proto::segcore::LoadFieldMeta* Arena::CreateMaybeMessage<::milvus::proto::segcore::LoadFieldMeta>(Arena*);
|
||||
template<> ::milvus::proto::segcore::LoadSegmentMeta* Arena::CreateMaybeMessage<::milvus::proto::segcore::LoadSegmentMeta>(Arena*);
|
||||
template<> ::milvus::proto::segcore::RetrieveResults* Arena::CreateMaybeMessage<::milvus::proto::segcore::RetrieveResults>(Arena*);
|
||||
|
@ -529,6 +533,150 @@ class LoadSegmentMeta :
|
|||
mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_;
|
||||
friend struct ::TableStruct_segcore_2eproto;
|
||||
};
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
class InsertRecord :
|
||||
public ::PROTOBUF_NAMESPACE_ID::Message /* @@protoc_insertion_point(class_definition:milvus.proto.segcore.InsertRecord) */ {
|
||||
public:
|
||||
InsertRecord();
|
||||
virtual ~InsertRecord();
|
||||
|
||||
InsertRecord(const InsertRecord& from);
|
||||
InsertRecord(InsertRecord&& from) noexcept
|
||||
: InsertRecord() {
|
||||
*this = ::std::move(from);
|
||||
}
|
||||
|
||||
inline InsertRecord& operator=(const InsertRecord& from) {
|
||||
CopyFrom(from);
|
||||
return *this;
|
||||
}
|
||||
inline InsertRecord& operator=(InsertRecord&& from) noexcept {
|
||||
if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) {
|
||||
if (this != &from) InternalSwap(&from);
|
||||
} else {
|
||||
CopyFrom(from);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
static const ::PROTOBUF_NAMESPACE_ID::Descriptor* descriptor() {
|
||||
return GetDescriptor();
|
||||
}
|
||||
static const ::PROTOBUF_NAMESPACE_ID::Descriptor* GetDescriptor() {
|
||||
return GetMetadataStatic().descriptor;
|
||||
}
|
||||
static const ::PROTOBUF_NAMESPACE_ID::Reflection* GetReflection() {
|
||||
return GetMetadataStatic().reflection;
|
||||
}
|
||||
static const InsertRecord& default_instance();
|
||||
|
||||
static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY
|
||||
static inline const InsertRecord* internal_default_instance() {
|
||||
return reinterpret_cast<const InsertRecord*>(
|
||||
&_InsertRecord_default_instance_);
|
||||
}
|
||||
static constexpr int kIndexInFileMessages =
|
||||
3;
|
||||
|
||||
friend void swap(InsertRecord& a, InsertRecord& b) {
|
||||
a.Swap(&b);
|
||||
}
|
||||
inline void Swap(InsertRecord* other) {
|
||||
if (other == this) return;
|
||||
InternalSwap(other);
|
||||
}
|
||||
|
||||
// implements Message ----------------------------------------------
|
||||
|
||||
inline InsertRecord* New() const final {
|
||||
return CreateMaybeMessage<InsertRecord>(nullptr);
|
||||
}
|
||||
|
||||
InsertRecord* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final {
|
||||
return CreateMaybeMessage<InsertRecord>(arena);
|
||||
}
|
||||
void CopyFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) final;
|
||||
void MergeFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) final;
|
||||
void CopyFrom(const InsertRecord& from);
|
||||
void MergeFrom(const InsertRecord& from);
|
||||
PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final;
|
||||
bool IsInitialized() const final;
|
||||
|
||||
size_t ByteSizeLong() const final;
|
||||
#if GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER
|
||||
const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final;
|
||||
#else
|
||||
bool MergePartialFromCodedStream(
|
||||
::PROTOBUF_NAMESPACE_ID::io::CodedInputStream* input) final;
|
||||
#endif // GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER
|
||||
void SerializeWithCachedSizes(
|
||||
::PROTOBUF_NAMESPACE_ID::io::CodedOutputStream* output) const final;
|
||||
::PROTOBUF_NAMESPACE_ID::uint8* InternalSerializeWithCachedSizesToArray(
|
||||
::PROTOBUF_NAMESPACE_ID::uint8* target) const final;
|
||||
int GetCachedSize() const final { return _cached_size_.Get(); }
|
||||
|
||||
private:
|
||||
inline void SharedCtor();
|
||||
inline void SharedDtor();
|
||||
void SetCachedSize(int size) const final;
|
||||
void InternalSwap(InsertRecord* other);
|
||||
friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata;
|
||||
static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() {
|
||||
return "milvus.proto.segcore.InsertRecord";
|
||||
}
|
||||
private:
|
||||
inline ::PROTOBUF_NAMESPACE_ID::Arena* GetArenaNoVirtual() const {
|
||||
return nullptr;
|
||||
}
|
||||
inline void* MaybeArenaPtr() const {
|
||||
return nullptr;
|
||||
}
|
||||
public:
|
||||
|
||||
::PROTOBUF_NAMESPACE_ID::Metadata GetMetadata() const final;
|
||||
private:
|
||||
static ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadataStatic() {
|
||||
::PROTOBUF_NAMESPACE_ID::internal::AssignDescriptors(&::descriptor_table_segcore_2eproto);
|
||||
return ::descriptor_table_segcore_2eproto.file_level_metadata[kIndexInFileMessages];
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
// nested types ----------------------------------------------------
|
||||
|
||||
// accessors -------------------------------------------------------
|
||||
|
||||
enum : int {
|
||||
kFieldsDataFieldNumber = 1,
|
||||
kNumRowsFieldNumber = 2,
|
||||
};
|
||||
// repeated .milvus.proto.schema.FieldData fields_data = 1;
|
||||
int fields_data_size() const;
|
||||
void clear_fields_data();
|
||||
::milvus::proto::schema::FieldData* mutable_fields_data(int index);
|
||||
::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::milvus::proto::schema::FieldData >*
|
||||
mutable_fields_data();
|
||||
const ::milvus::proto::schema::FieldData& fields_data(int index) const;
|
||||
::milvus::proto::schema::FieldData* add_fields_data();
|
||||
const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::milvus::proto::schema::FieldData >&
|
||||
fields_data() const;
|
||||
|
||||
// int64 num_rows = 2;
|
||||
void clear_num_rows();
|
||||
::PROTOBUF_NAMESPACE_ID::int64 num_rows() const;
|
||||
void set_num_rows(::PROTOBUF_NAMESPACE_ID::int64 value);
|
||||
|
||||
// @@protoc_insertion_point(class_scope:milvus.proto.segcore.InsertRecord)
|
||||
private:
|
||||
class _Internal;
|
||||
|
||||
::PROTOBUF_NAMESPACE_ID::internal::InternalMetadataWithArena _internal_metadata_;
|
||||
::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::milvus::proto::schema::FieldData > fields_data_;
|
||||
::PROTOBUF_NAMESPACE_ID::int64 num_rows_;
|
||||
mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_;
|
||||
friend struct ::TableStruct_segcore_2eproto;
|
||||
};
|
||||
// ===================================================================
|
||||
|
||||
|
||||
|
@ -736,6 +884,51 @@ inline void LoadSegmentMeta::set_total_size(::PROTOBUF_NAMESPACE_ID::int64 value
|
|||
// @@protoc_insertion_point(field_set:milvus.proto.segcore.LoadSegmentMeta.total_size)
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
// InsertRecord
|
||||
|
||||
// repeated .milvus.proto.schema.FieldData fields_data = 1;
|
||||
inline int InsertRecord::fields_data_size() const {
|
||||
return fields_data_.size();
|
||||
}
|
||||
inline ::milvus::proto::schema::FieldData* InsertRecord::mutable_fields_data(int index) {
|
||||
// @@protoc_insertion_point(field_mutable:milvus.proto.segcore.InsertRecord.fields_data)
|
||||
return fields_data_.Mutable(index);
|
||||
}
|
||||
inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::milvus::proto::schema::FieldData >*
|
||||
InsertRecord::mutable_fields_data() {
|
||||
// @@protoc_insertion_point(field_mutable_list:milvus.proto.segcore.InsertRecord.fields_data)
|
||||
return &fields_data_;
|
||||
}
|
||||
inline const ::milvus::proto::schema::FieldData& InsertRecord::fields_data(int index) const {
|
||||
// @@protoc_insertion_point(field_get:milvus.proto.segcore.InsertRecord.fields_data)
|
||||
return fields_data_.Get(index);
|
||||
}
|
||||
inline ::milvus::proto::schema::FieldData* InsertRecord::add_fields_data() {
|
||||
// @@protoc_insertion_point(field_add:milvus.proto.segcore.InsertRecord.fields_data)
|
||||
return fields_data_.Add();
|
||||
}
|
||||
inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::milvus::proto::schema::FieldData >&
|
||||
InsertRecord::fields_data() const {
|
||||
// @@protoc_insertion_point(field_list:milvus.proto.segcore.InsertRecord.fields_data)
|
||||
return fields_data_;
|
||||
}
|
||||
|
||||
// int64 num_rows = 2;
|
||||
inline void InsertRecord::clear_num_rows() {
|
||||
num_rows_ = PROTOBUF_LONGLONG(0);
|
||||
}
|
||||
inline ::PROTOBUF_NAMESPACE_ID::int64 InsertRecord::num_rows() const {
|
||||
// @@protoc_insertion_point(field_get:milvus.proto.segcore.InsertRecord.num_rows)
|
||||
return num_rows_;
|
||||
}
|
||||
inline void InsertRecord::set_num_rows(::PROTOBUF_NAMESPACE_ID::int64 value) {
|
||||
|
||||
num_rows_ = value;
|
||||
// @@protoc_insertion_point(field_set:milvus.proto.segcore.InsertRecord.num_rows)
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif // __GNUC__
|
||||
|
@ -743,6 +936,8 @@ inline void LoadSegmentMeta::set_total_size(::PROTOBUF_NAMESPACE_ID::int64 value
|
|||
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
|
||||
|
|
|
@ -86,15 +86,14 @@ struct LogicalBinaryExpr : BinaryExprBase {
|
|||
};
|
||||
|
||||
struct TermExpr : Expr {
|
||||
const FieldOffset field_offset_;
|
||||
const FieldId field_id_;
|
||||
const DataType data_type_;
|
||||
|
||||
protected:
|
||||
// prevent accidential instantiation
|
||||
TermExpr() = delete;
|
||||
|
||||
TermExpr(const FieldOffset field_offset, const DataType data_type)
|
||||
: field_offset_(field_offset), data_type_(data_type) {
|
||||
TermExpr(const FieldId field_id, const DataType data_type) : field_id_(field_id), data_type_(data_type) {
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -110,6 +109,8 @@ enum class OpType {
|
|||
LessEqual = 4,
|
||||
Equal = 5,
|
||||
NotEqual = 6,
|
||||
PrefixMatch = 7,
|
||||
PostfixMatch = 8,
|
||||
};
|
||||
|
||||
enum class ArithOpType {
|
||||
|
@ -134,7 +135,7 @@ static const std::map<ArithOpType, std::string> mapping_arith_op_ = {
|
|||
};
|
||||
|
||||
struct BinaryArithOpEvalRangeExpr : Expr {
|
||||
const FieldOffset field_offset_;
|
||||
const FieldId field_id_;
|
||||
const DataType data_type_;
|
||||
const OpType op_type_;
|
||||
const ArithOpType arith_op_;
|
||||
|
@ -143,11 +144,11 @@ struct BinaryArithOpEvalRangeExpr : Expr {
|
|||
// prevent accidential instantiation
|
||||
BinaryArithOpEvalRangeExpr() = delete;
|
||||
|
||||
BinaryArithOpEvalRangeExpr(const FieldOffset field_offset,
|
||||
BinaryArithOpEvalRangeExpr(const FieldId field_id,
|
||||
const DataType data_type,
|
||||
const OpType op_type,
|
||||
const ArithOpType arith_op)
|
||||
: field_offset_(field_offset), data_type_(data_type), op_type_(op_type), arith_op_(arith_op) {
|
||||
: field_id_(field_id), data_type_(data_type), op_type_(op_type), arith_op_(arith_op) {
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -163,7 +164,7 @@ static const std::map<std::string, OpType> mapping_ = {
|
|||
};
|
||||
|
||||
struct UnaryRangeExpr : Expr {
|
||||
const FieldOffset field_offset_;
|
||||
const FieldId field_id_;
|
||||
const DataType data_type_;
|
||||
const OpType op_type_;
|
||||
|
||||
|
@ -171,8 +172,8 @@ struct UnaryRangeExpr : Expr {
|
|||
// prevent accidential instantiation
|
||||
UnaryRangeExpr() = delete;
|
||||
|
||||
UnaryRangeExpr(const FieldOffset field_offset, const DataType data_type, const OpType op_type)
|
||||
: field_offset_(field_offset), data_type_(data_type), op_type_(op_type) {
|
||||
UnaryRangeExpr(const FieldId field_id, const DataType data_type, const OpType op_type)
|
||||
: field_id_(field_id), data_type_(data_type), op_type_(op_type) {
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -181,7 +182,7 @@ struct UnaryRangeExpr : Expr {
|
|||
};
|
||||
|
||||
struct BinaryRangeExpr : Expr {
|
||||
const FieldOffset field_offset_;
|
||||
const FieldId field_id_;
|
||||
const DataType data_type_;
|
||||
const bool lower_inclusive_;
|
||||
const bool upper_inclusive_;
|
||||
|
@ -190,11 +191,11 @@ struct BinaryRangeExpr : Expr {
|
|||
// prevent accidential instantiation
|
||||
BinaryRangeExpr() = delete;
|
||||
|
||||
BinaryRangeExpr(const FieldOffset field_offset,
|
||||
BinaryRangeExpr(const FieldId field_id,
|
||||
const DataType data_type,
|
||||
const bool lower_inclusive,
|
||||
const bool upper_inclusive)
|
||||
: field_offset_(field_offset),
|
||||
: field_id_(field_id),
|
||||
data_type_(data_type),
|
||||
lower_inclusive_(lower_inclusive),
|
||||
upper_inclusive_(upper_inclusive) {
|
||||
|
@ -206,8 +207,8 @@ struct BinaryRangeExpr : Expr {
|
|||
};
|
||||
|
||||
struct CompareExpr : Expr {
|
||||
FieldOffset left_field_offset_;
|
||||
FieldOffset right_field_offset_;
|
||||
FieldId left_field_id_;
|
||||
FieldId right_field_id_;
|
||||
DataType left_data_type_;
|
||||
DataType right_data_type_;
|
||||
OpType op_type_;
|
||||
|
|
|
@ -28,8 +28,8 @@ template <typename T>
|
|||
struct TermExprImpl : TermExpr {
|
||||
const std::vector<T> terms_;
|
||||
|
||||
TermExprImpl(const FieldOffset field_offset, const DataType data_type, const std::vector<T>& terms)
|
||||
: TermExpr(field_offset, data_type), terms_(terms) {
|
||||
TermExprImpl(const FieldId field_id, const DataType data_type, const std::vector<T>& terms)
|
||||
: TermExpr(field_id, data_type), terms_(terms) {
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -38,13 +38,13 @@ struct BinaryArithOpEvalRangeExprImpl : BinaryArithOpEvalRangeExpr {
|
|||
const T right_operand_;
|
||||
const T value_;
|
||||
|
||||
BinaryArithOpEvalRangeExprImpl(const FieldOffset field_offset,
|
||||
BinaryArithOpEvalRangeExprImpl(const FieldId field_id,
|
||||
const DataType data_type,
|
||||
const ArithOpType arith_op,
|
||||
const T right_operand,
|
||||
const OpType op_type,
|
||||
const T value)
|
||||
: BinaryArithOpEvalRangeExpr(field_offset, data_type, op_type, arith_op),
|
||||
: BinaryArithOpEvalRangeExpr(field_id, data_type, op_type, arith_op),
|
||||
right_operand_(right_operand),
|
||||
value_(value) {
|
||||
}
|
||||
|
@ -54,8 +54,8 @@ template <typename T>
|
|||
struct UnaryRangeExprImpl : UnaryRangeExpr {
|
||||
const T value_;
|
||||
|
||||
UnaryRangeExprImpl(const FieldOffset field_offset, const DataType data_type, const OpType op_type, const T value)
|
||||
: UnaryRangeExpr(field_offset, data_type, op_type), value_(value) {
|
||||
UnaryRangeExprImpl(const FieldId field_id, const DataType data_type, const OpType op_type, const T value)
|
||||
: UnaryRangeExpr(field_id, data_type, op_type), value_(value) {
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -64,13 +64,13 @@ struct BinaryRangeExprImpl : BinaryRangeExpr {
|
|||
const T lower_value_;
|
||||
const T upper_value_;
|
||||
|
||||
BinaryRangeExprImpl(const FieldOffset field_offset,
|
||||
BinaryRangeExprImpl(const FieldId field_id,
|
||||
const DataType data_type,
|
||||
const bool lower_inclusive,
|
||||
const bool upper_inclusive,
|
||||
const T lower_value,
|
||||
const T upper_value)
|
||||
: BinaryRangeExpr(field_offset, data_type, lower_inclusive, upper_inclusive),
|
||||
: BinaryRangeExpr(field_id, data_type, lower_inclusive, upper_inclusive),
|
||||
lower_value_(lower_value),
|
||||
upper_value_(upper_value) {
|
||||
}
|
||||
|
|
|
@ -75,13 +75,13 @@ Parser::ParseCompareNode(const Json& out_body) {
|
|||
Assert(item0.is_string());
|
||||
auto left_field_name = FieldName(item0.get<std::string>());
|
||||
expr->left_data_type_ = schema[left_field_name].get_data_type();
|
||||
expr->left_field_offset_ = schema.get_offset(left_field_name);
|
||||
expr->left_field_id_ = schema.get_field_id(left_field_name);
|
||||
|
||||
auto& item1 = body[1];
|
||||
Assert(item1.is_string());
|
||||
auto right_field_name = FieldName(item1.get<std::string>());
|
||||
expr->right_data_type_ = schema[right_field_name].get_data_type();
|
||||
expr->right_field_offset_ = schema.get_offset(right_field_name);
|
||||
expr->right_field_id_ = schema.get_field_id(right_field_name);
|
||||
|
||||
return expr;
|
||||
}
|
||||
|
@ -188,7 +188,7 @@ Parser::ParseVecNode(const Json& out_body) {
|
|||
AssertInfo(topk > 0, "topk must greater than 0");
|
||||
AssertInfo(topk < 16384, "topk is too large");
|
||||
|
||||
auto field_offset = schema.get_offset(field_name);
|
||||
auto field_id = schema.get_field_id(field_name);
|
||||
|
||||
auto vec_node = [&]() -> std::unique_ptr<VectorPlanNode> {
|
||||
auto& field_meta = schema.operator[](field_name);
|
||||
|
@ -202,12 +202,12 @@ Parser::ParseVecNode(const Json& out_body) {
|
|||
vec_node->search_info_.topk_ = topk;
|
||||
vec_node->search_info_.metric_type_ = GetMetricType(vec_info.at("metric_type"));
|
||||
vec_node->search_info_.search_params_ = vec_info.at("params");
|
||||
vec_node->search_info_.field_offset_ = field_offset;
|
||||
vec_node->search_info_.field_id_ = field_id;
|
||||
vec_node->search_info_.round_decimal_ = vec_info.at("round_decimal");
|
||||
vec_node->placeholder_tag_ = vec_info.at("query");
|
||||
auto tag = vec_node->placeholder_tag_;
|
||||
AssertInfo(!tag2field_.count(tag), "duplicated placeholder tag");
|
||||
tag2field_.emplace(tag, field_offset);
|
||||
tag2field_.emplace(tag, field_id);
|
||||
return vec_node;
|
||||
}
|
||||
|
||||
|
@ -232,7 +232,8 @@ Parser::ParseTermNodeImpl(const FieldName& field_name, const Json& body) {
|
|||
terms[i] = value;
|
||||
}
|
||||
std::sort(terms.begin(), terms.end());
|
||||
return std::make_unique<TermExprImpl<T>>(schema.get_offset(field_name), schema[field_name].get_data_type(), terms);
|
||||
return std::make_unique<TermExprImpl<T>>(schema.get_field_id(field_name), schema[field_name].get_data_type(),
|
||||
terms);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -298,8 +299,8 @@ Parser::ParseRangeNodeImpl(const FieldName& field_name, const Json& body) {
|
|||
}
|
||||
|
||||
return std::make_unique<BinaryArithOpEvalRangeExprImpl<T>>(
|
||||
schema.get_offset(field_name), schema[field_name].get_data_type(), arith_op_mapping_.at(arith_op_name),
|
||||
right_operand, mapping_.at(op_name), value);
|
||||
schema.get_field_id(field_name), schema[field_name].get_data_type(),
|
||||
arith_op_mapping_.at(arith_op_name), right_operand, mapping_.at(op_name), value);
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, bool>) {
|
||||
|
@ -312,7 +313,7 @@ Parser::ParseRangeNodeImpl(const FieldName& field_name, const Json& body) {
|
|||
static_assert(always_false<T>, "unsupported type");
|
||||
}
|
||||
return std::make_unique<UnaryRangeExprImpl<T>>(
|
||||
schema.get_offset(field_name), schema[field_name].get_data_type(), mapping_.at(op_name), item.value());
|
||||
schema.get_field_id(field_name), schema[field_name].get_data_type(), mapping_.at(op_name), item.value());
|
||||
} else if (body.size() == 2) {
|
||||
bool has_lower_value = false;
|
||||
bool has_upper_value = false;
|
||||
|
@ -351,7 +352,7 @@ Parser::ParseRangeNodeImpl(const FieldName& field_name, const Json& body) {
|
|||
}
|
||||
}
|
||||
AssertInfo(has_lower_value && has_upper_value, "illegal binary-range node");
|
||||
return std::make_unique<BinaryRangeExprImpl<T>>(schema.get_offset(field_name),
|
||||
return std::make_unique<BinaryRangeExprImpl<T>>(schema.get_field_id(field_name),
|
||||
schema[field_name].get_data_type(), lower_inclusive,
|
||||
upper_inclusive, lower_value, upper_value);
|
||||
} else {
|
||||
|
|
|
@ -85,7 +85,7 @@ class Parser {
|
|||
|
||||
private:
|
||||
const Schema& schema;
|
||||
std::map<std::string, FieldOffset> tag2field_; // PlaceholderName -> field offset
|
||||
std::map<std::string, FieldId> tag2field_; // PlaceholderName -> field id
|
||||
std::optional<std::unique_ptr<VectorPlanNode>> vector_node_opt_;
|
||||
};
|
||||
|
||||
|
|
|
@ -32,8 +32,8 @@ ParsePlaceholderGroup(const Plan* plan, const std::string& blob) {
|
|||
Placeholder element;
|
||||
element.tag_ = info.tag();
|
||||
Assert(plan->tag2field_.count(element.tag_));
|
||||
auto field_offset = plan->tag2field_.at(element.tag_);
|
||||
auto& field_meta = plan->schema_[field_offset];
|
||||
auto field_id = plan->tag2field_.at(element.tag_);
|
||||
auto& field_meta = plan->schema_[field_id];
|
||||
element.num_of_queries_ = info.values_size();
|
||||
AssertInfo(element.num_of_queries_, "must have queries");
|
||||
Assert(element.num_of_queries_ > 0);
|
||||
|
@ -86,9 +86,9 @@ GetNumOfQueries(const PlaceholderGroup* group) {
|
|||
// std::unique_ptr<RetrievePlan>
|
||||
// CreateRetrievePlan(const Schema& schema, proto::segcore::RetrieveRequest&& request) {
|
||||
// auto plan = std::make_unique<RetrievePlan>();
|
||||
// plan->ids_ = std::unique_ptr<proto::schema::IDs>(request.release_ids());
|
||||
// plan->seg_offsets_ = std::unique_ptr<proto::schema::IDs>(request.release_ids());
|
||||
// for (auto& field_id : request.output_fields_id()) {
|
||||
// plan->field_offsets_.push_back(schema.get_offset(FieldId(field_id)));
|
||||
// plan->field_ids_.push_back(schema.get_offset(FieldId(field_id)));
|
||||
// }
|
||||
// return plan;
|
||||
//}
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "exceptions/EasyAssert.h"
|
||||
#include "pb/milvus.pb.h"
|
||||
#include "utils/Json.h"
|
||||
#include "common/Consts.h"
|
||||
|
||||
namespace milvus::query {
|
||||
|
||||
|
@ -33,8 +34,10 @@ struct ExtractedPlanInfo {
|
|||
}
|
||||
|
||||
void
|
||||
add_involved_field(FieldOffset field_offset) {
|
||||
involved_fields_.set(field_offset.get());
|
||||
add_involved_field(FieldId field_id) {
|
||||
auto pos = field_id.get() - START_USER_FIELDID;
|
||||
AssertInfo(pos >= 0, "field id is invalid");
|
||||
involved_fields_.set(pos);
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -49,8 +52,8 @@ struct Plan {
|
|||
public:
|
||||
const Schema& schema_;
|
||||
std::unique_ptr<VectorPlanNode> plan_node_;
|
||||
std::map<std::string, FieldOffset> tag2field_; // PlaceholderName -> FieldOffset
|
||||
std::vector<FieldOffset> target_entries_;
|
||||
std::map<std::string, FieldId> tag2field_; // PlaceholderName -> FieldId
|
||||
std::vector<FieldId> target_entries_;
|
||||
void
|
||||
check_identical(Plan& other);
|
||||
|
||||
|
@ -86,7 +89,7 @@ struct RetrievePlan {
|
|||
public:
|
||||
const Schema& schema_;
|
||||
std::unique_ptr<RetrievePlanNode> plan_node_;
|
||||
std::vector<FieldOffset> field_offsets_;
|
||||
std::vector<FieldId> field_ids_;
|
||||
};
|
||||
|
||||
using PlanPtr = std::unique_ptr<Plan>;
|
||||
|
|
|
@ -37,7 +37,7 @@ using PlanNodePtr = std::unique_ptr<PlanNode>;
|
|||
struct SearchInfo {
|
||||
int64_t topk_;
|
||||
int64_t round_decimal_;
|
||||
FieldOffset field_offset_;
|
||||
FieldId field_id_;
|
||||
MetricType metric_type_;
|
||||
nlohmann::json search_params_;
|
||||
};
|
||||
|
|
|
@ -11,18 +11,21 @@
|
|||
|
||||
#include <google/protobuf/text_format.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "ExprImpl.h"
|
||||
#include "PlanProto.h"
|
||||
#include "generated/ExtractInfoExprVisitor.h"
|
||||
#include "generated/ExtractInfoPlanNodeVisitor.h"
|
||||
#include "common/VectorTrait.h"
|
||||
|
||||
namespace milvus::query {
|
||||
namespace planpb = milvus::proto::plan;
|
||||
|
||||
template <typename T>
|
||||
std::unique_ptr<TermExprImpl<T>>
|
||||
ExtractTermExprImpl(FieldOffset field_offset, DataType data_type, const planpb::TermExpr& expr_proto) {
|
||||
static_assert(std::is_fundamental_v<T>);
|
||||
ExtractTermExprImpl(FieldId field_id, DataType data_type, const planpb::TermExpr& expr_proto) {
|
||||
static_assert(IsScalar<T>);
|
||||
auto size = expr_proto.values_size();
|
||||
std::vector<T> terms(size);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
|
@ -36,18 +39,21 @@ ExtractTermExprImpl(FieldOffset field_offset, DataType data_type, const planpb::
|
|||
} else if constexpr (std::is_floating_point_v<T>) {
|
||||
Assert(value_proto.val_case() == planpb::GenericValue::kFloatVal);
|
||||
terms[i] = static_cast<T>(value_proto.float_val());
|
||||
} else if constexpr (std::is_same_v<T, std::string>) {
|
||||
Assert(value_proto.val_case() == planpb::GenericValue::kStringVal);
|
||||
terms[i] = static_cast<T>(value_proto.string_val());
|
||||
} else {
|
||||
static_assert(always_false<T>);
|
||||
}
|
||||
}
|
||||
std::sort(terms.begin(), terms.end());
|
||||
return std::make_unique<TermExprImpl<T>>(field_offset, data_type, terms);
|
||||
return std::make_unique<TermExprImpl<T>>(field_id, data_type, terms);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::unique_ptr<UnaryRangeExprImpl<T>>
|
||||
ExtractUnaryRangeExprImpl(FieldOffset field_offset, DataType data_type, const planpb::UnaryRangeExpr& expr_proto) {
|
||||
static_assert(std::is_fundamental_v<T>);
|
||||
ExtractUnaryRangeExprImpl(FieldId field_id, DataType data_type, const planpb::UnaryRangeExpr& expr_proto) {
|
||||
static_assert(IsScalar<T>);
|
||||
auto getValue = [&](const auto& value_proto) -> T {
|
||||
if constexpr (std::is_same_v<T, bool>) {
|
||||
Assert(value_proto.val_case() == planpb::GenericValue::kBoolVal);
|
||||
|
@ -58,18 +64,21 @@ ExtractUnaryRangeExprImpl(FieldOffset field_offset, DataType data_type, const pl
|
|||
} else if constexpr (std::is_floating_point_v<T>) {
|
||||
Assert(value_proto.val_case() == planpb::GenericValue::kFloatVal);
|
||||
return static_cast<T>(value_proto.float_val());
|
||||
} else if constexpr (std::is_same_v<T, std::string>) {
|
||||
Assert(value_proto.val_case() == planpb::GenericValue::kStringVal);
|
||||
return static_cast<T>(value_proto.string_val());
|
||||
} else {
|
||||
static_assert(always_false<T>);
|
||||
}
|
||||
};
|
||||
return std::make_unique<UnaryRangeExprImpl<T>>(field_offset, data_type, static_cast<OpType>(expr_proto.op()),
|
||||
return std::make_unique<UnaryRangeExprImpl<T>>(field_id, data_type, static_cast<OpType>(expr_proto.op()),
|
||||
getValue(expr_proto.value()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::unique_ptr<BinaryRangeExprImpl<T>>
|
||||
ExtractBinaryRangeExprImpl(FieldOffset field_offset, DataType data_type, const planpb::BinaryRangeExpr& expr_proto) {
|
||||
static_assert(std::is_fundamental_v<T>);
|
||||
ExtractBinaryRangeExprImpl(FieldId field_id, DataType data_type, const planpb::BinaryRangeExpr& expr_proto) {
|
||||
static_assert(IsScalar<T>);
|
||||
auto getValue = [&](const auto& value_proto) -> T {
|
||||
if constexpr (std::is_same_v<T, bool>) {
|
||||
Assert(value_proto.val_case() == planpb::GenericValue::kBoolVal);
|
||||
|
@ -80,18 +89,21 @@ ExtractBinaryRangeExprImpl(FieldOffset field_offset, DataType data_type, const p
|
|||
} else if constexpr (std::is_floating_point_v<T>) {
|
||||
Assert(value_proto.val_case() == planpb::GenericValue::kFloatVal);
|
||||
return static_cast<T>(value_proto.float_val());
|
||||
} else if constexpr (std::is_same_v<T, std::string>) {
|
||||
Assert(value_proto.val_case() == planpb::GenericValue::kStringVal);
|
||||
return static_cast<T>(value_proto.string_val());
|
||||
} else {
|
||||
static_assert(always_false<T>);
|
||||
}
|
||||
};
|
||||
return std::make_unique<BinaryRangeExprImpl<T>>(field_offset, data_type, expr_proto.lower_inclusive(),
|
||||
return std::make_unique<BinaryRangeExprImpl<T>>(field_id, data_type, expr_proto.lower_inclusive(),
|
||||
expr_proto.upper_inclusive(), getValue(expr_proto.lower_value()),
|
||||
getValue(expr_proto.upper_value()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::unique_ptr<BinaryArithOpEvalRangeExprImpl<T>>
|
||||
ExtractBinaryArithOpEvalRangeExprImpl(FieldOffset field_offset,
|
||||
ExtractBinaryArithOpEvalRangeExprImpl(FieldId field_id,
|
||||
DataType data_type,
|
||||
const planpb::BinaryArithOpEvalRangeExpr& expr_proto) {
|
||||
static_assert(std::is_fundamental_v<T>);
|
||||
|
@ -110,7 +122,7 @@ ExtractBinaryArithOpEvalRangeExprImpl(FieldOffset field_offset,
|
|||
}
|
||||
};
|
||||
return std::make_unique<BinaryArithOpEvalRangeExprImpl<T>>(
|
||||
field_offset, data_type, static_cast<ArithOpType>(expr_proto.arith_op()), getValue(expr_proto.right_operand()),
|
||||
field_id, data_type, static_cast<ArithOpType>(expr_proto.arith_op()), getValue(expr_proto.right_operand()),
|
||||
static_cast<OpType>(expr_proto.op()), getValue(expr_proto.value()));
|
||||
}
|
||||
|
||||
|
@ -131,8 +143,7 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) {
|
|||
|
||||
SearchInfo search_info;
|
||||
auto field_id = FieldId(anns_proto.field_id());
|
||||
auto field_offset = schema.get_offset(field_id);
|
||||
search_info.field_offset_ = field_offset;
|
||||
search_info.field_id_ = field_id;
|
||||
|
||||
search_info.metric_type_ = GetMetricType(query_info_proto.metric_type());
|
||||
search_info.topk_ = query_info_proto.topk();
|
||||
|
@ -165,6 +176,7 @@ ProtoParser::RetrievePlanNodeFromProto(const planpb::PlanNode& plan_node_proto)
|
|||
|
||||
std::unique_ptr<Plan>
|
||||
ProtoParser::CreatePlan(const proto::plan::PlanNode& plan_node_proto) {
|
||||
// std::cout << plan_node_proto.DebugString() << std::endl;
|
||||
auto plan = std::make_unique<Plan>(schema);
|
||||
|
||||
auto plan_node = PlanNodeFromProto(plan_node_proto);
|
||||
|
@ -172,14 +184,13 @@ ProtoParser::CreatePlan(const proto::plan::PlanNode& plan_node_proto) {
|
|||
ExtractInfoPlanNodeVisitor extractor(plan_info);
|
||||
plan_node->accept(extractor);
|
||||
|
||||
plan->tag2field_["$0"] = plan_node->search_info_.field_offset_;
|
||||
plan->tag2field_["$0"] = plan_node->search_info_.field_id_;
|
||||
plan->plan_node_ = std::move(plan_node);
|
||||
plan->extra_info_opt_ = std::move(plan_info);
|
||||
|
||||
for (auto field_id_raw : plan_node_proto.output_field_ids()) {
|
||||
auto field_id = FieldId(field_id_raw);
|
||||
auto offset = schema.get_offset(field_id);
|
||||
plan->target_entries_.push_back(offset);
|
||||
plan->target_entries_.push_back(field_id);
|
||||
}
|
||||
|
||||
return plan;
|
||||
|
@ -197,8 +208,7 @@ ProtoParser::CreateRetrievePlan(const proto::plan::PlanNode& plan_node_proto) {
|
|||
retrieve_plan->plan_node_ = std::move(plan_node);
|
||||
for (auto field_id_raw : plan_node_proto.output_field_ids()) {
|
||||
auto field_id = FieldId(field_id_raw);
|
||||
auto offset = schema.get_offset(field_id);
|
||||
retrieve_plan->field_offsets_.push_back(offset);
|
||||
retrieve_plan->field_ids_.push_back(field_id);
|
||||
}
|
||||
return retrieve_plan;
|
||||
}
|
||||
|
@ -207,32 +217,34 @@ ExprPtr
|
|||
ProtoParser::ParseUnaryRangeExpr(const proto::plan::UnaryRangeExpr& expr_pb) {
|
||||
auto& column_info = expr_pb.column_info();
|
||||
auto field_id = FieldId(column_info.field_id());
|
||||
auto field_offset = schema.get_offset(field_id);
|
||||
auto data_type = schema[field_offset].get_data_type();
|
||||
auto data_type = schema[field_id].get_data_type();
|
||||
Assert(data_type == static_cast<DataType>(column_info.data_type()));
|
||||
|
||||
auto result = [&]() -> ExprPtr {
|
||||
switch (data_type) {
|
||||
case DataType::BOOL: {
|
||||
return ExtractUnaryRangeExprImpl<bool>(field_offset, data_type, expr_pb);
|
||||
return ExtractUnaryRangeExprImpl<bool>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT8: {
|
||||
return ExtractUnaryRangeExprImpl<int8_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractUnaryRangeExprImpl<int8_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
return ExtractUnaryRangeExprImpl<int16_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractUnaryRangeExprImpl<int16_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
return ExtractUnaryRangeExprImpl<int32_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractUnaryRangeExprImpl<int32_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
return ExtractUnaryRangeExprImpl<int64_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractUnaryRangeExprImpl<int64_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
return ExtractUnaryRangeExprImpl<float>(field_offset, data_type, expr_pb);
|
||||
return ExtractUnaryRangeExprImpl<float>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
return ExtractUnaryRangeExprImpl<double>(field_offset, data_type, expr_pb);
|
||||
return ExtractUnaryRangeExprImpl<double>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
return ExtractUnaryRangeExprImpl<std::string>(field_id, data_type, expr_pb);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported data type");
|
||||
|
@ -246,32 +258,34 @@ ExprPtr
|
|||
ProtoParser::ParseBinaryRangeExpr(const proto::plan::BinaryRangeExpr& expr_pb) {
|
||||
auto& columnInfo = expr_pb.column_info();
|
||||
auto field_id = FieldId(columnInfo.field_id());
|
||||
auto field_offset = schema.get_offset(field_id);
|
||||
auto data_type = schema[field_offset].get_data_type();
|
||||
auto data_type = schema[field_id].get_data_type();
|
||||
Assert(data_type == (DataType)columnInfo.data_type());
|
||||
|
||||
auto result = [&]() -> ExprPtr {
|
||||
switch (data_type) {
|
||||
case DataType::BOOL: {
|
||||
return ExtractBinaryRangeExprImpl<bool>(field_offset, data_type, expr_pb);
|
||||
return ExtractBinaryRangeExprImpl<bool>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT8: {
|
||||
return ExtractBinaryRangeExprImpl<int8_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractBinaryRangeExprImpl<int8_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
return ExtractBinaryRangeExprImpl<int16_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractBinaryRangeExprImpl<int16_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
return ExtractBinaryRangeExprImpl<int32_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractBinaryRangeExprImpl<int32_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
return ExtractBinaryRangeExprImpl<int64_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractBinaryRangeExprImpl<int64_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
return ExtractBinaryRangeExprImpl<float>(field_offset, data_type, expr_pb);
|
||||
return ExtractBinaryRangeExprImpl<float>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
return ExtractBinaryRangeExprImpl<double>(field_offset, data_type, expr_pb);
|
||||
return ExtractBinaryRangeExprImpl<double>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
return ExtractBinaryRangeExprImpl<std::string>(field_id, data_type, expr_pb);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported data type");
|
||||
|
@ -285,21 +299,19 @@ ExprPtr
|
|||
ProtoParser::ParseCompareExpr(const proto::plan::CompareExpr& expr_pb) {
|
||||
auto& left_column_info = expr_pb.left_column_info();
|
||||
auto left_field_id = FieldId(left_column_info.field_id());
|
||||
auto left_field_offset = schema.get_offset(left_field_id);
|
||||
auto left_data_type = schema[left_field_offset].get_data_type();
|
||||
auto left_data_type = schema[left_field_id].get_data_type();
|
||||
Assert(left_data_type == static_cast<DataType>(left_column_info.data_type()));
|
||||
|
||||
auto& right_column_info = expr_pb.right_column_info();
|
||||
auto right_field_id = FieldId(right_column_info.field_id());
|
||||
auto right_field_offset = schema.get_offset(right_field_id);
|
||||
auto right_data_type = schema[right_field_offset].get_data_type();
|
||||
auto right_data_type = schema[right_field_id].get_data_type();
|
||||
Assert(right_data_type == static_cast<DataType>(right_column_info.data_type()));
|
||||
|
||||
return [&]() -> ExprPtr {
|
||||
auto result = std::make_unique<CompareExpr>();
|
||||
result->left_field_offset_ = left_field_offset;
|
||||
result->left_field_id_ = left_field_id;
|
||||
result->left_data_type_ = left_data_type;
|
||||
result->right_field_offset_ = right_field_offset;
|
||||
result->right_field_id_ = right_field_id;
|
||||
result->right_data_type_ = right_data_type;
|
||||
result->op_type_ = static_cast<OpType>(expr_pb.op());
|
||||
return result;
|
||||
|
@ -310,33 +322,35 @@ ExprPtr
|
|||
ProtoParser::ParseTermExpr(const proto::plan::TermExpr& expr_pb) {
|
||||
auto& columnInfo = expr_pb.column_info();
|
||||
auto field_id = FieldId(columnInfo.field_id());
|
||||
auto field_offset = schema.get_offset(field_id);
|
||||
auto data_type = schema[field_offset].get_data_type();
|
||||
auto data_type = schema[field_id].get_data_type();
|
||||
Assert(data_type == (DataType)columnInfo.data_type());
|
||||
|
||||
// auto& field_meta = schema[field_offset];
|
||||
auto result = [&]() -> ExprPtr {
|
||||
switch (data_type) {
|
||||
case DataType::BOOL: {
|
||||
return ExtractTermExprImpl<bool>(field_offset, data_type, expr_pb);
|
||||
return ExtractTermExprImpl<bool>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT8: {
|
||||
return ExtractTermExprImpl<int8_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractTermExprImpl<int8_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
return ExtractTermExprImpl<int16_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractTermExprImpl<int16_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
return ExtractTermExprImpl<int32_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractTermExprImpl<int32_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
return ExtractTermExprImpl<int64_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractTermExprImpl<int64_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
return ExtractTermExprImpl<float>(field_offset, data_type, expr_pb);
|
||||
return ExtractTermExprImpl<float>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
return ExtractTermExprImpl<double>(field_offset, data_type, expr_pb);
|
||||
return ExtractTermExprImpl<double>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
return ExtractTermExprImpl<std::string>(field_id, data_type, expr_pb);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported data type");
|
||||
|
@ -366,29 +380,28 @@ ExprPtr
|
|||
ProtoParser::ParseBinaryArithOpEvalRangeExpr(const proto::plan::BinaryArithOpEvalRangeExpr& expr_pb) {
|
||||
auto& column_info = expr_pb.column_info();
|
||||
auto field_id = FieldId(column_info.field_id());
|
||||
auto field_offset = schema.get_offset(field_id);
|
||||
auto data_type = schema[field_offset].get_data_type();
|
||||
auto data_type = schema[field_id].get_data_type();
|
||||
Assert(data_type == static_cast<DataType>(column_info.data_type()));
|
||||
|
||||
auto result = [&]() -> ExprPtr {
|
||||
switch (data_type) {
|
||||
case DataType::INT8: {
|
||||
return ExtractBinaryArithOpEvalRangeExprImpl<int8_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractBinaryArithOpEvalRangeExprImpl<int8_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
return ExtractBinaryArithOpEvalRangeExprImpl<int16_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractBinaryArithOpEvalRangeExprImpl<int16_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
return ExtractBinaryArithOpEvalRangeExprImpl<int32_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractBinaryArithOpEvalRangeExprImpl<int32_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
return ExtractBinaryArithOpEvalRangeExprImpl<int64_t>(field_offset, data_type, expr_pb);
|
||||
return ExtractBinaryArithOpEvalRangeExprImpl<int64_t>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
return ExtractBinaryArithOpEvalRangeExprImpl<float>(field_offset, data_type, expr_pb);
|
||||
return ExtractBinaryArithOpEvalRangeExprImpl<float>(field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
return ExtractBinaryArithOpEvalRangeExprImpl<double>(field_offset, data_type, expr_pb);
|
||||
return ExtractBinaryArithOpEvalRangeExprImpl<double>(field_id, data_type, expr_pb);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported data type");
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/VectorTrait.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "query/Expr.h"
|
||||
#include "common/Utils.h"
|
||||
#include "query/Utils.h"
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
namespace milvus::query {
|
||||
template <typename Op, typename T, typename U>
|
||||
bool
|
||||
RelationalImpl(const T& t, const U& u, FundamentalTag, FundamentalTag) {
|
||||
return Op{}(t, u);
|
||||
}
|
||||
|
||||
template <typename Op, typename T, typename U>
|
||||
bool
|
||||
RelationalImpl(const T& t, const U& u, FundamentalTag, StringTag) {
|
||||
PanicInfo("incompitible data type");
|
||||
}
|
||||
|
||||
template <typename Op, typename T, typename U>
|
||||
bool
|
||||
RelationalImpl(const T& t, const U& u, StringTag, FundamentalTag) {
|
||||
PanicInfo("incompitible data type");
|
||||
}
|
||||
|
||||
template <typename Op, typename T, typename U>
|
||||
bool
|
||||
RelationalImpl(const T& t, const U& u, StringTag, StringTag) {
|
||||
return Op{}(t, u);
|
||||
}
|
||||
|
||||
template <typename Op>
|
||||
struct Relational {
|
||||
template <typename T, typename U>
|
||||
bool
|
||||
operator()(const T& t, const U& u) const {
|
||||
return RelationalImpl<Op, T, U>(t, u, typename TagDispatchTrait<T>::Tag{}, typename TagDispatchTrait<U>::Tag{});
|
||||
}
|
||||
|
||||
template <typename... T>
|
||||
bool
|
||||
operator()(const T&...) const {
|
||||
PanicInfo("incompatible operands");
|
||||
}
|
||||
};
|
||||
|
||||
template <OpType op>
|
||||
struct MatchOp {
|
||||
template <typename T, typename U>
|
||||
bool
|
||||
operator()(const T& t, const U& u) {
|
||||
return Match(t, u, op);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace milvus::query
|
|
@ -12,6 +12,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <index/ScalarIndexSort.h>
|
||||
|
||||
#include "common/FieldMeta.h"
|
||||
|
@ -45,6 +46,8 @@ generate_scalar_index(SpanBase data, DataType data_type) {
|
|||
return generate_scalar_index(Span<float>(data));
|
||||
case DataType::DOUBLE:
|
||||
return generate_scalar_index(Span<double>(data));
|
||||
case DataType::VARCHAR:
|
||||
return generate_scalar_index(Span<std::string>(data));
|
||||
default:
|
||||
PanicInfo("unsupported type");
|
||||
}
|
||||
|
|
|
@ -76,7 +76,7 @@ BinarySearchBruteForceFast(MetricType metric_type,
|
|||
const BitsetView& bitset) {
|
||||
SubSearchResult sub_result(num_queries, topk, metric_type, round_decimal);
|
||||
float* result_distances = sub_result.get_distances();
|
||||
idx_t* result_ids = sub_result.get_ids();
|
||||
idx_t* result_ids = sub_result.get_seg_offsets();
|
||||
|
||||
int64_t code_size = dim / 8;
|
||||
const idx_t block_size = size_per_chunk;
|
||||
|
@ -101,10 +101,12 @@ FloatSearchBruteForce(const dataset::SearchDataset& dataset,
|
|||
auto query_data = reinterpret_cast<const float*>(dataset.query_data);
|
||||
auto chunk_data = reinterpret_cast<const float*>(chunk_data_raw);
|
||||
if (metric_type == MetricType::METRIC_L2) {
|
||||
faiss::float_maxheap_array_t buf{(size_t)num_queries, (size_t)topk, sub_qr.get_ids(), sub_qr.get_distances()};
|
||||
faiss::float_maxheap_array_t buf{(size_t)num_queries, (size_t)topk, sub_qr.get_seg_offsets(),
|
||||
sub_qr.get_distances()};
|
||||
faiss::knn_L2sqr(query_data, chunk_data, dim, num_queries, size_per_chunk, &buf, nullptr, bitset);
|
||||
} else {
|
||||
faiss::float_minheap_array_t buf{(size_t)num_queries, (size_t)topk, sub_qr.get_ids(), sub_qr.get_distances()};
|
||||
faiss::float_minheap_array_t buf{(size_t)num_queries, (size_t)topk, sub_qr.get_seg_offsets(),
|
||||
sub_qr.get_distances()};
|
||||
faiss::knn_inner_product(query_data, chunk_data, dim, num_queries, size_per_chunk, &buf, bitset);
|
||||
}
|
||||
sub_qr.round_values();
|
||||
|
|
|
@ -30,8 +30,8 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
|
||||
// step 1.1: get meta
|
||||
// step 1.2: get which vector field to search
|
||||
auto vecfield_offset = info.field_offset_;
|
||||
auto& field = schema[vecfield_offset];
|
||||
auto vecfield_id = info.field_id_;
|
||||
auto& field = schema[vecfield_id];
|
||||
|
||||
AssertInfo(field.get_data_type() == DataType::VECTOR_FLOAT, "[FloatSearch]Field data type isn't VECTOR_FLOAT");
|
||||
auto dim = field.get_dim();
|
||||
|
@ -44,13 +44,13 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
// std::vector<float> final_dis(total_count, std::numeric_limits<float>::max());
|
||||
SubSearchResult final_qr(num_queries, topk, metric_type, round_decimal);
|
||||
dataset::SearchDataset search_dataset{metric_type, num_queries, topk, round_decimal, dim, query_data};
|
||||
auto vec_ptr = record.get_field_data<FloatVector>(vecfield_offset);
|
||||
auto vec_ptr = record.get_field_data<FloatVector>(vecfield_id);
|
||||
|
||||
int current_chunk_id = 0;
|
||||
|
||||
if (indexing_record.is_in(vecfield_offset)) {
|
||||
if (indexing_record.is_in(vecfield_id)) {
|
||||
auto max_indexed_id = indexing_record.get_finished_ack();
|
||||
const auto& field_indexing = indexing_record.get_vec_field_indexing(vecfield_offset);
|
||||
const auto& field_indexing = indexing_record.get_vec_field_indexing(vecfield_id);
|
||||
auto search_conf = field_indexing.get_search_params(topk);
|
||||
AssertInfo(vec_ptr->get_size_per_chunk() == field_indexing.get_size_per_chunk(),
|
||||
"[FloatSearch]Chunk size of vector not equal to chunk size of field index");
|
||||
|
@ -63,7 +63,7 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
auto sub_qr = SearchOnIndex(search_dataset, *indexing, search_conf, sub_view);
|
||||
|
||||
// convert chunk uid to segment uid
|
||||
for (auto& x : sub_qr.mutable_ids()) {
|
||||
for (auto& x : sub_qr.mutable_seg_offsets()) {
|
||||
if (x != -1) {
|
||||
x += chunk_id * size_per_chunk;
|
||||
}
|
||||
|
@ -89,7 +89,7 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
auto sub_qr = FloatSearchBruteForce(search_dataset, chunk.data(), size_per_chunk, sub_view);
|
||||
|
||||
// convert chunk uid to segment uid
|
||||
for (auto& x : sub_qr.mutable_ids()) {
|
||||
for (auto& x : sub_qr.mutable_seg_offsets()) {
|
||||
if (x != -1) {
|
||||
x += chunk_id * vec_size_per_chunk;
|
||||
}
|
||||
|
@ -98,7 +98,7 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
}
|
||||
current_chunk_id = max_chunk;
|
||||
results.distances_ = std::move(final_qr.mutable_distances());
|
||||
results.ids_ = std::move(final_qr.mutable_ids());
|
||||
results.seg_offsets_ = std::move(final_qr.mutable_seg_offsets());
|
||||
results.topk_ = topk;
|
||||
results.num_queries_ = num_queries;
|
||||
|
||||
|
@ -123,8 +123,8 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
|
|||
|
||||
// step 2.1: get meta
|
||||
// step 2.2: get which vector field to search
|
||||
auto vecfield_offset = info.field_offset_;
|
||||
auto& field = schema[vecfield_offset];
|
||||
auto vecfield_id = info.field_id_;
|
||||
auto& field = schema[vecfield_id];
|
||||
|
||||
AssertInfo(field.get_data_type() == DataType::VECTOR_BINARY, "[BinarySearch]Field data type isn't VECTOR_BINARY");
|
||||
auto dim = field.get_dim();
|
||||
|
@ -134,7 +134,7 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
|
|||
// step 3: small indexing search
|
||||
query::dataset::SearchDataset search_dataset{metric_type, num_queries, topk, round_decimal, dim, query_data};
|
||||
|
||||
auto vec_ptr = record.get_field_data<BinaryVector>(vecfield_offset);
|
||||
auto vec_ptr = record.get_field_data<BinaryVector>(vecfield_id);
|
||||
auto max_indexed_id = 0;
|
||||
|
||||
// step 4: brute force search where small indexing is unavailable
|
||||
|
@ -151,7 +151,7 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
|
|||
auto sub_result = BinarySearchBruteForce(search_dataset, chunk.data(), nsize, sub_view);
|
||||
|
||||
// convert chunk uid to segment uid
|
||||
for (auto& x : sub_result.mutable_ids()) {
|
||||
for (auto& x : sub_result.mutable_seg_offsets()) {
|
||||
if (x != -1) {
|
||||
x += chunk_id * vec_size_per_chunk;
|
||||
}
|
||||
|
@ -161,7 +161,7 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
|
|||
|
||||
final_result.round_values();
|
||||
results.distances_ = std::move(final_result.mutable_distances());
|
||||
results.ids_ = std::move(final_result.mutable_ids());
|
||||
results.seg_offsets_ = std::move(final_result.mutable_seg_offsets());
|
||||
results.topk_ = topk;
|
||||
results.num_queries_ = num_queries;
|
||||
|
||||
|
@ -178,7 +178,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
|
|||
const BitsetView& bitset,
|
||||
SearchResult& results) {
|
||||
// TODO: add data_type to info
|
||||
auto data_type = segment.get_schema()[info.field_offset_].get_data_type();
|
||||
auto data_type = segment.get_schema()[info.field_id_].get_data_type();
|
||||
AssertInfo(datatype_is_vector(data_type), "[SearchOnGrowing]Data type isn't vector type");
|
||||
if (data_type == DataType::VECTOR_FLOAT) {
|
||||
auto typed_data = reinterpret_cast<const float*>(query_data);
|
||||
|
|
|
@ -35,7 +35,7 @@ SearchOnIndex(const dataset::SearchDataset& search_dataset,
|
|||
|
||||
SubSearchResult sub_qr(num_queries, topK, metric_type, round_decimal);
|
||||
std::copy_n(dis, num_queries * topK, sub_qr.get_distances());
|
||||
std::copy_n(uids, num_queries * topK, sub_qr.get_ids());
|
||||
std::copy_n(uids, num_queries * topK, sub_qr.get_seg_offsets());
|
||||
sub_qr.round_values();
|
||||
return sub_qr;
|
||||
}
|
||||
|
|
|
@ -32,13 +32,13 @@ SearchOnSealed(const Schema& schema,
|
|||
auto topk = search_info.topk_;
|
||||
auto round_decimal = search_info.round_decimal_;
|
||||
|
||||
auto field_offset = search_info.field_offset_;
|
||||
auto& field = schema[field_offset];
|
||||
auto field_id = search_info.field_id_;
|
||||
auto& field = schema[field_id];
|
||||
// Assert(field.get_data_type() == DataType::VECTOR_FLOAT);
|
||||
auto dim = field.get_dim();
|
||||
|
||||
AssertInfo(record.is_ready(field_offset), "[SearchOnSealed]Record isn't ready");
|
||||
auto field_indexing = record.get_field_indexing(field_offset);
|
||||
AssertInfo(record.is_ready(field_id), "[SearchOnSealed]Record isn't ready");
|
||||
auto field_indexing = record.get_field_indexing(field_id);
|
||||
AssertInfo(field_indexing->metric_type_ == search_info.metric_type_,
|
||||
"Metric type of field index isn't the same with search info");
|
||||
|
||||
|
@ -67,12 +67,12 @@ SearchOnSealed(const Schema& schema,
|
|||
distances[i] = round(distances[i] * multiplier) / multiplier;
|
||||
}
|
||||
}
|
||||
result.ids_.resize(total_num);
|
||||
result.seg_offsets_.resize(total_num);
|
||||
result.distances_.resize(total_num);
|
||||
result.num_queries_ = num_queries;
|
||||
result.topk_ = topk;
|
||||
|
||||
std::copy_n(ids, total_num, result.ids_.data());
|
||||
std::copy_n(ids, total_num, result.seg_offsets_.data());
|
||||
std::copy_n(distances, total_num, result.distances_.data());
|
||||
}
|
||||
} // namespace milvus::query
|
||||
|
|
|
@ -27,7 +27,7 @@ SubSearchResult::merge_impl(const SubSearchResult& right) {
|
|||
for (int64_t qn = 0; qn < num_queries_; ++qn) {
|
||||
auto offset = qn * topk_;
|
||||
|
||||
int64_t* __restrict__ left_ids = this->get_ids() + offset;
|
||||
int64_t* __restrict__ left_ids = this->get_seg_offsets() + offset;
|
||||
float* __restrict__ left_distances = this->get_distances() + offset;
|
||||
|
||||
auto right_ids = right.get_ids() + offset;
|
||||
|
|
|
@ -23,7 +23,7 @@ class SubSearchResult {
|
|||
: metric_type_(metric_type),
|
||||
num_queries_(num_queries),
|
||||
topk_(topk),
|
||||
ids_(num_queries * topk, -1),
|
||||
seg_offsets_(num_queries * topk, -1),
|
||||
distances_(num_queries * topk, init_value(metric_type)),
|
||||
round_decimal_(round_decimal) {
|
||||
}
|
||||
|
@ -57,12 +57,12 @@ class SubSearchResult {
|
|||
|
||||
const int64_t*
|
||||
get_ids() const {
|
||||
return ids_.data();
|
||||
return seg_offsets_.data();
|
||||
}
|
||||
|
||||
int64_t*
|
||||
get_ids() {
|
||||
return ids_.data();
|
||||
get_seg_offsets() {
|
||||
return seg_offsets_.data();
|
||||
}
|
||||
|
||||
const float*
|
||||
|
@ -76,8 +76,8 @@ class SubSearchResult {
|
|||
}
|
||||
|
||||
auto&
|
||||
mutable_ids() {
|
||||
return ids_;
|
||||
mutable_seg_offsets() {
|
||||
return seg_offsets_;
|
||||
}
|
||||
|
||||
auto&
|
||||
|
@ -104,7 +104,7 @@ class SubSearchResult {
|
|||
int64_t topk_;
|
||||
int64_t round_decimal_;
|
||||
MetricType metric_type_;
|
||||
std::vector<int64_t> ids_;
|
||||
std::vector<int64_t> seg_offsets_;
|
||||
std::vector<float> distances_;
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "query/Expr.h"
|
||||
#include "common/Utils.h"
|
||||
|
||||
namespace milvus::query {
|
||||
|
||||
template <typename T, typename U>
|
||||
inline bool
|
||||
Match(const T& x, const U& y, OpType op) {
|
||||
PanicInfo("not supported");
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool
|
||||
Match<std::string>(const std::string& str, const std::string& val, OpType op) {
|
||||
switch (op) {
|
||||
case OpType::PrefixMatch:
|
||||
return PrefixMatch(str, val);
|
||||
case OpType::PostfixMatch:
|
||||
return PostfixMatch(str, val);
|
||||
default:
|
||||
PanicInfo("not supported");
|
||||
}
|
||||
}
|
||||
} // namespace milvus::query
|
|
@ -62,11 +62,11 @@ class ExecExprVisitor : public ExprVisitor {
|
|||
public:
|
||||
template <typename T, typename IndexFunc, typename ElementFunc>
|
||||
auto
|
||||
ExecRangeVisitorImpl(FieldOffset field_offset, IndexFunc func, ElementFunc element_func) -> BitsetType;
|
||||
ExecRangeVisitorImpl(FieldId field_id, IndexFunc func, ElementFunc element_func) -> BitsetType;
|
||||
|
||||
template <typename T, typename ElementFunc>
|
||||
auto
|
||||
ExecDataRangeVisitorImpl(FieldOffset field_offset, ElementFunc element_func) -> BitsetType;
|
||||
ExecDataRangeVisitorImpl(FieldId field_id, ElementFunc element_func) -> BitsetType;
|
||||
|
||||
template <typename T>
|
||||
auto
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#include "query/ExprImpl.h"
|
||||
#include "query/generated/ExecExprVisitor.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "query/Utils.h"
|
||||
#include "query/Relational.h"
|
||||
|
||||
namespace milvus::query {
|
||||
// THIS CONTAINS EXTRA BODY FOR VISITOR
|
||||
|
@ -42,7 +44,7 @@ class ExecExprVisitor : ExprVisitor {
|
|||
public:
|
||||
template <typename T, typename IndexFunc, typename ElementFunc>
|
||||
auto
|
||||
ExecRangeVisitorImpl(FieldOffset field_offset, IndexFunc func, ElementFunc element_func) -> BitsetType;
|
||||
ExecRangeVisitorImpl(FieldId field_id, IndexFunc func, ElementFunc element_func) -> BitsetType;
|
||||
|
||||
template <typename T>
|
||||
auto
|
||||
|
@ -144,18 +146,17 @@ Assemble(const std::deque<BitsetType>& srcs) -> BitsetType {
|
|||
|
||||
template <typename T, typename IndexFunc, typename ElementFunc>
|
||||
auto
|
||||
ExecExprVisitor::ExecRangeVisitorImpl(FieldOffset field_offset, IndexFunc index_func, ElementFunc element_func)
|
||||
-> BitsetType {
|
||||
ExecExprVisitor::ExecRangeVisitorImpl(FieldId field_id, IndexFunc index_func, ElementFunc element_func) -> BitsetType {
|
||||
auto& schema = segment_.get_schema();
|
||||
auto& field_meta = schema[field_offset];
|
||||
auto indexing_barrier = segment_.num_chunk_index(field_offset);
|
||||
auto& field_meta = schema[field_id];
|
||||
auto indexing_barrier = segment_.num_chunk_index(field_id);
|
||||
auto size_per_chunk = segment_.size_per_chunk();
|
||||
auto num_chunk = upper_div(row_count_, size_per_chunk);
|
||||
std::deque<BitsetType> results;
|
||||
|
||||
using Index = scalar::ScalarIndex<T>;
|
||||
for (auto chunk_id = 0; chunk_id < indexing_barrier; ++chunk_id) {
|
||||
const Index& indexing = segment_.chunk_scalar_index<T>(field_offset, chunk_id);
|
||||
const Index& indexing = segment_.chunk_scalar_index<T>(field_id, chunk_id);
|
||||
// NOTE: knowhere is not const-ready
|
||||
// This is a dirty workaround
|
||||
auto data = index_func(const_cast<Index*>(&indexing));
|
||||
|
@ -165,7 +166,7 @@ ExecExprVisitor::ExecRangeVisitorImpl(FieldOffset field_offset, IndexFunc index_
|
|||
for (auto chunk_id = indexing_barrier; chunk_id < num_chunk; ++chunk_id) {
|
||||
auto this_size = chunk_id == num_chunk - 1 ? row_count_ - chunk_id * size_per_chunk : size_per_chunk;
|
||||
BitsetType result(this_size);
|
||||
auto chunk = segment_.chunk_data<T>(field_offset, chunk_id);
|
||||
auto chunk = segment_.chunk_data<T>(field_id, chunk_id);
|
||||
const T* data = chunk.data();
|
||||
for (int index = 0; index < this_size; ++index) {
|
||||
result[index] = element_func(data[index]);
|
||||
|
@ -180,9 +181,9 @@ ExecExprVisitor::ExecRangeVisitorImpl(FieldOffset field_offset, IndexFunc index_
|
|||
|
||||
template <typename T, typename ElementFunc>
|
||||
auto
|
||||
ExecExprVisitor::ExecDataRangeVisitorImpl(FieldOffset field_offset, ElementFunc element_func) -> BitsetType {
|
||||
ExecExprVisitor::ExecDataRangeVisitorImpl(FieldId field_id, ElementFunc element_func) -> BitsetType {
|
||||
auto& schema = segment_.get_schema();
|
||||
auto& field_meta = schema[field_offset];
|
||||
auto& field_meta = schema[field_id];
|
||||
auto size_per_chunk = segment_.size_per_chunk();
|
||||
auto num_chunk = upper_div(row_count_, size_per_chunk);
|
||||
std::deque<BitsetType> results;
|
||||
|
@ -190,7 +191,7 @@ ExecExprVisitor::ExecDataRangeVisitorImpl(FieldOffset field_offset, ElementFunc
|
|||
for (auto chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
|
||||
auto this_size = chunk_id == num_chunk - 1 ? row_count_ - chunk_id * size_per_chunk : size_per_chunk;
|
||||
BitsetType result(this_size);
|
||||
auto chunk = segment_.chunk_data<T>(field_offset, chunk_id);
|
||||
auto chunk = segment_.chunk_data<T>(field_id, chunk_id);
|
||||
const T* data = chunk.data();
|
||||
for (int index = 0; index < this_size; ++index) {
|
||||
result[index] = element_func(data[index]);
|
||||
|
@ -217,33 +218,44 @@ ExecExprVisitor::ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw) -> Bi
|
|||
case OpType::Equal: {
|
||||
auto index_func = [val](Index* index) { return index->In(1, &val); };
|
||||
auto elem_func = [val](T x) { return (x == val); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_offset_, index_func, elem_func);
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
case OpType::NotEqual: {
|
||||
auto index_func = [val](Index* index) { return index->NotIn(1, &val); };
|
||||
auto elem_func = [val](T x) { return (x != val); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_offset_, index_func, elem_func);
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
case OpType::GreaterEqual: {
|
||||
auto index_func = [val](Index* index) { return index->Range(val, Operator::GE); };
|
||||
auto elem_func = [val](T x) { return (x >= val); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_offset_, index_func, elem_func);
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
case OpType::GreaterThan: {
|
||||
auto index_func = [val](Index* index) { return index->Range(val, Operator::GT); };
|
||||
auto elem_func = [val](T x) { return (x > val); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_offset_, index_func, elem_func);
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
case OpType::LessEqual: {
|
||||
auto index_func = [val](Index* index) { return index->Range(val, Operator::LE); };
|
||||
auto elem_func = [val](T x) { return (x <= val); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_offset_, index_func, elem_func);
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
case OpType::LessThan: {
|
||||
auto index_func = [val](Index* index) { return index->Range(val, Operator::LT); };
|
||||
auto elem_func = [val](T x) { return (x < val); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_offset_, index_func, elem_func);
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
case OpType::PrefixMatch: {
|
||||
auto index_func = [val](Index* index) {
|
||||
auto dataset = std::make_unique<knowhere::Dataset>();
|
||||
dataset->Set(scalar::OPERATOR_TYPE, Operator::PrefixMatchOp);
|
||||
dataset->Set(scalar::PREFIX_VALUE, val);
|
||||
return index->Query(std::move(dataset));
|
||||
};
|
||||
auto elem_func = [val, op](T x) { return Match(x, val, op); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
// TODO: PostfixMatch
|
||||
default: {
|
||||
PanicInfo("unsupported range node");
|
||||
}
|
||||
|
@ -268,25 +280,25 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(BinaryArithOpEvalRa
|
|||
switch (arith_op) {
|
||||
case ArithOpType::Add: {
|
||||
auto elem_func = [val, right_operand](T x) { return ((x + right_operand) == val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_offset_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
}
|
||||
case ArithOpType::Sub: {
|
||||
auto elem_func = [val, right_operand](T x) { return ((x - right_operand) == val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_offset_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
}
|
||||
case ArithOpType::Mul: {
|
||||
auto elem_func = [val, right_operand](T x) { return ((x * right_operand) == val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_offset_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
}
|
||||
case ArithOpType::Div: {
|
||||
auto elem_func = [val, right_operand](T x) { return ((x / right_operand) == val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_offset_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
}
|
||||
case ArithOpType::Mod: {
|
||||
auto elem_func = [val, right_operand](T x) {
|
||||
return (static_cast<T>(fmod(x, right_operand)) == val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_offset_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported arithmetic operation");
|
||||
|
@ -297,25 +309,25 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(BinaryArithOpEvalRa
|
|||
switch (arith_op) {
|
||||
case ArithOpType::Add: {
|
||||
auto elem_func = [val, right_operand](T x) { return ((x + right_operand) != val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_offset_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
}
|
||||
case ArithOpType::Sub: {
|
||||
auto elem_func = [val, right_operand](T x) { return ((x - right_operand) != val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_offset_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
}
|
||||
case ArithOpType::Mul: {
|
||||
auto elem_func = [val, right_operand](T x) { return ((x * right_operand) != val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_offset_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
}
|
||||
case ArithOpType::Div: {
|
||||
auto elem_func = [val, right_operand](T x) { return ((x / right_operand) != val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_offset_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
}
|
||||
case ArithOpType::Mod: {
|
||||
auto elem_func = [val, right_operand](T x) {
|
||||
return (static_cast<T>(fmod(x, right_operand)) != val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_offset_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported arithmetic operation");
|
||||
|
@ -348,23 +360,23 @@ ExecExprVisitor::ExecBinaryRangeVisitorDispatcher(BinaryRangeExpr& expr_raw) ->
|
|||
auto index_func = [=](Index* index) { return index->Range(val1, lower_inclusive, val2, upper_inclusive); };
|
||||
if (lower_inclusive && upper_inclusive) {
|
||||
auto elem_func = [val1, val2](T x) { return (val1 <= x && x <= val2); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_offset_, index_func, elem_func);
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
} else if (lower_inclusive && !upper_inclusive) {
|
||||
auto elem_func = [val1, val2](T x) { return (val1 <= x && x < val2); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_offset_, index_func, elem_func);
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
} else if (!lower_inclusive && upper_inclusive) {
|
||||
auto elem_func = [val1, val2](T x) { return (val1 < x && x <= val2); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_offset_, index_func, elem_func);
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
} else {
|
||||
auto elem_func = [val1, val2](T x) { return (val1 < x && x < val2); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_offset_, index_func, elem_func);
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
}
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
void
|
||||
ExecExprVisitor::visit(UnaryRangeExpr& expr) {
|
||||
auto& field_meta = segment_.get_schema()[expr.field_offset_];
|
||||
auto& field_meta = segment_.get_schema()[expr.field_id_];
|
||||
AssertInfo(expr.data_type_ == field_meta.get_data_type(),
|
||||
"[ExecExprVisitor]DataType of expr isn't field_meta data type");
|
||||
BitsetType res;
|
||||
|
@ -397,6 +409,10 @@ ExecExprVisitor::visit(UnaryRangeExpr& expr) {
|
|||
res = ExecUnaryRangeVisitorDispatcher<double>(expr);
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
res = ExecUnaryRangeVisitorDispatcher<std::string>(expr);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
|
@ -406,7 +422,7 @@ ExecExprVisitor::visit(UnaryRangeExpr& expr) {
|
|||
|
||||
void
|
||||
ExecExprVisitor::visit(BinaryArithOpEvalRangeExpr& expr) {
|
||||
auto& field_meta = segment_.get_schema()[expr.field_offset_];
|
||||
auto& field_meta = segment_.get_schema()[expr.field_id_];
|
||||
AssertInfo(expr.data_type_ == field_meta.get_data_type(),
|
||||
"[ExecExprVisitor]DataType of expr isn't field_meta data type");
|
||||
BitsetType res;
|
||||
|
@ -444,7 +460,7 @@ ExecExprVisitor::visit(BinaryArithOpEvalRangeExpr& expr) {
|
|||
|
||||
void
|
||||
ExecExprVisitor::visit(BinaryRangeExpr& expr) {
|
||||
auto& field_meta = segment_.get_schema()[expr.field_offset_];
|
||||
auto& field_meta = segment_.get_schema()[expr.field_id_];
|
||||
AssertInfo(expr.data_type_ == field_meta.get_data_type(),
|
||||
"[ExecExprVisitor]DataType of expr isn't field_meta data type");
|
||||
BitsetType res;
|
||||
|
@ -477,6 +493,10 @@ ExecExprVisitor::visit(BinaryRangeExpr& expr) {
|
|||
res = ExecBinaryRangeVisitorDispatcher<double>(expr);
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
res = ExecBinaryRangeVisitorDispatcher<std::string>(expr);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
|
@ -501,52 +521,56 @@ struct relational {
|
|||
template <typename Op>
|
||||
auto
|
||||
ExecExprVisitor::ExecCompareExprDispatcher(CompareExpr& expr, Op op) -> BitsetType {
|
||||
using number = boost::variant<bool, int8_t, int16_t, int32_t, int64_t, float, double>;
|
||||
using number = boost::variant<bool, int8_t, int16_t, int32_t, int64_t, float, double, std::string>;
|
||||
auto size_per_chunk = segment_.size_per_chunk();
|
||||
auto num_chunk = upper_div(row_count_, size_per_chunk);
|
||||
std::deque<BitsetType> bitsets;
|
||||
for (int64_t chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
|
||||
auto size = chunk_id == num_chunk - 1 ? row_count_ - chunk_id * size_per_chunk : size_per_chunk;
|
||||
auto getChunkData = [&, chunk_id](DataType type, FieldOffset offset) -> std::function<const number(int)> {
|
||||
auto getChunkData = [&, chunk_id](DataType type, FieldId field_id) -> std::function<const number(int)> {
|
||||
switch (type) {
|
||||
case DataType::BOOL: {
|
||||
auto chunk_data = segment_.chunk_data<bool>(offset, chunk_id).data();
|
||||
auto chunk_data = segment_.chunk_data<bool>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
}
|
||||
case DataType::INT8: {
|
||||
auto chunk_data = segment_.chunk_data<int8_t>(offset, chunk_id).data();
|
||||
auto chunk_data = segment_.chunk_data<int8_t>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
}
|
||||
case DataType::INT16: {
|
||||
auto chunk_data = segment_.chunk_data<int16_t>(offset, chunk_id).data();
|
||||
auto chunk_data = segment_.chunk_data<int16_t>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
}
|
||||
case DataType::INT32: {
|
||||
auto chunk_data = segment_.chunk_data<int32_t>(offset, chunk_id).data();
|
||||
auto chunk_data = segment_.chunk_data<int32_t>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
}
|
||||
case DataType::INT64: {
|
||||
auto chunk_data = segment_.chunk_data<int64_t>(offset, chunk_id).data();
|
||||
auto chunk_data = segment_.chunk_data<int64_t>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
auto chunk_data = segment_.chunk_data<float>(offset, chunk_id).data();
|
||||
auto chunk_data = segment_.chunk_data<float>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
auto chunk_data = segment_.chunk_data<double>(offset, chunk_id).data();
|
||||
auto chunk_data = segment_.chunk_data<double>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto chunk_data = segment_.chunk_data<std::string>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
}
|
||||
default:
|
||||
PanicInfo("unsupported datatype");
|
||||
}
|
||||
};
|
||||
auto left = getChunkData(expr.left_data_type_, expr.left_field_offset_);
|
||||
auto right = getChunkData(expr.right_data_type_, expr.right_field_offset_);
|
||||
auto left = getChunkData(expr.left_data_type_, expr.left_field_id_);
|
||||
auto right = getChunkData(expr.right_data_type_, expr.right_field_id_);
|
||||
|
||||
BitsetType bitset(size);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
bool is_in = boost::apply_visitor(relational<decltype(op)>{}, left(i), right(i));
|
||||
bool is_in = boost::apply_visitor(Relational<decltype(op)>{}, left(i), right(i));
|
||||
bitset[i] = is_in;
|
||||
}
|
||||
bitsets.emplace_back(std::move(bitset));
|
||||
|
@ -559,8 +583,8 @@ ExecExprVisitor::ExecCompareExprDispatcher(CompareExpr& expr, Op op) -> BitsetTy
|
|||
void
|
||||
ExecExprVisitor::visit(CompareExpr& expr) {
|
||||
auto& schema = segment_.get_schema();
|
||||
auto& left_field_meta = schema[expr.left_field_offset_];
|
||||
auto& right_field_meta = schema[expr.right_field_offset_];
|
||||
auto& left_field_meta = schema[expr.left_field_id_];
|
||||
auto& right_field_meta = schema[expr.right_field_id_];
|
||||
AssertInfo(expr.left_data_type_ == left_field_meta.get_data_type(),
|
||||
"[ExecExprVisitor]Left data type not equal to left field mata type");
|
||||
AssertInfo(expr.right_data_type_ == right_field_meta.get_data_type(),
|
||||
|
@ -592,6 +616,12 @@ ExecExprVisitor::visit(CompareExpr& expr) {
|
|||
res = ExecCompareExprDispatcher(expr, std::less<>{});
|
||||
break;
|
||||
}
|
||||
case OpType::PrefixMatch: {
|
||||
res = ExecCompareExprDispatcher(expr, MatchOp<OpType::PrefixMatch>{});
|
||||
break;
|
||||
}
|
||||
// case OpType::PostfixMatch: {
|
||||
// }
|
||||
default: {
|
||||
PanicInfo("unsupported optype");
|
||||
}
|
||||
|
@ -605,21 +635,37 @@ auto
|
|||
ExecExprVisitor::ExecTermVisitorImpl(TermExpr& expr_raw) -> BitsetType {
|
||||
auto& expr = static_cast<TermExprImpl<T>&>(expr_raw);
|
||||
auto& schema = segment_.get_schema();
|
||||
auto primary_offset = schema.get_primary_key_offset();
|
||||
auto field_offset = expr_raw.field_offset_;
|
||||
auto& field_meta = schema[field_offset];
|
||||
auto primary_filed_id = schema.get_primary_field_id();
|
||||
auto field_id = expr_raw.field_id_;
|
||||
auto& field_meta = schema[field_id];
|
||||
|
||||
bool use_pk_index = false;
|
||||
if (primary_offset.has_value()) {
|
||||
use_pk_index = primary_offset.value() == field_offset && field_meta.get_data_type() == engine::DataType::INT64;
|
||||
if (primary_filed_id.has_value()) {
|
||||
use_pk_index = primary_filed_id.value() == field_id && IsPrimaryKeyDataType(field_meta.get_data_type());
|
||||
}
|
||||
|
||||
if (use_pk_index) {
|
||||
auto id_array = std::make_unique<IdArray>();
|
||||
auto dst_ids = id_array->mutable_int_id();
|
||||
for (const auto& id : expr.terms_) {
|
||||
dst_ids->add_data(id);
|
||||
switch (field_meta.get_data_type()) {
|
||||
case DataType::INT64: {
|
||||
auto dst_ids = id_array->mutable_int_id();
|
||||
for (const auto& id : expr.terms_) {
|
||||
dst_ids->add_data((int64_t&)id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto dst_ids = id_array->mutable_str_id();
|
||||
for (const auto& id : expr.terms_) {
|
||||
dst_ids->add_data((std::string&)id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported type");
|
||||
}
|
||||
}
|
||||
|
||||
auto [uids, seg_offsets] = segment_.search_ids(*id_array, timestamp_);
|
||||
BitsetType bitset(row_count_);
|
||||
for (const auto& offset : seg_offsets) {
|
||||
|
@ -636,7 +682,7 @@ ExecExprVisitor::ExecTermVisitorImpl(TermExpr& expr_raw) -> BitsetType {
|
|||
auto num_chunk = upper_div(row_count_, size_per_chunk);
|
||||
std::unordered_set<T> term_set(expr.terms_.begin(), expr.terms_.end());
|
||||
for (int64_t chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
|
||||
Span<T> chunk = segment_.chunk_data<T>(field_offset, chunk_id);
|
||||
Span<T> chunk = segment_.chunk_data<T>(field_id, chunk_id);
|
||||
auto chunk_data = chunk.data();
|
||||
auto size = (chunk_id == num_chunk - 1) ? row_count_ - chunk_id * size_per_chunk : size_per_chunk;
|
||||
BitsetType bitset(size);
|
||||
|
@ -650,9 +696,34 @@ ExecExprVisitor::ExecTermVisitorImpl(TermExpr& expr_raw) -> BitsetType {
|
|||
return final_result;
|
||||
}
|
||||
|
||||
// TODO: refactor this to use `scalar::ScalarIndex::In`.
|
||||
// made a test to compare the performance.
|
||||
// vector<bool> don't match the template.
|
||||
// boost::container::vector<bool> match.
|
||||
template <>
|
||||
auto
|
||||
ExecExprVisitor::ExecTermVisitorImpl<std::string>(TermExpr& expr_raw) -> BitsetType {
|
||||
using T = std::string;
|
||||
auto& expr = static_cast<TermExprImpl<T>&>(expr_raw);
|
||||
using Index = scalar::ScalarIndex<T>;
|
||||
using Operator = scalar::OperatorType;
|
||||
const auto& terms = expr.terms_;
|
||||
auto n = terms.size();
|
||||
std::unordered_set<T> term_set(expr.terms_.begin(), expr.terms_.end());
|
||||
|
||||
auto index_func = [&terms, n](Index* index) { return index->In(n, terms.data()); };
|
||||
auto elem_func = [&terms, &term_set](T x) {
|
||||
//// terms has already been sorted.
|
||||
// return std::binary_search(terms.begin(), terms.end(), x);
|
||||
return term_set.find(x) != term_set.end();
|
||||
};
|
||||
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
|
||||
void
|
||||
ExecExprVisitor::visit(TermExpr& expr) {
|
||||
auto& field_meta = segment_.get_schema()[expr.field_offset_];
|
||||
auto& field_meta = segment_.get_schema()[expr.field_id_];
|
||||
AssertInfo(expr.data_type_ == field_meta.get_data_type(),
|
||||
"[ExecExprVisitor]DataType of expr isn't field_meta data type ");
|
||||
BitsetType res;
|
||||
|
@ -685,6 +756,10 @@ ExecExprVisitor::visit(TermExpr& expr) {
|
|||
res = ExecTermVisitorImpl<double>(expr);
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
res = ExecTermVisitorImpl<std::string>(expr);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
|
|
|
@ -61,7 +61,7 @@ empty_search_result(int64_t num_queries, int64_t topk, int64_t round_decimal, Me
|
|||
SubSearchResult result(num_queries, topk, metric_type, round_decimal);
|
||||
final_result.num_queries_ = num_queries;
|
||||
final_result.topk_ = topk;
|
||||
final_result.ids_ = std::move(result.mutable_ids());
|
||||
final_result.seg_offsets_ = std::move(result.mutable_seg_offsets());
|
||||
final_result.distances_ = std::move(result.mutable_distances());
|
||||
return final_result;
|
||||
}
|
||||
|
|
|
@ -40,28 +40,28 @@ ExtractInfoExprVisitor::visit(LogicalBinaryExpr& expr) {
|
|||
|
||||
void
|
||||
ExtractInfoExprVisitor::visit(TermExpr& expr) {
|
||||
plan_info_.add_involved_field(expr.field_offset_);
|
||||
plan_info_.add_involved_field(expr.field_id_);
|
||||
}
|
||||
|
||||
void
|
||||
ExtractInfoExprVisitor::visit(UnaryRangeExpr& expr) {
|
||||
plan_info_.add_involved_field(expr.field_offset_);
|
||||
plan_info_.add_involved_field(expr.field_id_);
|
||||
}
|
||||
|
||||
void
|
||||
ExtractInfoExprVisitor::visit(BinaryRangeExpr& expr) {
|
||||
plan_info_.add_involved_field(expr.field_offset_);
|
||||
plan_info_.add_involved_field(expr.field_id_);
|
||||
}
|
||||
|
||||
void
|
||||
ExtractInfoExprVisitor::visit(CompareExpr& expr) {
|
||||
plan_info_.add_involved_field(expr.left_field_offset_);
|
||||
plan_info_.add_involved_field(expr.right_field_offset_);
|
||||
plan_info_.add_involved_field(expr.left_field_id_);
|
||||
plan_info_.add_involved_field(expr.right_field_id_);
|
||||
}
|
||||
|
||||
void
|
||||
ExtractInfoExprVisitor::visit(BinaryArithOpEvalRangeExpr& expr) {
|
||||
plan_info_.add_involved_field(expr.field_offset_);
|
||||
plan_info_.add_involved_field(expr.field_id_);
|
||||
}
|
||||
|
||||
} // namespace milvus::query
|
||||
|
|
|
@ -30,7 +30,7 @@ class ExtractInfoPlanNodeVisitor : PlanNodeVisitor {
|
|||
|
||||
void
|
||||
ExtractInfoPlanNodeVisitor::visit(FloatVectorANNS& node) {
|
||||
plan_info_.add_involved_field(node.search_info_.field_offset_);
|
||||
plan_info_.add_involved_field(node.search_info_.field_id_);
|
||||
if (node.predicate_.has_value()) {
|
||||
ExtractInfoExprVisitor expr_visitor(plan_info_);
|
||||
node.predicate_.value()->accept(expr_visitor);
|
||||
|
@ -39,7 +39,7 @@ ExtractInfoPlanNodeVisitor::visit(FloatVectorANNS& node) {
|
|||
|
||||
void
|
||||
ExtractInfoPlanNodeVisitor::visit(BinaryVectorANNS& node) {
|
||||
plan_info_.add_involved_field(node.search_info_.field_offset_);
|
||||
plan_info_.add_involved_field(node.search_info_.field_id_);
|
||||
if (node.predicate_.has_value()) {
|
||||
ExtractInfoExprVisitor expr_visitor(plan_info_);
|
||||
node.predicate_.value()->accept(expr_visitor);
|
||||
|
|
|
@ -132,7 +132,7 @@ ShowExprVisitor::visit(TermExpr& expr) {
|
|||
}();
|
||||
|
||||
Json res{{"expr_type", "Term"},
|
||||
{"field_offset", expr.field_offset_.get()},
|
||||
{"field_id", expr.field_id_.get()},
|
||||
{"data_type", datatype_name(expr.data_type_)},
|
||||
{"terms", std::move(terms)}};
|
||||
|
||||
|
@ -147,7 +147,7 @@ UnaryRangeExtract(const UnaryRangeExpr& expr_raw) {
|
|||
auto expr = dynamic_cast<const UnaryRangeExprImpl<T>*>(&expr_raw);
|
||||
AssertInfo(expr, "[ShowExprVisitor]UnaryRangeExpr cast to UnaryRangeExprImpl failed");
|
||||
Json res{{"expr_type", "UnaryRange"},
|
||||
{"field_offset", expr->field_offset_.get()},
|
||||
{"field_id", expr->field_id_.get()},
|
||||
{"data_type", datatype_name(expr->data_type_)},
|
||||
{"op", OpType_Name(static_cast<OpType>(expr->op_type_))},
|
||||
{"value", expr->value_}};
|
||||
|
@ -193,7 +193,7 @@ BinaryRangeExtract(const BinaryRangeExpr& expr_raw) {
|
|||
auto expr = dynamic_cast<const BinaryRangeExprImpl<T>*>(&expr_raw);
|
||||
AssertInfo(expr, "[ShowExprVisitor]BinaryRangeExpr cast to BinaryRangeExprImpl failed");
|
||||
Json res{{"expr_type", "BinaryRange"},
|
||||
{"field_offset", expr->field_offset_.get()},
|
||||
{"field_id", expr->field_id_.get()},
|
||||
{"data_type", datatype_name(expr->data_type_)},
|
||||
{"lower_inclusive", expr->lower_inclusive_},
|
||||
{"upper_inclusive", expr->upper_inclusive_},
|
||||
|
@ -240,9 +240,9 @@ ShowExprVisitor::visit(CompareExpr& expr) {
|
|||
AssertInfo(!json_opt_.has_value(), "[ShowExprVisitor]Ret json already has value before visit");
|
||||
|
||||
Json res{{"expr_type", "Compare"},
|
||||
{"left_field_offset", expr.left_field_offset_.get()},
|
||||
{"left_field_id", expr.left_field_id_.get()},
|
||||
{"left_data_type", datatype_name(expr.left_data_type_)},
|
||||
{"right_field_offset", expr.right_field_offset_.get()},
|
||||
{"right_field_id", expr.right_field_id_.get()},
|
||||
{"right_data_type", datatype_name(expr.right_data_type_)},
|
||||
{"op", OpType_Name(static_cast<OpType>(expr.op_type_))}};
|
||||
json_opt_ = res;
|
||||
|
@ -260,7 +260,7 @@ BinaryArithOpEvalRangeExtract(const BinaryArithOpEvalRangeExpr& expr_raw) {
|
|||
AssertInfo(expr, "[ShowExprVisitor]BinaryArithOpEvalRangeExpr cast to BinaryArithOpEvalRangeExprImpl failed");
|
||||
|
||||
Json res{{"expr_type", "BinaryArithOpEvalRange"},
|
||||
{"field_offset", expr->field_offset_.get()},
|
||||
{"field_offset", expr->field_id_.get()},
|
||||
{"data_type", datatype_name(expr->data_type_)},
|
||||
{"arith_op", ArithOpType_Name(static_cast<ArithOpType>(expr->arith_op_))},
|
||||
{"right_operand", expr->right_operand_},
|
||||
|
|
|
@ -55,7 +55,7 @@ ShowPlanNodeVisitor::visit(FloatVectorANNS& node) {
|
|||
Json json_body{
|
||||
{"node_type", "FloatVectorANNS"}, //
|
||||
{"metric_type", MetricTypeToName(info.metric_type_)}, //
|
||||
{"field_offset_", info.field_offset_.get()}, //
|
||||
{"field_id_", info.field_id_.get()}, //
|
||||
{"topk", info.topk_}, //
|
||||
{"search_params", info.search_params_}, //
|
||||
{"placeholder_tag", node.placeholder_tag_}, //
|
||||
|
@ -77,7 +77,7 @@ ShowPlanNodeVisitor::visit(BinaryVectorANNS& node) {
|
|||
Json json_body{
|
||||
{"node_type", "BinaryVectorANNS"}, //
|
||||
{"metric_type", MetricTypeToName(info.metric_type_)}, //
|
||||
{"field_offset_", info.field_offset_.get()}, //
|
||||
{"field_id_", info.field_id_.get()}, //
|
||||
{"topk", info.topk_}, //
|
||||
{"search_params", info.search_params_}, //
|
||||
{"placeholder_tag", node.placeholder_tag_}, //
|
||||
|
|
|
@ -32,7 +32,8 @@ set(SEGCORE_FILES
|
|||
segcore_init_c.cpp
|
||||
ScalarIndex.cpp
|
||||
TimestampIndex.cpp
|
||||
)
|
||||
Utils.cpp
|
||||
ConcurrentVector.cpp)
|
||||
add_library(milvus_segcore SHARED
|
||||
${SEGCORE_FILES}
|
||||
)
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "segcore/ConcurrentVector.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
void
|
||||
VectorBase::set_data_raw(ssize_t element_offset,
|
||||
ssize_t element_count,
|
||||
const DataArray* data,
|
||||
const FieldMeta& field_meta) {
|
||||
if (field_meta.is_vector()) {
|
||||
if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
return set_data_raw(element_offset, data->vectors().float_vector().data().data(), element_count);
|
||||
} else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
return set_data_raw(element_offset, data->vectors().binary_vector().data(), element_count);
|
||||
} else {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
|
||||
switch (field_meta.get_data_type()) {
|
||||
case DataType::BOOL: {
|
||||
return set_data_raw(element_offset, data->scalars().bool_data().data().data(), element_count);
|
||||
}
|
||||
case DataType::INT8: {
|
||||
auto src_data = data->scalars().int_data().data();
|
||||
std::vector<int8_t> data_raw(src_data.size());
|
||||
std::copy_n(src_data.data(), src_data.size(), data_raw.data());
|
||||
return set_data_raw(element_offset, data_raw.data(), element_count);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
auto src_data = data->scalars().int_data().data();
|
||||
std::vector<int16_t> data_raw(src_data.size());
|
||||
std::copy_n(src_data.data(), src_data.size(), data_raw.data());
|
||||
return set_data_raw(element_offset, data_raw.data(), element_count);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
return set_data_raw(element_offset, data->scalars().int_data().data().data(), element_count);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
return set_data_raw(element_offset, data->scalars().long_data().data().data(), element_count);
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
return set_data_raw(element_offset, data->scalars().float_data().data().data(), element_count);
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
return set_data_raw(element_offset, data->scalars().double_data().data().data(), element_count);
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto begin = data->scalars().string_data().data().begin();
|
||||
auto end = data->scalars().string_data().data().end();
|
||||
std::vector<std::string> data_raw(begin, end);
|
||||
return set_data_raw(element_offset, data_raw.data(), element_count);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
VectorBase::fill_chunk_data(ssize_t element_count, const DataArray* data, const FieldMeta& field_meta) {
|
||||
if (field_meta.is_vector()) {
|
||||
if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
return fill_chunk_data(data->vectors().float_vector().data().data(), element_count);
|
||||
} else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
return fill_chunk_data(data->vectors().binary_vector().data(), element_count);
|
||||
} else {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
|
||||
switch (field_meta.get_data_type()) {
|
||||
case DataType::BOOL: {
|
||||
return fill_chunk_data(data->scalars().bool_data().data().data(), element_count);
|
||||
}
|
||||
case DataType::INT8: {
|
||||
auto src_data = data->scalars().int_data().data();
|
||||
std::vector<int8_t> data_raw(src_data.size());
|
||||
std::copy_n(src_data.data(), src_data.size(), data_raw.data());
|
||||
return fill_chunk_data(data_raw.data(), element_count);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
auto src_data = data->scalars().int_data().data();
|
||||
std::vector<int16_t> data_raw(src_data.size());
|
||||
std::copy_n(src_data.data(), src_data.size(), data_raw.data());
|
||||
return fill_chunk_data(data_raw.data(), element_count);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
return fill_chunk_data(data->scalars().int_data().data().data(), element_count);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
return fill_chunk_data(data->scalars().long_data().data().data(), element_count);
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
return fill_chunk_data(data->scalars().float_data().data().data(), element_count);
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
return fill_chunk_data(data->scalars().double_data().data().data(), element_count);
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto begin = data->scalars().string_data().data().begin();
|
||||
auto end = data->scalars().string_data().data().end();
|
||||
std::vector<std::string> data_raw(begin, end);
|
||||
return fill_chunk_data(data_raw.data(), element_count);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace milvus::segcore
|
|
@ -15,23 +15,22 @@
|
|||
#include <cassert>
|
||||
#include <deque>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <shared_mutex>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/container/vector.hpp>
|
||||
#include <tbb/concurrent_vector.h>
|
||||
|
||||
#include "common/Types.h"
|
||||
#include "common/Span.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "utils/Utils.h"
|
||||
#include "common/FieldMeta.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
template <typename Type>
|
||||
using FixedVector = boost::container::vector<Type>;
|
||||
|
||||
template <typename Type>
|
||||
class ThreadSafeVector {
|
||||
public:
|
||||
|
@ -66,6 +65,13 @@ class ThreadSafeVector {
|
|||
return size_;
|
||||
}
|
||||
|
||||
void
|
||||
clear() {
|
||||
std::lock_guard lck(mutex_);
|
||||
size_ = 0;
|
||||
vec_.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::atomic<int64_t> size_ = 0;
|
||||
std::deque<Type> vec_;
|
||||
|
@ -84,6 +90,15 @@ class VectorBase {
|
|||
virtual void
|
||||
set_data_raw(ssize_t element_offset, const void* source, ssize_t element_count) = 0;
|
||||
|
||||
void
|
||||
set_data_raw(ssize_t element_offset, ssize_t element_count, const DataArray* data, const FieldMeta& field_meta);
|
||||
|
||||
virtual void
|
||||
fill_chunk_data(const void* source, ssize_t element_count) = 0;
|
||||
|
||||
void
|
||||
fill_chunk_data(ssize_t element_count, const DataArray* data, const FieldMeta& field_meta);
|
||||
|
||||
virtual SpanBase
|
||||
get_span_base(int64_t chunk_id) const = 0;
|
||||
|
||||
|
@ -92,6 +107,15 @@ class VectorBase {
|
|||
return size_per_chunk_;
|
||||
}
|
||||
|
||||
virtual const void*
|
||||
get_chunk_data(ssize_t chunk_index) const = 0;
|
||||
|
||||
virtual ssize_t
|
||||
num_chunk() const = 0;
|
||||
|
||||
virtual bool
|
||||
empty() = 0;
|
||||
|
||||
protected:
|
||||
const int64_t size_per_chunk_;
|
||||
};
|
||||
|
@ -128,13 +152,13 @@ class ConcurrentVectorImpl : public VectorBase {
|
|||
get_span(int64_t chunk_id) const {
|
||||
auto& chunk = get_chunk(chunk_id);
|
||||
if constexpr (is_scalar) {
|
||||
return Span<TraitType>(chunk.data(), size_per_chunk_);
|
||||
return Span<TraitType>(chunk.data(), chunk.size());
|
||||
} else if constexpr (std::is_same_v<Type, int64_t> || std::is_same_v<Type, int>) {
|
||||
// only for testing
|
||||
PanicInfo("unimplemented");
|
||||
} else {
|
||||
static_assert(std::is_same_v<typename TraitType::embedded_type, Type>);
|
||||
return Span<TraitType>(chunk.data(), size_per_chunk_, Dim);
|
||||
return Span<TraitType>(chunk.data(), chunk.size(), Dim);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -143,17 +167,27 @@ class ConcurrentVectorImpl : public VectorBase {
|
|||
return get_span(chunk_id);
|
||||
}
|
||||
|
||||
void
|
||||
fill_chunk_data(const void* source, ssize_t element_count) override {
|
||||
if (element_count == 0) {
|
||||
return;
|
||||
}
|
||||
AssertInfo(chunks_.size() == 0, "no empty concurrent vector");
|
||||
chunks_.emplace_to_at_least(1, Dim * element_count);
|
||||
set_data(0, static_cast<const Type*>(source), element_count);
|
||||
}
|
||||
|
||||
void
|
||||
set_data_raw(ssize_t element_offset, const void* source, ssize_t element_count) override {
|
||||
if (element_count == 0) {
|
||||
return;
|
||||
}
|
||||
this->grow_to_at_least(element_offset + element_count);
|
||||
set_data(element_offset, static_cast<const Type*>(source), element_count);
|
||||
}
|
||||
|
||||
void
|
||||
set_data(ssize_t element_offset, const Type* source, ssize_t element_count) {
|
||||
if (element_count == 0) {
|
||||
return;
|
||||
}
|
||||
this->grow_to_at_least(element_offset + element_count);
|
||||
auto chunk_id = element_offset / size_per_chunk_;
|
||||
auto chunk_offset = element_offset % size_per_chunk_;
|
||||
ssize_t source_offset = 0;
|
||||
|
@ -190,6 +224,11 @@ class ConcurrentVectorImpl : public VectorBase {
|
|||
return chunks_[chunk_index];
|
||||
}
|
||||
|
||||
const void*
|
||||
get_chunk_data(ssize_t chunk_index) const override {
|
||||
return chunks_[chunk_index].data();
|
||||
}
|
||||
|
||||
// just for fun, don't use it directly
|
||||
const Type*
|
||||
get_element(ssize_t element_index) const {
|
||||
|
@ -207,10 +246,26 @@ class ConcurrentVectorImpl : public VectorBase {
|
|||
}
|
||||
|
||||
ssize_t
|
||||
num_chunk() const {
|
||||
num_chunk() const override {
|
||||
return chunks_.size();
|
||||
}
|
||||
|
||||
bool
|
||||
empty() override {
|
||||
for (size_t i = 0; i < chunks_.size(); i++) {
|
||||
if (get_chunk(i).size() > 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
clear() {
|
||||
chunks_.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
void
|
||||
fill_chunk(
|
||||
|
@ -234,7 +289,7 @@ class ConcurrentVectorImpl : public VectorBase {
|
|||
template <typename Type>
|
||||
class ConcurrentVector : public ConcurrentVectorImpl<Type, true> {
|
||||
public:
|
||||
static_assert(std::is_fundamental_v<Type>);
|
||||
static_assert(IsScalar<Type> || std::is_same_v<Type, PkType>);
|
||||
explicit ConcurrentVector(int64_t size_per_chunk)
|
||||
: ConcurrentVectorImpl<Type, true>::ConcurrentVectorImpl(1, size_per_chunk) {
|
||||
}
|
||||
|
|
|
@ -32,9 +32,7 @@ struct DeletedRecord {
|
|||
};
|
||||
static constexpr int64_t deprecated_size_per_chunk = 32 * 1024;
|
||||
DeletedRecord()
|
||||
: lru_(std::make_shared<TmpBitmap>()),
|
||||
timestamps_(deprecated_size_per_chunk),
|
||||
uids_(deprecated_size_per_chunk) {
|
||||
: lru_(std::make_shared<TmpBitmap>()), timestamps_(deprecated_size_per_chunk), pks_(deprecated_size_per_chunk) {
|
||||
lru_->bitmap_ptr = std::make_shared<BitsetType>();
|
||||
}
|
||||
|
||||
|
@ -60,7 +58,7 @@ struct DeletedRecord {
|
|||
std::atomic<int64_t> reserved = 0;
|
||||
AckResponder ack_responder_;
|
||||
ConcurrentVector<Timestamp> timestamps_;
|
||||
ConcurrentVector<idx_t> uids_;
|
||||
ConcurrentVector<PkType> pks_;
|
||||
int64_t record_size_ = 0;
|
||||
|
||||
private:
|
||||
|
|
|
@ -11,7 +11,8 @@
|
|||
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <index/ScalarIndexSort.h>
|
||||
#include "index/ScalarIndexSort.h"
|
||||
#include "index/StringIndexSort.h"
|
||||
|
||||
#include "common/SystemProperty.h"
|
||||
#include "knowhere/index/vector_index/IndexIVF.h"
|
||||
|
@ -111,9 +112,15 @@ ScalarFieldIndexing<T>::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const
|
|||
const auto& chunk = source->get_chunk(chunk_id);
|
||||
// build index for chunk
|
||||
// TODO
|
||||
auto indexing = scalar::CreateScalarIndexSort<T>();
|
||||
indexing->Build(vec_base->get_size_per_chunk(), chunk.data());
|
||||
data_[chunk_id] = std::move(indexing);
|
||||
if constexpr (std::is_same_v<T, std::string>) {
|
||||
auto indexing = scalar::CreateStringIndexSort();
|
||||
indexing->Build(vec_base->get_size_per_chunk(), chunk.data());
|
||||
data_[chunk_id] = std::move(indexing);
|
||||
} else {
|
||||
auto indexing = scalar::CreateScalarIndexSort<T>();
|
||||
indexing->Build(vec_base->get_size_per_chunk(), chunk.data());
|
||||
data_[chunk_id] = std::move(indexing);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -142,6 +149,8 @@ CreateIndex(const FieldMeta& field_meta, const SegcoreConfig& segcore_config) {
|
|||
return std::make_unique<ScalarFieldIndexing<float>>(field_meta, segcore_config);
|
||||
case DataType::DOUBLE:
|
||||
return std::make_unique<ScalarFieldIndexing<double>>(field_meta, segcore_config);
|
||||
case DataType::VARCHAR:
|
||||
return std::make_unique<ScalarFieldIndexing<std::string>>(field_meta, segcore_config);
|
||||
default:
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
|
|
|
@ -118,22 +118,21 @@ class IndexingRecord {
|
|||
void
|
||||
Initialize() {
|
||||
int offset_id = 0;
|
||||
for (const FieldMeta& field : schema_) {
|
||||
auto offset = FieldOffset(offset_id);
|
||||
for (auto& [field_id, field_meta] : schema_.get_fields()) {
|
||||
++offset_id;
|
||||
|
||||
if (field.is_vector()) {
|
||||
if (field_meta.is_vector()) {
|
||||
// TODO: skip binary small index now, reenable after config.yaml is ready
|
||||
if (field.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
continue;
|
||||
}
|
||||
// flat should be skipped
|
||||
if (!field.get_metric_type().has_value()) {
|
||||
if (!field_meta.get_metric_type().has_value()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
field_indexings_.try_emplace(offset, CreateIndex(field, segcore_config_));
|
||||
field_indexings_.try_emplace(field_id, CreateIndex(field_meta, segcore_config_));
|
||||
}
|
||||
assert(offset_id == schema_.size());
|
||||
}
|
||||
|
@ -149,28 +148,28 @@ class IndexingRecord {
|
|||
}
|
||||
|
||||
const FieldIndexing&
|
||||
get_field_indexing(FieldOffset field_offset) const {
|
||||
Assert(field_indexings_.count(field_offset));
|
||||
return *field_indexings_.at(field_offset);
|
||||
get_field_indexing(FieldId field_id) const {
|
||||
Assert(field_indexings_.count(field_id));
|
||||
return *field_indexings_.at(field_id);
|
||||
}
|
||||
|
||||
const VectorFieldIndexing&
|
||||
get_vec_field_indexing(FieldOffset field_offset) const {
|
||||
auto& field_indexing = get_field_indexing(field_offset);
|
||||
get_vec_field_indexing(FieldId field_id) const {
|
||||
auto& field_indexing = get_field_indexing(field_id);
|
||||
auto ptr = dynamic_cast<const VectorFieldIndexing*>(&field_indexing);
|
||||
AssertInfo(ptr, "invalid indexing");
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
bool
|
||||
is_in(FieldOffset field_offset) const {
|
||||
return field_indexings_.count(field_offset);
|
||||
is_in(FieldId field_id) const {
|
||||
return field_indexings_.count(field_id);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto
|
||||
get_scalar_field_indexing(FieldOffset field_offset) const -> const ScalarFieldIndexing<T>& {
|
||||
auto& entry = get_field_indexing(field_offset);
|
||||
get_scalar_field_indexing(FieldId field_id) const -> const ScalarFieldIndexing<T>& {
|
||||
auto& entry = get_field_indexing(field_id);
|
||||
auto ptr = dynamic_cast<const ScalarFieldIndexing<T>*>(&entry);
|
||||
AssertInfo(ptr, "invalid indexing");
|
||||
return *ptr;
|
||||
|
@ -189,7 +188,7 @@ class IndexingRecord {
|
|||
|
||||
private:
|
||||
// field_offset => indexing
|
||||
std::map<FieldOffset, std::unique_ptr<FieldIndexing>> field_indexings_;
|
||||
std::map<FieldId, std::unique_ptr<FieldIndexing>> field_indexings_;
|
||||
};
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
|
|
@ -14,46 +14,53 @@
|
|||
namespace milvus::segcore {
|
||||
|
||||
InsertRecord::InsertRecord(const Schema& schema, int64_t size_per_chunk)
|
||||
: uids_(size_per_chunk), timestamps_(size_per_chunk) {
|
||||
: row_ids_(size_per_chunk), timestamps_(size_per_chunk) {
|
||||
for (auto& field : schema) {
|
||||
if (field.is_vector()) {
|
||||
if (field.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
this->append_field_data<FloatVector>(field.get_dim(), size_per_chunk);
|
||||
auto field_id = field.first;
|
||||
auto& field_meta = field.second;
|
||||
|
||||
if (field_meta.is_vector()) {
|
||||
if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
this->append_field_data<FloatVector>(field_id, field_meta.get_dim(), size_per_chunk);
|
||||
continue;
|
||||
} else if (field.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
this->append_field_data<BinaryVector>(field.get_dim(), size_per_chunk);
|
||||
} else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
this->append_field_data<BinaryVector>(field_id, field_meta.get_dim(), size_per_chunk);
|
||||
continue;
|
||||
} else {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
switch (field.get_data_type()) {
|
||||
switch (field_meta.get_data_type()) {
|
||||
case DataType::BOOL: {
|
||||
this->append_field_data<bool>(size_per_chunk);
|
||||
this->append_field_data<bool>(field_id, size_per_chunk);
|
||||
break;
|
||||
}
|
||||
case DataType::INT8: {
|
||||
this->append_field_data<int8_t>(size_per_chunk);
|
||||
this->append_field_data<int8_t>(field_id, size_per_chunk);
|
||||
break;
|
||||
}
|
||||
case DataType::INT16: {
|
||||
this->append_field_data<int16_t>(size_per_chunk);
|
||||
this->append_field_data<int16_t>(field_id, size_per_chunk);
|
||||
break;
|
||||
}
|
||||
case DataType::INT32: {
|
||||
this->append_field_data<int32_t>(size_per_chunk);
|
||||
this->append_field_data<int32_t>(field_id, size_per_chunk);
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
this->append_field_data<int64_t>(size_per_chunk);
|
||||
this->append_field_data<int64_t>(field_id, size_per_chunk);
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
this->append_field_data<float>(size_per_chunk);
|
||||
this->append_field_data<float>(field_id, size_per_chunk);
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
this->append_field_data<double>(size_per_chunk);
|
||||
this->append_field_data<double>(field_id, size_per_chunk);
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
this->append_field_data<std::string>(field_id, size_per_chunk);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
|
|
|
@ -13,34 +13,41 @@
|
|||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/Schema.h"
|
||||
#include "segcore/AckResponder.h"
|
||||
#include "segcore/ConcurrentVector.h"
|
||||
#include "segcore/Record.h"
|
||||
#include "TimestampIndex.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
struct InsertRecord {
|
||||
ConcurrentVector<Timestamp> timestamps_;
|
||||
ConcurrentVector<idx_t> row_ids_;
|
||||
|
||||
// used for preInsert of growing segment
|
||||
std::atomic<int64_t> reserved = 0;
|
||||
AckResponder ack_responder_;
|
||||
ConcurrentVector<Timestamp> timestamps_;
|
||||
ConcurrentVector<idx_t> uids_;
|
||||
|
||||
// used for timestamps index of sealed segment
|
||||
TimestampIndex timestamp_index_;
|
||||
|
||||
explicit InsertRecord(const Schema& schema, int64_t size_per_chunk);
|
||||
|
||||
// get field data without knowing the type
|
||||
VectorBase*
|
||||
get_field_data_base(FieldOffset field_offset) const {
|
||||
auto ptr = fields_data_[field_offset.get()].get();
|
||||
get_field_data_base(FieldId field_id) const {
|
||||
auto ptr = fields_data_.at(field_id).get();
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// get field data in given type, const version
|
||||
template <typename Type>
|
||||
const ConcurrentVector<Type>*
|
||||
get_field_data(FieldOffset field_offset) const {
|
||||
auto base_ptr = get_field_data_base(field_offset);
|
||||
get_field_data(FieldId field_id) const {
|
||||
auto base_ptr = get_field_data_base(field_id);
|
||||
auto ptr = dynamic_cast<const ConcurrentVector<Type>*>(base_ptr);
|
||||
Assert(ptr);
|
||||
return ptr;
|
||||
|
@ -49,8 +56,8 @@ struct InsertRecord {
|
|||
// get field data in given type, non-const version
|
||||
template <typename Type>
|
||||
ConcurrentVector<Type>*
|
||||
get_field_data(FieldOffset field_offset) {
|
||||
auto base_ptr = get_field_data_base(field_offset);
|
||||
get_field_data(FieldId field_id) {
|
||||
auto base_ptr = get_field_data_base(field_id);
|
||||
auto ptr = dynamic_cast<ConcurrentVector<Type>*>(base_ptr);
|
||||
Assert(ptr);
|
||||
return ptr;
|
||||
|
@ -59,21 +66,27 @@ struct InsertRecord {
|
|||
// append a column of scalar type
|
||||
template <typename Type>
|
||||
void
|
||||
append_field_data(int64_t size_per_chunk) {
|
||||
static_assert(std::is_fundamental_v<Type>);
|
||||
fields_data_.emplace_back(std::make_unique<ConcurrentVector<Type>>(size_per_chunk));
|
||||
append_field_data(FieldId field_id, int64_t size_per_chunk) {
|
||||
static_assert(IsScalar<Type>);
|
||||
fields_data_.emplace(field_id, std::make_unique<ConcurrentVector<Type>>(size_per_chunk));
|
||||
}
|
||||
|
||||
// append a column of vector type
|
||||
template <typename VectorType>
|
||||
void
|
||||
append_field_data(int64_t dim, int64_t size_per_chunk) {
|
||||
append_field_data(FieldId field_id, int64_t dim, int64_t size_per_chunk) {
|
||||
static_assert(std::is_base_of_v<VectorTrait, VectorType>);
|
||||
fields_data_.emplace_back(std::make_unique<ConcurrentVector<VectorType>>(dim, size_per_chunk));
|
||||
fields_data_.emplace(field_id, std::make_unique<ConcurrentVector<VectorType>>(dim, size_per_chunk));
|
||||
}
|
||||
|
||||
void
|
||||
drop_field_data(FieldId field_id) {
|
||||
fields_data_.erase(field_id);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<VectorBase>> fields_data_;
|
||||
// std::vector<std::unique_ptr<VectorBase>> fields_data_;
|
||||
std::unordered_map<FieldId, std::unique_ptr<VectorBase>> fields_data_;
|
||||
};
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
|
|
@ -19,14 +19,15 @@
|
|||
using milvus::SearchResult;
|
||||
|
||||
struct SearchResultPair {
|
||||
int64_t primary_key_;
|
||||
milvus::PkType primary_key_;
|
||||
float distance_;
|
||||
milvus::SearchResult* search_result_;
|
||||
int64_t index_;
|
||||
int64_t offset_;
|
||||
int64_t offset_rb_; // right bound
|
||||
|
||||
SearchResultPair(int64_t primary_key, float distance, SearchResult* result, int64_t index, int64_t lb, int64_t rb)
|
||||
SearchResultPair(
|
||||
milvus::PkType primary_key, float distance, SearchResult* result, int64_t index, int64_t lb, int64_t rb)
|
||||
: primary_key_(primary_key),
|
||||
distance_(distance),
|
||||
search_result_(result),
|
||||
|
@ -37,10 +38,10 @@ struct SearchResultPair {
|
|||
|
||||
bool
|
||||
operator>(const SearchResultPair& other) const {
|
||||
if (this->primary_key_ == INVALID_ID) {
|
||||
if (this->primary_key_ == INVALID_PK) {
|
||||
return false;
|
||||
} else {
|
||||
if (other.primary_key_ == INVALID_ID) {
|
||||
if (other.primary_key_ == INVALID_PK) {
|
||||
return true;
|
||||
} else {
|
||||
return (distance_ > other.distance_);
|
||||
|
@ -50,17 +51,12 @@ struct SearchResultPair {
|
|||
|
||||
void
|
||||
reset() {
|
||||
offset_++;
|
||||
if (offset_ < offset_rb_) {
|
||||
offset_++;
|
||||
if (offset_ < offset_rb_) {
|
||||
primary_key_ = search_result_->primary_keys_.at(offset_);
|
||||
distance_ = search_result_->distances_.at(offset_);
|
||||
} else {
|
||||
primary_key_ = INVALID_ID;
|
||||
distance_ = std::numeric_limits<float>::max();
|
||||
}
|
||||
primary_key_ = search_result_->primary_keys_.at(offset_);
|
||||
distance_ = search_result_->distances_.at(offset_);
|
||||
} else {
|
||||
primary_key_ = INVALID_ID;
|
||||
primary_key_ = INVALID_PK;
|
||||
distance_ = std::numeric_limits<float>::max();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include <shared_mutex>
|
||||
#include <utility>
|
||||
|
@ -32,36 +33,36 @@ using SealedIndexingEntryPtr = std::unique_ptr<SealedIndexingEntry>;
|
|||
|
||||
struct SealedIndexingRecord {
|
||||
void
|
||||
append_field_indexing(FieldOffset field_offset, MetricType metric_type, knowhere::VecIndexPtr indexing) {
|
||||
append_field_indexing(FieldId field_id, MetricType metric_type, knowhere::VecIndexPtr indexing) {
|
||||
auto ptr = std::make_unique<SealedIndexingEntry>();
|
||||
ptr->indexing_ = indexing;
|
||||
ptr->metric_type_ = metric_type;
|
||||
std::unique_lock lck(mutex_);
|
||||
field_indexings_[field_offset] = std::move(ptr);
|
||||
field_indexings_[field_id] = std::move(ptr);
|
||||
}
|
||||
|
||||
const SealedIndexingEntry*
|
||||
get_field_indexing(FieldOffset field_offset) const {
|
||||
get_field_indexing(FieldId field_id) const {
|
||||
std::shared_lock lck(mutex_);
|
||||
AssertInfo(field_indexings_.count(field_offset), "field_offset not found");
|
||||
return field_indexings_.at(field_offset).get();
|
||||
AssertInfo(field_indexings_.count(field_id), "field_id not found");
|
||||
return field_indexings_.at(field_id).get();
|
||||
}
|
||||
|
||||
void
|
||||
drop_field_indexing(FieldOffset field_offset) {
|
||||
drop_field_indexing(FieldId field_id) {
|
||||
std::unique_lock lck(mutex_);
|
||||
field_indexings_.erase(field_offset);
|
||||
field_indexings_.erase(field_id);
|
||||
}
|
||||
|
||||
bool
|
||||
is_ready(FieldOffset field_offset) const {
|
||||
is_ready(FieldId field_id) const {
|
||||
std::shared_lock lck(mutex_);
|
||||
return field_indexings_.count(field_offset);
|
||||
return field_indexings_.count(field_id);
|
||||
}
|
||||
|
||||
private:
|
||||
// field_offset -> SealedIndexingEntry
|
||||
std::map<FieldOffset, SealedIndexingEntryPtr> field_indexings_;
|
||||
std::unordered_map<FieldId, SealedIndexingEntryPtr> field_indexings_;
|
||||
mutable std::shared_mutex mutex_;
|
||||
};
|
||||
|
||||
|
|
|
@ -43,19 +43,12 @@ class SegmentGrowing : public SegmentInternalInterface {
|
|||
virtual int64_t
|
||||
PreInsert(int64_t size) = 0;
|
||||
|
||||
virtual Status
|
||||
Insert(int64_t reserved_offset,
|
||||
int64_t size,
|
||||
const int64_t* row_ids,
|
||||
const Timestamp* timestamps,
|
||||
const RowBasedRawData& values) = 0;
|
||||
|
||||
virtual void
|
||||
Insert(int64_t reserved_offset,
|
||||
int64_t size,
|
||||
const int64_t* row_ids,
|
||||
const Timestamp* timestamps,
|
||||
const ColumnBasedRawData& values) = 0;
|
||||
const InsertData* insert_data) = 0;
|
||||
|
||||
// virtual int64_t
|
||||
// PreDelete(int64_t size) = 0;
|
||||
|
|
|
@ -23,12 +23,13 @@
|
|||
#include "segcore/Reduce.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "utils/Utils.h"
|
||||
#include "segcore/Utils.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
int64_t
|
||||
SegmentGrowingImpl::PreInsert(int64_t size) {
|
||||
auto reserved_begin = record_.reserved.fetch_add(size);
|
||||
auto reserved_begin = insert_record_.reserved.fetch_add(size);
|
||||
return reserved_begin;
|
||||
}
|
||||
|
||||
|
@ -65,12 +66,12 @@ SegmentGrowingImpl::get_deleted_bitmap(int64_t del_barrier,
|
|||
}
|
||||
for (auto del_index = start; del_index < end; ++del_index) {
|
||||
// get uid in delete logs
|
||||
auto uid = deleted_record_.uids_[del_index];
|
||||
auto uid = deleted_record_.pks_[del_index];
|
||||
|
||||
// map uid to corresponding offsets, select the max one, which should be the target
|
||||
// the max one should be closest to query_timestamp, so the delete log should refer to it
|
||||
int64_t the_offset = -1;
|
||||
auto [iter_b, iter_e] = uid2offset_.equal_range(uid);
|
||||
auto [iter_b, iter_e] = pk2offset_.equal_range(uid);
|
||||
|
||||
for (auto iter = iter_b; iter != iter_e; ++iter) {
|
||||
auto offset = iter->second;
|
||||
|
@ -79,7 +80,7 @@ SegmentGrowingImpl::get_deleted_bitmap(int64_t del_barrier,
|
|||
if (the_offset == -1) {
|
||||
continue;
|
||||
}
|
||||
if (record_.timestamps_[the_offset] >= query_timestamp) {
|
||||
if (insert_record_.timestamps_[the_offset] >= query_timestamp) {
|
||||
bitmap->reset(the_offset);
|
||||
} else {
|
||||
bitmap->set(the_offset);
|
||||
|
@ -105,121 +106,80 @@ SegmentGrowingImpl::mask_with_delete(BitsetType& bitset, int64_t ins_barrier, Ti
|
|||
bitset |= delete_bitset;
|
||||
}
|
||||
|
||||
Status
|
||||
SegmentGrowingImpl::Insert(int64_t reserved_begin,
|
||||
void
|
||||
SegmentGrowingImpl::Insert(int64_t reserved_offset,
|
||||
int64_t size,
|
||||
const int64_t* uids_raw,
|
||||
const int64_t* row_ids,
|
||||
const Timestamp* timestamps_raw,
|
||||
const RowBasedRawData& entities_raw) {
|
||||
AssertInfo(entities_raw.count == size, "Entities_raw count not equal to insert size");
|
||||
// step 1: check schema if valid
|
||||
if (entities_raw.sizeof_per_row != schema_->get_total_sizeof()) {
|
||||
std::string msg = "entity length = " + std::to_string(entities_raw.sizeof_per_row) +
|
||||
", schema length = " + std::to_string(schema_->get_total_sizeof());
|
||||
throw std::runtime_error(msg);
|
||||
const InsertData* insert_data) {
|
||||
AssertInfo(insert_data->num_rows() == size, "Entities_raw count not equal to insert size");
|
||||
// AssertInfo(insert_data->fields_data_size() == schema_->size(),
|
||||
// "num fields of insert data not equal to num of schema fields");
|
||||
// step 1: check insert data if valid
|
||||
std::unordered_map<FieldId, int64_t> field_id_to_offset;
|
||||
int64_t field_offset = 0;
|
||||
for (auto field : insert_data->fields_data()) {
|
||||
auto field_id = FieldId(field.field_id());
|
||||
AssertInfo(!field_id_to_offset.count(field_id), "duplicate field data");
|
||||
field_id_to_offset.emplace(field_id, field_offset++);
|
||||
}
|
||||
|
||||
// step 2: sort timestamp
|
||||
auto raw_data = reinterpret_cast<const char*>(entities_raw.raw_data);
|
||||
auto len_per_row = entities_raw.sizeof_per_row;
|
||||
std::vector<std::tuple<Timestamp, idx_t, int64_t>> ordering;
|
||||
ordering.resize(size);
|
||||
// #pragma omp parallel for
|
||||
// query node already guarantees that the timestamp is ordered, avoid field data copy in c++
|
||||
|
||||
// step 3: fill into Segment.ConcurrentVector
|
||||
insert_record_.timestamps_.set_data_raw(reserved_offset, timestamps_raw, size);
|
||||
insert_record_.row_ids_.set_data_raw(reserved_offset, row_ids, size);
|
||||
for (auto [field_id, field_meta] : schema_->get_fields()) {
|
||||
AssertInfo(field_id_to_offset.count(field_id), "Cannot find field_id");
|
||||
auto data_offset = field_id_to_offset[field_id];
|
||||
insert_record_.get_field_data_base(field_id)->set_data_raw(reserved_offset, size,
|
||||
&insert_data->fields_data(data_offset), field_meta);
|
||||
}
|
||||
|
||||
// step 4: set pks to offset
|
||||
auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
|
||||
AssertInfo(field_id.get() != INVALID_FIELD_ID, "Primary key is -1");
|
||||
std::vector<PkType> pks(size);
|
||||
ParsePksFromFieldData(pks, insert_data->fields_data(field_id_to_offset[field_id]));
|
||||
for (int i = 0; i < size; ++i) {
|
||||
ordering[i] = std::make_tuple(timestamps_raw[i], uids_raw[i], i);
|
||||
}
|
||||
std::sort(ordering.begin(), ordering.end());
|
||||
|
||||
// step 3: and convert row-based data to column-based data accordingly
|
||||
auto sizeof_infos = schema_->get_sizeof_infos();
|
||||
std::vector<int> offset_infos(schema_->size() + 1, 0);
|
||||
std::partial_sum(sizeof_infos.begin(), sizeof_infos.end(), offset_infos.begin() + 1);
|
||||
std::vector<aligned_vector<uint8_t>> entities(schema_->size());
|
||||
|
||||
for (int fid = 0; fid < schema_->size(); ++fid) {
|
||||
auto len = sizeof_infos[fid];
|
||||
entities[fid].resize(len * size);
|
||||
pk2offset_.insert(std::make_pair(pks[i], reserved_offset + i));
|
||||
}
|
||||
|
||||
std::vector<idx_t> uids(size);
|
||||
std::vector<Timestamp> timestamps(size);
|
||||
// #pragma omp parallel for
|
||||
for (int index = 0; index < size; ++index) {
|
||||
auto [t, uid, order_index] = ordering[index];
|
||||
timestamps[index] = t;
|
||||
uids[index] = uid;
|
||||
for (int fid = 0; fid < schema_->size(); ++fid) {
|
||||
auto len = sizeof_infos[fid];
|
||||
auto offset = offset_infos[fid];
|
||||
auto src = raw_data + order_index * len_per_row + offset;
|
||||
auto dst = entities[fid].data() + index * len;
|
||||
memcpy(dst, src, len);
|
||||
}
|
||||
}
|
||||
|
||||
do_insert(reserved_begin, size, uids.data(), timestamps.data(), entities);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void
|
||||
SegmentGrowingImpl::do_insert(int64_t reserved_begin,
|
||||
int64_t size,
|
||||
const idx_t* row_ids,
|
||||
const Timestamp* timestamps,
|
||||
const std::vector<aligned_vector<uint8_t>>& columns_data) {
|
||||
// step 4: fill into Segment.ConcurrentVector
|
||||
record_.timestamps_.set_data(reserved_begin, timestamps, size);
|
||||
record_.uids_.set_data(reserved_begin, row_ids, size);
|
||||
for (int fid = 0; fid < schema_->size(); ++fid) {
|
||||
auto field_offset = FieldOffset(fid);
|
||||
record_.get_field_data_base(field_offset)->set_data_raw(reserved_begin, columns_data[fid].data(), size);
|
||||
}
|
||||
|
||||
if (schema_->get_is_auto_id()) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto row_id = row_ids[i];
|
||||
// NOTE: this must be the last step, cannot be put above
|
||||
uid2offset_.insert(std::make_pair(row_id, reserved_begin + i));
|
||||
}
|
||||
} else {
|
||||
auto offset = schema_->get_primary_key_offset().value_or(FieldOffset(-1));
|
||||
AssertInfo(offset.get() != -1, "Primary key offset is -1");
|
||||
auto& row = columns_data[offset.get()];
|
||||
auto row_ptr = reinterpret_cast<const int64_t*>(row.data());
|
||||
for (int i = 0; i < size; ++i) {
|
||||
uid2offset_.insert(std::make_pair(row_ptr[i], reserved_begin + i));
|
||||
}
|
||||
}
|
||||
|
||||
record_.ack_responder_.AddSegment(reserved_begin, reserved_begin + size);
|
||||
// step 5: update small indexes
|
||||
insert_record_.ack_responder_.AddSegment(reserved_offset, reserved_offset + size);
|
||||
if (enable_small_index_) {
|
||||
int64_t chunk_rows = segcore_config_.get_chunk_rows();
|
||||
indexing_record_.UpdateResourceAck(record_.ack_responder_.GetAck() / chunk_rows, record_);
|
||||
indexing_record_.UpdateResourceAck(insert_record_.ack_responder_.GetAck() / chunk_rows, insert_record_);
|
||||
}
|
||||
}
|
||||
|
||||
Status
|
||||
SegmentGrowingImpl::Delete(int64_t reserved_begin,
|
||||
int64_t size,
|
||||
const int64_t* uids_raw,
|
||||
const Timestamp* timestamps_raw) {
|
||||
std::vector<std::tuple<Timestamp, idx_t>> ordering;
|
||||
ordering.resize(size);
|
||||
// #pragma omp parallel for
|
||||
for (int i = 0; i < size; ++i) {
|
||||
ordering[i] = std::make_tuple(timestamps_raw[i], uids_raw[i]);
|
||||
SegmentGrowingImpl::Delete(int64_t reserved_begin, int64_t size, const IdArray* ids, const Timestamp* timestamps_raw) {
|
||||
auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
|
||||
AssertInfo(field_id.get() != -1, "Primary key is -1");
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
std::vector<PkType> pks(size);
|
||||
ParsePksFromIDs(pks, field_meta.get_data_type(), *ids);
|
||||
|
||||
// step 1: sort timestamp
|
||||
std::vector<std::tuple<Timestamp, PkType>> ordering(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
ordering[i] = std::make_tuple(timestamps_raw[i], pks[i]);
|
||||
}
|
||||
std::sort(ordering.begin(), ordering.end());
|
||||
std::vector<idx_t> uids(size);
|
||||
std::vector<Timestamp> timestamps(size);
|
||||
// #pragma omp parallel for
|
||||
for (int index = 0; index < size; ++index) {
|
||||
auto [t, uid] = ordering[index];
|
||||
timestamps[index] = t;
|
||||
uids[index] = uid;
|
||||
std::vector<PkType> sort_pks(size);
|
||||
std::vector<Timestamp> sort_timestamps(size);
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
auto [t, pk] = ordering[i];
|
||||
sort_timestamps[i] = t;
|
||||
sort_pks[i] = pk;
|
||||
}
|
||||
deleted_record_.timestamps_.set_data(reserved_begin, timestamps.data(), size);
|
||||
deleted_record_.uids_.set_data(reserved_begin, uids.data(), size);
|
||||
|
||||
// step 2: fill delete record
|
||||
deleted_record_.timestamps_.set_data_raw(reserved_begin, sort_timestamps.data(), size);
|
||||
deleted_record_.pks_.set_data_raw(reserved_begin, sort_pks.data(), size);
|
||||
deleted_record_.ack_responder_.AddSegment(reserved_begin, reserved_begin + size);
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -228,7 +188,7 @@ int64_t
|
|||
SegmentGrowingImpl::GetMemoryUsageInBytes() const {
|
||||
int64_t total_bytes = 0;
|
||||
auto chunk_rows = segcore_config_.get_chunk_rows();
|
||||
int64_t ins_n = upper_align(record_.reserved, chunk_rows);
|
||||
int64_t ins_n = upper_align(insert_record_.reserved, chunk_rows);
|
||||
total_bytes += ins_n * (schema_->get_total_sizeof() + 16 + 1);
|
||||
int64_t del_n = upper_align(deleted_record_.reserved, chunk_rows);
|
||||
total_bytes += del_n * (16 * 2);
|
||||
|
@ -236,8 +196,8 @@ SegmentGrowingImpl::GetMemoryUsageInBytes() const {
|
|||
}
|
||||
|
||||
SpanBase
|
||||
SegmentGrowingImpl::chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const {
|
||||
auto vec = get_insert_record().get_field_data_base(field_offset);
|
||||
SegmentGrowingImpl::chunk_data_impl(FieldId field_id, int64_t chunk_id) const {
|
||||
auto vec = get_insert_record().get_field_data_base(field_id);
|
||||
return vec->get_span_base(chunk_id);
|
||||
}
|
||||
|
||||
|
@ -256,7 +216,7 @@ SegmentGrowingImpl::vector_search(int64_t vec_count,
|
|||
const BitsetView& bitset,
|
||||
SearchResult& output) const {
|
||||
auto& sealed_indexing = this->get_sealed_indexing_record();
|
||||
if (sealed_indexing.is_ready(search_info.field_offset_)) {
|
||||
if (sealed_indexing.is_ready(search_info.field_id_)) {
|
||||
query::SearchOnSealed(this->get_schema(), sealed_indexing, search_info, query_data, query_count, bitset, output,
|
||||
id_);
|
||||
} else {
|
||||
|
@ -264,54 +224,64 @@ SegmentGrowingImpl::vector_search(int64_t vec_count,
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
SegmentGrowingImpl::bulk_subscript(FieldOffset field_offset,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count,
|
||||
void* output) const {
|
||||
std::unique_ptr<DataArray>
|
||||
SegmentGrowingImpl::bulk_subscript(FieldId field_id, const int64_t* seg_offsets, int64_t count) const {
|
||||
// TODO: support more types
|
||||
auto vec_ptr = record_.get_field_data_base(field_offset);
|
||||
auto& field_meta = schema_->operator[](field_offset);
|
||||
auto vec_ptr = insert_record_.get_field_data_base(field_id);
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
if (field_meta.is_vector()) {
|
||||
aligned_vector<char> output(field_meta.get_sizeof() * count);
|
||||
if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
bulk_subscript_impl<FloatVector>(field_meta.get_sizeof(), *vec_ptr, seg_offsets, count, output);
|
||||
bulk_subscript_impl<FloatVector>(field_meta.get_sizeof(), *vec_ptr, seg_offsets, count, output.data());
|
||||
} else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
bulk_subscript_impl<BinaryVector>(field_meta.get_sizeof(), *vec_ptr, seg_offsets, count, output);
|
||||
bulk_subscript_impl<BinaryVector>(field_meta.get_sizeof(), *vec_ptr, seg_offsets, count, output.data());
|
||||
} else {
|
||||
PanicInfo("logical error");
|
||||
}
|
||||
return;
|
||||
return CreateVectorDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
|
||||
AssertInfo(!field_meta.is_vector(), "Scalar field meta type is vector type");
|
||||
switch (field_meta.get_data_type()) {
|
||||
case DataType::BOOL: {
|
||||
bulk_subscript_impl<bool>(*vec_ptr, seg_offsets, count, false, output);
|
||||
break;
|
||||
FixedVector<bool> output(count);
|
||||
bulk_subscript_impl<bool>(*vec_ptr, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT8: {
|
||||
bulk_subscript_impl<int8_t>(*vec_ptr, seg_offsets, count, -1, output);
|
||||
break;
|
||||
FixedVector<bool> output(count);
|
||||
bulk_subscript_impl<int8_t>(*vec_ptr, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
bulk_subscript_impl<int16_t>(*vec_ptr, seg_offsets, count, -1, output);
|
||||
break;
|
||||
FixedVector<int16_t> output(count);
|
||||
bulk_subscript_impl<int16_t>(*vec_ptr, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
bulk_subscript_impl<int32_t>(*vec_ptr, seg_offsets, count, -1, output);
|
||||
break;
|
||||
FixedVector<int32_t> output(count);
|
||||
bulk_subscript_impl<int32_t>(*vec_ptr, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
bulk_subscript_impl<int64_t>(*vec_ptr, seg_offsets, count, -1, output);
|
||||
break;
|
||||
FixedVector<int64_t> output(count);
|
||||
bulk_subscript_impl<int64_t>(*vec_ptr, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
bulk_subscript_impl<float>(*vec_ptr, seg_offsets, count, -1.0, output);
|
||||
break;
|
||||
FixedVector<float> output(count);
|
||||
bulk_subscript_impl<float>(*vec_ptr, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
bulk_subscript_impl<double>(*vec_ptr, seg_offsets, count, -1.0, output);
|
||||
break;
|
||||
FixedVector<double> output(count);
|
||||
bulk_subscript_impl<double>(*vec_ptr, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
FixedVector<std::string> output(count);
|
||||
bulk_subscript_impl<std::string>(*vec_ptr, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported type");
|
||||
|
@ -342,8 +312,10 @@ SegmentGrowingImpl::bulk_subscript_impl(int64_t element_sizeof,
|
|||
|
||||
template <typename T>
|
||||
void
|
||||
SegmentGrowingImpl::bulk_subscript_impl(
|
||||
const VectorBase& vec_raw, const int64_t* seg_offsets, int64_t count, T default_value, void* output_raw) const {
|
||||
SegmentGrowingImpl::bulk_subscript_impl(const VectorBase& vec_raw,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count,
|
||||
void* output_raw) const {
|
||||
static_assert(IsScalar<T>);
|
||||
auto vec_ptr = dynamic_cast<const ConcurrentVector<T>*>(&vec_raw);
|
||||
AssertInfo(vec_ptr, "Pointer of vec_raw is nullptr");
|
||||
|
@ -351,7 +323,9 @@ SegmentGrowingImpl::bulk_subscript_impl(
|
|||
auto output = reinterpret_cast<T*>(output_raw);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
auto offset = seg_offsets[i];
|
||||
output[i] = (offset == INVALID_SEG_OFFSET ? default_value : vec[offset]);
|
||||
if (offset != INVALID_SEG_OFFSET) {
|
||||
output[i] = vec[offset];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -364,79 +338,23 @@ SegmentGrowingImpl::bulk_subscript(SystemFieldType system_type,
|
|||
case SystemFieldType::Timestamp:
|
||||
PanicInfo("timestamp unsupported");
|
||||
case SystemFieldType::RowId:
|
||||
bulk_subscript_impl<int64_t>(this->record_.uids_, seg_offsets, count, INVALID_ID, output);
|
||||
bulk_subscript_impl<int64_t>(this->insert_record_.row_ids_, seg_offsets, count, output);
|
||||
break;
|
||||
default:
|
||||
PanicInfo("unknown subscript fields");
|
||||
}
|
||||
}
|
||||
|
||||
// copied from stack overflow
|
||||
template <typename T>
|
||||
std::vector<size_t>
|
||||
sort_indexes(const T* src, int64_t size) {
|
||||
// initialize original index locations
|
||||
std::vector<size_t> idx(size);
|
||||
iota(idx.begin(), idx.end(), 0);
|
||||
|
||||
// sort indexes based on comparing values in v
|
||||
// using std::stable_sort instead of std::sort
|
||||
// to avoid unnecessary index re-orderings
|
||||
// when v contains elements of equal values
|
||||
std::stable_sort(idx.begin(), idx.end(), [src](size_t i1, size_t i2) { return src[i1] < src[i2]; });
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
void
|
||||
SegmentGrowingImpl::Insert(int64_t reserved_offset,
|
||||
int64_t size,
|
||||
const int64_t* row_ids_raw,
|
||||
const Timestamp* timestamps_raw,
|
||||
const ColumnBasedRawData& values) {
|
||||
auto indexes = sort_indexes(timestamps_raw, size);
|
||||
std::vector<Timestamp> timestamps(size);
|
||||
std::vector<idx_t> row_ids(size);
|
||||
AssertInfo(values.count == size, "Insert values count not equal to insert size");
|
||||
for (int64_t i = 0; i < size; ++i) {
|
||||
auto offset = indexes[i];
|
||||
timestamps[i] = timestamps_raw[offset];
|
||||
row_ids[i] = row_ids_raw[i];
|
||||
}
|
||||
std::vector<aligned_vector<uint8_t>> columns_data;
|
||||
|
||||
for (int field_offset = 0; field_offset < schema_->size(); ++field_offset) {
|
||||
auto& field_meta = schema_->operator[](FieldOffset(field_offset));
|
||||
aligned_vector<uint8_t> column;
|
||||
auto element_sizeof = field_meta.get_sizeof();
|
||||
auto& src_vec = values.columns_[field_offset];
|
||||
AssertInfo(src_vec.size() == element_sizeof * size, "Vector size is not aligned");
|
||||
for (int64_t i = 0; i < size; ++i) {
|
||||
auto offset = indexes[i];
|
||||
auto beg = src_vec.data() + offset * element_sizeof;
|
||||
column.insert(column.end(), beg, beg + element_sizeof);
|
||||
}
|
||||
columns_data.emplace_back(std::move(column));
|
||||
}
|
||||
do_insert(reserved_offset, size, row_ids.data(), timestamps.data(), columns_data);
|
||||
}
|
||||
|
||||
std::vector<SegOffset>
|
||||
SegmentGrowingImpl::search_ids(const BitsetType& bitset, Timestamp timestamp) const {
|
||||
std::vector<SegOffset> res_offsets;
|
||||
|
||||
for (int i = 0; i < bitset.size(); i++) {
|
||||
if (bitset[i]) {
|
||||
SegOffset the_offset(-1);
|
||||
auto offset = SegOffset(i);
|
||||
if (record_.timestamps_[offset.get()] < timestamp) {
|
||||
the_offset = std::max(the_offset, offset);
|
||||
if (insert_record_.timestamps_[offset.get()] <= timestamp) {
|
||||
res_offsets.push_back(offset);
|
||||
}
|
||||
|
||||
if (the_offset == SegOffset(-1)) {
|
||||
continue;
|
||||
}
|
||||
res_offsets.push_back(the_offset);
|
||||
}
|
||||
}
|
||||
return res_offsets;
|
||||
|
@ -448,16 +366,10 @@ SegmentGrowingImpl::search_ids(const BitsetView& bitset, Timestamp timestamp) co
|
|||
|
||||
for (int i = 0; i < bitset.size(); ++i) {
|
||||
if (!bitset.test(i)) {
|
||||
SegOffset the_offset(-1);
|
||||
auto offset = SegOffset(i);
|
||||
if (record_.timestamps_[offset.get()] < timestamp) {
|
||||
the_offset = std::max(the_offset, offset);
|
||||
if (insert_record_.timestamps_[offset.get()] <= timestamp) {
|
||||
res_offsets.push_back(offset);
|
||||
}
|
||||
|
||||
if (the_offset == SegOffset(-1)) {
|
||||
continue;
|
||||
}
|
||||
res_offsets.push_back(the_offset);
|
||||
}
|
||||
}
|
||||
return res_offsets;
|
||||
|
@ -466,24 +378,36 @@ SegmentGrowingImpl::search_ids(const BitsetView& bitset, Timestamp timestamp) co
|
|||
std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
|
||||
SegmentGrowingImpl::search_ids(const IdArray& id_array, Timestamp timestamp) const {
|
||||
AssertInfo(id_array.has_int_id(), "Id array doesn't have int_id element");
|
||||
auto& src_int_arr = id_array.int_id();
|
||||
auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
|
||||
AssertInfo(field_id.get() != -1, "Primary key is -1");
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
auto data_type = field_meta.get_data_type();
|
||||
auto ids_size = GetSizeOfIdArray(id_array);
|
||||
std::vector<PkType> pks(ids_size);
|
||||
ParsePksFromIDs(pks, data_type, id_array);
|
||||
|
||||
auto res_id_arr = std::make_unique<IdArray>();
|
||||
auto res_int_id_arr = res_id_arr->mutable_int_id();
|
||||
std::vector<SegOffset> res_offsets;
|
||||
for (auto uid : src_int_arr.data()) {
|
||||
auto [iter_b, iter_e] = uid2offset_.equal_range(uid);
|
||||
SegOffset the_offset(-1);
|
||||
for (auto pk : pks) {
|
||||
auto [iter_b, iter_e] = pk2offset_.equal_range(pk);
|
||||
for (auto iter = iter_b; iter != iter_e; ++iter) {
|
||||
auto offset = SegOffset(iter->second);
|
||||
if (record_.timestamps_[offset.get()] < timestamp) {
|
||||
the_offset = std::max(the_offset, offset);
|
||||
if (insert_record_.timestamps_[offset.get()] <= timestamp) {
|
||||
switch (data_type) {
|
||||
case DataType::INT64: {
|
||||
res_id_arr->mutable_int_id()->add_data(std::get<int64_t>(pk));
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
res_id_arr->mutable_str_id()->add_data(std::get<std::string>(pk));
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported type");
|
||||
}
|
||||
}
|
||||
res_offsets.push_back(offset);
|
||||
}
|
||||
// if not found, skip
|
||||
if (the_offset == SegOffset(-1)) {
|
||||
continue;
|
||||
}
|
||||
res_int_id_arr->add_data(uid);
|
||||
res_offsets.push_back(the_offset);
|
||||
}
|
||||
}
|
||||
return {std::move(res_id_arr), std::move(res_offsets)};
|
||||
|
|
|
@ -42,26 +42,19 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
int64_t
|
||||
PreInsert(int64_t size) override;
|
||||
|
||||
Status
|
||||
Insert(int64_t reserved_offset,
|
||||
int64_t size,
|
||||
const int64_t* row_ids,
|
||||
const Timestamp* timestamps,
|
||||
const RowBasedRawData& values) override;
|
||||
|
||||
void
|
||||
Insert(int64_t reserved_offset,
|
||||
int64_t size,
|
||||
const int64_t* row_ids,
|
||||
const Timestamp* timestamps,
|
||||
const ColumnBasedRawData& values) override;
|
||||
const InsertData* insert_data) override;
|
||||
|
||||
int64_t
|
||||
PreDelete(int64_t size) override;
|
||||
|
||||
// TODO: add id into delete log, possibly bitmap
|
||||
Status
|
||||
Delete(int64_t reserverd_offset, int64_t size, const int64_t* row_ids, const Timestamp* timestamps) override;
|
||||
Delete(int64_t reserverd_offset, int64_t size, const IdArray* pks, const Timestamp* timestamps) override;
|
||||
|
||||
int64_t
|
||||
GetMemoryUsageInBytes() const override;
|
||||
|
@ -72,7 +65,7 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
public:
|
||||
const InsertRecord&
|
||||
get_insert_record() const {
|
||||
return record_;
|
||||
return insert_record_;
|
||||
}
|
||||
|
||||
const IndexingRecord&
|
||||
|
@ -97,14 +90,14 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
|
||||
// return count of index that has index, i.e., [0, num_chunk_index) have built index
|
||||
int64_t
|
||||
num_chunk_index(FieldOffset field_offset) const final {
|
||||
num_chunk_index(FieldId field_id) const final {
|
||||
return indexing_record_.get_finished_ack();
|
||||
}
|
||||
|
||||
// deprecated
|
||||
const knowhere::Index*
|
||||
chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const final {
|
||||
return indexing_record_.get_field_indexing(field_offset).get_chunk_indexing(chunk_id);
|
||||
chunk_index_impl(FieldId field_id, int64_t chunk_id) const final {
|
||||
return indexing_record_.get_field_indexing(field_id).get_chunk_indexing(chunk_id);
|
||||
}
|
||||
|
||||
int64_t
|
||||
|
@ -121,7 +114,7 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
|
||||
int64_t
|
||||
get_row_count() const override {
|
||||
return record_.ack_responder_.GetAck();
|
||||
return insert_record_.ack_responder_.GetAck();
|
||||
}
|
||||
|
||||
ssize_t
|
||||
|
@ -135,8 +128,7 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
// for scalar vectors
|
||||
template <typename T>
|
||||
void
|
||||
bulk_subscript_impl(
|
||||
const VectorBase& vec_raw, const int64_t* seg_offsets, int64_t count, T default_value, void* output_raw) const;
|
||||
bulk_subscript_impl(const VectorBase& vec_raw, const int64_t* seg_offsets, int64_t count, void* output_raw) const;
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
|
@ -149,8 +141,8 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
void
|
||||
bulk_subscript(SystemFieldType system_type, const int64_t* seg_offsets, int64_t count, void* output) const override;
|
||||
|
||||
void
|
||||
bulk_subscript(FieldOffset field_offset, const int64_t* seg_offsets, int64_t count, void* output) const override;
|
||||
std::unique_ptr<DataArray>
|
||||
bulk_subscript(FieldId field_id, const int64_t* seg_offsets, int64_t count) const override;
|
||||
|
||||
public:
|
||||
friend std::unique_ptr<SegmentGrowing>
|
||||
|
@ -159,7 +151,7 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
explicit SegmentGrowingImpl(SchemaPtr schema, const SegcoreConfig& segcore_config, int64_t segment_id)
|
||||
: segcore_config_(segcore_config),
|
||||
schema_(std::move(schema)),
|
||||
record_(*schema_, segcore_config.get_chunk_rows()),
|
||||
insert_record_(*schema_, segcore_config.get_chunk_rows()),
|
||||
indexing_record_(*schema_, segcore_config_),
|
||||
id_(segment_id) {
|
||||
}
|
||||
|
@ -189,6 +181,16 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
std::vector<SegOffset>
|
||||
search_ids(const BitsetView& view, Timestamp timestamp) const override;
|
||||
|
||||
bool
|
||||
HasIndex(FieldId field_id) const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
HasFieldData(FieldId field_id) const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::shared_ptr<DeletedRecord::TmpBitmap>
|
||||
get_deleted_bitmap(int64_t del_barrier,
|
||||
|
@ -200,31 +202,29 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
num_chunk() const override;
|
||||
|
||||
SpanBase
|
||||
chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const override;
|
||||
chunk_data_impl(FieldId field_id, int64_t chunk_id) const override;
|
||||
|
||||
void
|
||||
check_search(const query::Plan* plan) const override {
|
||||
Assert(plan);
|
||||
}
|
||||
|
||||
private:
|
||||
void
|
||||
do_insert(int64_t reserved_begin,
|
||||
int64_t size,
|
||||
const idx_t* row_ids,
|
||||
const Timestamp* timestamps,
|
||||
const std::vector<aligned_vector<uint8_t>>& columns_data);
|
||||
|
||||
private:
|
||||
SegcoreConfig segcore_config_;
|
||||
SchemaPtr schema_;
|
||||
|
||||
InsertRecord record_;
|
||||
mutable DeletedRecord deleted_record_;
|
||||
// small indexes for every chunk
|
||||
IndexingRecord indexing_record_;
|
||||
SealedIndexingRecord sealed_indexing_record_;
|
||||
SealedIndexingRecord sealed_indexing_record_; // not used
|
||||
|
||||
tbb::concurrent_unordered_multimap<idx_t, int64_t> uid2offset_;
|
||||
// inserted fields data and row_ids, timestamps
|
||||
InsertRecord insert_record_;
|
||||
|
||||
// deleted pks
|
||||
mutable DeletedRecord deleted_record_;
|
||||
|
||||
// pks to row offset
|
||||
tbb::concurrent_unordered_multimap<PkType, int64_t, std::hash<PkType>> pk2offset_;
|
||||
int64_t id_;
|
||||
|
||||
private:
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#include "SegmentInterface.h"
|
||||
#include "query/generated/ExecPlanNodeVisitor.h"
|
||||
#include "Utils.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
|
@ -19,23 +20,21 @@ SegmentInternalInterface::FillPrimaryKeys(const query::Plan* plan, SearchResult&
|
|||
std::shared_lock lck(mutex_);
|
||||
AssertInfo(plan, "empty plan");
|
||||
auto size = results.distances_.size();
|
||||
AssertInfo(results.ids_.size() == size, "Size of result distances is not equal to size of ids");
|
||||
AssertInfo(results.seg_offsets_.size() == size, "Size of result distances is not equal to size of ids");
|
||||
Assert(results.primary_keys_.size() == 0);
|
||||
results.primary_keys_.resize(size);
|
||||
|
||||
auto element_sizeof = sizeof(int64_t);
|
||||
aligned_vector<char> blob(size * element_sizeof);
|
||||
if (plan->schema_.get_is_auto_id()) {
|
||||
bulk_subscript(SystemFieldType::RowId, results.ids_.data(), size, blob.data());
|
||||
} else {
|
||||
auto key_offset_opt = get_schema().get_primary_key_offset();
|
||||
AssertInfo(key_offset_opt.has_value(), "Cannot get primary key offset from schema");
|
||||
auto key_offset = key_offset_opt.value();
|
||||
AssertInfo(get_schema()[key_offset].get_data_type() == DataType::INT64, "Primary key field is not INT64 type");
|
||||
bulk_subscript(key_offset, results.ids_.data(), size, blob.data());
|
||||
}
|
||||
auto pk_field_id_opt = get_schema().get_primary_field_id();
|
||||
AssertInfo(pk_field_id_opt.has_value(), "Cannot get primary key offset from schema");
|
||||
auto pk_field_id = pk_field_id_opt.value();
|
||||
AssertInfo(IsPrimaryKeyDataType(get_schema()[pk_field_id].get_data_type()),
|
||||
"Primary key field is not INT64 or VARCHAR type");
|
||||
auto field_data = bulk_subscript(pk_field_id, results.seg_offsets_.data(), size);
|
||||
results.pk_type_ = engine::DataType(field_data->type());
|
||||
|
||||
memcpy(results.primary_keys_.data(), blob.data(), element_sizeof * size);
|
||||
std::vector<PkType> pks(size);
|
||||
ParsePksFromFieldData(pks, *field_data.get());
|
||||
results.primary_keys_ = std::move(pks);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -43,39 +42,12 @@ SegmentInternalInterface::FillTargetEntry(const query::Plan* plan, SearchResult&
|
|||
std::shared_lock lck(mutex_);
|
||||
AssertInfo(plan, "empty plan");
|
||||
auto size = results.distances_.size();
|
||||
AssertInfo(results.ids_.size() == size, "Size of result distances is not equal to size of ids");
|
||||
|
||||
std::vector<int64_t> element_sizeofs;
|
||||
std::vector<aligned_vector<char>> blobs;
|
||||
|
||||
// fill row_ids
|
||||
{
|
||||
results.ids_data_.resize(size * sizeof(int64_t));
|
||||
if (plan->schema_.get_is_auto_id()) {
|
||||
bulk_subscript(SystemFieldType::RowId, results.ids_.data(), size, results.ids_data_.data());
|
||||
} else {
|
||||
auto key_offset_opt = get_schema().get_primary_key_offset();
|
||||
AssertInfo(key_offset_opt.has_value(), "Cannot get primary key offset from schema");
|
||||
auto key_offset = key_offset_opt.value();
|
||||
AssertInfo(get_schema()[key_offset].get_data_type() == DataType::INT64,
|
||||
"Primary key field is not INT64 type");
|
||||
bulk_subscript(key_offset, results.ids_.data(), size, results.ids_data_.data());
|
||||
}
|
||||
}
|
||||
AssertInfo(results.seg_offsets_.size() == size, "Size of result distances is not equal to size of ids");
|
||||
|
||||
// fill other entries except primary key by result_offset
|
||||
for (auto field_offset : plan->target_entries_) {
|
||||
auto& field_meta = get_schema()[field_offset];
|
||||
auto element_sizeof = field_meta.get_sizeof();
|
||||
aligned_vector<char> blob(size * element_sizeof);
|
||||
bulk_subscript(field_offset, results.ids_.data(), size, blob.data());
|
||||
results.output_fields_data_.emplace_back(std::move(blob));
|
||||
if (field_meta.is_vector()) {
|
||||
results.AddField(field_meta.get_name(), field_meta.get_id(), field_meta.get_data_type(),
|
||||
field_meta.get_dim(), field_meta.get_metric_type());
|
||||
} else {
|
||||
results.AddField(field_meta.get_name(), field_meta.get_id(), field_meta.get_data_type());
|
||||
}
|
||||
for (auto field_id : plan->target_entries_) {
|
||||
auto field_data = bulk_subscript(field_id, results.seg_offsets_.data(), size);
|
||||
results.output_fields_data_[field_id] = std::move(field_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -92,114 +64,6 @@ SegmentInternalInterface::Search(const query::Plan* plan,
|
|||
return results;
|
||||
}
|
||||
|
||||
// Note: this is temporary solution.
|
||||
// modify bulk script implement to make process more clear
|
||||
static std::unique_ptr<ScalarArray>
|
||||
CreateScalarArrayFrom(const void* data_raw, int64_t count, DataType data_type) {
|
||||
auto scalar_array = std::make_unique<ScalarArray>();
|
||||
switch (data_type) {
|
||||
case DataType::BOOL: {
|
||||
auto data = reinterpret_cast<const double*>(data_raw);
|
||||
auto obj = scalar_array->mutable_bool_data();
|
||||
obj->mutable_data()->Add(data, data + count);
|
||||
break;
|
||||
}
|
||||
case DataType::INT8: {
|
||||
auto data = reinterpret_cast<const int8_t*>(data_raw);
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
obj->mutable_data()->Add(data, data + count);
|
||||
break;
|
||||
}
|
||||
case DataType::INT16: {
|
||||
auto data = reinterpret_cast<const int16_t*>(data_raw);
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
obj->mutable_data()->Add(data, data + count);
|
||||
break;
|
||||
}
|
||||
case DataType::INT32: {
|
||||
auto data = reinterpret_cast<const int32_t*>(data_raw);
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
obj->mutable_data()->Add(data, data + count);
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
auto data = reinterpret_cast<const int64_t*>(data_raw);
|
||||
auto obj = scalar_array->mutable_long_data();
|
||||
obj->mutable_data()->Add(data, data + count);
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
auto data = reinterpret_cast<const float*>(data_raw);
|
||||
auto obj = scalar_array->mutable_float_data();
|
||||
obj->mutable_data()->Add(data, data + count);
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
auto data = reinterpret_cast<const double*>(data_raw);
|
||||
auto obj = scalar_array->mutable_double_data();
|
||||
obj->mutable_data()->Add(data, data + count);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported datatype");
|
||||
}
|
||||
}
|
||||
return scalar_array;
|
||||
}
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
CreateDataArrayFrom(const void* data_raw, int64_t count, const FieldMeta& field_meta) {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
auto data_array = std::make_unique<DataArray>();
|
||||
data_array->set_field_id(field_meta.get_id().get());
|
||||
data_array->set_type(milvus::proto::schema::DataType(field_meta.get_data_type()));
|
||||
|
||||
if (!datatype_is_vector(data_type)) {
|
||||
auto scalar_array = CreateScalarArrayFrom(data_raw, count, data_type);
|
||||
data_array->set_allocated_scalars(scalar_array.release());
|
||||
} else {
|
||||
auto vector_array = data_array->mutable_vectors();
|
||||
auto dim = field_meta.get_dim();
|
||||
vector_array->set_dim(dim);
|
||||
switch (data_type) {
|
||||
case DataType::VECTOR_FLOAT: {
|
||||
auto length = count * dim;
|
||||
auto data = reinterpret_cast<const float*>(data_raw);
|
||||
auto obj = vector_array->mutable_float_vector();
|
||||
obj->mutable_data()->Add(data, data + length);
|
||||
break;
|
||||
}
|
||||
case DataType::VECTOR_BINARY: {
|
||||
AssertInfo(dim % 8 == 0, "Binary vector field dimension is not a multiple of 8");
|
||||
auto num_bytes = count * dim / 8;
|
||||
auto data = reinterpret_cast<const char*>(data_raw);
|
||||
auto obj = vector_array->mutable_binary_vector();
|
||||
obj->assign(data, num_bytes);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported datatype");
|
||||
}
|
||||
}
|
||||
}
|
||||
return data_array;
|
||||
}
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
SegmentInternalInterface::BulkSubScript(FieldOffset field_offset, const SegOffset* seg_offsets, int64_t count) const {
|
||||
if (field_offset.get() >= 0) {
|
||||
auto& field_meta = get_schema()[field_offset];
|
||||
aligned_vector<char> data(field_meta.get_sizeof() * count);
|
||||
bulk_subscript(field_offset, (const int64_t*)seg_offsets, count, data.data());
|
||||
return CreateDataArrayFrom(data.data(), count, field_meta);
|
||||
} else {
|
||||
Assert(field_offset.get() == -1);
|
||||
aligned_vector<char> data(sizeof(int64_t) * count);
|
||||
bulk_subscript(SystemFieldType::RowId, (const int64_t*)seg_offsets, count, data.data());
|
||||
return CreateDataArrayFrom(data.data(), count, FieldMeta::RowIdMeta);
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<proto::segcore::RetrieveResults>
|
||||
SegmentInternalInterface::Retrieve(const query::RetrievePlan* plan, Timestamp timestamp) const {
|
||||
std::shared_lock lck(mutex_);
|
||||
|
@ -212,16 +76,33 @@ SegmentInternalInterface::Retrieve(const query::RetrievePlan* plan, Timestamp ti
|
|||
|
||||
auto fields_data = results->mutable_fields_data();
|
||||
auto ids = results->mutable_ids();
|
||||
auto pk_offset = plan->schema_.get_primary_key_offset();
|
||||
for (auto field_offset : plan->field_offsets_) {
|
||||
auto col = BulkSubScript(field_offset, (SegOffset*)retrieve_results.result_offsets_.data(),
|
||||
retrieve_results.result_offsets_.size());
|
||||
auto pk_field_id = plan->schema_.get_primary_field_id();
|
||||
for (auto field_id : plan->field_ids_) {
|
||||
auto& field_mata = plan->schema_[field_id];
|
||||
|
||||
auto col =
|
||||
bulk_subscript(field_id, retrieve_results.result_offsets_.data(), retrieve_results.result_offsets_.size());
|
||||
auto col_data = col.release();
|
||||
fields_data->AddAllocated(col_data);
|
||||
if (pk_offset.has_value() && pk_offset.value() == field_offset) {
|
||||
auto int_ids = ids->mutable_int_id();
|
||||
auto src_data = col_data->scalars().long_data();
|
||||
int_ids->mutable_data()->Add(src_data.data().begin(), src_data.data().end());
|
||||
if (pk_field_id.has_value() && pk_field_id.value() == field_id) {
|
||||
switch (field_mata.get_data_type()) {
|
||||
case DataType::INT64: {
|
||||
auto int_ids = ids->mutable_int_id();
|
||||
auto src_data = col_data->scalars().long_data();
|
||||
int_ids->mutable_data()->Add(src_data.data().begin(), src_data.data().end());
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto str_ids = ids->mutable_str_id();
|
||||
auto src_data = col_data->scalars().string_data();
|
||||
for (auto i = 0; i < src_data.data_size(); ++i)
|
||||
*(str_ids->mutable_data()->Add()) = src_data.data(i);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported data type");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return results;
|
||||
|
|
|
@ -51,6 +51,7 @@ class SegmentInterface {
|
|||
virtual std::unique_ptr<proto::segcore::RetrieveResults>
|
||||
Retrieve(const query::RetrievePlan* Plan, Timestamp timestamp) const = 0;
|
||||
|
||||
// TODO: memory use is not correct when load string or load string index
|
||||
virtual int64_t
|
||||
GetMemoryUsageInBytes() const = 0;
|
||||
|
||||
|
@ -64,7 +65,7 @@ class SegmentInterface {
|
|||
PreDelete(int64_t size) = 0;
|
||||
|
||||
virtual Status
|
||||
Delete(int64_t reserved_offset, int64_t size, const int64_t* row_ids, const Timestamp* timestamps) = 0;
|
||||
Delete(int64_t reserved_offset, int64_t size, const IdArray* pks, const Timestamp* timestamps) = 0;
|
||||
};
|
||||
|
||||
// internal API for DSL calculation
|
||||
|
@ -73,16 +74,16 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
public:
|
||||
template <typename T>
|
||||
Span<T>
|
||||
chunk_data(FieldOffset field_offset, int64_t chunk_id) const {
|
||||
return static_cast<Span<T>>(chunk_data_impl(field_offset, chunk_id));
|
||||
chunk_data(FieldId field_id, int64_t chunk_id) const {
|
||||
return static_cast<Span<T>>(chunk_data_impl(field_id, chunk_id));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const scalar::ScalarIndex<T>&
|
||||
chunk_scalar_index(FieldOffset field_offset, int64_t chunk_id) const {
|
||||
chunk_scalar_index(FieldId field_id, int64_t chunk_id) const {
|
||||
static_assert(IsScalar<T>);
|
||||
using IndexType = scalar::ScalarIndex<T>;
|
||||
auto base_ptr = chunk_index_impl(field_offset, chunk_id);
|
||||
auto base_ptr = chunk_index_impl(field_id, chunk_id);
|
||||
auto ptr = dynamic_cast<const IndexType*>(base_ptr);
|
||||
AssertInfo(ptr, "entry mismatch");
|
||||
return *ptr;
|
||||
|
@ -102,6 +103,12 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
std::unique_ptr<proto::segcore::RetrieveResults>
|
||||
Retrieve(const query::RetrievePlan* plan, Timestamp timestamp) const override;
|
||||
|
||||
virtual bool
|
||||
HasIndex(FieldId field_id) const = 0;
|
||||
|
||||
virtual bool
|
||||
HasFieldData(FieldId field_id) const = 0;
|
||||
|
||||
virtual std::string
|
||||
debug() const = 0;
|
||||
|
||||
|
@ -120,7 +127,7 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
|
||||
// count of chunk that has index available
|
||||
virtual int64_t
|
||||
num_chunk_index(FieldOffset field_offset) const = 0;
|
||||
num_chunk_index(FieldId field_id) const = 0;
|
||||
|
||||
virtual void
|
||||
mask_with_timestamps(BitsetType& bitset_chunk, Timestamp timestamp) const = 0;
|
||||
|
@ -148,11 +155,11 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
protected:
|
||||
// internal API: return chunk_data in span
|
||||
virtual SpanBase
|
||||
chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const = 0;
|
||||
chunk_data_impl(FieldId field_id, int64_t chunk_id) const = 0;
|
||||
|
||||
// internal API: return chunk_index in span, support scalar index only
|
||||
virtual const knowhere::Index*
|
||||
chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const = 0;
|
||||
chunk_index_impl(FieldId field_id, int64_t chunk_id) const = 0;
|
||||
|
||||
// TODO remove system fields
|
||||
// calculate output[i] = Vec[seg_offsets[i]}, where Vec binds to system_type
|
||||
|
@ -160,13 +167,8 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
bulk_subscript(SystemFieldType system_type, const int64_t* seg_offsets, int64_t count, void* output) const = 0;
|
||||
|
||||
// calculate output[i] = Vec[seg_offsets[i]}, where Vec binds to field_offset
|
||||
virtual void
|
||||
bulk_subscript(FieldOffset field_offset, const int64_t* seg_offsets, int64_t count, void* output) const = 0;
|
||||
|
||||
// TODO: special hack: FieldOffset == -1 -> RowId.
|
||||
// TODO: remove this hack when transfer is done
|
||||
virtual std::unique_ptr<DataArray>
|
||||
BulkSubScript(FieldOffset field_offset, const SegOffset* seg_offsets, int64_t count) const;
|
||||
bulk_subscript(FieldId field_id, const int64_t* seg_offsets, int64_t count) const = 0;
|
||||
|
||||
virtual void
|
||||
check_search(const query::Plan* plan) const = 0;
|
||||
|
@ -175,10 +177,4 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
mutable std::shared_mutex mutex_;
|
||||
};
|
||||
|
||||
static std::unique_ptr<ScalarArray>
|
||||
CreateScalarArrayFrom(const void* data_raw, int64_t count, DataType data_type);
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
CreateDataArrayFrom(const void* data_raw, int64_t count, const FieldMeta& field_meta);
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
|
|
@ -34,10 +34,6 @@ class SegmentSealed : public SegmentInternalInterface {
|
|||
DropIndex(const FieldId field_id) = 0;
|
||||
virtual void
|
||||
DropFieldData(const FieldId field_id) = 0;
|
||||
virtual bool
|
||||
HasIndex(FieldId field_id) const = 0;
|
||||
virtual bool
|
||||
HasFieldData(FieldId field_id) const = 0;
|
||||
};
|
||||
|
||||
using SegmentSealedPtr = std::unique_ptr<SegmentSealed>;
|
||||
|
|
|
@ -14,17 +14,22 @@
|
|||
#include "query/SearchBruteForce.h"
|
||||
#include "query/SearchOnSealed.h"
|
||||
#include "query/ScalarIndex.h"
|
||||
#include "Utils.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
static inline void
|
||||
set_bit(BitsetType& bitset, FieldOffset field_offset, bool flag = true) {
|
||||
bitset[field_offset.get()] = flag;
|
||||
set_bit(BitsetType& bitset, FieldId field_id, bool flag = true) {
|
||||
auto pos = field_id.get() - START_USER_FIELDID;
|
||||
AssertInfo(pos >= 0, "invalid field id");
|
||||
bitset[pos] = flag;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
get_bit(const BitsetType& bitset, FieldOffset field_offset) {
|
||||
return bitset[field_offset.get()];
|
||||
get_bit(const BitsetType& bitset, FieldId field_id) {
|
||||
auto pos = field_id.get() - START_USER_FIELDID;
|
||||
AssertInfo(pos >= 0, "invalid field id");
|
||||
return bitset[pos];
|
||||
}
|
||||
|
||||
int64_t
|
||||
|
@ -33,136 +38,168 @@ SegmentSealedImpl::PreDelete(int64_t size) {
|
|||
return reserved_begin;
|
||||
}
|
||||
|
||||
void
|
||||
print(const std::map<std::string, std::string>& m) {
|
||||
for (const auto& [k, v] : m) {
|
||||
std::cout << k << ": " << v << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
print(const LoadIndexInfo& info) {
|
||||
std::cout << "------------------LoadIndexInfo----------------------" << std::endl;
|
||||
std::cout << "field_id: " << info.field_id << std::endl;
|
||||
std::cout << "field_type: " << info.field_type << std::endl;
|
||||
std::cout << "index_params:" << std::endl;
|
||||
print(info.index_params);
|
||||
std::cout << "------------------LoadIndexInfo----------------------" << std::endl;
|
||||
}
|
||||
|
||||
void
|
||||
print(const LoadFieldDataInfo& info) {
|
||||
std::cout << "------------------LoadFieldDataInfo----------------------" << std::endl;
|
||||
std::cout << "field_id: " << info.field_id << std::endl;
|
||||
std::cout << "------------------LoadFieldDataInfo----------------------" << std::endl;
|
||||
}
|
||||
|
||||
void
|
||||
SegmentSealedImpl::LoadIndex(const LoadIndexInfo& info) {
|
||||
// print(info);
|
||||
// NOTE: lock only when data is ready to avoid starvation
|
||||
auto field_id = FieldId(info.field_id);
|
||||
auto field_offset = schema_->get_offset(field_id);
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
|
||||
if (field_meta.is_vector()) {
|
||||
LoadVecIndex(info);
|
||||
} else {
|
||||
LoadScalarIndex(info);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
|
||||
// NOTE: lock only when data is ready to avoid starvation
|
||||
auto field_id = FieldId(info.field_id);
|
||||
|
||||
auto index = std::dynamic_pointer_cast<knowhere::VecIndex>(info.index);
|
||||
AssertInfo(info.index_params.count("metric_type"), "Can't get metric_type in index_params");
|
||||
auto metric_type_str = info.index_params.at("metric_type");
|
||||
auto row_count = info.index->Count();
|
||||
auto row_count = index->Count();
|
||||
AssertInfo(row_count > 0, "Index count is 0");
|
||||
|
||||
std::unique_lock lck(mutex_);
|
||||
AssertInfo(!get_bit(vecindex_ready_bitset_, field_offset),
|
||||
"Can't get bitset element at " + std::to_string(field_offset.get()));
|
||||
AssertInfo(!get_bit(vecindex_ready_bitset_, field_id),
|
||||
"Can't get bitset element at " + std::to_string(field_id.get()));
|
||||
if (row_count_opt_.has_value()) {
|
||||
AssertInfo(row_count_opt_.value() == row_count, "load data has different row count from other columns");
|
||||
} else {
|
||||
row_count_opt_ = row_count;
|
||||
}
|
||||
AssertInfo(!vecindexs_.is_ready(field_offset), "vec index is not ready");
|
||||
vecindexs_.append_field_indexing(field_offset, GetMetricType(metric_type_str), info.index);
|
||||
AssertInfo(!vector_indexings_.is_ready(field_id), "vec index is not ready");
|
||||
vector_indexings_.append_field_indexing(field_id, GetMetricType(metric_type_str), index);
|
||||
|
||||
set_bit(vecindex_ready_bitset_, field_offset, true);
|
||||
set_bit(vecindex_ready_bitset_, field_id, true);
|
||||
lck.unlock();
|
||||
}
|
||||
|
||||
void
|
||||
SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
||||
// NOTE: lock only when data is ready to avoid starvation
|
||||
auto field_id = FieldId(info.field_id);
|
||||
|
||||
auto index = std::dynamic_pointer_cast<scalar::IndexBase>(info.index);
|
||||
auto row_count = index->Count();
|
||||
AssertInfo(row_count > 0, "Index count is 0");
|
||||
|
||||
std::unique_lock lck(mutex_);
|
||||
|
||||
if (row_count_opt_.has_value()) {
|
||||
AssertInfo(row_count_opt_.value() == row_count, "load data has different row count from other columns");
|
||||
} else {
|
||||
row_count_opt_ = row_count;
|
||||
}
|
||||
|
||||
scalar_indexings_[field_id] = std::move(index);
|
||||
|
||||
set_bit(field_data_ready_bitset_, field_id, true);
|
||||
lck.unlock();
|
||||
}
|
||||
|
||||
void
|
||||
SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& info) {
|
||||
// print(info);
|
||||
// NOTE: lock only when data is ready to avoid starvation
|
||||
AssertInfo(info.row_count > 0, "The row count of field data is 0");
|
||||
auto field_id = FieldId(info.field_id);
|
||||
AssertInfo(info.blob, "Field info blob is null");
|
||||
auto create_index = [](const int64_t* data, int64_t size) {
|
||||
AssertInfo(size, "Vector data size is 0 when create index");
|
||||
auto pk_index = std::make_unique<ScalarIndexVector>();
|
||||
pk_index->append_data(data, size, SegOffset(0));
|
||||
pk_index->build();
|
||||
return pk_index;
|
||||
};
|
||||
AssertInfo(info.field_data != nullptr, "Field info blob is null");
|
||||
auto size = info.row_count;
|
||||
|
||||
if (SystemProperty::Instance().IsSystem(field_id)) {
|
||||
auto system_field_type = SystemProperty::Instance().GetSystemFieldType(field_id);
|
||||
if (system_field_type == SystemFieldType::Timestamp) {
|
||||
auto src_ptr = reinterpret_cast<const Timestamp*>(info.blob);
|
||||
aligned_vector<Timestamp> vec_data(info.row_count);
|
||||
std::copy_n(src_ptr, info.row_count, vec_data.data());
|
||||
auto timestamps = reinterpret_cast<const Timestamp*>(info.field_data->scalars().long_data().data().data());
|
||||
|
||||
auto size = info.row_count;
|
||||
|
||||
// TODO: load from outside
|
||||
TimestampIndex index;
|
||||
auto min_slice_length = size < 4096 ? 1 : 4096;
|
||||
auto meta = GenerateFakeSlices(src_ptr, size, min_slice_length);
|
||||
auto meta = GenerateFakeSlices(timestamps, size, min_slice_length);
|
||||
index.set_length_meta(std::move(meta));
|
||||
index.build_with(src_ptr, size);
|
||||
index.build_with(timestamps, size);
|
||||
|
||||
// use special index
|
||||
std::unique_lock lck(mutex_);
|
||||
update_row_count(info.row_count);
|
||||
AssertInfo(timestamps_.empty(), "already exists");
|
||||
timestamps_ = std::move(vec_data);
|
||||
timestamp_index_ = std::move(index);
|
||||
|
||||
AssertInfo(insert_record_.timestamps_.empty(), "already exists");
|
||||
insert_record_.timestamps_.fill_chunk_data(timestamps, size);
|
||||
insert_record_.timestamp_index_ = std::move(index);
|
||||
AssertInfo(insert_record_.timestamps_.num_chunk() == 1, "num chunk not equal to 1 for sealed segment");
|
||||
} else {
|
||||
AssertInfo(system_field_type == SystemFieldType::RowId, "System field type of id column is not RowId");
|
||||
auto src_ptr = reinterpret_cast<const idx_t*>(info.blob);
|
||||
|
||||
// prepare data
|
||||
aligned_vector<idx_t> vec_data(info.row_count);
|
||||
std::copy_n(src_ptr, info.row_count, vec_data.data());
|
||||
|
||||
std::unique_ptr<ScalarIndexBase> pk_index_;
|
||||
// fix unintentional index update
|
||||
if (schema_->get_is_auto_id()) {
|
||||
pk_index_ = create_index(vec_data.data(), vec_data.size());
|
||||
}
|
||||
|
||||
auto row_ids = reinterpret_cast<const idx_t*>(info.field_data->scalars().long_data().data().data());
|
||||
// write data under lock
|
||||
std::unique_lock lck(mutex_);
|
||||
update_row_count(info.row_count);
|
||||
AssertInfo(row_ids_.empty(), "already exists");
|
||||
row_ids_ = std::move(vec_data);
|
||||
|
||||
if (schema_->get_is_auto_id()) {
|
||||
primary_key_index_ = std::move(pk_index_);
|
||||
}
|
||||
AssertInfo(insert_record_.row_ids_.empty(), "already exists");
|
||||
insert_record_.row_ids_.fill_chunk_data(row_ids, size);
|
||||
AssertInfo(insert_record_.row_ids_.num_chunk() == 1, "num chunk not equal to 1 for sealed segment");
|
||||
}
|
||||
++system_ready_count_;
|
||||
} else {
|
||||
// prepare data
|
||||
auto field_offset = schema_->get_offset(field_id);
|
||||
auto& field_meta = schema_->operator[](field_offset);
|
||||
// Assert(!field_meta.is_vector());
|
||||
auto element_sizeof = field_meta.get_sizeof();
|
||||
auto span = SpanBase(info.blob, info.row_count, element_sizeof);
|
||||
auto length_in_bytes = element_sizeof * info.row_count;
|
||||
aligned_vector<char> vec_data(length_in_bytes);
|
||||
memcpy(vec_data.data(), info.blob, length_in_bytes);
|
||||
|
||||
// generate scalar index
|
||||
std::unique_ptr<knowhere::Index> index;
|
||||
if (!field_meta.is_vector()) {
|
||||
index = query::generate_scalar_index(span, field_meta.get_data_type());
|
||||
}
|
||||
|
||||
std::unique_ptr<ScalarIndexBase> pk_index_;
|
||||
if (schema_->get_primary_key_offset() == field_offset) {
|
||||
pk_index_ = create_index((const int64_t*)vec_data.data(), info.row_count);
|
||||
}
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
auto data_type = field_meta.get_data_type();
|
||||
AssertInfo(data_type == engine::DataType(info.field_data->type()),
|
||||
"field type of load data is inconsistent with the schema");
|
||||
auto field_data = insert_record_.get_field_data_base(field_id);
|
||||
AssertInfo(field_data->empty(), "already exists");
|
||||
|
||||
// write data under lock
|
||||
std::unique_lock lck(mutex_);
|
||||
update_row_count(info.row_count);
|
||||
AssertInfo(fields_data_[field_offset.get()].empty(), "field data already exists");
|
||||
|
||||
// insert data to insertRecord
|
||||
field_data->fill_chunk_data(size, info.field_data, field_meta);
|
||||
AssertInfo(field_data->num_chunk() == 1, "num chunk not equal to 1 for sealed segment");
|
||||
|
||||
// set pks to offset
|
||||
if (schema_->get_primary_field_id() == field_id) {
|
||||
AssertInfo(field_id.get() != -1, "Primary key is -1");
|
||||
AssertInfo(pk2offset_.empty(), "already exists");
|
||||
std::vector<PkType> pks(size);
|
||||
ParsePksFromFieldData(pks, *info.field_data);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
pk2offset_.insert(std::make_pair(pks[i], i));
|
||||
}
|
||||
}
|
||||
|
||||
if (field_meta.is_vector()) {
|
||||
AssertInfo(!vecindexs_.is_ready(field_offset), "field data can't be loaded when indexing exists");
|
||||
fields_data_[field_offset.get()] = std::move(vec_data);
|
||||
} else {
|
||||
AssertInfo(!scalar_indexings_[field_offset.get()], "scalar indexing not cleared");
|
||||
fields_data_[field_offset.get()] = std::move(vec_data);
|
||||
scalar_indexings_[field_offset.get()] = std::move(index);
|
||||
AssertInfo(!vector_indexings_.is_ready(field_id), "field data can't be loaded when indexing exists");
|
||||
} else if (!scalar_indexings_.count(field_id)) {
|
||||
// generate scalar index
|
||||
std::unique_ptr<knowhere::Index> index;
|
||||
index = query::generate_scalar_index(field_data->get_span_base(0), data_type);
|
||||
scalar_indexings_[field_id] = std::move(index);
|
||||
}
|
||||
|
||||
if (schema_->get_primary_key_offset() == field_offset) {
|
||||
primary_key_index_ = std::move(pk_index_);
|
||||
}
|
||||
|
||||
set_bit(field_data_ready_bitset_, field_offset, true);
|
||||
set_bit(field_data_ready_bitset_, field_id, true);
|
||||
}
|
||||
update_row_count(info.row_count);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -170,19 +207,25 @@ SegmentSealedImpl::LoadDeletedRecord(const LoadDeletedRecordInfo& info) {
|
|||
AssertInfo(info.row_count > 0, "The row count of deleted record is 0");
|
||||
AssertInfo(info.primary_keys, "Deleted primary keys is null");
|
||||
AssertInfo(info.timestamps, "Deleted timestamps is null");
|
||||
auto primary_keys = reinterpret_cast<const idx_t*>(info.primary_keys);
|
||||
auto timestamps = reinterpret_cast<const Timestamp*>(info.timestamps);
|
||||
// step 1: get pks and timestamps
|
||||
auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
|
||||
AssertInfo(field_id.get() != -1, "Primary key is -1");
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
int64_t size = info.row_count;
|
||||
std::vector<PkType> pks(size);
|
||||
ParsePksFromIDs(pks, field_meta.get_data_type(), *info.primary_keys);
|
||||
auto timestamps = reinterpret_cast<const Timestamp*>(info.timestamps);
|
||||
|
||||
deleted_record_.uids_.set_data(0, primary_keys, size);
|
||||
deleted_record_.timestamps_.set_data(0, timestamps, size);
|
||||
// step 2: fill pks and timestamps
|
||||
deleted_record_.pks_.set_data_raw(0, pks.data(), size);
|
||||
deleted_record_.timestamps_.set_data_raw(0, timestamps, size);
|
||||
deleted_record_.ack_responder_.AddSegment(0, size);
|
||||
deleted_record_.reserved.fetch_add(size);
|
||||
deleted_record_.record_size_ = size;
|
||||
}
|
||||
|
||||
int64_t
|
||||
SegmentSealedImpl::num_chunk_index(FieldOffset field_offset) const {
|
||||
SegmentSealedImpl::num_chunk_index(FieldId field_id) const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -197,22 +240,23 @@ SegmentSealedImpl::size_per_chunk() const {
|
|||
}
|
||||
|
||||
SpanBase
|
||||
SegmentSealedImpl::chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const {
|
||||
SegmentSealedImpl::chunk_data_impl(FieldId field_id, int64_t chunk_id) const {
|
||||
std::shared_lock lck(mutex_);
|
||||
AssertInfo(get_bit(field_data_ready_bitset_, field_offset),
|
||||
"Can't get bitset element at " + std::to_string(field_offset.get()));
|
||||
auto& field_meta = schema_->operator[](field_offset);
|
||||
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
||||
"Can't get bitset element at " + std::to_string(field_id.get()));
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
auto element_sizeof = field_meta.get_sizeof();
|
||||
SpanBase base(fields_data_[field_offset.get()].data(), row_count_opt_.value(), element_sizeof);
|
||||
return base;
|
||||
auto field_data = insert_record_.get_field_data_base(field_id);
|
||||
AssertInfo(field_data->num_chunk() == 1, "num chunk not equal to 1 for sealed segment");
|
||||
return field_data->get_span_base(0);
|
||||
}
|
||||
|
||||
const knowhere::Index*
|
||||
SegmentSealedImpl::chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const {
|
||||
SegmentSealedImpl::chunk_index_impl(FieldId field_id, int64_t chunk_id) const {
|
||||
AssertInfo(chunk_id == 0, "Chunk_id is not equal to 0");
|
||||
// TODO: support scalar index
|
||||
auto ptr = scalar_indexings_[field_offset.get()].get();
|
||||
AssertInfo(ptr, "Scalar index of " + std::to_string(field_offset.get()) + " is null");
|
||||
auto ptr = scalar_indexings_.at(field_id).get();
|
||||
AssertInfo(ptr, "Scalar index of " + std::to_string(field_id.get()) + " is null");
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
@ -245,17 +289,25 @@ SegmentSealedImpl::get_deleted_bitmap(int64_t del_barrier,
|
|||
current->del_barrier = del_barrier;
|
||||
auto bitmap = current->bitmap_ptr;
|
||||
// Sealed segment only has one chunk with chunk_id 0
|
||||
auto span = deleted_record_.uids_.get_span_base(0);
|
||||
auto uids_ptr = reinterpret_cast<const idx_t*>(span.data());
|
||||
auto delete_pks_data = deleted_record_.pks_.get_chunk_data(0);
|
||||
auto delete_pks = reinterpret_cast<const PkType*>(delete_pks_data);
|
||||
auto del_size = deleted_record_.reserved.load();
|
||||
std::vector<idx_t> ids(del_size);
|
||||
std::copy_n(uids_ptr, del_size, ids.data());
|
||||
|
||||
auto [uids, seg_offsets] = primary_key_index_->do_search_ids(ids);
|
||||
for (int i = 0; i < uids.size(); ++i) {
|
||||
std::vector<SegOffset> seg_offsets;
|
||||
std::vector<PkType> pks;
|
||||
for (int i = 0; i < del_size; ++i) {
|
||||
auto [iter_b, iter_e] = pk2offset_.equal_range(delete_pks[i]);
|
||||
for (auto iter = iter_b; iter != iter_e; ++iter) {
|
||||
auto [entry_pk, entry_offset] = *iter;
|
||||
pks.emplace_back(entry_pk);
|
||||
seg_offsets.emplace_back(SegOffset(entry_offset));
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < pks.size(); ++i) {
|
||||
bitmap->set(seg_offsets[i].get());
|
||||
}
|
||||
if (uids.size() == 0 || seg_offsets.size() == 0) {
|
||||
if (pks.size() == 0 || seg_offsets.size() == 0) {
|
||||
return current;
|
||||
}
|
||||
|
||||
|
@ -305,16 +357,16 @@ SegmentSealedImpl::vector_search(int64_t vec_count,
|
|||
const BitsetView& bitset,
|
||||
SearchResult& output) const {
|
||||
AssertInfo(is_system_field_ready(), "System field is not ready");
|
||||
auto field_offset = search_info.field_offset_;
|
||||
auto& field_meta = schema_->operator[](field_offset);
|
||||
auto field_id = search_info.field_id_;
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
|
||||
AssertInfo(field_meta.is_vector(), "The meta type of vector field is not vector type");
|
||||
if (get_bit(vecindex_ready_bitset_, field_offset)) {
|
||||
AssertInfo(vecindexs_.is_ready(field_offset),
|
||||
"vector indexes isn't ready for field " + std::to_string(field_offset.get()));
|
||||
query::SearchOnSealed(*schema_, vecindexs_, search_info, query_data, query_count, bitset, output, id_);
|
||||
if (get_bit(vecindex_ready_bitset_, field_id)) {
|
||||
AssertInfo(vector_indexings_.is_ready(field_id),
|
||||
"vector indexes isn't ready for field " + std::to_string(field_id.get()));
|
||||
query::SearchOnSealed(*schema_, vector_indexings_, search_info, query_data, query_count, bitset, output, id_);
|
||||
return;
|
||||
} else if (!get_bit(field_data_ready_bitset_, field_offset)) {
|
||||
} else if (!get_bit(field_data_ready_bitset_, field_id)) {
|
||||
PanicInfo("Field Data is not loaded");
|
||||
}
|
||||
|
||||
|
@ -327,11 +379,13 @@ SegmentSealedImpl::vector_search(int64_t vec_count,
|
|||
dataset.dim = field_meta.get_dim();
|
||||
dataset.round_decimal = search_info.round_decimal_;
|
||||
|
||||
AssertInfo(get_bit(field_data_ready_bitset_, field_offset),
|
||||
"Can't get bitset element at " + std::to_string(field_offset.get()));
|
||||
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
||||
"Can't get bitset element at " + std::to_string(field_id.get()));
|
||||
AssertInfo(row_count_opt_.has_value(), "Can't get row count value");
|
||||
auto row_count = row_count_opt_.value();
|
||||
auto chunk_data = fields_data_[field_offset.get()].data();
|
||||
auto vec_data = insert_record_.get_field_data_base(field_id);
|
||||
AssertInfo(vec_data->num_chunk() == 1, "num chunk not equal to 1 for sealed segment");
|
||||
auto chunk_data = vec_data->get_chunk_data(0);
|
||||
|
||||
auto sub_qr = [&] {
|
||||
if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
|
@ -343,7 +397,7 @@ SegmentSealedImpl::vector_search(int64_t vec_count,
|
|||
|
||||
SearchResult results;
|
||||
results.distances_ = std::move(sub_qr.mutable_distances());
|
||||
results.ids_ = std::move(sub_qr.mutable_ids());
|
||||
results.seg_offsets_ = std::move(sub_qr.mutable_seg_offsets());
|
||||
results.topk_ = dataset.topk;
|
||||
results.num_queries_ = dataset.num_queries;
|
||||
|
||||
|
@ -358,21 +412,17 @@ SegmentSealedImpl::DropFieldData(const FieldId field_id) {
|
|||
std::unique_lock lck(mutex_);
|
||||
--system_ready_count_;
|
||||
if (system_field_type == SystemFieldType::RowId) {
|
||||
auto row_ids = std::move(row_ids_);
|
||||
insert_record_.row_ids_.clear();
|
||||
} else if (system_field_type == SystemFieldType::Timestamp) {
|
||||
auto ts = std::move(timestamps_);
|
||||
insert_record_.timestamps_.clear();
|
||||
}
|
||||
lck.unlock();
|
||||
} else {
|
||||
auto field_offset = schema_->get_offset(field_id);
|
||||
auto& field_meta = schema_->operator[](field_offset);
|
||||
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
std::unique_lock lck(mutex_);
|
||||
set_bit(field_data_ready_bitset_, field_offset, false);
|
||||
auto vec = std::move(fields_data_[field_offset.get()]);
|
||||
set_bit(field_data_ready_bitset_, field_id, false);
|
||||
insert_record_.drop_field_data(field_id);
|
||||
lck.unlock();
|
||||
|
||||
vec.clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -380,14 +430,13 @@ void
|
|||
SegmentSealedImpl::DropIndex(const FieldId field_id) {
|
||||
AssertInfo(!SystemProperty::Instance().IsSystem(field_id),
|
||||
"Field id:" + std::to_string(field_id.get()) + " isn't one of system type when drop index");
|
||||
auto field_offset = schema_->get_offset(field_id);
|
||||
auto& field_meta = schema_->operator[](field_offset);
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
AssertInfo(field_meta.is_vector(),
|
||||
"Field meta of offset:" + std::to_string(field_offset.get()) + " is not vector type");
|
||||
"Field meta of offset:" + std::to_string(field_id.get()) + " is not vector type");
|
||||
|
||||
std::unique_lock lck(mutex_);
|
||||
vecindexs_.drop_field_indexing(field_offset);
|
||||
set_bit(vecindex_ready_bitset_, field_offset, false);
|
||||
vector_indexings_.drop_field_indexing(field_id);
|
||||
set_bit(vecindex_ready_bitset_, field_id, false);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -406,15 +455,16 @@ SegmentSealedImpl::check_search(const query::Plan* plan) const {
|
|||
auto absent_fields = request_fields - field_ready_bitset;
|
||||
|
||||
if (absent_fields.any()) {
|
||||
auto field_offset = FieldOffset(absent_fields.find_first());
|
||||
auto& field_meta = schema_->operator[](field_offset);
|
||||
auto field_id = FieldId(absent_fields.find_first() + START_USER_FIELDID);
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
PanicInfo("User Field(" + field_meta.get_name().get() + ") is not loaded");
|
||||
}
|
||||
}
|
||||
|
||||
SegmentSealedImpl::SegmentSealedImpl(SchemaPtr schema, int64_t segment_id)
|
||||
: schema_(schema),
|
||||
fields_data_(schema->size()),
|
||||
// fields_data_(schema->size()),
|
||||
insert_record_(*schema, MAX_ROW_COUNT),
|
||||
field_data_ready_bitset_(schema->size()),
|
||||
vecindex_ready_bitset_(schema->size()),
|
||||
scalar_indexings_(schema->size()),
|
||||
|
@ -428,7 +478,9 @@ SegmentSealedImpl::bulk_subscript(SystemFieldType system_type,
|
|||
void* output) const {
|
||||
AssertInfo(is_system_field_ready(), "System field isn't ready when do bulk_insert");
|
||||
AssertInfo(system_type == SystemFieldType::RowId, "System field type of id column is not RowId");
|
||||
bulk_subscript_impl<int64_t>(row_ids_.data(), seg_offsets, count, output);
|
||||
AssertInfo(insert_record_.row_ids_.num_chunk() == 1, "num chunk not equal to 1 for sealed segment");
|
||||
auto field_data = insert_record_.row_ids_.get_chunk_data(0);
|
||||
bulk_subscript_impl<int64_t>(field_data, seg_offsets, count, output);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -439,7 +491,9 @@ SegmentSealedImpl::bulk_subscript_impl(const void* src_raw, const int64_t* seg_o
|
|||
auto dst = reinterpret_cast<T*>(dst_raw);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
auto offset = seg_offsets[i];
|
||||
dst[i] = (offset == INVALID_SEG_OFFSET ? INVALID_ID : src[offset]);
|
||||
if (offset != INVALID_SEG_OFFSET) {
|
||||
dst[i] = src[offset];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -458,51 +512,115 @@ SegmentSealedImpl::bulk_subscript_impl(
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
SegmentSealedImpl::bulk_subscript(FieldOffset field_offset,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count,
|
||||
void* output) const {
|
||||
// Assert(get_bit(field_data_ready_bitset_, field_offset));
|
||||
if (!get_bit(field_data_ready_bitset_, field_offset)) {
|
||||
return;
|
||||
}
|
||||
auto& field_meta = schema_->operator[](field_offset);
|
||||
auto src_vec = fields_data_[field_offset.get()].data();
|
||||
std::unique_ptr<DataArray>
|
||||
SegmentSealedImpl::fill_with_empty(FieldId field_id, int64_t count) const {
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
switch (field_meta.get_data_type()) {
|
||||
case DataType::BOOL: {
|
||||
bulk_subscript_impl<bool>(src_vec, seg_offsets, count, output);
|
||||
break;
|
||||
FixedVector<bool> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT8: {
|
||||
bulk_subscript_impl<int8_t>(src_vec, seg_offsets, count, output);
|
||||
break;
|
||||
FixedVector<int8_t> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
bulk_subscript_impl<int16_t>(src_vec, seg_offsets, count, output);
|
||||
break;
|
||||
FixedVector<int16_t> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
bulk_subscript_impl<int32_t>(src_vec, seg_offsets, count, output);
|
||||
break;
|
||||
FixedVector<int32_t> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
bulk_subscript_impl<int64_t>(src_vec, seg_offsets, count, output);
|
||||
break;
|
||||
FixedVector<int64_t> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
bulk_subscript_impl<float>(src_vec, seg_offsets, count, output);
|
||||
break;
|
||||
FixedVector<float> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
bulk_subscript_impl<double>(src_vec, seg_offsets, count, output);
|
||||
break;
|
||||
FixedVector<double> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
FixedVector<std::string> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
|
||||
case DataType::VECTOR_FLOAT:
|
||||
case DataType::VECTOR_BINARY: {
|
||||
bulk_subscript_impl(field_meta.get_sizeof(), src_vec, seg_offsets, count, output);
|
||||
break;
|
||||
aligned_vector<char> output(field_meta.get_sizeof() * count);
|
||||
return CreateVectorDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
|
||||
default: {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
SegmentSealedImpl::bulk_subscript(FieldId field_id, const int64_t* seg_offsets, int64_t count) const {
|
||||
if (!HasFieldData(field_id)) {
|
||||
return fill_with_empty(field_id, count);
|
||||
}
|
||||
|
||||
Assert(get_bit(field_data_ready_bitset_, field_id));
|
||||
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
auto field_data = insert_record_.get_field_data_base(field_id);
|
||||
AssertInfo(field_data->num_chunk() == 1, std::string("num chunk not equal to 1 for sealed segment, num_chunk: ") +
|
||||
std::to_string(field_data->num_chunk()));
|
||||
auto src_vec = field_data->get_chunk_data(0);
|
||||
switch (field_meta.get_data_type()) {
|
||||
case DataType::BOOL: {
|
||||
FixedVector<bool> output(count);
|
||||
bulk_subscript_impl<bool>(src_vec, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT8: {
|
||||
FixedVector<int8_t> output(count);
|
||||
bulk_subscript_impl<int8_t>(src_vec, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
FixedVector<int16_t> output(count);
|
||||
bulk_subscript_impl<int16_t>(src_vec, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
FixedVector<int32_t> output(count);
|
||||
bulk_subscript_impl<int32_t>(src_vec, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
FixedVector<int64_t> output(count);
|
||||
bulk_subscript_impl<int64_t>(src_vec, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
FixedVector<float> output(count);
|
||||
bulk_subscript_impl<float>(src_vec, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
FixedVector<double> output(count);
|
||||
bulk_subscript_impl<double>(src_vec, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
FixedVector<std::string> output(count);
|
||||
bulk_subscript_impl<std::string>(src_vec, seg_offsets, count, output.data());
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
|
||||
case DataType::VECTOR_FLOAT:
|
||||
case DataType::VECTOR_BINARY: {
|
||||
aligned_vector<char> output(field_meta.get_sizeof() * count);
|
||||
bulk_subscript_impl(field_meta.get_sizeof(), src_vec, seg_offsets, count, output.data());
|
||||
return CreateVectorDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
|
||||
default: {
|
||||
|
@ -516,8 +634,7 @@ SegmentSealedImpl::HasIndex(FieldId field_id) const {
|
|||
std::shared_lock lck(mutex_);
|
||||
AssertInfo(!SystemProperty::Instance().IsSystem(field_id),
|
||||
"Field id:" + std::to_string(field_id.get()) + " isn't one of system type when drop index");
|
||||
auto field_offset = schema_->get_offset(field_id);
|
||||
return get_bit(vecindex_ready_bitset_, field_offset);
|
||||
return get_bit(vecindex_ready_bitset_, field_id);
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -526,40 +643,73 @@ SegmentSealedImpl::HasFieldData(FieldId field_id) const {
|
|||
if (SystemProperty::Instance().IsSystem(field_id)) {
|
||||
return is_system_field_ready();
|
||||
} else {
|
||||
auto field_offset = schema_->get_offset(field_id);
|
||||
return get_bit(field_data_ready_bitset_, field_offset);
|
||||
return get_bit(field_data_ready_bitset_, field_id);
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
|
||||
SegmentSealedImpl::search_ids(const IdArray& id_array, Timestamp timestamp) const {
|
||||
AssertInfo(id_array.has_int_id(), "string ids are not implemented");
|
||||
auto arr = id_array.int_id();
|
||||
AssertInfo(primary_key_index_, "Primary key index is null");
|
||||
return primary_key_index_->do_search_ids(id_array);
|
||||
AssertInfo(id_array.has_int_id(), "Id array doesn't have int_id element");
|
||||
auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
|
||||
AssertInfo(field_id.get() != -1, "Primary key is -1");
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
auto data_type = field_meta.get_data_type();
|
||||
auto ids_size = GetSizeOfIdArray(id_array);
|
||||
std::vector<PkType> pks(ids_size);
|
||||
ParsePksFromIDs(pks, data_type, id_array);
|
||||
|
||||
auto res_id_arr = std::make_unique<IdArray>();
|
||||
std::vector<SegOffset> res_offsets;
|
||||
for (auto pk : pks) {
|
||||
auto [iter_b, iter_e] = pk2offset_.equal_range(pk);
|
||||
for (auto iter = iter_b; iter != iter_e; ++iter) {
|
||||
auto offset = SegOffset(iter->second);
|
||||
if (insert_record_.timestamps_[offset.get()] <= timestamp) {
|
||||
switch (data_type) {
|
||||
case DataType::INT64: {
|
||||
res_id_arr->mutable_int_id()->add_data(std::get<int64_t>(pk));
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
res_id_arr->mutable_str_id()->add_data(std::get<std::string>(pk));
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported type");
|
||||
}
|
||||
}
|
||||
res_offsets.push_back(offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
return {std::move(res_id_arr), std::move(res_offsets)};
|
||||
}
|
||||
|
||||
Status
|
||||
SegmentSealedImpl::Delete(int64_t reserved_offset,
|
||||
int64_t row_count,
|
||||
const int64_t* uids_raw,
|
||||
const Timestamp* timestamps_raw) {
|
||||
std::vector<std::tuple<Timestamp, idx_t>> ordering(row_count);
|
||||
for (int i = 0; i < row_count; i++) {
|
||||
ordering[i] = std::make_tuple(timestamps_raw[i], uids_raw[i]);
|
||||
SegmentSealedImpl::Delete(int64_t reserved_offset, int64_t size, const IdArray* ids, const Timestamp* timestamps_raw) {
|
||||
auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
|
||||
AssertInfo(field_id.get() != -1, "Primary key is -1");
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
std::vector<PkType> pks(size);
|
||||
ParsePksFromIDs(pks, field_meta.get_data_type(), *ids);
|
||||
|
||||
// step 1: sort timestamp
|
||||
std::vector<std::tuple<Timestamp, PkType>> ordering(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
ordering[i] = std::make_tuple(timestamps_raw[i], pks[i]);
|
||||
}
|
||||
std::sort(ordering.begin(), ordering.end());
|
||||
std::vector<idx_t> src_uids(row_count);
|
||||
std::vector<Timestamp> src_timestamps(row_count);
|
||||
std::vector<PkType> sort_pks(size);
|
||||
std::vector<Timestamp> sort_timestamps(size);
|
||||
|
||||
for (int i = 0; i < row_count; i++) {
|
||||
auto [t, uid] = ordering[i];
|
||||
src_timestamps[i] = t;
|
||||
src_uids[i] = uid;
|
||||
for (int i = 0; i < size; i++) {
|
||||
auto [t, pk] = ordering[i];
|
||||
sort_timestamps[i] = t;
|
||||
sort_pks[i] = pk;
|
||||
}
|
||||
deleted_record_.timestamps_.set_data(reserved_offset, src_timestamps.data(), row_count);
|
||||
deleted_record_.uids_.set_data(reserved_offset, src_uids.data(), row_count);
|
||||
deleted_record_.ack_responder_.AddSegment(reserved_offset, row_count);
|
||||
deleted_record_.timestamps_.set_data_raw(reserved_offset, sort_timestamps.data(), size);
|
||||
deleted_record_.pks_.set_data_raw(reserved_offset, sort_pks.data(), size);
|
||||
deleted_record_.ack_responder_.AddSegment(reserved_offset, size);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -568,7 +718,10 @@ SegmentSealedImpl::search_ids(const BitsetType& bitset, Timestamp timestamp) con
|
|||
std::vector<SegOffset> dst_offset;
|
||||
for (int i = 0; i < bitset.size(); i++) {
|
||||
if (bitset[i]) {
|
||||
dst_offset.emplace_back(SegOffset(i));
|
||||
auto offset = SegOffset(i);
|
||||
if (insert_record_.timestamps_[offset.get()] <= timestamp) {
|
||||
dst_offset.push_back(offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
return dst_offset;
|
||||
|
@ -579,7 +732,10 @@ SegmentSealedImpl::search_ids(const BitsetView& bitset, Timestamp timestamp) con
|
|||
std::vector<SegOffset> dst_offset;
|
||||
for (int i = 0; i < bitset.size(); i++) {
|
||||
if (!bitset.test(i)) {
|
||||
dst_offset.emplace_back(SegOffset(i));
|
||||
auto offset = SegOffset(i);
|
||||
if (insert_record_.timestamps_[offset.get()] <= timestamp) {
|
||||
dst_offset.push_back(offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
return dst_offset;
|
||||
|
@ -589,7 +745,6 @@ std::string
|
|||
SegmentSealedImpl::debug() const {
|
||||
std::string log_str;
|
||||
log_str += "Sealed\n";
|
||||
log_str += "Index:" + primary_key_index_->debug();
|
||||
log_str += "\n";
|
||||
return log_str;
|
||||
}
|
||||
|
@ -601,7 +756,7 @@ SegmentSealedImpl::LoadSegmentMeta(const proto::segcore::LoadSegmentMeta& segmen
|
|||
for (auto& info : segment_meta.metas()) {
|
||||
slice_lengths.push_back(info.row_count());
|
||||
}
|
||||
timestamp_index_.set_length_meta(std::move(slice_lengths));
|
||||
insert_record_.timestamp_index_.set_length_meta(std::move(slice_lengths));
|
||||
PanicInfo("unimplemented");
|
||||
}
|
||||
|
||||
|
@ -614,13 +769,15 @@ SegmentSealedImpl::get_active_count(Timestamp ts) const {
|
|||
void
|
||||
SegmentSealedImpl::mask_with_timestamps(BitsetType& bitset_chunk, Timestamp timestamp) const {
|
||||
// TODO change the
|
||||
AssertInfo(this->timestamps_.size() == get_row_count(), "Timestamp size not equal to row count");
|
||||
auto range = timestamp_index_.get_active_range(timestamp);
|
||||
AssertInfo(insert_record_.timestamps_.num_chunk() == 1, "num chunk not equal to 1 for sealed segment");
|
||||
auto timestamps_data = insert_record_.timestamps_.get_chunk(0);
|
||||
AssertInfo(timestamps_data.size() == get_row_count(), "Timestamp size not equal to row count");
|
||||
auto range = insert_record_.timestamp_index_.get_active_range(timestamp);
|
||||
|
||||
// range == (size_, size_) and size_ is this->timestamps_.size().
|
||||
// it means these data are all useful, we don't need to update bitset_chunk.
|
||||
// It can be thought of as an AND operation with another bitmask that is all 1s, but it is not necessary to do so.
|
||||
if (range.first == range.second && range.first == this->timestamps_.size()) {
|
||||
if (range.first == range.second && range.first == timestamps_data.size()) {
|
||||
// just skip
|
||||
return;
|
||||
}
|
||||
|
@ -630,7 +787,7 @@ SegmentSealedImpl::mask_with_timestamps(BitsetType& bitset_chunk, Timestamp time
|
|||
bitset_chunk.reset();
|
||||
return;
|
||||
}
|
||||
auto mask = TimestampIndex::GenerateBitset(timestamp, range, this->timestamps_.data(), this->timestamps_.size());
|
||||
auto mask = TimestampIndex::GenerateBitset(timestamp, range, timestamps_data.data(), timestamps_data.size());
|
||||
bitset_chunk &= mask;
|
||||
}
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <deque>
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
@ -62,7 +63,7 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
|
||||
public:
|
||||
int64_t
|
||||
num_chunk_index(FieldOffset field_offset) const override;
|
||||
num_chunk_index(FieldId field_id) const override;
|
||||
|
||||
int64_t
|
||||
num_chunk() const override;
|
||||
|
@ -78,15 +79,15 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
PreDelete(int64_t size) override;
|
||||
|
||||
Status
|
||||
Delete(int64_t reserved_offset, int64_t size, const int64_t* row_ids, const Timestamp* timestamps) override;
|
||||
Delete(int64_t reserved_offset, int64_t size, const IdArray* pks, const Timestamp* timestamps) override;
|
||||
|
||||
protected:
|
||||
// blob and row_count
|
||||
SpanBase
|
||||
chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const override;
|
||||
chunk_data_impl(FieldId field_id, int64_t chunk_id) const override;
|
||||
|
||||
const knowhere::Index*
|
||||
chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const override;
|
||||
chunk_index_impl(FieldId field_id, int64_t chunk_id) const override;
|
||||
|
||||
// Calculate: output[i] = Vec[seg_offset[i]],
|
||||
// where Vec is determined from field_offset
|
||||
|
@ -95,8 +96,8 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
|
||||
// Calculate: output[i] = Vec[seg_offset[i]]
|
||||
// where Vec is determined from field_offset
|
||||
void
|
||||
bulk_subscript(FieldOffset field_offset, const int64_t* seg_offsets, int64_t count, void* output) const override;
|
||||
std::unique_ptr<DataArray>
|
||||
bulk_subscript(FieldId field_id, const int64_t* seg_offsets, int64_t count) const override;
|
||||
|
||||
void
|
||||
check_search(const query::Plan* plan) const override;
|
||||
|
@ -119,6 +120,9 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
bulk_subscript_impl(
|
||||
int64_t element_sizeof, const void* src_raw, const int64_t* seg_offsets, int64_t count, void* dst_raw);
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
fill_with_empty(FieldId field_id, int64_t count) const;
|
||||
|
||||
void
|
||||
update_row_count(int64_t row_count) {
|
||||
if (row_count_opt_.has_value()) {
|
||||
|
@ -162,8 +166,11 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
std::vector<SegOffset>
|
||||
search_ids(const BitsetType& view, Timestamp timestamp) const override;
|
||||
|
||||
// virtual void
|
||||
// build_index_if_primary_key(FieldId field_id);
|
||||
void
|
||||
LoadVecIndex(const LoadIndexInfo& info);
|
||||
|
||||
void
|
||||
LoadScalarIndex(const LoadIndexInfo& info);
|
||||
|
||||
private:
|
||||
// segment loading state
|
||||
|
@ -175,18 +182,21 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
// TODO: generate index for scalar
|
||||
std::optional<int64_t> row_count_opt_;
|
||||
|
||||
// TODO: use protobuf format
|
||||
// TODO: remove duplicated indexing
|
||||
std::vector<std::unique_ptr<knowhere::Index>> scalar_indexings_;
|
||||
std::unique_ptr<ScalarIndexBase> primary_key_index_;
|
||||
// scalar field index
|
||||
std::unordered_map<FieldId, knowhere::IndexPtr> scalar_indexings_;
|
||||
// vector field index
|
||||
SealedIndexingRecord vector_indexings_;
|
||||
|
||||
std::vector<aligned_vector<char>> fields_data_;
|
||||
// inserted fields data and row_ids, timestamps
|
||||
InsertRecord insert_record_;
|
||||
|
||||
// deleted pks
|
||||
mutable DeletedRecord deleted_record_;
|
||||
|
||||
SealedIndexingRecord vecindexs_;
|
||||
aligned_vector<idx_t> row_ids_;
|
||||
aligned_vector<Timestamp> timestamps_;
|
||||
TimestampIndex timestamp_index_;
|
||||
// pks to row offset
|
||||
tbb::concurrent_unordered_multimap<PkType, int64_t, std::hash<PkType>> pk2offset_;
|
||||
// std::unique_ptr<ScalarIndexBase> primary_key_index_;
|
||||
|
||||
SchemaPtr schema_;
|
||||
int64_t id_;
|
||||
};
|
||||
|
|
|
@ -0,0 +1,258 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "Utils.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
void
|
||||
ParsePksFromFieldData(std::vector<PkType>& pks, const DataArray& data) {
|
||||
switch (DataType(data.type())) {
|
||||
case DataType::INT64: {
|
||||
auto source_data = reinterpret_cast<const int64_t*>(data.scalars().long_data().data().data());
|
||||
std::copy_n(source_data, pks.size(), pks.data());
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto src_data = data.scalars().string_data().data();
|
||||
std::copy(src_data.begin(), src_data.end(), pks.begin());
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ParsePksFromIDs(std::vector<PkType>& pks, DataType data_type, const IdArray& data) {
|
||||
switch (data_type) {
|
||||
case DataType::INT64: {
|
||||
auto source_data = reinterpret_cast<const int64_t*>(data.int_id().data().data());
|
||||
std::copy_n(source_data, pks.size(), pks.data());
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto source_data = data.str_id().data();
|
||||
std::copy(source_data.begin(), source_data.end(), pks.begin());
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int64_t
|
||||
GetSizeOfIdArray(const IdArray& data) {
|
||||
if (data.has_int_id()) {
|
||||
return data.int_id().data_size();
|
||||
}
|
||||
|
||||
if (data.has_str_id()) {
|
||||
return data.str_id().data_size();
|
||||
}
|
||||
|
||||
PanicInfo("unsupported id type");
|
||||
}
|
||||
|
||||
// Note: this is temporary solution.
|
||||
// modify bulk script implement to make process more clear
|
||||
std::unique_ptr<DataArray>
|
||||
CreateScalarDataArrayFrom(const void* data_raw, int64_t count, const FieldMeta& field_meta) {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
auto data_array = std::make_unique<DataArray>();
|
||||
data_array->set_field_id(field_meta.get_id().get());
|
||||
data_array->set_type(milvus::proto::schema::DataType(field_meta.get_data_type()));
|
||||
|
||||
auto scalar_array = data_array->mutable_scalars();
|
||||
switch (data_type) {
|
||||
case DataType::BOOL: {
|
||||
auto data = reinterpret_cast<const double*>(data_raw);
|
||||
auto obj = scalar_array->mutable_bool_data();
|
||||
obj->mutable_data()->Add(data, data + count);
|
||||
break;
|
||||
}
|
||||
case DataType::INT8: {
|
||||
auto data = reinterpret_cast<const int8_t*>(data_raw);
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
obj->mutable_data()->Add(data, data + count);
|
||||
break;
|
||||
}
|
||||
case DataType::INT16: {
|
||||
auto data = reinterpret_cast<const int16_t*>(data_raw);
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
obj->mutable_data()->Add(data, data + count);
|
||||
break;
|
||||
}
|
||||
case DataType::INT32: {
|
||||
auto data = reinterpret_cast<const int32_t*>(data_raw);
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
obj->mutable_data()->Add(data, data + count);
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
auto data = reinterpret_cast<const int64_t*>(data_raw);
|
||||
auto obj = scalar_array->mutable_long_data();
|
||||
obj->mutable_data()->Add(data, data + count);
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
auto data = reinterpret_cast<const float*>(data_raw);
|
||||
auto obj = scalar_array->mutable_float_data();
|
||||
obj->mutable_data()->Add(data, data + count);
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
auto data = reinterpret_cast<const double*>(data_raw);
|
||||
auto obj = scalar_array->mutable_double_data();
|
||||
obj->mutable_data()->Add(data, data + count);
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto data = reinterpret_cast<const std::string*>(data_raw);
|
||||
auto obj = scalar_array->mutable_string_data();
|
||||
for (auto i = 0; i < count; i++) *(obj->mutable_data()->Add()) = data[i];
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported datatype");
|
||||
}
|
||||
}
|
||||
|
||||
return data_array;
|
||||
}
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
CreateVectorDataArrayFrom(const void* data_raw, int64_t count, const FieldMeta& field_meta) {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
auto data_array = std::make_unique<DataArray>();
|
||||
data_array->set_field_id(field_meta.get_id().get());
|
||||
data_array->set_type(milvus::proto::schema::DataType(field_meta.get_data_type()));
|
||||
|
||||
auto vector_array = data_array->mutable_vectors();
|
||||
auto dim = field_meta.get_dim();
|
||||
vector_array->set_dim(dim);
|
||||
switch (data_type) {
|
||||
case DataType::VECTOR_FLOAT: {
|
||||
auto length = count * dim;
|
||||
auto data = reinterpret_cast<const float*>(data_raw);
|
||||
auto obj = vector_array->mutable_float_vector();
|
||||
obj->mutable_data()->Add(data, data + length);
|
||||
break;
|
||||
}
|
||||
case DataType::VECTOR_BINARY: {
|
||||
AssertInfo(dim % 8 == 0, "Binary vector field dimension is not a multiple of 8");
|
||||
auto num_bytes = count * dim / 8;
|
||||
auto data = reinterpret_cast<const char*>(data_raw);
|
||||
auto obj = vector_array->mutable_binary_vector();
|
||||
obj->assign(data, num_bytes);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported datatype");
|
||||
}
|
||||
}
|
||||
return data_array;
|
||||
}
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
CreateDataArrayFrom(const void* data_raw, int64_t count, const FieldMeta& field_meta) {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
|
||||
if (!datatype_is_vector(data_type)) {
|
||||
return CreateScalarDataArrayFrom(data_raw, count, field_meta);
|
||||
}
|
||||
|
||||
return CreateVectorDataArrayFrom(data_raw, count, field_meta);
|
||||
}
|
||||
|
||||
// TODO remove merge dataArray, instead fill target entity when get data slice
|
||||
std::unique_ptr<DataArray>
|
||||
MergeDataArray(std::vector<std::pair<milvus::SearchResult*, int64_t>>& result_offsets, const FieldMeta& field_meta) {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
auto data_array = std::make_unique<DataArray>();
|
||||
data_array->set_field_id(field_meta.get_id().get());
|
||||
data_array->set_type(milvus::proto::schema::DataType(field_meta.get_data_type()));
|
||||
|
||||
for (auto& result_pair : result_offsets) {
|
||||
auto src_field_data = result_pair.first->output_fields_data_[field_meta.get_id()].get();
|
||||
auto src_offset = result_pair.second;
|
||||
AssertInfo(data_type == DataType(src_field_data->type()), "merge field data type not consistent");
|
||||
if (field_meta.is_vector()) {
|
||||
auto vector_array = data_array->mutable_vectors();
|
||||
auto dim = field_meta.get_dim();
|
||||
vector_array->set_dim(dim);
|
||||
if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
auto data = src_field_data->vectors().float_vector().data().data();
|
||||
auto obj = vector_array->mutable_float_vector();
|
||||
obj->mutable_data()->Add(data + src_offset * dim, data + (src_offset + 1) * dim);
|
||||
} else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
AssertInfo(dim % 8 == 0, "Binary vector field dimension is not a multiple of 8");
|
||||
auto num_bytes = dim / 8;
|
||||
auto data = src_field_data->vectors().binary_vector().data();
|
||||
auto obj = vector_array->mutable_binary_vector();
|
||||
obj->assign(data + src_offset * num_bytes, num_bytes);
|
||||
} else {
|
||||
PanicInfo("logical error");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
auto scalar_array = data_array->mutable_scalars();
|
||||
switch (data_type) {
|
||||
case DataType::BOOL: {
|
||||
auto data = src_field_data->scalars().bool_data().data().data();
|
||||
auto obj = scalar_array->mutable_bool_data();
|
||||
*(obj->mutable_data()->Add()) = data[src_offset];
|
||||
continue;
|
||||
}
|
||||
case DataType::INT8:
|
||||
case DataType::INT16:
|
||||
case DataType::INT32: {
|
||||
auto data = src_field_data->scalars().int_data().data().data();
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
*(obj->mutable_data()->Add()) = data[src_offset];
|
||||
continue;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
auto data = src_field_data->scalars().long_data().data().data();
|
||||
auto obj = scalar_array->mutable_long_data();
|
||||
*(obj->mutable_data()->Add()) = data[src_offset];
|
||||
continue;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
auto data = src_field_data->scalars().float_data().data().data();
|
||||
auto obj = scalar_array->mutable_float_data();
|
||||
*(obj->mutable_data()->Add()) = data[src_offset];
|
||||
continue;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
auto data = src_field_data->scalars().double_data().data().data();
|
||||
auto obj = scalar_array->mutable_double_data();
|
||||
*(obj->mutable_data()->Add()) = data[src_offset];
|
||||
continue;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto data = src_field_data->scalars().string_data();
|
||||
auto obj = scalar_array->mutable_string_data();
|
||||
*(obj->mutable_data()->Add()) = data.data(src_offset);
|
||||
continue;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported datatype");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return data_array;
|
||||
}
|
||||
} // namespace milvus::segcore
|
|
@ -12,8 +12,12 @@
|
|||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include <exception>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <stdexcept>
|
||||
#include <knowhere/common/MetricType.h>
|
||||
#include "common/QueryResult.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
|
@ -51,4 +55,28 @@ MetricTypeToString(faiss::MetricType metric_type) {
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
ParsePksFromFieldData(std::vector<PkType>& pks, const DataArray& data);
|
||||
|
||||
void
|
||||
ParsePksFromIDs(std::vector<PkType>& pks, DataType data_type, const IdArray& data);
|
||||
|
||||
int64_t
|
||||
GetSizeOfIdArray(const IdArray& data);
|
||||
|
||||
// Note: this is temporary solution.
|
||||
// modify bulk script implement to make process more clear
|
||||
std::unique_ptr<DataArray>
|
||||
CreateScalarDataArrayFrom(const void* data_raw, int64_t count, const FieldMeta& field_meta);
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
CreateVectorDataArrayFrom(const void* data_raw, int64_t count, const FieldMeta& field_meta);
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
CreateDataArrayFrom(const void* data_raw, int64_t count, const FieldMeta& field_meta);
|
||||
|
||||
// TODO remove merge dataArray, instead fill target entity when get data slice
|
||||
std::unique_ptr<DataArray>
|
||||
MergeDataArray(std::vector<std::pair<milvus::SearchResult*, int64_t>>& result_offsets, const FieldMeta& field_meta);
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
#include "knowhere/common/BinarySet.h"
|
||||
#include "knowhere/index/vector_index/VecIndexFactory.h"
|
||||
#include "segcore/load_index_c.h"
|
||||
#include "index/IndexFactory.h"
|
||||
#include "common/CDataType.h"
|
||||
|
||||
CStatus
|
||||
NewLoadIndexInfo(CLoadIndexInfo* c_load_index_info) {
|
||||
|
@ -59,10 +61,11 @@ AppendIndexParam(CLoadIndexInfo c_load_index_info, const char* c_index_key, cons
|
|||
}
|
||||
|
||||
CStatus
|
||||
AppendFieldInfo(CLoadIndexInfo c_load_index_info, int64_t field_id) {
|
||||
AppendFieldInfo(CLoadIndexInfo c_load_index_info, int64_t field_id, enum CDataType field_type) {
|
||||
try {
|
||||
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
|
||||
load_index_info->field_id = field_id;
|
||||
load_index_info->field_type = field_type;
|
||||
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
|
@ -77,7 +80,7 @@ AppendFieldInfo(CLoadIndexInfo c_load_index_info, int64_t field_id) {
|
|||
}
|
||||
|
||||
CStatus
|
||||
AppendIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
|
||||
appendVecIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
|
||||
try {
|
||||
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
|
||||
auto binary_set = (knowhere::BinarySet*)c_binary_set;
|
||||
|
@ -107,3 +110,37 @@ AppendIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
|
|||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
appendScalarIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
|
||||
try {
|
||||
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
|
||||
auto field_type = load_index_info->field_type;
|
||||
auto binary_set = (knowhere::BinarySet*)c_binary_set;
|
||||
auto& index_params = load_index_info->index_params;
|
||||
bool find_index_type = index_params.count("index_type") > 0 ? true : false;
|
||||
AssertInfo(find_index_type == true, "Can't find index type in index_params");
|
||||
load_index_info->index =
|
||||
milvus::scalar::IndexFactory::GetInstance().CreateIndex(field_type, index_params["index_type"]);
|
||||
load_index_info->index->Load(*binary_set);
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
|
||||
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
|
||||
auto field_type = load_index_info->field_type;
|
||||
if (milvus::IsVectorType(field_type)) {
|
||||
return appendVecIndex(c_load_index_info, c_binary_set);
|
||||
}
|
||||
return appendScalarIndex(c_load_index_info, c_binary_set);
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ CStatus
|
|||
AppendIndexParam(CLoadIndexInfo c_load_index_info, const char* index_key, const char* index_value);
|
||||
|
||||
CStatus
|
||||
AppendFieldInfo(CLoadIndexInfo c_load_index_info, int64_t field_id);
|
||||
AppendFieldInfo(CLoadIndexInfo c_load_index_info, int64_t field_id, enum CDataType field_type);
|
||||
|
||||
CStatus
|
||||
AppendIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set);
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "segcore/ReduceStructure.h"
|
||||
#include "segcore/SegmentInterface.h"
|
||||
#include "segcore/reduce_c.h"
|
||||
#include "segcore/Utils.h"
|
||||
|
||||
using SearchResult = milvus::SearchResult;
|
||||
|
||||
|
@ -40,32 +41,36 @@ using SearchResult = milvus::SearchResult;
|
|||
|
||||
void
|
||||
ReduceResultData(std::vector<SearchResult*>& search_results, int64_t nq, int64_t topk) {
|
||||
AssertInfo(topk > 0, "topk must greater than 0");
|
||||
auto num_segments = search_results.size();
|
||||
AssertInfo(num_segments > 0, "num segment must greater than 0");
|
||||
for (int i = 0; i < num_segments; i++) {
|
||||
auto search_result = search_results[i];
|
||||
auto result_count = search_result->get_total_result_count();
|
||||
AssertInfo(search_result != nullptr, "search result must not equal to nullptr");
|
||||
AssertInfo(search_result->primary_keys_.size() == nq * topk, "incorrect search result primary key size");
|
||||
AssertInfo(search_result->distances_.size() == nq * topk, "incorrect search result distance size");
|
||||
AssertInfo(search_result->primary_keys_.size() == result_count, "incorrect search result primary key size");
|
||||
AssertInfo(search_result->distances_.size() == result_count, "incorrect search result distance size");
|
||||
}
|
||||
|
||||
std::vector<std::vector<int64_t>> final_real_topks(num_segments);
|
||||
for (auto& topks : final_real_topks) {
|
||||
topks.resize(nq);
|
||||
}
|
||||
std::vector<std::vector<int64_t>> search_records(num_segments);
|
||||
std::unordered_set<int64_t> pk_set;
|
||||
std::unordered_set<milvus::PkType> pk_set;
|
||||
int64_t skip_dup_cnt = 0;
|
||||
|
||||
// reduce search results
|
||||
int64_t result_offset = 0;
|
||||
for (int64_t qi = 0; qi < nq; qi++) {
|
||||
std::vector<SearchResultPair> result_pairs;
|
||||
int64_t base_offset = qi * topk;
|
||||
for (int i = 0; i < num_segments; i++) {
|
||||
auto search_result = search_results[i];
|
||||
auto base_offset = search_result->get_result_count(qi);
|
||||
auto primary_key = search_result->primary_keys_[base_offset];
|
||||
auto distance = search_result->distances_[base_offset];
|
||||
result_pairs.push_back(
|
||||
SearchResultPair(primary_key, distance, search_result, i, base_offset, base_offset + topk));
|
||||
result_pairs.push_back(SearchResultPair(primary_key, distance, search_result, i, base_offset,
|
||||
base_offset + search_result->real_topK_per_nq_[qi]));
|
||||
}
|
||||
int64_t curr_offset = base_offset;
|
||||
|
||||
#if 0
|
||||
for (int i = 0; i < topk; ++i) {
|
||||
|
@ -78,20 +83,22 @@ ReduceResultData(std::vector<SearchResult*>& search_results, int64_t nq, int64_t
|
|||
}
|
||||
#else
|
||||
pk_set.clear();
|
||||
while (curr_offset - base_offset < topk) {
|
||||
int64_t last_nq_result_offset = result_offset;
|
||||
while (result_offset - last_nq_result_offset < topk) {
|
||||
std::sort(result_pairs.begin(), result_pairs.end(), std::greater<>());
|
||||
auto& pilot = result_pairs[0];
|
||||
auto index = pilot.index_;
|
||||
int64_t curr_pk = pilot.primary_key_;
|
||||
auto curr_pk = pilot.primary_key_;
|
||||
// no valid search result for this nq, break to next
|
||||
if (curr_pk == INVALID_PK) {
|
||||
break;
|
||||
}
|
||||
// remove duplicates
|
||||
if (curr_pk == INVALID_ID || pk_set.count(curr_pk) == 0) {
|
||||
pilot.search_result_->result_offsets_.push_back(curr_offset++);
|
||||
// when inserted data are dirty, it's possible that primary keys are duplicated,
|
||||
// in this case, "offset_" may be greater than "offset_rb_" (#10530)
|
||||
search_records[index].push_back(pilot.offset_ < pilot.offset_rb_ ? pilot.offset_ : INVALID_OFFSET);
|
||||
if (curr_pk != INVALID_ID) {
|
||||
pk_set.insert(curr_pk);
|
||||
}
|
||||
if (pk_set.count(curr_pk) == 0) {
|
||||
pilot.search_result_->result_offsets_.push_back(result_offset++);
|
||||
search_records[index].push_back(pilot.offset_);
|
||||
pk_set.insert(curr_pk);
|
||||
final_real_topks[index][qi]++;
|
||||
} else {
|
||||
// skip entity with same primary key
|
||||
skip_dup_cnt++;
|
||||
|
@ -109,123 +116,167 @@ ReduceResultData(std::vector<SearchResult*>& search_results, int64_t nq, int64_t
|
|||
continue;
|
||||
}
|
||||
|
||||
std::vector<int64_t> primary_keys;
|
||||
std::vector<milvus::PkType> primary_keys;
|
||||
std::vector<float> distances;
|
||||
std::vector<int64_t> ids;
|
||||
for (int j = 0; j < search_records[i].size(); j++) {
|
||||
auto& offset = search_records[i][j];
|
||||
primary_keys.push_back(offset != INVALID_OFFSET ? search_result->primary_keys_[offset] : INVALID_ID);
|
||||
distances.push_back(offset != INVALID_OFFSET ? search_result->distances_[offset]
|
||||
: std::numeric_limits<float>::max());
|
||||
ids.push_back(offset != INVALID_OFFSET ? search_result->ids_[offset] : INVALID_ID);
|
||||
primary_keys.push_back(search_result->primary_keys_[offset]);
|
||||
distances.push_back(search_result->distances_[offset]);
|
||||
ids.push_back(search_result->seg_offsets_[offset]);
|
||||
}
|
||||
|
||||
search_result->primary_keys_ = primary_keys;
|
||||
search_result->distances_ = distances;
|
||||
search_result->ids_ = ids;
|
||||
search_result->primary_keys_ = std::move(primary_keys);
|
||||
search_result->distances_ = std::move(distances);
|
||||
search_result->seg_offsets_ = std::move(ids);
|
||||
search_result->real_topK_per_nq_ = std::move(final_real_topks[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ReorganizeSearchResults(std::vector<SearchResult*>& search_results,
|
||||
int32_t nq,
|
||||
int32_t topK,
|
||||
milvus::aligned_vector<int64_t>& result_ids,
|
||||
std::vector<float>& result_distances,
|
||||
std::vector<milvus::aligned_vector<char>>& result_output_fields_data) {
|
||||
auto num_segments = search_results.size();
|
||||
auto results_count = 0;
|
||||
|
||||
for (int i = 0; i < num_segments; i++) {
|
||||
auto search_result = search_results[i];
|
||||
AssertInfo(search_result != nullptr, "null search result when reorganize");
|
||||
AssertInfo(search_result->output_fields_meta_.size() == result_output_fields_data.size(),
|
||||
"illegal fields meta size"
|
||||
", fields_meta_size = " +
|
||||
std::to_string(search_result->output_fields_meta_.size()) +
|
||||
", expected_size = " + std::to_string(result_output_fields_data.size()));
|
||||
auto num_results = search_result->result_offsets_.size();
|
||||
if (num_results == 0) {
|
||||
continue;
|
||||
}
|
||||
#pragma omp parallel for
|
||||
for (int j = 0; j < num_results; j++) {
|
||||
auto loc = search_result->result_offsets_[j];
|
||||
// AssertInfo(loc < nq * topK, "result location of out range, location = " +
|
||||
// std::to_string(loc));
|
||||
// set result ids
|
||||
memcpy(&result_ids[loc], &search_result->ids_data_[j * sizeof(int64_t)], sizeof(int64_t));
|
||||
// set result distances
|
||||
result_distances[loc] = search_result->distances_[j];
|
||||
// set result output fields data
|
||||
for (int k = 0; k < search_result->output_fields_meta_.size(); k++) {
|
||||
auto ele_size = search_result->output_fields_meta_[k].get_sizeof();
|
||||
memcpy(&result_output_fields_data[k][loc * ele_size],
|
||||
&search_result->output_fields_data_[k][j * ele_size], ele_size);
|
||||
}
|
||||
}
|
||||
results_count += num_results;
|
||||
struct Int64PKVisitor {
|
||||
template <typename T>
|
||||
int64_t
|
||||
operator()(T t) const {
|
||||
PanicInfo("invalid int64 pk value");
|
||||
}
|
||||
};
|
||||
|
||||
AssertInfo(results_count == nq * topK,
|
||||
"size of reduce result is less than nq * topK"
|
||||
", result_count = " +
|
||||
std::to_string(results_count) + ", nq * topK = " + std::to_string(nq * topK));
|
||||
template <>
|
||||
int64_t
|
||||
Int64PKVisitor::operator()<int64_t>(int64_t t) const {
|
||||
return t;
|
||||
}
|
||||
|
||||
struct StrPKVisitor {
|
||||
template <typename T>
|
||||
std::string
|
||||
operator()(T t) const {
|
||||
PanicInfo("invalid string pk value");
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
std::string
|
||||
StrPKVisitor::operator()<std::string>(std::string t) const {
|
||||
return t;
|
||||
}
|
||||
|
||||
std::vector<char>
|
||||
GetSearchResultDataSlice(milvus::aligned_vector<int64_t>& result_ids,
|
||||
std::vector<float>& result_distances,
|
||||
std::vector<milvus::aligned_vector<char>>& result_output_fields_data,
|
||||
int32_t nq,
|
||||
int32_t topK,
|
||||
int32_t nq_begin,
|
||||
int32_t nq_end,
|
||||
std::vector<milvus::FieldMeta>& output_fields_meta) {
|
||||
GetSearchResultDataSlice(std::vector<SearchResult*>& search_results,
|
||||
milvus::query::Plan* plan,
|
||||
int64_t nq_offset_begin,
|
||||
int64_t nq_offset_end,
|
||||
int64_t result_offset_begin,
|
||||
int64_t result_offset_end,
|
||||
int64_t nq,
|
||||
int64_t topK) {
|
||||
AssertInfo(nq_offset_begin <= nq_offset_end,
|
||||
"illegal offsets when GetSearchResultDataSlice, nq_offset_begin = " + std::to_string(nq_offset_begin) +
|
||||
", nq_offset_end = " + std::to_string(nq_offset_end));
|
||||
AssertInfo(nq_offset_end <= nq, "illegal nq_offset_end when GetSearchResultDataSlice, nq_offset_end = " +
|
||||
std::to_string(nq_offset_end) + ", nq = " + std::to_string(nq));
|
||||
|
||||
AssertInfo(result_offset_begin <= result_offset_end,
|
||||
"illegal result offsets when GetSearchResultDataSlice, result_offset_begin = " +
|
||||
std::to_string(result_offset_begin) + ", result_offset_end = " + std::to_string(result_offset_end));
|
||||
AssertInfo(result_offset_end <= nq * topK,
|
||||
"illegal result_offset_end when GetSearchResultDataSlice, result_offset_end = " +
|
||||
std::to_string(result_offset_end) + ", nq = " + std::to_string(nq) +
|
||||
", topk = " + std::to_string(topK));
|
||||
|
||||
auto search_result_data = std::make_unique<milvus::proto::schema::SearchResultData>();
|
||||
// set topK and nq
|
||||
search_result_data->set_top_k(topK);
|
||||
search_result_data->set_num_queries(nq);
|
||||
search_result_data->set_num_queries(nq_offset_end - nq_offset_begin);
|
||||
search_result_data->mutable_topks()->Resize(nq_offset_end - nq_offset_begin, 0);
|
||||
|
||||
auto offset_begin = nq_begin * topK;
|
||||
auto offset_end = nq_end * topK;
|
||||
AssertInfo(offset_begin <= offset_end,
|
||||
"illegal offsets when GetSearchResultDataSlice"
|
||||
", offset_begin = " +
|
||||
std::to_string(offset_begin) + ", offset_end = " + std::to_string(offset_end));
|
||||
AssertInfo(offset_end <= topK * nq,
|
||||
"illegal offset_end when GetSearchResultDataSlice"
|
||||
", offset_end = " +
|
||||
std::to_string(offset_end) + ", nq = " + std::to_string(nq) + ", topK = " + std::to_string(topK));
|
||||
auto num_segments = search_results.size();
|
||||
auto total_result_count = result_offset_end - result_offset_begin;
|
||||
|
||||
// set ids
|
||||
auto proto_ids = std::make_unique<milvus::proto::schema::IDs>();
|
||||
auto ids = std::make_unique<milvus::proto::schema::LongArray>();
|
||||
*ids->mutable_data() = {result_ids.begin() + offset_begin, result_ids.begin() + offset_end};
|
||||
proto_ids->set_allocated_int_id(ids.release());
|
||||
search_result_data->set_allocated_ids(proto_ids.release());
|
||||
AssertInfo(search_result_data->ids().int_id().data_size() == offset_end - offset_begin,
|
||||
"wrong ids size"
|
||||
", size = " +
|
||||
std::to_string(search_result_data->ids().int_id().data_size()) +
|
||||
", expected size = " + std::to_string(offset_end - offset_begin));
|
||||
// use for fill field data
|
||||
std::vector<std::pair<SearchResult*, int64_t>> result_offsets(total_result_count);
|
||||
|
||||
// set scores
|
||||
*search_result_data->mutable_scores() = {result_distances.begin() + offset_begin,
|
||||
result_distances.begin() + offset_end};
|
||||
AssertInfo(search_result_data->scores_size() == offset_end - offset_begin,
|
||||
// reverse space for pks
|
||||
auto primary_field_id = plan->schema_.get_primary_field_id().value_or(milvus::FieldId(-1));
|
||||
AssertInfo(primary_field_id.get() != INVALID_FIELD_ID, "Primary key is -1");
|
||||
auto pk_type = plan->schema_[primary_field_id].get_data_type();
|
||||
switch (pk_type) {
|
||||
case milvus::DataType::INT64: {
|
||||
auto ids = std::make_unique<milvus::proto::schema::LongArray>();
|
||||
ids->mutable_data()->Resize(total_result_count, 0);
|
||||
search_result_data->mutable_ids()->set_allocated_int_id(ids.release());
|
||||
break;
|
||||
}
|
||||
case milvus::DataType::VARCHAR: {
|
||||
auto ids = std::make_unique<milvus::proto::schema::StringArray>();
|
||||
std::vector<std::string> string_pks(total_result_count);
|
||||
*ids->mutable_data() = {string_pks.begin(), string_pks.end()};
|
||||
search_result_data->mutable_ids()->set_allocated_str_id(ids.release());
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported primary key type");
|
||||
}
|
||||
}
|
||||
|
||||
// reverse space for distances
|
||||
search_result_data->mutable_scores()->Resize(total_result_count, 0);
|
||||
|
||||
// fill pks and distances
|
||||
for (auto nq_offset = nq_offset_begin; nq_offset < nq_offset_end; nq_offset++) {
|
||||
int64_t result_count = 0;
|
||||
for (int i = 0; i < num_segments; i++) {
|
||||
auto search_result = search_results[i];
|
||||
AssertInfo(search_result != nullptr, "null search result when reorganize");
|
||||
if (search_result->result_offsets_.size() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto seg_result_offset_start = search_result->get_result_count(nq_offset);
|
||||
auto seg_result_offset_end = seg_result_offset_start + search_result->real_topK_per_nq_[nq_offset];
|
||||
for (auto j = seg_result_offset_start; j < seg_result_offset_end; j++) {
|
||||
auto loc = search_result->result_offsets_[j] - result_offset_begin;
|
||||
// set result pks
|
||||
switch (pk_type) {
|
||||
case milvus::DataType::INT64: {
|
||||
search_result_data->mutable_ids()->mutable_int_id()->mutable_data()->Set(
|
||||
loc, std::visit(Int64PKVisitor{}, search_result->primary_keys_[j]));
|
||||
break;
|
||||
}
|
||||
case milvus::DataType::VARCHAR: {
|
||||
*search_result_data->mutable_ids()->mutable_str_id()->mutable_data()->Mutable(loc) =
|
||||
std::visit(StrPKVisitor{}, search_result->primary_keys_[j]);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported primary key type");
|
||||
}
|
||||
}
|
||||
|
||||
// set result distances
|
||||
search_result_data->mutable_scores()->Set(loc, search_result->distances_[j]);
|
||||
// set result offset to fill output fields data
|
||||
result_offsets[loc] = std::make_pair(search_result, j);
|
||||
}
|
||||
|
||||
result_count += search_result->real_topK_per_nq_[nq_offset];
|
||||
}
|
||||
|
||||
// update result topks
|
||||
search_result_data->mutable_topks()->Set(nq_offset - nq_offset_begin, result_count);
|
||||
}
|
||||
|
||||
AssertInfo(search_result_data->scores_size() == total_result_count,
|
||||
"wrong scores size"
|
||||
", size = " +
|
||||
std::to_string(search_result_data->scores_size()) +
|
||||
", expected size = " + std::to_string(offset_end - offset_begin));
|
||||
", expected size = " + std::to_string(total_result_count));
|
||||
|
||||
// set output fields
|
||||
for (int i = 0; i < result_output_fields_data.size(); i++) {
|
||||
auto& field_meta = output_fields_meta[i];
|
||||
auto field_size = field_meta.get_sizeof();
|
||||
auto array = milvus::segcore::CreateDataArrayFrom(
|
||||
result_output_fields_data[i].data() + offset_begin * field_size, offset_end - offset_begin, field_meta);
|
||||
search_result_data->mutable_fields_data()->AddAllocated(array.release());
|
||||
for (auto field_id : plan->target_entries_) {
|
||||
auto& field_meta = plan->schema_[field_id];
|
||||
auto field_data = milvus::segcore::MergeDataArray(result_offsets, field_meta);
|
||||
search_result_data->mutable_fields_data()->AddAllocated(field_data.release());
|
||||
}
|
||||
|
||||
// SearchResultData to blob
|
||||
|
@ -239,6 +290,7 @@ GetSearchResultDataSlice(milvus::aligned_vector<int64_t>& result_ids,
|
|||
CStatus
|
||||
Marshal(CSearchResultDataBlobs* cSearchResultDataBlobs,
|
||||
CSearchResult* c_search_results,
|
||||
CSearchPlan c_plan,
|
||||
int32_t num_segments,
|
||||
int32_t* nq_slice_sizes,
|
||||
int32_t num_slices) {
|
||||
|
@ -249,46 +301,44 @@ Marshal(CSearchResultDataBlobs* cSearchResultDataBlobs,
|
|||
search_results[i] = static_cast<SearchResult*>(c_search_results[i]);
|
||||
}
|
||||
AssertInfo(search_results.size() > 0, "empty search result when Marshal");
|
||||
auto plan = (milvus::query::Plan*)c_plan;
|
||||
auto topK = search_results[0]->topk_;
|
||||
auto nq = search_results[0]->num_queries_;
|
||||
|
||||
// init result ids, distances
|
||||
auto result_ids = milvus::aligned_vector<int64_t>(nq * topK);
|
||||
auto result_distances = std::vector<float>(nq * topK);
|
||||
|
||||
// init result output fields data
|
||||
auto& output_fields_meta = search_results[0]->output_fields_meta_;
|
||||
auto num_output_fields = output_fields_meta.size();
|
||||
auto result_output_fields_data = std::vector<milvus::aligned_vector<char>>(num_output_fields);
|
||||
for (int i = 0; i < num_output_fields; i++) {
|
||||
auto size = output_fields_meta[i].get_sizeof();
|
||||
result_output_fields_data[i].resize(size * nq * topK);
|
||||
std::vector<int64_t> result_count_per_nq(nq);
|
||||
for (auto search_result : search_results) {
|
||||
AssertInfo(search_result->real_topK_per_nq_.size() == nq,
|
||||
"incorrect real_topK_per_nq_ size in search result");
|
||||
for (int j = 0; j < nq; j++) {
|
||||
result_count_per_nq[j] += search_result->real_topK_per_nq_[j];
|
||||
}
|
||||
}
|
||||
|
||||
// Reorganize search results, get result ids, distances and output fields data
|
||||
ReorganizeSearchResults(search_results, nq, topK, result_ids, result_distances, result_output_fields_data);
|
||||
|
||||
// prefix sum, get slices offsets
|
||||
AssertInfo(num_slices > 0, "empty nq_slice_sizes is not allowed");
|
||||
auto slice_offsets_size = num_slices + 1;
|
||||
auto slice_offsets = std::vector<int32_t>(slice_offsets_size);
|
||||
slice_offsets[0] = 0;
|
||||
slice_offsets[1] = nq_slice_sizes[0];
|
||||
for (int i = 2; i < slice_offsets_size; i++) {
|
||||
slice_offsets[i] = slice_offsets[i - 1] + nq_slice_sizes[i - 1];
|
||||
auto nq_slice_offsets = std::vector<int32_t>(slice_offsets_size);
|
||||
auto result_slice_offset = std::vector<int64_t>(slice_offsets_size);
|
||||
|
||||
for (int i = 1; i < slice_offsets_size; i++) {
|
||||
nq_slice_offsets[i] = nq_slice_offsets[i - 1] + nq_slice_sizes[i - 1];
|
||||
result_slice_offset[i] = result_slice_offset[i - 1];
|
||||
for (auto j = nq_slice_offsets[i - 1]; j < nq_slice_offsets[i]; j++) {
|
||||
result_slice_offset[i] += result_count_per_nq[j];
|
||||
}
|
||||
}
|
||||
AssertInfo(slice_offsets[num_slices] == nq,
|
||||
AssertInfo(nq_slice_offsets[num_slices] == nq,
|
||||
"illegal req sizes"
|
||||
", slice_offsets[last] = " +
|
||||
std::to_string(slice_offsets[num_slices]) + ", nq = " + std::to_string(nq));
|
||||
", nq_slice_offsets[last] = " +
|
||||
std::to_string(nq_slice_offsets[num_slices]) + ", nq = " + std::to_string(nq));
|
||||
|
||||
// get search result data blobs by slices
|
||||
auto search_result_data_blobs = std::make_unique<milvus::segcore::SearchResultDataBlobs>();
|
||||
search_result_data_blobs->blobs.resize(num_slices);
|
||||
#pragma omp parallel for
|
||||
//#pragma omp parallel for
|
||||
for (int i = 0; i < num_slices; i++) {
|
||||
auto proto = GetSearchResultDataSlice(result_ids, result_distances, result_output_fields_data, nq, topK,
|
||||
slice_offsets[i], slice_offsets[i + 1], output_fields_meta);
|
||||
auto proto = GetSearchResultDataSlice(search_results, plan, nq_slice_offsets[i], nq_slice_offsets[i + 1],
|
||||
result_slice_offset[i], result_slice_offset[i + 1], nq, topK);
|
||||
search_result_data_blobs->blobs[i] = proto;
|
||||
}
|
||||
|
||||
|
@ -328,6 +378,36 @@ DeleteSearchResultDataBlobs(CSearchResultDataBlobs cSearchResultDataBlobs) {
|
|||
delete search_result_data_blobs;
|
||||
}
|
||||
|
||||
void
|
||||
FilterInvalidSearchResult(SearchResult* search_result) {
|
||||
auto nq = search_result->num_queries_;
|
||||
auto topk = search_result->topk_;
|
||||
AssertInfo(search_result->seg_offsets_.size() == nq * topk,
|
||||
"wrong seg offsets size, size = " + std::to_string(search_result->seg_offsets_.size()) +
|
||||
", expected size = " + std::to_string(nq * topk));
|
||||
AssertInfo(search_result->distances_.size() == nq * topk,
|
||||
"wrong distances size, size = " + std::to_string(search_result->distances_.size()) +
|
||||
", expected size = " + std::to_string(nq * topk));
|
||||
std::vector<int64_t> real_topks(nq);
|
||||
std::vector<float> distances;
|
||||
std::vector<int64_t> seg_offsets;
|
||||
for (auto i = 0; i < nq; i++) {
|
||||
real_topks[i] = 0;
|
||||
for (auto j = 0; j < topk; j++) {
|
||||
auto offset = i * topk + j;
|
||||
if (search_result->seg_offsets_[offset] != INVALID_SEG_OFFSET) {
|
||||
real_topks[i]++;
|
||||
seg_offsets.push_back(search_result->seg_offsets_[offset]);
|
||||
distances.push_back(search_result->distances_[offset]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
search_result->distances_ = std::move(distances);
|
||||
search_result->seg_offsets_ = std::move(seg_offsets);
|
||||
search_result->real_topK_per_nq_ = std::move(real_topks);
|
||||
}
|
||||
|
||||
CStatus
|
||||
ReduceSearchResultsAndFillData(CSearchPlan c_plan, CSearchResult* c_search_results, int64_t num_segments) {
|
||||
try {
|
||||
|
@ -339,13 +419,20 @@ ReduceSearchResultsAndFillData(CSearchPlan c_plan, CSearchResult* c_search_resul
|
|||
auto topk = search_results[0]->topk_;
|
||||
auto num_queries = search_results[0]->num_queries_;
|
||||
|
||||
std::vector<SearchResult*> valid_search_results;
|
||||
// get primary keys for duplicates removal
|
||||
for (auto& search_result : search_results) {
|
||||
for (auto search_result : search_results) {
|
||||
auto segment = (milvus::segcore::SegmentInterface*)(search_result->segment_);
|
||||
FilterInvalidSearchResult(search_result);
|
||||
segment->FillPrimaryKeys(plan, *search_result);
|
||||
if (search_result->get_total_result_count() > 0) {
|
||||
valid_search_results.push_back(search_result);
|
||||
}
|
||||
}
|
||||
|
||||
ReduceResultData(search_results, num_queries, topk);
|
||||
if (valid_search_results.size() > 0) {
|
||||
ReduceResultData(valid_search_results, num_queries, topk);
|
||||
}
|
||||
|
||||
// fill in other entities
|
||||
for (auto& search_result : search_results) {
|
||||
|
|
|
@ -25,6 +25,7 @@ ReduceSearchResultsAndFillData(CSearchPlan c_plan, CSearchResult* search_results
|
|||
CStatus
|
||||
Marshal(CSearchResultDataBlobs* cSearchResultDataBlobs,
|
||||
CSearchResult* c_search_results,
|
||||
CSearchPlan c_plan,
|
||||
int32_t num_segments,
|
||||
int32_t* nq_slice_sizes,
|
||||
int32_t num_slices);
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "segcore/SegmentSealedImpl.h"
|
||||
#include "segcore/SimilarityCorelation.h"
|
||||
#include "segcore/segment_c.h"
|
||||
#include "google/protobuf/text_format.h"
|
||||
|
||||
////////////////////////////// common interfaces //////////////////////////////
|
||||
CSegmentInterface
|
||||
|
@ -133,54 +134,16 @@ Insert(CSegmentInterface c_segment,
|
|||
int64_t size,
|
||||
const int64_t* row_ids,
|
||||
const uint64_t* timestamps,
|
||||
void* raw_data,
|
||||
int sizeof_per_row,
|
||||
int64_t count) {
|
||||
const char* data_info) {
|
||||
try {
|
||||
auto segment = (milvus::segcore::SegmentGrowing*)c_segment;
|
||||
milvus::segcore::RowBasedRawData dataChunk{};
|
||||
auto proto = std::string(data_info);
|
||||
Assert(!proto.empty());
|
||||
auto insert_data = std::make_unique<milvus::InsertData>();
|
||||
auto suc = google::protobuf::TextFormat::ParseFromString(proto, insert_data.get());
|
||||
AssertInfo(suc, "unmarshal field data string failed");
|
||||
|
||||
dataChunk.raw_data = raw_data;
|
||||
dataChunk.sizeof_per_row = sizeof_per_row;
|
||||
dataChunk.count = count;
|
||||
segment->Insert(reserved_offset, size, row_ids, timestamps, dataChunk);
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(UnexpectedError, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
InsertColumnData(CSegmentInterface c_segment,
|
||||
int64_t reserved_offset,
|
||||
int64_t size,
|
||||
const int64_t* row_ids,
|
||||
const uint64_t* timestamps,
|
||||
void* raw_data,
|
||||
int64_t count) {
|
||||
try {
|
||||
auto segment = (milvus::segcore::SegmentGrowing*)c_segment;
|
||||
milvus::segcore::ColumnBasedRawData dataChunk{};
|
||||
|
||||
auto& schema = segment->get_schema();
|
||||
auto sizeof_infos = schema.get_sizeof_infos();
|
||||
dataChunk.columns_ = std::vector<milvus::aligned_vector<uint8_t>>(schema.size());
|
||||
// reverse space for each field
|
||||
for (int fid = 0; fid < schema.size(); ++fid) {
|
||||
auto len = sizeof_infos[fid];
|
||||
dataChunk.columns_[fid].resize(len * size);
|
||||
}
|
||||
auto col_data = reinterpret_cast<const char*>(raw_data);
|
||||
int64_t offset = 0;
|
||||
for (int fid = 0; fid < schema.size(); ++fid) {
|
||||
auto len = sizeof_infos[fid] * size;
|
||||
auto src = col_data + offset;
|
||||
auto dst = dataChunk.columns_[fid].data();
|
||||
memcpy(dst, src, len);
|
||||
offset += len;
|
||||
}
|
||||
dataChunk.count = count;
|
||||
segment->Insert(reserved_offset, size, row_ids, timestamps, dataChunk);
|
||||
segment->Insert(reserved_offset, size, row_ids, timestamps, insert_data.get());
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(UnexpectedError, e.what());
|
||||
|
@ -199,15 +162,16 @@ PreInsert(CSegmentInterface c_segment, int64_t size, int64_t* offset) {
|
|||
}
|
||||
|
||||
CStatus
|
||||
Delete(CSegmentInterface c_segment,
|
||||
int64_t reserved_offset,
|
||||
int64_t size,
|
||||
const int64_t* row_ids,
|
||||
const uint64_t* timestamps) {
|
||||
Delete(
|
||||
CSegmentInterface c_segment, int64_t reserved_offset, int64_t size, const char* ids, const uint64_t* timestamps) {
|
||||
auto segment = (milvus::segcore::SegmentInterface*)c_segment;
|
||||
|
||||
auto proto = std::string(ids);
|
||||
Assert(!proto.empty());
|
||||
auto pks = std::make_unique<milvus::proto::schema::IDs>();
|
||||
auto suc = google::protobuf::TextFormat::ParseFromString(proto, pks.get());
|
||||
AssertInfo(suc, "unmarshal field data string failed");
|
||||
try {
|
||||
auto res = segment->Delete(reserved_offset, size, row_ids, timestamps);
|
||||
auto res = segment->Delete(reserved_offset, size, pks.get(), timestamps);
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(UnexpectedError, e.what());
|
||||
|
@ -228,8 +192,13 @@ LoadFieldData(CSegmentInterface c_segment, CLoadFieldDataInfo load_field_data_in
|
|||
auto segment_interface = reinterpret_cast<milvus::segcore::SegmentInterface*>(c_segment);
|
||||
auto segment = dynamic_cast<milvus::segcore::SegmentSealed*>(segment_interface);
|
||||
AssertInfo(segment != nullptr, "segment conversion failed");
|
||||
auto proto = std::string(load_field_data_info.blob);
|
||||
Assert(!proto.empty());
|
||||
auto field_data = std::make_unique<milvus::DataArray>();
|
||||
auto suc = google::protobuf::TextFormat::ParseFromString(proto, field_data.get());
|
||||
AssertInfo(suc, "unmarshal field data string failed");
|
||||
auto load_info =
|
||||
LoadFieldDataInfo{load_field_data_info.field_id, load_field_data_info.blob, load_field_data_info.row_count};
|
||||
LoadFieldDataInfo{load_field_data_info.field_id, field_data.get(), load_field_data_info.row_count};
|
||||
segment->LoadFieldData(load_info);
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
|
@ -243,8 +212,13 @@ LoadDeletedRecord(CSegmentInterface c_segment, CLoadDeletedRecordInfo deleted_re
|
|||
auto segment_interface = reinterpret_cast<milvus::segcore::SegmentInterface*>(c_segment);
|
||||
auto segment = dynamic_cast<milvus::segcore::SegmentSealed*>(segment_interface);
|
||||
AssertInfo(segment != nullptr, "segment conversion failed");
|
||||
auto load_info = LoadDeletedRecordInfo{deleted_record_info.timestamps, deleted_record_info.primary_keys,
|
||||
deleted_record_info.row_count};
|
||||
auto proto = std::string(deleted_record_info.primary_keys);
|
||||
Assert(!proto.empty());
|
||||
auto pks = std::make_unique<milvus::proto::schema::IDs>();
|
||||
auto suc = google::protobuf::TextFormat::ParseFromString(proto, pks.get());
|
||||
AssertInfo(suc, "unmarshal field data string failed");
|
||||
auto load_info =
|
||||
LoadDeletedRecordInfo{deleted_record_info.timestamps, pks.get(), deleted_record_info.row_count};
|
||||
segment->LoadDeletedRecord(load_info);
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
|
|
|
@ -67,32 +67,11 @@ Insert(CSegmentInterface c_segment,
|
|||
int64_t size,
|
||||
const int64_t* row_ids,
|
||||
const uint64_t* timestamps,
|
||||
void* raw_data,
|
||||
int sizeof_per_row,
|
||||
int64_t count);
|
||||
|
||||
CStatus
|
||||
InsertColumnData(CSegmentInterface c_segment,
|
||||
int64_t reserved_offset,
|
||||
int64_t size,
|
||||
const int64_t* row_ids,
|
||||
const uint64_t* timestamps,
|
||||
void* raw_data,
|
||||
int64_t count);
|
||||
const char* data_info);
|
||||
|
||||
CStatus
|
||||
PreInsert(CSegmentInterface c_segment, int64_t size, int64_t* offset);
|
||||
|
||||
CStatus
|
||||
Delete(CSegmentInterface c_segment,
|
||||
int64_t reserved_offset,
|
||||
int64_t size,
|
||||
const int64_t* row_ids,
|
||||
const uint64_t* timestamps);
|
||||
|
||||
int64_t
|
||||
PreDelete(CSegmentInterface c_segment, int64_t size);
|
||||
|
||||
////////////////////////////// interfaces for sealed segment //////////////////////////////
|
||||
CStatus
|
||||
LoadFieldData(CSegmentInterface c_segment, CLoadFieldDataInfo load_field_data_info);
|
||||
|
@ -109,6 +88,12 @@ DropFieldData(CSegmentInterface c_segment, int64_t field_id);
|
|||
CStatus
|
||||
DropSealedSegmentIndex(CSegmentInterface c_segment, int64_t field_id);
|
||||
|
||||
////////////////////////////// interfaces for SegmentInterface //////////////////////////////
|
||||
CStatus
|
||||
Delete(CSegmentInterface c_segment, int64_t reserved_offset, int64_t size, const char* ids, const uint64_t* timestamps);
|
||||
|
||||
int64_t
|
||||
PreDelete(CSegmentInterface c_segment, int64_t size);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -41,6 +41,7 @@ enum class DataType {
|
|||
DOUBLE = 11,
|
||||
|
||||
STRING = 20,
|
||||
VARCHAR = 21,
|
||||
|
||||
VECTOR_BINARY = 100,
|
||||
VECTOR_FLOAT = 101,
|
||||
|
|
|
@ -33,12 +33,14 @@ set(MILVUS_TEST_FILES
|
|||
test_query.cpp
|
||||
test_reduce.cpp
|
||||
test_reduce_c.cpp
|
||||
test_relational.cpp
|
||||
test_retrieve.cpp
|
||||
test_scalar_index.cpp
|
||||
test_sealed.cpp
|
||||
test_segcore.cpp
|
||||
test_similarity_corelation.cpp
|
||||
test_span.cpp
|
||||
test_string_expr.cpp
|
||||
test_timestamp_index.cpp
|
||||
test_utils.cpp
|
||||
)
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "indexbuilder/index_c.h"
|
||||
#include "indexbuilder/utils.h"
|
||||
#include "test_utils/indexbuilder_test_utils.h"
|
||||
#include "common/Consts.h"
|
||||
|
||||
constexpr int64_t NB = 1000000;
|
||||
|
||||
|
@ -59,7 +60,7 @@ IndexBuilder_build(benchmark::State& state) {
|
|||
|
||||
auto is_binary = state.range(2);
|
||||
auto dataset = GenDataset(NB, metric_type, is_binary);
|
||||
auto xb_data = dataset.get_col<float>(0);
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(START_USER_FIELDID));
|
||||
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
|
||||
for (auto _ : state) {
|
||||
|
@ -88,7 +89,7 @@ IndexBuilder_build_and_codec(benchmark::State& state) {
|
|||
|
||||
auto is_binary = state.range(2);
|
||||
auto dataset = GenDataset(NB, metric_type, is_binary);
|
||||
auto xb_data = dataset.get_col<float>(0);
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
|
||||
for (auto _ : state) {
|
||||
|
|
|
@ -76,10 +76,7 @@ Search_SmallIndex(benchmark::State& state) {
|
|||
segment->disable_small_index();
|
||||
}
|
||||
segment->PreInsert(N);
|
||||
ColumnBasedRawData raw_data;
|
||||
raw_data.columns_ = dataset_.cols_;
|
||||
raw_data.count = N;
|
||||
segment->Insert(0, N, dataset_.row_ids_.data(), dataset_.timestamps_.data(), raw_data);
|
||||
segment->Insert(0, N, dataset_.row_ids_.data(), dataset_.timestamps_.data(), dataset_.raw_);
|
||||
|
||||
Timestamp time = 10000000;
|
||||
|
||||
|
@ -104,8 +101,8 @@ Search_Sealed(benchmark::State& state) {
|
|||
// Brute Force
|
||||
} else if (choice == 1) {
|
||||
// ivf
|
||||
auto vec = (const float*)dataset_.cols_[0].data();
|
||||
auto indexing = GenIndexing(N, dim, vec);
|
||||
auto vec = dataset_.get_col<float>(milvus::FieldId(100));
|
||||
auto indexing = GenIndexing(N, dim, vec.data());
|
||||
LoadIndexInfo info;
|
||||
info.index = indexing;
|
||||
info.field_id = (*schema)[FieldName("fakevec")].get_id().get();
|
||||
|
|
|
@ -18,8 +18,9 @@ using namespace milvus::segcore;
|
|||
TEST(Binary, Insert) {
|
||||
int64_t N = 100000;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("vecbin", DataType::VECTOR_BINARY, 128, MetricType::METRIC_Jaccard);
|
||||
schema->AddDebugField("age", DataType::INT32);
|
||||
auto vec_fid = schema->AddDebugField("vecbin", DataType::VECTOR_BINARY, 128, MetricType::METRIC_Jaccard);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
auto dataset = DataGen(schema, N, 10);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
auto offset = segment->PreInsert(N);
|
||||
|
|
|
@ -18,10 +18,10 @@ TEST(Bitmap, Naive) {
|
|||
using namespace milvus::segcore;
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("height", DataType::FLOAT);
|
||||
auto field_id = schema->AddDebugField("height", DataType::FLOAT);
|
||||
int N = 10000;
|
||||
auto raw_data = DataGen(schema, N);
|
||||
auto vec = raw_data.get_col<float>(0);
|
||||
auto vec = raw_data.get_col<float>(field_id);
|
||||
auto sort_index = std::make_shared<scalar::ScalarIndexSort<float>>();
|
||||
sort_index->Build(N, vec.data());
|
||||
{
|
||||
|
|
|
@ -53,6 +53,26 @@ TEST_F(BoolIndexTest, Constructor) {
|
|||
auto index = milvus::scalar::CreateBoolIndex();
|
||||
}
|
||||
|
||||
TEST_F(BoolIndexTest, Count) {
|
||||
{
|
||||
auto index = milvus::scalar::CreateBoolIndex();
|
||||
index->BuildWithDataset(all_true_ds);
|
||||
ASSERT_EQ(n, index->Count());
|
||||
}
|
||||
|
||||
{
|
||||
auto index = milvus::scalar::CreateBoolIndex();
|
||||
index->BuildWithDataset(all_false_ds);
|
||||
ASSERT_EQ(n, index->Count());
|
||||
}
|
||||
|
||||
{
|
||||
auto index = milvus::scalar::CreateBoolIndex();
|
||||
index->BuildWithDataset(half_ds);
|
||||
ASSERT_EQ(n, index->Count());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(BoolIndexTest, In) {
|
||||
auto true_test = std::make_unique<bool>(true);
|
||||
auto false_test = std::make_unique<bool>(false);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -103,7 +103,7 @@ TEST(Expr, Range) {
|
|||
schema->AddDebugField("age", DataType::INT32);
|
||||
auto plan = CreatePlan(*schema, dsl_string);
|
||||
ShowPlanNodeVisitor shower;
|
||||
Assert(plan->tag2field_.at("$0") == schema->get_offset(FieldName("fakevec")));
|
||||
Assert(plan->tag2field_.at("$0") == schema->get_field_id(FieldName("fakevec")));
|
||||
auto out = shower.call_child(*plan->plan_node_);
|
||||
std::cout << out.dump(4);
|
||||
}
|
||||
|
@ -145,7 +145,7 @@ TEST(Expr, RangeBinary) {
|
|||
schema->AddDebugField("age", DataType::INT32);
|
||||
auto plan = CreatePlan(*schema, dsl_string);
|
||||
ShowPlanNodeVisitor shower;
|
||||
Assert(plan->tag2field_.at("$0") == schema->get_offset(FieldName("fakevec")));
|
||||
Assert(plan->tag2field_.at("$0") == schema->get_field_id(FieldName("fakevec")));
|
||||
auto out = shower.call_child(*plan->plan_node_);
|
||||
std::cout << out.dump(4);
|
||||
}
|
||||
|
@ -231,14 +231,14 @@ TEST(Expr, ShowExecutor) {
|
|||
using namespace milvus::segcore;
|
||||
auto node = std::make_unique<FloatVectorANNS>();
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
auto field_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
int64_t num_queries = 100L;
|
||||
auto raw_data = DataGen(schema, num_queries);
|
||||
auto& info = node->search_info_;
|
||||
|
||||
info.metric_type_ = MetricType::METRIC_L2;
|
||||
info.topk_ = 20;
|
||||
info.field_offset_ = FieldOffset(0);
|
||||
info.field_id_ = field_id;
|
||||
node->predicate_ = std::nullopt;
|
||||
ShowPlanNodeVisitor show_visitor;
|
||||
PlanNodePtr base(node.release());
|
||||
|
@ -291,8 +291,9 @@ TEST(Expr, TestRange) {
|
|||
}
|
||||
})";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddDebugField("age", DataType::INT32);
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto seg = CreateGrowingSegment(schema);
|
||||
int N = 1000;
|
||||
|
@ -300,7 +301,7 @@ TEST(Expr, TestRange) {
|
|||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_age_col = raw_data.get_col<int>(1);
|
||||
auto new_age_col = raw_data.get_col<int>(i64_fid);
|
||||
age_col.insert(age_col.end(), new_age_col.begin(), new_age_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
|
@ -373,8 +374,9 @@ TEST(Expr, TestTerm) {
|
|||
}
|
||||
})";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddDebugField("age", DataType::INT32);
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto seg = CreateGrowingSegment(schema);
|
||||
int N = 1000;
|
||||
|
@ -382,7 +384,7 @@ TEST(Expr, TestTerm) {
|
|||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_age_col = raw_data.get_col<int>(1);
|
||||
auto new_age_col = raw_data.get_col<int>(i64_fid);
|
||||
age_col.insert(age_col.end(), new_age_col.begin(), new_age_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
|
@ -445,7 +447,7 @@ TEST(Expr, TestSimpleDsl) {
|
|||
{
|
||||
Json dsl;
|
||||
dsl["must"] = Json::array({vec_dsl, get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
||||
testcases.emplace_back(dsl, [](int x) { return (x & 0b1111) == 0b1011; });
|
||||
testcases.emplace_back(dsl, [](int64_t x) { return (x & 0b1111) == 0b1011; });
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -453,7 +455,7 @@ TEST(Expr, TestSimpleDsl) {
|
|||
Json sub_dsl;
|
||||
sub_dsl["must"] = Json::array({get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
||||
dsl["must"] = Json::array({sub_dsl, vec_dsl});
|
||||
testcases.emplace_back(dsl, [](int x) { return (x & 0b1111) == 0b1011; });
|
||||
testcases.emplace_back(dsl, [](int64_t x) { return (x & 0b1111) == 0b1011; });
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -461,7 +463,7 @@ TEST(Expr, TestSimpleDsl) {
|
|||
Json sub_dsl;
|
||||
sub_dsl["should"] = Json::array({get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
||||
dsl["must"] = Json::array({sub_dsl, vec_dsl});
|
||||
testcases.emplace_back(dsl, [](int x) { return !!((x & 0b1111) ^ 0b0100); });
|
||||
testcases.emplace_back(dsl, [](int64_t x) { return !!((x & 0b1111) ^ 0b0100); });
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -469,19 +471,20 @@ TEST(Expr, TestSimpleDsl) {
|
|||
Json sub_dsl;
|
||||
sub_dsl["must_not"] = Json::array({get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
||||
dsl["must"] = Json::array({sub_dsl, vec_dsl});
|
||||
testcases.emplace_back(dsl, [](int x) { return (x & 0b1111) != 0b1011; });
|
||||
testcases.emplace_back(dsl, [](int64_t x) { return (x & 0b1111) != 0b1011; });
|
||||
}
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddDebugField("age", DataType::INT32);
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto seg = CreateGrowingSegment(schema);
|
||||
std::vector<int> age_col;
|
||||
std::vector<int64_t> age_col;
|
||||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_age_col = raw_data.get_col<int>(1);
|
||||
auto new_age_col = raw_data.get_col<int64_t>(i64_fid);
|
||||
age_col.insert(age_col.end(), new_age_col.begin(), new_age_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
|
@ -543,9 +546,10 @@ TEST(Expr, TestCompare) {
|
|||
}
|
||||
})";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddDebugField("age1", DataType::INT32);
|
||||
schema->AddDebugField("age2", DataType::INT64);
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
auto i32_fid = schema->AddDebugField("age1", DataType::INT32);
|
||||
auto i64_fid = schema->AddDebugField("age2", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto seg = CreateGrowingSegment(schema);
|
||||
int N = 1000;
|
||||
|
@ -554,8 +558,8 @@ TEST(Expr, TestCompare) {
|
|||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_age1_col = raw_data.get_col<int>(1);
|
||||
auto new_age2_col = raw_data.get_col<int64_t>(2);
|
||||
auto new_age1_col = raw_data.get_col<int>(i32_fid);
|
||||
auto new_age2_col = raw_data.get_col<int64_t>(i64_fid);
|
||||
age1_col.insert(age1_col.end(), new_age1_col.begin(), new_age1_col.end());
|
||||
age2_col.insert(age2_col.end(), new_age2_col.begin(), new_age2_col.end());
|
||||
seg->PreInsert(N);
|
||||
|
@ -592,80 +596,93 @@ TEST(Expr, TestBinaryArithOpEvalRange) {
|
|||
"right_operand": 4,
|
||||
"value": 8
|
||||
}
|
||||
})", [](int8_t v) { return (v + 4) == 8; }, DataType::INT8},
|
||||
})",
|
||||
[](int8_t v) { return (v + 4) == 8; }, DataType::INT8},
|
||||
{R"("EQ": {
|
||||
"SUB": {
|
||||
"right_operand": 500,
|
||||
"value": 1500
|
||||
}
|
||||
})", [](int16_t v) { return (v - 500) == 1500; }, DataType::INT16},
|
||||
})",
|
||||
[](int16_t v) { return (v - 500) == 1500; }, DataType::INT16},
|
||||
{R"("EQ": {
|
||||
"MUL": {
|
||||
"right_operand": 2,
|
||||
"value": 4000
|
||||
}
|
||||
})", [](int32_t v) { return (v * 2) == 4000; }, DataType::INT32},
|
||||
})",
|
||||
[](int32_t v) { return (v * 2) == 4000; }, DataType::INT32},
|
||||
{R"("EQ": {
|
||||
"DIV": {
|
||||
"right_operand": 2,
|
||||
"value": 1000
|
||||
}
|
||||
})", [](int64_t v) { return (v / 2) == 1000; }, DataType::INT64},
|
||||
})",
|
||||
[](int64_t v) { return (v / 2) == 1000; }, DataType::INT64},
|
||||
{R"("EQ": {
|
||||
"MOD": {
|
||||
"right_operand": 100,
|
||||
"value": 0
|
||||
}
|
||||
})", [](int32_t v) { return (v % 100) == 0; }, DataType::INT32},
|
||||
})",
|
||||
[](int32_t v) { return (v % 100) == 0; }, DataType::INT32},
|
||||
{R"("EQ": {
|
||||
"ADD": {
|
||||
"right_operand": 500,
|
||||
"value": 2500
|
||||
}
|
||||
})", [](float v) { return (v + 500) == 2500; }, DataType::FLOAT},
|
||||
})",
|
||||
[](float v) { return (v + 500) == 2500; }, DataType::FLOAT},
|
||||
{R"("EQ": {
|
||||
"ADD": {
|
||||
"right_operand": 500,
|
||||
"value": 2500
|
||||
}
|
||||
})", [](double v) { return (v + 500) == 2500; }, DataType::DOUBLE},
|
||||
})",
|
||||
[](double v) { return (v + 500) == 2500; }, DataType::DOUBLE},
|
||||
// Add test cases for BinaryArithOpEvalRangeExpr NE of various data types
|
||||
{R"("NE": {
|
||||
"ADD": {
|
||||
"right_operand": 500,
|
||||
"value": 2500
|
||||
}
|
||||
})", [](float v) { return (v + 500) != 2500; }, DataType::FLOAT},
|
||||
})",
|
||||
[](float v) { return (v + 500) != 2500; }, DataType::FLOAT},
|
||||
{R"("NE": {
|
||||
"SUB": {
|
||||
"right_operand": 500,
|
||||
"value": 2500
|
||||
}
|
||||
})", [](double v) { return (v - 500) != 2500; }, DataType::DOUBLE},
|
||||
})",
|
||||
[](double v) { return (v - 500) != 2500; }, DataType::DOUBLE},
|
||||
{R"("NE": {
|
||||
"MUL": {
|
||||
"right_operand": 2,
|
||||
"value": 2
|
||||
}
|
||||
})", [](int8_t v) { return (v * 2) != 2; }, DataType::INT8},
|
||||
})",
|
||||
[](int8_t v) { return (v * 2) != 2; }, DataType::INT8},
|
||||
{R"("NE": {
|
||||
"DIV": {
|
||||
"right_operand": 2,
|
||||
"value": 1000
|
||||
}
|
||||
})", [](int16_t v) { return (v / 2) != 1000; }, DataType::INT16},
|
||||
})",
|
||||
[](int16_t v) { return (v / 2) != 1000; }, DataType::INT16},
|
||||
{R"("NE": {
|
||||
"MOD": {
|
||||
"right_operand": 100,
|
||||
"value": 0
|
||||
}
|
||||
})", [](int32_t v) { return (v % 100) != 0; }, DataType::INT32},
|
||||
})",
|
||||
[](int32_t v) { return (v % 100) != 0; }, DataType::INT32},
|
||||
{R"("NE": {
|
||||
"ADD": {
|
||||
"right_operand": 500,
|
||||
"value": 2500
|
||||
}
|
||||
})", [](int64_t v) { return (v + 500) != 2500; }, DataType::INT64},
|
||||
})",
|
||||
[](int64_t v) { return (v + 500) != 2500; }, DataType::INT64},
|
||||
};
|
||||
|
||||
std::string dsl_string_tmp = R"({
|
||||
|
@ -713,7 +730,6 @@ TEST(Expr, TestBinaryArithOpEvalRange) {
|
|||
@@@@
|
||||
})";
|
||||
|
||||
|
||||
std::string dsl_string_float = R"(
|
||||
"age_float": {
|
||||
@@@@
|
||||
|
@ -725,13 +741,14 @@ TEST(Expr, TestBinaryArithOpEvalRange) {
|
|||
})";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddDebugField("age8", DataType::INT8);
|
||||
schema->AddDebugField("age16", DataType::INT16);
|
||||
schema->AddDebugField("age32", DataType::INT32);
|
||||
schema->AddDebugField("age64", DataType::INT64);
|
||||
schema->AddDebugField("age_float", DataType::FLOAT);
|
||||
schema->AddDebugField("age_double", DataType::DOUBLE);
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
auto i8_fid = schema->AddDebugField("age8", DataType::INT8);
|
||||
auto i16_fid = schema->AddDebugField("age16", DataType::INT16);
|
||||
auto i32_fid = schema->AddDebugField("age32", DataType::INT32);
|
||||
auto i64_fid = schema->AddDebugField("age64", DataType::INT64);
|
||||
auto float_fid = schema->AddDebugField("age_float", DataType::FLOAT);
|
||||
auto double_fid = schema->AddDebugField("age_double", DataType::DOUBLE);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto seg = CreateGrowingSegment(schema);
|
||||
int N = 1000;
|
||||
|
@ -745,12 +762,12 @@ TEST(Expr, TestBinaryArithOpEvalRange) {
|
|||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
|
||||
auto new_age8_col = raw_data.get_col<int8_t>(1);
|
||||
auto new_age16_col = raw_data.get_col<int16_t>(2);
|
||||
auto new_age32_col = raw_data.get_col<int32_t>(3);
|
||||
auto new_age64_col = raw_data.get_col<int64_t>(4);
|
||||
auto new_age_float_col = raw_data.get_col<float>(5);
|
||||
auto new_age_double_col = raw_data.get_col<double>(6);
|
||||
auto new_age8_col = raw_data.get_col<int8_t>(i8_fid);
|
||||
auto new_age16_col = raw_data.get_col<int16_t>(i16_fid);
|
||||
auto new_age32_col = raw_data.get_col<int32_t>(i32_fid);
|
||||
auto new_age64_col = raw_data.get_col<int64_t>(i64_fid);
|
||||
auto new_age_float_col = raw_data.get_col<float>(float_fid);
|
||||
auto new_age_double_col = raw_data.get_col<double>(double_fid);
|
||||
|
||||
age8_col.insert(age8_col.end(), new_age8_col.begin(), new_age8_col.end());
|
||||
age16_col.insert(age16_col.end(), new_age16_col.begin(), new_age16_col.end());
|
||||
|
@ -832,39 +849,45 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
|
|||
"right_operand": 500,
|
||||
"value": 2500.00
|
||||
}
|
||||
})", "Assert \"(value.is_number_integer())\"", DataType::INT32},
|
||||
})",
|
||||
"Assert \"(value.is_number_integer())\"", DataType::INT32},
|
||||
{R"("EQ": {
|
||||
"ADD": {
|
||||
"right_operand": 500.0,
|
||||
"value": 2500
|
||||
}
|
||||
})", "Assert \"(right_operand.is_number_integer())\"", DataType::INT32},
|
||||
})",
|
||||
"Assert \"(right_operand.is_number_integer())\"", DataType::INT32},
|
||||
{R"("EQ": {
|
||||
"ADD": {
|
||||
"right_operand": 500.0,
|
||||
"value": true
|
||||
}
|
||||
})", "Assert \"(value.is_number())\"", DataType::FLOAT},
|
||||
})",
|
||||
"Assert \"(value.is_number())\"", DataType::FLOAT},
|
||||
{R"("EQ": {
|
||||
"ADD": {
|
||||
"right_operand": "500",
|
||||
"value": 2500.0
|
||||
}
|
||||
})", "Assert \"(right_operand.is_number())\"", DataType::FLOAT},
|
||||
})",
|
||||
"Assert \"(right_operand.is_number())\"", DataType::FLOAT},
|
||||
// Check unsupported arithmetic operator type
|
||||
{R"("EQ": {
|
||||
"EXP": {
|
||||
"right_operand": 500,
|
||||
"value": 2500
|
||||
}
|
||||
})", "arith op(exp) not found", DataType::INT32},
|
||||
})",
|
||||
"arith op(exp) not found", DataType::INT32},
|
||||
// Check unsupported data type
|
||||
{R"("EQ": {
|
||||
"ADD": {
|
||||
"right_operand": true,
|
||||
"value": false
|
||||
}
|
||||
})", "bool type is not supported", DataType::BOOL},
|
||||
})",
|
||||
"bool type is not supported", DataType::BOOL},
|
||||
};
|
||||
|
||||
std::string dsl_string_tmp = R"({
|
||||
|
@ -932,12 +955,10 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
|
|||
try {
|
||||
auto plan = CreatePlan(*schema, dsl_string);
|
||||
FAIL() << "Expected AssertionError: " << assert_info << " not thrown";
|
||||
}
|
||||
catch(const std::exception& err) {
|
||||
} catch (const std::exception& err) {
|
||||
std::string err_msg = err.what();
|
||||
ASSERT_TRUE(err_msg.find(assert_info) != std::string::npos);
|
||||
}
|
||||
catch(...) {
|
||||
} catch (...) {
|
||||
FAIL() << "Expected AssertionError: " << assert_info << " not thrown";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ TEST(FloatVecIndex, All) {
|
|||
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenDataset(NB, metric_type, false);
|
||||
auto xb_data = dataset.get_col<float>(0);
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
|
||||
CDataType dtype = FloatVector;
|
||||
CIndex index;
|
||||
|
@ -94,7 +94,7 @@ TEST(BinaryVecIndex, All) {
|
|||
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenDataset(NB, metric_type, true);
|
||||
auto xb_data = dataset.get_col<uint8_t>(0);
|
||||
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
|
||||
CDataType dtype = BinaryVector;
|
||||
CIndex index;
|
||||
|
|
|
@ -76,14 +76,14 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
|
|||
|
||||
auto dataset = GenDataset(NB, metric_type, is_binary);
|
||||
if (!is_binary) {
|
||||
xb_data = dataset.get_col<float>(0);
|
||||
xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
xq_data = dataset.get_col<float>(0);
|
||||
xq_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
xq_dataset = knowhere::GenDataset(NQ, DIM, xq_data.data());
|
||||
} else {
|
||||
xb_bin_data = dataset.get_col<uint8_t>(0);
|
||||
xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
xb_dataset = knowhere::GenDataset(NB, DIM, xb_bin_data.data());
|
||||
xq_bin_data = dataset.get_col<uint8_t>(0);
|
||||
xq_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
xq_dataset = knowhere::GenDataset(NQ, DIM, xq_bin_data.data());
|
||||
}
|
||||
}
|
||||
|
@ -113,7 +113,7 @@ TEST(PQ, Build) {
|
|||
auto conf = generate_conf(index_type, metric_type);
|
||||
auto index = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type);
|
||||
auto dataset = GenDataset(NB, metric_type, false);
|
||||
auto xb_data = dataset.get_col<float>(0);
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
ASSERT_NO_THROW(index->Train(xb_dataset, conf));
|
||||
ASSERT_NO_THROW(index->AddWithoutIds(xb_dataset, conf));
|
||||
|
@ -125,7 +125,7 @@ TEST(IVFFLATNM, Build) {
|
|||
auto conf = generate_conf(index_type, metric_type);
|
||||
auto index = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type);
|
||||
auto dataset = GenDataset(NB, metric_type, false);
|
||||
auto xb_data = dataset.get_col<float>(0);
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
ASSERT_NO_THROW(index->Train(xb_dataset, conf));
|
||||
ASSERT_NO_THROW(index->AddWithoutIds(xb_dataset, conf));
|
||||
|
@ -139,7 +139,7 @@ TEST(IVFFLATNM, Query) {
|
|||
auto conf = generate_conf(index_type, metric_type);
|
||||
auto index = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type);
|
||||
auto dataset = GenDataset(NB, metric_type, false);
|
||||
auto xb_data = dataset.get_col<float>(0);
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
ASSERT_NO_THROW(index->Train(xb_dataset, conf));
|
||||
ASSERT_NO_THROW(index->AddWithoutIds(xb_dataset, conf));
|
||||
|
@ -149,7 +149,7 @@ TEST(IVFFLATNM, Query) {
|
|||
bptr->size = DIM * NB * sizeof(float);
|
||||
bs.Append(RAW_DATA, bptr);
|
||||
index->Load(bs);
|
||||
auto xq_data = dataset.get_col<float>(0);
|
||||
auto xq_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
auto xq_dataset = knowhere::GenDataset(NQ, DIM, xq_data.data());
|
||||
auto result = index->Query(xq_dataset, conf, nullptr);
|
||||
|
||||
|
@ -189,7 +189,7 @@ TEST(BINFLAT, Build) {
|
|||
auto conf = generate_conf(index_type, metric_type);
|
||||
auto index = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type);
|
||||
auto dataset = GenDataset(NB, metric_type, true);
|
||||
auto xb_data = dataset.get_col<uint8_t>(0);
|
||||
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
std::vector<knowhere::IDType> ids(NB, 0);
|
||||
std::iota(ids.begin(), ids.end(), 0);
|
||||
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
|
@ -222,12 +222,12 @@ TEST(BinIVFFlat, Build_and_Query) {
|
|||
auto dim = 128;
|
||||
auto nq = 10;
|
||||
auto dataset = GenDataset(std::max(nq, nb), metric_type, true);
|
||||
auto xb_data = dataset.get_col<uint8_t>(0);
|
||||
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
std::vector<knowhere::IDType> ids(nb, 0);
|
||||
std::iota(ids.begin(), ids.end(), 0);
|
||||
auto xb_dataset = knowhere::GenDataset(nb, dim, xb_data.data());
|
||||
index->BuildAll(xb_dataset, conf);
|
||||
auto xq_data = dataset.get_col<float>(0);
|
||||
auto xq_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
auto xq_dataset = knowhere::GenDataset(nq, dim, xq_data.data());
|
||||
auto result = index->Query(xq_dataset, conf, nullptr);
|
||||
|
||||
|
@ -258,7 +258,7 @@ TEST(BINIDMAP, Build) {
|
|||
auto conf = generate_conf(index_type, metric_type);
|
||||
auto index = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type);
|
||||
auto dataset = GenDataset(NB, metric_type, true);
|
||||
auto xb_data = dataset.get_col<uint8_t>(0);
|
||||
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
std::vector<knowhere::IDType> ids(NB, 0);
|
||||
std::iota(ids.begin(), ids.end(), 0);
|
||||
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
|
@ -278,7 +278,7 @@ TEST(PQWrapper, Build) {
|
|||
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenDataset(NB, metric_type, false);
|
||||
auto xb_data = dataset.get_col<float>(0);
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
auto index =
|
||||
std::make_unique<milvus::indexbuilder::VecIndexCreator>(type_params_str.c_str(), index_params_str.c_str());
|
||||
|
@ -298,7 +298,7 @@ TEST(IVFFLATNMWrapper, Build) {
|
|||
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenDataset(NB, metric_type, false);
|
||||
auto xb_data = dataset.get_col<float>(0);
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
auto index =
|
||||
std::make_unique<milvus::indexbuilder::VecIndexCreator>(type_params_str.c_str(), index_params_str.c_str());
|
||||
|
@ -319,7 +319,7 @@ TEST(IVFFLATNMWrapper, Codec) {
|
|||
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenDataset(flat_nb, metric_type, false);
|
||||
auto xb_data = dataset.get_col<float>(0);
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
auto xb_dataset = knowhere::GenDataset(flat_nb, DIM, xb_data.data());
|
||||
auto index_wrapper =
|
||||
std::make_unique<milvus::indexbuilder::VecIndexCreator>(type_params_str.c_str(), index_params_str.c_str());
|
||||
|
@ -353,7 +353,7 @@ TEST(BinFlatWrapper, Build) {
|
|||
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenDataset(NB, metric_type, true);
|
||||
auto xb_data = dataset.get_col<uint8_t>(0);
|
||||
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
std::vector<knowhere::IDType> ids(NB, 0);
|
||||
std::iota(ids.begin(), ids.end(), 0);
|
||||
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
|
@ -376,7 +376,7 @@ TEST(BinIdMapWrapper, Build) {
|
|||
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenDataset(NB, metric_type, true);
|
||||
auto xb_data = dataset.get_col<uint8_t>(0);
|
||||
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
std::vector<knowhere::IDType> ids(NB, 0);
|
||||
std::iota(ids.begin(), ids.end(), 0);
|
||||
auto xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
|
|
|
@ -268,10 +268,10 @@ TEST(Indexing, BinaryBruteForce) {
|
|||
int64_t dim = 8192;
|
||||
auto result_count = topk * num_queries;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("vecbin", DataType::VECTOR_BINARY, dim, MetricType::METRIC_Jaccard);
|
||||
schema->AddDebugField("age", DataType::INT64);
|
||||
auto vec_fid = schema->AddDebugField("vecbin", DataType::VECTOR_BINARY, dim, MetricType::METRIC_Jaccard);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
auto dataset = DataGen(schema, N, 10);
|
||||
auto bin_vec = dataset.get_col<uint8_t>(0);
|
||||
auto bin_vec = dataset.get_col<uint8_t>(vec_fid);
|
||||
auto query_data = 1024 * dim / 8 + bin_vec.data();
|
||||
query::dataset::SearchDataset search_dataset{
|
||||
faiss::MetricType::METRIC_Jaccard, //
|
||||
|
@ -287,7 +287,7 @@ TEST(Indexing, BinaryBruteForce) {
|
|||
SearchResult sr;
|
||||
sr.num_queries_ = num_queries;
|
||||
sr.topk_ = topk;
|
||||
sr.ids_ = std::move(sub_result.mutable_ids());
|
||||
sr.seg_offsets_ = std::move(sub_result.mutable_seg_offsets());
|
||||
sr.distances_ = std::move(sub_result.mutable_distances());
|
||||
|
||||
auto json = SearchResultToJson(sr);
|
||||
|
|
|
@ -29,20 +29,18 @@ namespace spb = proto::schema;
|
|||
static SchemaPtr
|
||||
getStandardSchema() {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField(FieldName("FloatVectorField"), FieldId(100 + spb::DataType::FloatVector), DataType::VECTOR_FLOAT,
|
||||
16, MetricType::METRIC_L2);
|
||||
schema->AddField(FieldName("BinaryVectorField"), FieldId(100 + spb::DataType::BinaryVector),
|
||||
DataType::VECTOR_BINARY, 16, MetricType::METRIC_Jaccard);
|
||||
schema->AddField(FieldName("Int64Field"), FieldId(100 + spb::DataType::Int64), DataType::INT64);
|
||||
schema->AddField(FieldName("Int32Field"), FieldId(100 + spb::DataType::Int32), DataType::INT32);
|
||||
schema->AddField(FieldName("Int16Field"), FieldId(100 + spb::DataType::Int16), DataType::INT16);
|
||||
schema->AddField(FieldName("Int8Field"), FieldId(100 + spb::DataType::Int8), DataType::INT8);
|
||||
schema->AddField(FieldName("DoubleField"), FieldId(100 + spb::DataType::Double), DataType::DOUBLE);
|
||||
schema->AddField(FieldName("FloatField"), FieldId(100 + spb::DataType::Float), DataType::FLOAT);
|
||||
schema->AddDebugField("FloatVectorField", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddDebugField("BinaryVectorField", DataType::VECTOR_BINARY, 16, MetricType::METRIC_Jaccard);
|
||||
schema->AddDebugField("Int64Field", DataType::INT64);
|
||||
schema->AddDebugField("Int32Field", DataType::INT32);
|
||||
schema->AddDebugField("Int16Field", DataType::INT16);
|
||||
schema->AddDebugField("Int8Field", DataType::INT8);
|
||||
schema->AddDebugField("DoubleField", DataType::DOUBLE);
|
||||
schema->AddDebugField("FloatField", DataType::FLOAT);
|
||||
return schema;
|
||||
}
|
||||
|
||||
class PlanProtoTest : public ::testing::TestWithParam<std::tuple<spb::DataType>> {
|
||||
class PlanProtoTest : public ::testing::TestWithParam<std::tuple<std::string>> {
|
||||
public:
|
||||
PlanProtoTest() {
|
||||
schema = getStandardSchema();
|
||||
|
@ -54,40 +52,44 @@ class PlanProtoTest : public ::testing::TestWithParam<std::tuple<spb::DataType>>
|
|||
|
||||
INSTANTIATE_TEST_CASE_P(InstName,
|
||||
PlanProtoTest,
|
||||
::testing::Values( //
|
||||
std::make_tuple(spb::DataType::Double), //
|
||||
std::make_tuple(spb::DataType::Float), //
|
||||
std::make_tuple(spb::DataType::Int64), //
|
||||
std::make_tuple(spb::DataType::Int32), //
|
||||
std::make_tuple(spb::DataType::Int16), //
|
||||
std::make_tuple(spb::DataType::Int8) //
|
||||
::testing::Values( //
|
||||
std::make_tuple("DoubleField"), //
|
||||
std::make_tuple("FloatField"), //
|
||||
std::make_tuple("Int64Field"), //
|
||||
std::make_tuple("Int32Field"), //
|
||||
std::make_tuple("Int16Field"), //
|
||||
std::make_tuple("Int8Field") //
|
||||
));
|
||||
|
||||
TEST_P(PlanProtoTest, Range) {
|
||||
// xxx.query(predicates = "int64field > 3", topk = 10, ...)
|
||||
auto data_type = std::get<0>(GetParam());
|
||||
auto data_type_str = spb::DataType_Name(data_type);
|
||||
auto field_id = 100 + (int)data_type;
|
||||
auto field_name = data_type_str + "Field";
|
||||
FieldName vec_field_name = FieldName("FloatVectorField");
|
||||
FieldId vec_float_field_id = schema->get_field_id(vec_field_name);
|
||||
|
||||
auto field_name = std::get<0>(GetParam());
|
||||
auto field_id = schema->get_field_id(FieldName(field_name));
|
||||
auto data_type = schema->operator[](field_id).get_data_type();
|
||||
auto data_type_str = spb::DataType_Name(int(data_type));
|
||||
|
||||
string value_tag = "bool_val";
|
||||
if (datatype_is_floating((DataType)data_type)) {
|
||||
if (datatype_is_floating(data_type)) {
|
||||
value_tag = "float_val";
|
||||
} else if (datatype_is_integer((DataType)data_type)) {
|
||||
} else if (datatype_is_integer(data_type)) {
|
||||
value_tag = "int64_val";
|
||||
}
|
||||
|
||||
auto fmt1 = boost::format(R"(
|
||||
vector_anns: <
|
||||
field_id: 201
|
||||
field_id: %1%
|
||||
predicates: <
|
||||
unary_range_expr: <
|
||||
column_info: <
|
||||
field_id: %1%
|
||||
data_type: %2%
|
||||
field_id: %2%
|
||||
data_type: %3%
|
||||
>
|
||||
op: GreaterThan
|
||||
value: <
|
||||
%3%: 3
|
||||
%4%: 3
|
||||
>
|
||||
>
|
||||
>
|
||||
|
@ -99,8 +101,8 @@ vector_anns: <
|
|||
>
|
||||
placeholder_tag: "$0"
|
||||
>
|
||||
)") % field_id % data_type_str %
|
||||
value_tag;
|
||||
)") % vec_float_field_id.get() %
|
||||
field_id.get() % data_type_str % value_tag;
|
||||
|
||||
auto proto_text = fmt1.str();
|
||||
planpb::PlanNode node_proto;
|
||||
|
@ -148,34 +150,38 @@ vector_anns: <
|
|||
|
||||
TEST_P(PlanProtoTest, TermExpr) {
|
||||
// xxx.query(predicates = "int64field in [1, 2, 3]", topk = 10, ...)
|
||||
auto data_type = std::get<0>(GetParam());
|
||||
auto data_type_str = spb::DataType_Name(data_type);
|
||||
auto field_id = 100 + (int)data_type;
|
||||
auto field_name = data_type_str + "Field";
|
||||
FieldName vec_field_name = FieldName("FloatVectorField");
|
||||
FieldId vec_float_field_id = schema->get_field_id(vec_field_name);
|
||||
|
||||
auto field_name = std::get<0>(GetParam());
|
||||
auto field_id = schema->get_field_id(FieldName(field_name));
|
||||
auto data_type = schema->operator[](field_id).get_data_type();
|
||||
auto data_type_str = spb::DataType_Name(int(data_type));
|
||||
|
||||
string value_tag = "bool_val";
|
||||
if (datatype_is_floating((DataType)data_type)) {
|
||||
if (datatype_is_floating(data_type)) {
|
||||
value_tag = "float_val";
|
||||
} else if (datatype_is_integer((DataType)data_type)) {
|
||||
} else if (datatype_is_integer(data_type)) {
|
||||
value_tag = "int64_val";
|
||||
}
|
||||
|
||||
auto fmt1 = boost::format(R"(
|
||||
vector_anns: <
|
||||
field_id: 201
|
||||
field_id: %1%
|
||||
predicates: <
|
||||
term_expr: <
|
||||
column_info: <
|
||||
field_id: %1%
|
||||
data_type: %2%
|
||||
field_id: %2%
|
||||
data_type: %3%
|
||||
>
|
||||
values: <
|
||||
%3%: 1
|
||||
%4%: 1
|
||||
>
|
||||
values: <
|
||||
%3%: 2
|
||||
%4%: 2
|
||||
>
|
||||
values: <
|
||||
%3%: 3
|
||||
%4%: 3
|
||||
>
|
||||
>
|
||||
>
|
||||
|
@ -187,8 +193,8 @@ vector_anns: <
|
|||
>
|
||||
placeholder_tag: "$0"
|
||||
>
|
||||
)") % field_id % data_type_str %
|
||||
value_tag;
|
||||
)") % vec_float_field_id.get() %
|
||||
field_id.get() % data_type_str % value_tag;
|
||||
|
||||
auto proto_text = fmt1.str();
|
||||
planpb::PlanNode node_proto;
|
||||
|
@ -237,32 +243,31 @@ vector_anns: <
|
|||
TEST(PlanProtoTest, NotExpr) {
|
||||
auto schema = getStandardSchema();
|
||||
// xxx.query(predicates = "not (int64field > 3)", topk = 10, ...)
|
||||
FieldName vec_field_name = FieldName("FloatVectorField");
|
||||
FieldId vec_float_field_id = schema->get_field_id(vec_field_name);
|
||||
|
||||
FieldName int64_field_name = FieldName("Int64Field");
|
||||
FieldId int64_field_id = schema->get_field_id(int64_field_name);
|
||||
string value_tag = "int64_val";
|
||||
|
||||
auto data_type = spb::DataType::Int64;
|
||||
auto data_type_str = spb::DataType_Name(data_type);
|
||||
auto field_id = 100 + (int)data_type;
|
||||
auto field_name = data_type_str + "Field";
|
||||
string value_tag = "bool_val";
|
||||
if (datatype_is_floating((DataType)data_type)) {
|
||||
value_tag = "float_val";
|
||||
} else if (datatype_is_integer((DataType)data_type)) {
|
||||
value_tag = "int64_val";
|
||||
}
|
||||
auto data_type_str = spb::DataType_Name(int(data_type));
|
||||
|
||||
auto fmt1 = boost::format(R"(
|
||||
vector_anns: <
|
||||
field_id: 201
|
||||
field_id: %1%
|
||||
predicates: <
|
||||
unary_expr: <
|
||||
op: Not
|
||||
child: <
|
||||
unary_range_expr: <
|
||||
column_info: <
|
||||
field_id: %1%
|
||||
data_type: %2%
|
||||
field_id: %2%
|
||||
data_type: %3%
|
||||
>
|
||||
op: GreaterThan
|
||||
value: <
|
||||
%3%: 3
|
||||
%4%: 3
|
||||
>
|
||||
>
|
||||
>
|
||||
|
@ -276,8 +281,8 @@ vector_anns: <
|
|||
>
|
||||
placeholder_tag: "$0"
|
||||
>
|
||||
)") % field_id % data_type_str %
|
||||
value_tag;
|
||||
)") % vec_float_field_id.get() %
|
||||
int64_field_id.get() % data_type_str % value_tag;
|
||||
|
||||
auto proto_text = fmt1.str();
|
||||
planpb::PlanNode node_proto;
|
||||
|
@ -319,7 +324,7 @@ vector_anns: <
|
|||
]
|
||||
}
|
||||
}
|
||||
)") % field_name);
|
||||
)") % int64_field_name.get());
|
||||
|
||||
auto ref_plan = CreatePlan(*schema, dsl_text);
|
||||
auto ref_json = ShowPlanNodeVisitor().call_child(*ref_plan->plan_node_);
|
||||
|
@ -330,32 +335,31 @@ vector_anns: <
|
|||
TEST(PlanProtoTest, AndOrExpr) {
|
||||
auto schema = getStandardSchema();
|
||||
// xxx.query(predicates = "(int64field < 3) && (int64field > 2 || int64field == 1)", topk = 10, ...)
|
||||
FieldName vec_field_name = FieldName("FloatVectorField");
|
||||
FieldId vec_float_field_id = schema->get_field_id(vec_field_name);
|
||||
|
||||
FieldName int64_field_name = FieldName("Int64Field");
|
||||
FieldId int64_field_id = schema->get_field_id(int64_field_name);
|
||||
string value_tag = "int64_val";
|
||||
|
||||
auto data_type = spb::DataType::Int64;
|
||||
auto data_type_str = spb::DataType_Name(data_type);
|
||||
auto field_id = 100 + (int)data_type;
|
||||
auto field_name = data_type_str + "Field";
|
||||
string value_tag = "bool_val";
|
||||
if (datatype_is_floating((DataType)data_type)) {
|
||||
value_tag = "float_val";
|
||||
} else if (datatype_is_integer((DataType)data_type)) {
|
||||
value_tag = "int64_val";
|
||||
}
|
||||
auto data_type_str = spb::DataType_Name(int(data_type));
|
||||
|
||||
auto fmt1 = boost::format(R"(
|
||||
vector_anns: <
|
||||
field_id: 201
|
||||
field_id: %1%
|
||||
predicates: <
|
||||
binary_expr: <
|
||||
op: LogicalAnd
|
||||
left: <
|
||||
unary_range_expr: <
|
||||
column_info: <
|
||||
field_id: 105
|
||||
data_type: Int64
|
||||
field_id: %2%
|
||||
data_type: %3%
|
||||
>
|
||||
op: LessThan
|
||||
value: <
|
||||
int64_val: 3
|
||||
%4%: 3
|
||||
>
|
||||
>
|
||||
>
|
||||
|
@ -365,24 +369,24 @@ vector_anns: <
|
|||
left: <
|
||||
unary_range_expr: <
|
||||
column_info: <
|
||||
field_id: 105
|
||||
data_type: Int64
|
||||
field_id: %2%
|
||||
data_type: %3%
|
||||
>
|
||||
op: GreaterThan
|
||||
value: <
|
||||
int64_val: 2
|
||||
%4%: 2
|
||||
>
|
||||
>
|
||||
>
|
||||
right: <
|
||||
unary_range_expr: <
|
||||
column_info: <
|
||||
field_id: 105
|
||||
data_type: Int64
|
||||
field_id: %2%
|
||||
data_type: %3%
|
||||
>
|
||||
op: Equal
|
||||
value: <
|
||||
int64_val: 1
|
||||
%4%: 1
|
||||
>
|
||||
>
|
||||
>
|
||||
|
@ -398,7 +402,8 @@ vector_anns: <
|
|||
>
|
||||
placeholder_tag: "$0"
|
||||
>
|
||||
)");
|
||||
)") % vec_float_field_id.get() %
|
||||
int64_field_id.get() % data_type_str % value_tag;
|
||||
|
||||
auto proto_text = fmt1.str();
|
||||
planpb::PlanNode node_proto;
|
||||
|
@ -457,7 +462,7 @@ vector_anns: <
|
|||
]
|
||||
}
|
||||
}
|
||||
)") % field_name);
|
||||
)") % int64_field_name.get());
|
||||
|
||||
auto ref_plan = CreatePlan(*schema, dsl_text);
|
||||
auto ref_json = ShowPlanNodeVisitor().call_child(*ref_plan->plan_node_);
|
||||
|
@ -467,25 +472,29 @@ vector_anns: <
|
|||
|
||||
TEST_P(PlanProtoTest, CompareExpr) {
|
||||
auto schema = getStandardSchema();
|
||||
schema->AddField(FieldName("age1"), FieldId(128), DataType::INT64);
|
||||
auto age_fid = schema->AddDebugField("age1", DataType::INT64);
|
||||
// xxx.query(predicates = "int64field < int64field", topk = 10, ...)
|
||||
auto data_type = std::get<0>(GetParam());
|
||||
auto field_id = 100 + (int)data_type;
|
||||
auto data_type_str = spb::DataType_Name(data_type);
|
||||
auto field_name = data_type_str + "Field";
|
||||
|
||||
FieldName vec_field_name = FieldName("FloatVectorField");
|
||||
FieldId vec_float_field_id = schema->get_field_id(vec_field_name);
|
||||
|
||||
auto field_name = std::get<0>(GetParam());
|
||||
auto field_id = schema->get_field_id(FieldName(field_name));
|
||||
auto data_type = schema->operator[](field_id).get_data_type();
|
||||
auto data_type_str = spb::DataType_Name(int(data_type));
|
||||
|
||||
auto fmt1 = boost::format(R"(
|
||||
vector_anns: <
|
||||
field_id: 201
|
||||
field_id: %1%
|
||||
predicates: <
|
||||
compare_expr: <
|
||||
left_column_info: <
|
||||
field_id: 128
|
||||
field_id: %2%
|
||||
data_type: Int64
|
||||
>
|
||||
right_column_info: <
|
||||
field_id: %1%
|
||||
data_type: %2%
|
||||
field_id: %3%
|
||||
data_type: %4%
|
||||
>
|
||||
op: LessThan
|
||||
>
|
||||
|
@ -498,7 +507,8 @@ vector_anns: <
|
|||
>
|
||||
placeholder_tag: "$0"
|
||||
>
|
||||
)") % field_id % data_type_str;
|
||||
)") % vec_float_field_id.get() %
|
||||
age_fid.get() % field_id.get() % data_type_str;
|
||||
|
||||
auto proto_text = fmt1.str();
|
||||
planpb::PlanNode node_proto;
|
||||
|
@ -547,33 +557,48 @@ vector_anns: <
|
|||
|
||||
TEST_P(PlanProtoTest, BinaryArithOpEvalRange) {
|
||||
// xxx.query(predicates = "int64field > 3", topk = 10, ...)
|
||||
auto data_type = std::get<0>(GetParam());
|
||||
auto data_type_str = spb::DataType_Name(data_type);
|
||||
auto field_id = 100 + (int)data_type;
|
||||
auto field_name = data_type_str + "Field";
|
||||
// auto data_type = std::get<0>(GetParam());
|
||||
// auto data_type_str = spb::DataType_Name(data_type);
|
||||
// auto field_id = 100 + (int)data_type;
|
||||
// auto field_name = data_type_str + "Field";
|
||||
// string value_tag = "bool_val";
|
||||
// if (datatype_is_floating((DataType)data_type)) {
|
||||
// value_tag = "float_val";
|
||||
// } else if (datatype_is_integer((DataType)data_type)) {
|
||||
// value_tag = "int64_val";
|
||||
// }
|
||||
|
||||
FieldName vec_field_name = FieldName("FloatVectorField");
|
||||
FieldId vec_float_field_id = schema->get_field_id(vec_field_name);
|
||||
|
||||
auto field_name = std::get<0>(GetParam());
|
||||
auto field_id = schema->get_field_id(FieldName(field_name));
|
||||
auto data_type = schema->operator[](field_id).get_data_type();
|
||||
auto data_type_str = spb::DataType_Name(int(data_type));
|
||||
|
||||
string value_tag = "bool_val";
|
||||
if (datatype_is_floating((DataType)data_type)) {
|
||||
if (datatype_is_floating(data_type)) {
|
||||
value_tag = "float_val";
|
||||
} else if (datatype_is_integer((DataType)data_type)) {
|
||||
} else if (datatype_is_integer(data_type)) {
|
||||
value_tag = "int64_val";
|
||||
}
|
||||
|
||||
auto fmt1 = boost::format(R"(
|
||||
vector_anns: <
|
||||
field_id: 201
|
||||
field_id: %1%
|
||||
predicates: <
|
||||
binary_arith_op_eval_range_expr: <
|
||||
column_info: <
|
||||
field_id: %1%
|
||||
data_type: %2%
|
||||
field_id: %2%
|
||||
data_type: %3%
|
||||
>
|
||||
arith_op: Add
|
||||
right_operand: <
|
||||
%3%: 1029
|
||||
%4%: 1029
|
||||
>
|
||||
op: Equal
|
||||
value: <
|
||||
%3%: 2016
|
||||
%4%: 2016
|
||||
>
|
||||
>
|
||||
>
|
||||
|
@ -585,8 +610,8 @@ vector_anns: <
|
|||
>
|
||||
placeholder_tag: "$0"
|
||||
>
|
||||
)") % field_id % data_type_str %
|
||||
value_tag;
|
||||
)") % vec_float_field_id.get() %
|
||||
field_id.get() % data_type_str % value_tag;
|
||||
|
||||
auto proto_text = fmt1.str();
|
||||
planpb::PlanNode node_proto;
|
||||
|
|
|
@ -35,13 +35,13 @@ TEST(Query, ShowExecutor) {
|
|||
using namespace milvus;
|
||||
auto node = std::make_unique<FloatVectorANNS>();
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
auto field_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
int64_t num_queries = 100L;
|
||||
auto raw_data = DataGen(schema, num_queries);
|
||||
auto& info = node->search_info_;
|
||||
info.metric_type_ = MetricType::METRIC_L2;
|
||||
info.topk_ = 20;
|
||||
info.field_offset_ = FieldOffset(1000);
|
||||
info.field_id_ = field_id;
|
||||
node->predicate_ = std::nullopt;
|
||||
ShowPlanNodeVisitor show_visitor;
|
||||
PlanNodePtr base(node.release());
|
||||
|
@ -140,6 +140,8 @@ TEST(Query, ExecWithPredicateLoader) {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto counter_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(counter_fid);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
"must": [
|
||||
|
@ -171,10 +173,7 @@ TEST(Query, ExecWithPredicateLoader) {
|
|||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
ColumnBasedRawData raw_data;
|
||||
raw_data.columns_ = dataset.cols_;
|
||||
raw_data.count = N;
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), raw_data);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
|
@ -219,6 +218,8 @@ TEST(Query, ExecWithPredicateSmallN) {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 7, MetricType::METRIC_L2);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
"must": [
|
||||
|
@ -271,6 +272,8 @@ TEST(Query, ExecWithPredicate) {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
"must": [
|
||||
|
@ -347,6 +350,8 @@ TEST(Query, ExecTerm) {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
"must": [
|
||||
|
@ -430,7 +435,7 @@ TEST(Query, ExecEmpty) {
|
|||
auto sr = segment->Search(plan.get(), *ph_group, time);
|
||||
std::cout << SearchResultToJson(*sr);
|
||||
|
||||
for (auto i : sr->ids_) {
|
||||
for (auto i : sr->seg_offsets_) {
|
||||
ASSERT_EQ(i, -1);
|
||||
}
|
||||
|
||||
|
@ -445,6 +450,8 @@ TEST(Query, ExecWithoutPredicateFlat) {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, std::nullopt);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
"must": [
|
||||
|
@ -489,6 +496,8 @@ TEST(Query, ExecWithoutPredicate) {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
"must": [
|
||||
|
@ -576,13 +585,15 @@ TEST(Indexing, InnerProduct) {
|
|||
]
|
||||
}
|
||||
})";
|
||||
schema->AddDebugField("normalized", DataType::VECTOR_FLOAT, dim, MetricType::METRIC_INNER_PRODUCT);
|
||||
auto vec_fid = schema->AddDebugField("normalized", DataType::VECTOR_FLOAT, dim, MetricType::METRIC_INNER_PRODUCT);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
auto col = dataset.get_col<float>(0);
|
||||
auto col = dataset.get_col<float>(vec_fid);
|
||||
|
||||
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, col.data());
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
@ -637,9 +648,9 @@ TEST(Query, FillSegment) {
|
|||
// dispatch here
|
||||
int N = 100000;
|
||||
auto dataset = DataGen(schema, N);
|
||||
const auto std_vec = dataset.get_col<int64_t>(1); // ids field
|
||||
const auto std_vfloat_vec = dataset.get_col<float>(0); // vector field
|
||||
const auto std_i32_vec = dataset.get_col<int32_t>(2); // scalar field
|
||||
const auto std_vec = dataset.get_col<int64_t>(FieldId(101)); // ids field
|
||||
const auto std_vfloat_vec = dataset.get_col<float>(FieldId(100)); // vector field
|
||||
const auto std_i32_vec = dataset.get_col<int32_t>(FieldId(102)); // scalar field
|
||||
|
||||
std::vector<std::unique_ptr<SegmentInternalInterface>> segments;
|
||||
segments.emplace_back([&] {
|
||||
|
@ -694,27 +705,32 @@ TEST(Query, FillSegment) {
|
|||
|
||||
for (auto& segment : segments) {
|
||||
plan->target_entries_.clear();
|
||||
plan->target_entries_.push_back(schema->get_offset(FieldName("fakevec")));
|
||||
plan->target_entries_.push_back(schema->get_offset(FieldName("the_value")));
|
||||
plan->target_entries_.push_back(schema->get_field_id(FieldName("fakevec")));
|
||||
plan->target_entries_.push_back(schema->get_field_id(FieldName("the_value")));
|
||||
auto result = segment->Search(plan.get(), *ph, ts);
|
||||
// std::cout << SearchResultToJson(result).dump(2);
|
||||
result->result_offsets_.resize(topk * num_queries);
|
||||
segment->FillTargetEntry(plan.get(), *result);
|
||||
segment->FillPrimaryKeys(plan.get(), *result);
|
||||
|
||||
auto fields_data = result->output_fields_data_;
|
||||
auto fields_meta = result->output_fields_meta_;
|
||||
auto& fields_data = result->output_fields_data_;
|
||||
ASSERT_EQ(fields_data.size(), 2);
|
||||
ASSERT_EQ(fields_data.size(), 2);
|
||||
ASSERT_EQ(fields_meta[0].get_sizeof(), sizeof(float) * dim);
|
||||
ASSERT_EQ(fields_meta[1].get_sizeof(), sizeof(int32_t));
|
||||
ASSERT_EQ(fields_data[0].size(), fields_meta[0].get_sizeof() * topk * num_queries);
|
||||
ASSERT_EQ(fields_data[1].size(), fields_meta[1].get_sizeof() * topk * num_queries);
|
||||
for (auto field_id : plan->target_entries_) {
|
||||
ASSERT_EQ(fields_data.count(field_id), true);
|
||||
}
|
||||
|
||||
auto vec_field_id = schema->get_field_id(FieldName("fakevec"));
|
||||
auto output_vec_field_data = fields_data.at(vec_field_id)->vectors().float_vector().data();
|
||||
ASSERT_EQ(output_vec_field_data.size(), topk * num_queries * dim);
|
||||
|
||||
auto i32_field_id = schema->get_field_id(FieldName("the_value"));
|
||||
auto output_i32_field_data = fields_data.at(i32_field_id)->scalars().int_data().data();
|
||||
ASSERT_EQ(output_i32_field_data.size(), topk * num_queries);
|
||||
|
||||
for (int i = 0; i < topk * num_queries; i++) {
|
||||
int64_t val;
|
||||
memcpy(&val, &result->ids_data_[i * sizeof(int64_t)], sizeof(int64_t));
|
||||
int64_t val = std::get<int64_t>(result->primary_keys_[i]);
|
||||
|
||||
auto internal_offset = result->ids_[i];
|
||||
auto internal_offset = result->seg_offsets_[i];
|
||||
auto std_val = std_vec[internal_offset];
|
||||
auto std_i32 = std_i32_vec[internal_offset];
|
||||
std::vector<float> std_vfloat(dim);
|
||||
|
@ -724,12 +740,12 @@ TEST(Query, FillSegment) {
|
|||
if (val != -1) {
|
||||
// check vector field
|
||||
std::vector<float> vfloat(dim);
|
||||
memcpy(vfloat.data(), &fields_data[0][i * sizeof(float) * dim], dim * sizeof(float));
|
||||
memcpy(vfloat.data(), &output_vec_field_data[i * dim], dim * sizeof(float));
|
||||
ASSERT_EQ(vfloat, std_vfloat);
|
||||
|
||||
// check int32 field
|
||||
int i32;
|
||||
memcpy(&i32, &fields_data[1][i * sizeof(int32_t)], sizeof(int32_t));
|
||||
memcpy(&i32, &output_i32_field_data[i], sizeof(int32_t));
|
||||
ASSERT_EQ(i32, std_i32);
|
||||
}
|
||||
}
|
||||
|
@ -740,8 +756,10 @@ TEST(Query, ExecWithPredicateBinary) {
|
|||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard);
|
||||
auto float_fid = schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
"must": [
|
||||
|
@ -774,7 +792,7 @@ TEST(Query, ExecWithPredicateBinary) {
|
|||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
auto vec_ptr = dataset.get_col<uint8_t>(0);
|
||||
auto vec_ptr = dataset.get_col<uint8_t>(vec_fid);
|
||||
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
|
|
|
@ -52,7 +52,7 @@ TEST(Reduce, SubQueryResult) {
|
|||
}
|
||||
SubSearchResult sub_result(num_queries, topk, metric_type, round_decimal);
|
||||
sub_result.mutable_distances() = distances;
|
||||
sub_result.mutable_ids() = ids;
|
||||
sub_result.mutable_seg_offsets() = ids;
|
||||
final_result.merge(sub_result);
|
||||
}
|
||||
|
||||
|
@ -62,7 +62,7 @@ TEST(Reduce, SubQueryResult) {
|
|||
auto ref_x = ref_results[n].top();
|
||||
ref_results[n].pop();
|
||||
auto index = n * topk + topk - 1 - k;
|
||||
auto id = final_result.get_ids()[index];
|
||||
auto id = final_result.get_seg_offsets()[index];
|
||||
auto distance = final_result.get_distances()[index];
|
||||
ASSERT_EQ(id, ref_x);
|
||||
ASSERT_EQ(distance, ref_x);
|
||||
|
@ -104,7 +104,7 @@ TEST(Reduce, SubSearchResultDesc) {
|
|||
}
|
||||
SubSearchResult sub_result(num_queries, topk, metric_type, round_decimal);
|
||||
sub_result.mutable_distances() = distances;
|
||||
sub_result.mutable_ids() = ids;
|
||||
sub_result.mutable_seg_offsets() = ids;
|
||||
final_result.merge(sub_result);
|
||||
}
|
||||
|
||||
|
@ -114,7 +114,7 @@ TEST(Reduce, SubSearchResultDesc) {
|
|||
auto ref_x = ref_results[n].top();
|
||||
ref_results[n].pop();
|
||||
auto index = n * topk + topk - 1 - k;
|
||||
auto id = final_result.get_ids()[index];
|
||||
auto id = final_result.get_seg_offsets()[index];
|
||||
auto distance = final_result.get_distances()[index];
|
||||
ASSERT_EQ(id, ref_x);
|
||||
ASSERT_EQ(distance, ref_x);
|
||||
|
|
|
@ -19,15 +19,15 @@ TEST(SearchResultPair, Greater) {
|
|||
auto pair2 = SearchResultPair(1, 2.0, nullptr, 1, 0, 10);
|
||||
ASSERT_EQ(pair1 > pair2, false);
|
||||
|
||||
pair1.primary_key_ = INVALID_ID;
|
||||
pair1.primary_key_ = INVALID_PK;
|
||||
pair2.primary_key_ = 1;
|
||||
ASSERT_EQ(pair1 > pair2, false);
|
||||
|
||||
pair1.primary_key_ = 0;
|
||||
pair2.primary_key_ = INVALID_ID;
|
||||
pair2.primary_key_ = INVALID_PK;
|
||||
ASSERT_EQ(pair1 > pair2, true);
|
||||
|
||||
pair1.primary_key_ = INVALID_ID;
|
||||
pair2.primary_key_ = INVALID_ID;
|
||||
pair1.primary_key_ = INVALID_PK;
|
||||
pair2.primary_key_ = INVALID_PK;
|
||||
ASSERT_EQ(pair1 > pair2, false);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "query/Relational.h"
|
||||
#include "common/Utils.h"
|
||||
#include <string>
|
||||
|
||||
TEST(Relational, Basic) {
|
||||
using namespace milvus::query;
|
||||
|
||||
int64_t i64 = 4;
|
||||
int64_t another_i64 = 5;
|
||||
|
||||
std::string s = "str4";
|
||||
std::string another_s = "str5";
|
||||
|
||||
ASSERT_EQ(Relational<decltype(std::equal_to<>{})>()(i64, another_i64), i64 == another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(i64, another_i64), i64 != another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(i64, another_i64), i64 >= another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::greater<>{})>()(i64, another_i64), i64 > another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(i64, another_i64), i64 <= another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::less<>{})>()(i64, another_i64), i64 < another_i64);
|
||||
|
||||
ASSERT_EQ(Relational<decltype(std::equal_to<>{})>()(s, another_s), s == another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(s, another_s), s != another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(s, another_s), s >= another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::greater<>{})>()(s, another_s), s > another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(s, another_s), s <= another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::less<>{})>()(s, another_s), s < another_s);
|
||||
ASSERT_EQ(Relational<decltype(MatchOp<OpType::PrefixMatch>{})>()(s, another_s), milvus::PrefixMatch(s, another_s));
|
||||
ASSERT_EQ(Relational<decltype(MatchOp<OpType::PostfixMatch>{})>()(s, another_s),
|
||||
milvus::PostfixMatch(s, another_s));
|
||||
}
|
||||
|
||||
TEST(Relational, DifferentFundamentalType) {
|
||||
using namespace milvus::query;
|
||||
|
||||
int32_t i32 = 3;
|
||||
int64_t i64 = 4;
|
||||
|
||||
ASSERT_EQ(Relational<decltype(std::equal_to<>{})>()(i64, i32), i64 == i32);
|
||||
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(i64, i32), i64 != i32);
|
||||
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(i64, i32), i64 >= i32);
|
||||
ASSERT_EQ(Relational<decltype(std::greater<>{})>()(i64, i32), i64 > i32);
|
||||
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(i64, i32), i64 <= i32);
|
||||
ASSERT_EQ(Relational<decltype(std::less<>{})>()(i64, i32), i64 < i32);
|
||||
}
|
||||
|
||||
TEST(Relational, DifferentInCompatibleType) {
|
||||
using namespace milvus::query;
|
||||
|
||||
int64_t i64 = 4;
|
||||
std::string s = "str4";
|
||||
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::equal_to<>{})>()(s, i64));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::not_equal_to<>{})>()(s, i64));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::greater_equal<>{})>()(s, i64));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::greater<>{})>()(s, i64));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::less_equal<>{})>()(s, i64));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::less<>{})>()(s, i64));
|
||||
ASSERT_ANY_THROW(Relational<decltype(MatchOp<OpType::PrefixMatch>{})>()(s, i64));
|
||||
ASSERT_ANY_THROW(Relational<decltype(MatchOp<OpType::PostfixMatch>{})>()(s, i64));
|
||||
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::equal_to<>{})>()(i64, s));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::not_equal_to<>{})>()(i64, s));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::greater_equal<>{})>()(i64, s));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::greater<>{})>()(i64, s));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::less_equal<>{})>()(i64, s));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::less<>{})>()(i64, s));
|
||||
ASSERT_ANY_THROW(Relational<decltype(MatchOp<OpType::PrefixMatch>{})>()(i64, s));
|
||||
ASSERT_ANY_THROW(Relational<decltype(MatchOp<OpType::PostfixMatch>{})>()(i64, s));
|
||||
}
|
|
@ -49,7 +49,7 @@ TEST(Retrieve, AutoID) {
|
|||
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
|
||||
auto DIM = 16;
|
||||
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, MetricType::METRIC_L2);
|
||||
schema->set_primary_key(FieldOffset(0));
|
||||
schema->set_primary_field_id(fid_64);
|
||||
|
||||
int64_t N = 100;
|
||||
int64_t req_size = 10;
|
||||
|
@ -58,22 +58,21 @@ TEST(Retrieve, AutoID) {
|
|||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
SealedLoader(dataset, *segment);
|
||||
auto i64_col = dataset.get_col<int64_t>(0);
|
||||
auto i64_col = dataset.get_col<int64_t>(fid_64);
|
||||
|
||||
auto plan = std::make_unique<query::RetrievePlan>(*schema);
|
||||
std::vector<int64_t> values;
|
||||
for (int i = 0; i < req_size; ++i) {
|
||||
values.emplace_back(i64_col[choose(i)]);
|
||||
}
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(FieldOffset(0), DataType::INT64, values);
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
|
||||
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
|
||||
plan->plan_node_->predicate_ = std::move(term_expr);
|
||||
std::vector<FieldOffset> target_offsets{FieldOffset(0), FieldOffset(1)};
|
||||
plan->field_offsets_ = target_offsets;
|
||||
std::vector<FieldId> target_fields_id{fid_64, fid_vec};
|
||||
plan->field_ids_ = target_fields_id;
|
||||
|
||||
auto retrieve_results = segment->Retrieve(plan.get(), 100);
|
||||
Assert(retrieve_results->fields_data_size() == target_offsets.size());
|
||||
FieldOffset field_offset(0);
|
||||
Assert(retrieve_results->fields_data_size() == target_fields_id.size());
|
||||
auto field0 = retrieve_results->fields_data(0);
|
||||
Assert(field0.has_scalars());
|
||||
auto field0_data = field0.scalars().long_data();
|
||||
|
@ -100,7 +99,7 @@ TEST(Retrieve, AutoID2) {
|
|||
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
|
||||
auto DIM = 16;
|
||||
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, MetricType::METRIC_L2);
|
||||
schema->set_primary_key(FieldOffset(0));
|
||||
schema->set_primary_field_id(fid_64);
|
||||
|
||||
int64_t N = 100;
|
||||
int64_t req_size = 10;
|
||||
|
@ -109,22 +108,21 @@ TEST(Retrieve, AutoID2) {
|
|||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
SealedLoader(dataset, *segment);
|
||||
auto i64_col = dataset.get_col<int64_t>(0);
|
||||
auto i64_col = dataset.get_col<int64_t>(fid_64);
|
||||
|
||||
auto plan = std::make_unique<query::RetrievePlan>(*schema);
|
||||
std::vector<int64_t> values;
|
||||
for (int i = 0; i < req_size; ++i) {
|
||||
values.emplace_back(i64_col[choose(i)]);
|
||||
}
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(FieldOffset(0), DataType::INT64, values);
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
|
||||
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
|
||||
plan->plan_node_->predicate_ = std::move(term_expr);
|
||||
std::vector<FieldOffset> target_offsets{FieldOffset(0), FieldOffset(1)};
|
||||
plan->field_offsets_ = target_offsets;
|
||||
std::vector<FieldId> target_offsets{fid_64, fid_vec};
|
||||
plan->field_ids_ = target_offsets;
|
||||
|
||||
auto retrieve_results = segment->Retrieve(plan.get(), 100);
|
||||
Assert(retrieve_results->fields_data_size() == target_offsets.size());
|
||||
FieldOffset field_offset(0);
|
||||
auto field0 = retrieve_results->fields_data(0);
|
||||
Assert(field0.has_scalars());
|
||||
auto field0_data = field0.scalars().long_data();
|
||||
|
@ -146,7 +144,7 @@ TEST(Retrieve, NotExist) {
|
|||
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
|
||||
auto DIM = 16;
|
||||
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, MetricType::METRIC_L2);
|
||||
schema->set_primary_key(FieldOffset(0));
|
||||
schema->set_primary_field_id(fid_64);
|
||||
|
||||
int64_t N = 100;
|
||||
int64_t req_size = 10;
|
||||
|
@ -156,7 +154,7 @@ TEST(Retrieve, NotExist) {
|
|||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
SealedLoader(dataset, *segment);
|
||||
auto i64_col = dataset.get_col<int64_t>(0);
|
||||
auto i64_col = dataset.get_col<int64_t>(fid_64);
|
||||
|
||||
auto plan = std::make_unique<query::RetrievePlan>(*schema);
|
||||
std::vector<int64_t> values;
|
||||
|
@ -165,15 +163,14 @@ TEST(Retrieve, NotExist) {
|
|||
values.emplace_back(choose2(i));
|
||||
}
|
||||
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(FieldOffset(0), DataType::INT64, values);
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
|
||||
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
|
||||
plan->plan_node_->predicate_ = std::move(term_expr);
|
||||
std::vector<FieldOffset> target_offsets{FieldOffset(0), FieldOffset(1)};
|
||||
plan->field_offsets_ = target_offsets;
|
||||
std::vector<FieldId> target_offsets{fid_64, fid_vec};
|
||||
plan->field_ids_ = target_offsets;
|
||||
|
||||
auto retrieve_results = segment->Retrieve(plan.get(), 100);
|
||||
Assert(retrieve_results->fields_data_size() == target_offsets.size());
|
||||
FieldOffset field_offset(0);
|
||||
auto field0 = retrieve_results->fields_data(0);
|
||||
Assert(field0.has_scalars());
|
||||
auto field0_data = field0.scalars().long_data();
|
||||
|
@ -195,7 +192,7 @@ TEST(Retrieve, Empty) {
|
|||
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
|
||||
auto DIM = 16;
|
||||
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, MetricType::METRIC_L2);
|
||||
schema->set_primary_key(FieldOffset(0));
|
||||
schema->set_primary_field_id(fid_64);
|
||||
|
||||
int64_t N = 100;
|
||||
int64_t req_size = 10;
|
||||
|
@ -208,11 +205,11 @@ TEST(Retrieve, Empty) {
|
|||
for (int i = 0; i < req_size; ++i) {
|
||||
values.emplace_back(choose(i));
|
||||
}
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(FieldOffset(0), DataType::INT64, values);
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
|
||||
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
|
||||
plan->plan_node_->predicate_ = std::move(term_expr);
|
||||
std::vector<FieldOffset> target_offsets{FieldOffset(0), FieldOffset(1)};
|
||||
plan->field_offsets_ = target_offsets;
|
||||
std::vector<FieldId> target_offsets{fid_64, fid_vec};
|
||||
plan->field_ids_ = target_offsets;
|
||||
|
||||
auto retrieve_results = segment->Retrieve(plan.get(), 100);
|
||||
|
||||
|
@ -230,7 +227,7 @@ TEST(Retrieve, LargeTimestamp) {
|
|||
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
|
||||
auto DIM = 16;
|
||||
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, MetricType::METRIC_L2);
|
||||
schema->set_primary_key(FieldOffset(0));
|
||||
schema->set_primary_field_id(fid_64);
|
||||
|
||||
int64_t N = 100;
|
||||
int64_t req_size = 10;
|
||||
|
@ -240,32 +237,38 @@ TEST(Retrieve, LargeTimestamp) {
|
|||
auto dataset = DataGen(schema, N, 42, ts_offset + 1);
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
SealedLoader(dataset, *segment);
|
||||
auto i64_col = dataset.get_col<int64_t>(0);
|
||||
auto i64_col = dataset.get_col<int64_t>(fid_64);
|
||||
|
||||
auto plan = std::make_unique<query::RetrievePlan>(*schema);
|
||||
std::vector<int64_t> values;
|
||||
for (int i = 0; i < req_size; ++i) {
|
||||
values.emplace_back(i64_col[choose(i)]);
|
||||
}
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(FieldOffset(0), DataType::INT64, values);
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
|
||||
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
|
||||
plan->plan_node_->predicate_ = std::move(term_expr);
|
||||
std::vector<FieldOffset> target_offsets{FieldOffset(0), FieldOffset(1)};
|
||||
plan->field_offsets_ = target_offsets;
|
||||
std::vector<FieldId> target_offsets{fid_64, fid_vec};
|
||||
plan->field_ids_ = target_offsets;
|
||||
|
||||
std::vector<int> filter_timestamps{-1, 0, 1, 10, 20};
|
||||
filter_timestamps.push_back(N / 2);
|
||||
for (const auto& f_ts : filter_timestamps) {
|
||||
auto retrieve_results = segment->Retrieve(plan.get(), ts_offset + 1 + f_ts);
|
||||
Assert(retrieve_results->fields_data_size() == 2);
|
||||
auto field0 = retrieve_results->fields_data(0);
|
||||
auto field1 = retrieve_results->fields_data(1);
|
||||
|
||||
int target_num = (f_ts + choose_sep) / choose_sep;
|
||||
if (target_num > req_size) {
|
||||
target_num = req_size;
|
||||
}
|
||||
Assert(field0.scalars().long_data().data_size() == target_num);
|
||||
Assert(field1.vectors().float_vector().data_size() == target_num * DIM);
|
||||
|
||||
for (auto field_data : retrieve_results->fields_data()) {
|
||||
if (DataType(field_data.type()) == DataType::INT64) {
|
||||
Assert(field_data.scalars().long_data().data_size() == target_num);
|
||||
}
|
||||
if (DataType(field_data.type()) == DataType::VECTOR_FLOAT) {
|
||||
Assert(field_data.vectors().float_vector().data_size() == target_num * DIM);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -274,7 +277,7 @@ TEST(Retrieve, Delete) {
|
|||
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
|
||||
auto DIM = 16;
|
||||
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, MetricType::METRIC_L2);
|
||||
schema->set_primary_key(FieldOffset(0));
|
||||
schema->set_primary_field_id(fid_64);
|
||||
|
||||
int64_t N = 10;
|
||||
int64_t req_size = 10;
|
||||
|
@ -283,23 +286,22 @@ TEST(Retrieve, Delete) {
|
|||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
SealedLoader(dataset, *segment);
|
||||
auto i64_col = dataset.get_col<int64_t>(0);
|
||||
auto i64_col = dataset.get_col<int64_t>(fid_64);
|
||||
|
||||
auto plan = std::make_unique<query::RetrievePlan>(*schema);
|
||||
std::vector<int64_t> values;
|
||||
for (int i = 0; i < req_size; ++i) {
|
||||
values.emplace_back(i64_col[choose(i)]);
|
||||
}
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(FieldOffset(0), DataType::INT64, values);
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
|
||||
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
|
||||
plan->plan_node_->predicate_ = std::move(term_expr);
|
||||
std::vector<FieldOffset> target_offsets{FieldOffset(0), FieldOffset(1)};
|
||||
plan->field_offsets_ = target_offsets;
|
||||
std::vector<FieldId> target_offsets{fid_64, fid_vec};
|
||||
plan->field_ids_ = target_offsets;
|
||||
|
||||
{
|
||||
auto retrieve_results = segment->Retrieve(plan.get(), 100);
|
||||
Assert(retrieve_results->fields_data_size() == target_offsets.size());
|
||||
FieldOffset field_offset(0);
|
||||
auto field0 = retrieve_results->fields_data(0);
|
||||
Assert(field0.has_scalars());
|
||||
auto field0_data = field0.scalars().long_data();
|
||||
|
@ -326,25 +328,28 @@ TEST(Retrieve, Delete) {
|
|||
auto load_delete_record = false;
|
||||
if (load_delete_record) {
|
||||
std::vector<idx_t> pks{1, 2, 3, 4, 5};
|
||||
auto ids = std::make_unique<IdArray>();
|
||||
ids->mutable_int_id()->mutable_data()->Add(pks.begin(), pks.end());
|
||||
|
||||
std::vector<Timestamp> timestamps{10, 10, 10, 10, 10};
|
||||
|
||||
LoadDeletedRecordInfo info = {timestamps.data(), pks.data(), row_count};
|
||||
LoadDeletedRecordInfo info = {timestamps.data(), ids.get(), row_count};
|
||||
segment->LoadDeletedRecord(info);
|
||||
row_count = 5;
|
||||
}
|
||||
|
||||
int64_t new_count = 6;
|
||||
std::vector<idx_t> new_pks{0, 1, 2, 3, 4, 5};
|
||||
auto ids = std::make_unique<IdArray>();
|
||||
ids->mutable_int_id()->mutable_data()->Add(new_pks.begin(), new_pks.end());
|
||||
std::vector<idx_t> new_timestamps{10, 10, 10, 10, 10, 10};
|
||||
auto reserved_offset = segment->PreDelete(new_count);
|
||||
ASSERT_EQ(reserved_offset, row_count);
|
||||
segment->Delete(reserved_offset, new_count, reinterpret_cast<const int64_t*>(new_pks.data()),
|
||||
reinterpret_cast<const Timestamp*>(new_timestamps.data()));
|
||||
segment->Delete(reserved_offset, new_count, ids.get(), reinterpret_cast<const Timestamp*>(new_timestamps.data()));
|
||||
|
||||
{
|
||||
auto retrieve_results = segment->Retrieve(plan.get(), 100);
|
||||
Assert(retrieve_results->fields_data_size() == target_offsets.size());
|
||||
FieldOffset field_offset(0);
|
||||
auto field0 = retrieve_results->fields_data(0);
|
||||
Assert(field0.has_scalars());
|
||||
auto field0_data = field0.scalars().long_data();
|
||||
|
|
|
@ -52,6 +52,18 @@ TYPED_TEST_P(TypedScalarIndexTest, Constructor) {
|
|||
}
|
||||
}
|
||||
|
||||
TYPED_TEST_P(TypedScalarIndexTest, Count) {
|
||||
using T = TypeParam;
|
||||
auto dtype = milvus::GetDType<T>();
|
||||
auto index_types = GetIndexTypes<T>();
|
||||
for (const auto& index_type : index_types) {
|
||||
auto index = milvus::scalar::IndexFactory::GetInstance().CreateIndex<T>(index_type);
|
||||
auto arr = GenArr<T>(nb);
|
||||
index->Build(nb, arr.data());
|
||||
ASSERT_EQ(nb, index->Count());
|
||||
}
|
||||
}
|
||||
|
||||
TYPED_TEST_P(TypedScalarIndexTest, In) {
|
||||
using T = TypeParam;
|
||||
auto dtype = milvus::GetDType<T>();
|
||||
|
@ -101,6 +113,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Codec) {
|
|||
auto copy_index = milvus::scalar::IndexFactory::GetInstance().CreateIndex<T>(index_type);
|
||||
copy_index->Load(binary_set);
|
||||
|
||||
ASSERT_EQ(nb, copy_index->Count());
|
||||
assert_in<T>(copy_index, arr);
|
||||
assert_not_in<T>(copy_index, arr);
|
||||
assert_range<T>(copy_index, arr);
|
||||
|
@ -110,6 +123,6 @@ TYPED_TEST_P(TypedScalarIndexTest, Codec) {
|
|||
// TODO: it's easy to overflow for int8_t. Design more reasonable ut.
|
||||
using ScalarT = ::testing::Types<int8_t, int16_t, int32_t, int64_t, float, double>;
|
||||
|
||||
REGISTER_TYPED_TEST_CASE_P(TypedScalarIndexTest, Dummy, Constructor, In, NotIn, Range, Codec);
|
||||
REGISTER_TYPED_TEST_CASE_P(TypedScalarIndexTest, Dummy, Constructor, Count, In, NotIn, Range, Codec);
|
||||
|
||||
INSTANTIATE_TYPED_TEST_CASE_P(ArithmeticCheck, TypedScalarIndexTest, ScalarT);
|
||||
|
|
|
@ -31,7 +31,9 @@ TEST(Sealed, without_predicate) {
|
|||
auto topK = 5;
|
||||
auto metric_type = MetricType::METRIC_L2;
|
||||
auto fake_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto float_fid = schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
"must": [
|
||||
|
@ -55,7 +57,7 @@ TEST(Sealed, without_predicate) {
|
|||
auto N = ROW_COUNT;
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto vec_col = dataset.get_col<float>(0);
|
||||
auto vec_col = dataset.get_col<float>(fake_id);
|
||||
for (int64_t i = 0; i < 1000 * dim; ++i) {
|
||||
vec_col.push_back(0);
|
||||
}
|
||||
|
@ -99,7 +101,7 @@ TEST(Sealed, without_predicate) {
|
|||
std::vector<int64_t> vec_ids(ids, ids + topK * num_queries);
|
||||
std::vector<float> vec_dis(dis, dis + topK * num_queries);
|
||||
|
||||
sr->ids_ = vec_ids;
|
||||
sr->seg_offsets_ = vec_ids;
|
||||
sr->distances_ = vec_dis;
|
||||
auto ref_result = SearchResultToJson(*sr);
|
||||
|
||||
|
@ -127,7 +129,8 @@ TEST(Sealed, with_predicate) {
|
|||
auto topK = 5;
|
||||
auto metric_type = MetricType::METRIC_L2;
|
||||
auto fake_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
schema->AddDebugField("counter", DataType::INT64);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
"must": [
|
||||
|
@ -159,7 +162,7 @@ TEST(Sealed, with_predicate) {
|
|||
auto N = ROW_COUNT;
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto vec_col = dataset.get_col<float>(0);
|
||||
auto vec_col = dataset.get_col<float>(fake_id);
|
||||
auto query_ptr = vec_col.data() + 42000 * dim;
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
|
@ -204,7 +207,7 @@ TEST(Sealed, with_predicate) {
|
|||
|
||||
for (int i = 0; i < num_queries; ++i) {
|
||||
auto offset = i * topK;
|
||||
ASSERT_EQ(sr->ids_[offset], 42000 + i);
|
||||
ASSERT_EQ(sr->seg_offsets_[offset], 42000 + i);
|
||||
ASSERT_EQ(sr->distances_[offset], 0.0);
|
||||
}
|
||||
}
|
||||
|
@ -219,10 +222,11 @@ TEST(Sealed, LoadFieldData) {
|
|||
auto counter_id = schema->AddDebugField("counter", DataType::INT64);
|
||||
auto double_id = schema->AddDebugField("double", DataType::DOUBLE);
|
||||
auto nothing_id = schema->AddDebugField("nothing", DataType::INT32);
|
||||
schema->set_primary_field_id(counter_id);
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
|
||||
auto fakevec = dataset.get_col<float>(0);
|
||||
auto fakevec = dataset.get_col<float>(fakevec_id);
|
||||
|
||||
auto indexing = GenIndexing(N, dim, fakevec.data());
|
||||
|
||||
|
@ -277,10 +281,10 @@ TEST(Sealed, LoadFieldData) {
|
|||
segment->LoadIndex(vec_info);
|
||||
|
||||
ASSERT_EQ(segment->num_chunk(), 1);
|
||||
auto chunk_span1 = segment->chunk_data<int64_t>(FieldOffset(1), 0);
|
||||
auto chunk_span2 = segment->chunk_data<double>(FieldOffset(2), 0);
|
||||
auto ref1 = dataset.get_col<int64_t>(1);
|
||||
auto ref2 = dataset.get_col<double>(2);
|
||||
auto chunk_span1 = segment->chunk_data<int64_t>(counter_id, 0);
|
||||
auto chunk_span2 = segment->chunk_data<double>(double_id, 0);
|
||||
auto ref1 = dataset.get_col<int64_t>(counter_id);
|
||||
auto ref2 = dataset.get_col<double>(double_id);
|
||||
for (int i = 0; i < N; ++i) {
|
||||
ASSERT_EQ(chunk_span1[i], ref1[i]);
|
||||
ASSERT_EQ(chunk_span2[i], ref2[i]);
|
||||
|
@ -324,6 +328,96 @@ TEST(Sealed, LoadFieldData) {
|
|||
ASSERT_EQ(std_json.dump(-2), json.dump(-2));
|
||||
}
|
||||
|
||||
TEST(Sealed, LoadScalarIndex) {
|
||||
auto dim = 16;
|
||||
auto N = ROW_COUNT;
|
||||
auto metric_type = MetricType::METRIC_L2;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto fakevec_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto counter_id = schema->AddDebugField("counter", DataType::INT64);
|
||||
auto double_id = schema->AddDebugField("double", DataType::DOUBLE);
|
||||
auto nothing_id = schema->AddDebugField("nothing", DataType::INT32);
|
||||
schema->set_primary_field_id(counter_id);
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
|
||||
auto fakevec = dataset.get_col<float>(fakevec_id);
|
||||
|
||||
auto indexing = GenIndexing(N, dim, fakevec.data());
|
||||
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
"must": [
|
||||
{
|
||||
"range": {
|
||||
"double": {
|
||||
"GE": -1,
|
||||
"LT": 1
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"vector": {
|
||||
"fakevec": {
|
||||
"metric_type": "L2",
|
||||
"params": {
|
||||
"nprobe": 10
|
||||
},
|
||||
"query": "$0",
|
||||
"topk": 5,
|
||||
"round_decimal": 3
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
})";
|
||||
|
||||
Timestamp time = 1000000;
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
SealedLoader(dataset, *segment);
|
||||
|
||||
LoadIndexInfo vec_info;
|
||||
vec_info.field_id = fakevec_id.get();
|
||||
vec_info.field_type = CDataType::FloatVector;
|
||||
vec_info.index = indexing;
|
||||
vec_info.index_params["metric_type"] = knowhere::Metric::L2;
|
||||
segment->LoadIndex(vec_info);
|
||||
|
||||
LoadIndexInfo counter_index;
|
||||
counter_index.field_id = counter_id.get();
|
||||
counter_index.field_type = CDataType::Int64;
|
||||
counter_index.index_params["index_type"] = "sort";
|
||||
auto counter_data = dataset.get_col<int64_t>(counter_id);
|
||||
counter_index.index = std::move(GenScalarIndexing<int64_t>(N, counter_data.data()));
|
||||
segment->LoadIndex(counter_index);
|
||||
|
||||
LoadIndexInfo double_index;
|
||||
double_index.field_id = double_id.get();
|
||||
double_index.field_type = CDataType::Double;
|
||||
double_index.index_params["index_type"] = "sort";
|
||||
auto double_data = dataset.get_col<double>(double_id);
|
||||
double_index.index = std::move(GenScalarIndexing<double>(N, double_data.data()));
|
||||
segment->LoadIndex(double_index);
|
||||
|
||||
LoadIndexInfo nothing_index;
|
||||
nothing_index.field_id = nothing_id.get();
|
||||
nothing_index.field_type = CDataType::Int32;
|
||||
nothing_index.index_params["index_type"] = "sort";
|
||||
auto nothing_data = dataset.get_col<int32_t>(nothing_id);
|
||||
nothing_index.index = std::move(GenScalarIndexing<int32_t>(N, nothing_data.data()));
|
||||
segment->LoadIndex(nothing_index);
|
||||
|
||||
auto sr = segment->Search(plan.get(), *ph_group, time);
|
||||
auto json = SearchResultToJson(*sr);
|
||||
std::cout << json.dump(1);
|
||||
}
|
||||
|
||||
TEST(Sealed, Delete) {
|
||||
auto dim = 16;
|
||||
auto topK = 5;
|
||||
|
@ -334,10 +428,11 @@ TEST(Sealed, Delete) {
|
|||
auto counter_id = schema->AddDebugField("counter", DataType::INT64);
|
||||
auto double_id = schema->AddDebugField("double", DataType::DOUBLE);
|
||||
auto nothing_id = schema->AddDebugField("nothing", DataType::INT32);
|
||||
schema->set_primary_field_id(counter_id);
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
|
||||
auto fakevec = dataset.get_col<float>(0);
|
||||
auto fakevec = dataset.get_col<float>(fakevec_id);
|
||||
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
std::string dsl = R"({
|
||||
|
@ -380,9 +475,11 @@ TEST(Sealed, Delete) {
|
|||
|
||||
int64_t row_count = 5;
|
||||
std::vector<idx_t> pks{1, 2, 3, 4, 5};
|
||||
auto ids = std::make_unique<IdArray>();
|
||||
ids->mutable_int_id()->mutable_data()->Add(pks.begin(), pks.end());
|
||||
std::vector<Timestamp> timestamps{10, 10, 10, 10, 10};
|
||||
|
||||
LoadDeletedRecordInfo info = {timestamps.data(), pks.data(), row_count};
|
||||
LoadDeletedRecordInfo info = {timestamps.data(), ids.get(), row_count};
|
||||
segment->LoadDeletedRecord(info);
|
||||
|
||||
std::vector<uint8_t> tmp_block{0, 0};
|
||||
|
@ -392,9 +489,11 @@ TEST(Sealed, Delete) {
|
|||
|
||||
int64_t new_count = 3;
|
||||
std::vector<idx_t> new_pks{6, 7, 8};
|
||||
auto new_ids = std::make_unique<IdArray>();
|
||||
new_ids->mutable_int_id()->mutable_data()->Add(new_pks.begin(), new_pks.end());
|
||||
std::vector<idx_t> new_timestamps{10, 10, 10};
|
||||
auto reserved_offset = segment->PreDelete(new_count);
|
||||
ASSERT_EQ(reserved_offset, row_count);
|
||||
segment->Delete(reserved_offset, new_count, reinterpret_cast<const int64_t*>(new_pks.data()),
|
||||
segment->Delete(reserved_offset, new_count, new_ids.get(),
|
||||
reinterpret_cast<const Timestamp*>(new_timestamps.data()));
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <string>
|
||||
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "test_utils/DataGen.h"
|
||||
|
||||
using namespace milvus;
|
||||
|
||||
|
@ -56,46 +57,6 @@ TEST(SegmentCoreTest, NormalDistributionTest) {
|
|||
segment->PreDelete(N);
|
||||
}
|
||||
|
||||
// Test insert row-based data
|
||||
TEST(SegmentCoreTest, MockTest) {
|
||||
using namespace milvus::segcore;
|
||||
using namespace milvus::engine;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddDebugField("age", DataType::INT32);
|
||||
std::vector<char> raw_data;
|
||||
std::vector<Timestamp> timestamps;
|
||||
std::vector<int64_t> uids;
|
||||
int N = 10000;
|
||||
std::default_random_engine e(67);
|
||||
for (int i = 0; i < N; ++i) {
|
||||
uids.push_back(100000 + i);
|
||||
timestamps.push_back(0);
|
||||
// append vec
|
||||
float vec[16];
|
||||
for (auto& x : vec) {
|
||||
x = e() % 2000 * 0.001 - 1.0;
|
||||
}
|
||||
raw_data.insert(raw_data.end(), (const char*)std::begin(vec), (const char*)std::end(vec));
|
||||
int age = e() % 100;
|
||||
raw_data.insert(raw_data.end(), (const char*)&age, ((const char*)&age) + sizeof(age));
|
||||
}
|
||||
auto line_sizeof = (sizeof(int) + sizeof(float) * 16);
|
||||
assert(raw_data.size() == line_sizeof * N);
|
||||
|
||||
// auto index_meta = std::make_shared<IndexMeta>(schema);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
|
||||
RowBasedRawData data_chunk{raw_data.data(), (int)line_sizeof, N};
|
||||
auto offset = segment->PreInsert(N);
|
||||
segment->Insert(offset, N, uids.data(), timestamps.data(), data_chunk);
|
||||
SearchResult search_result;
|
||||
// segment->Query(nullptr, 0, query_result);
|
||||
// segment->BuildIndex();
|
||||
int i = 0;
|
||||
i++;
|
||||
}
|
||||
|
||||
// Test insert column-based data
|
||||
TEST(SegmentCoreTest, MockTest2) {
|
||||
using namespace milvus::segcore;
|
||||
|
@ -104,70 +65,14 @@ TEST(SegmentCoreTest, MockTest2) {
|
|||
// schema
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddDebugField("age", DataType::INT32);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
// generate random row-based data
|
||||
std::vector<char> row_data;
|
||||
std::vector<Timestamp> timestamps;
|
||||
std::vector<int64_t> uids;
|
||||
int N = 10000; // number of records
|
||||
std::default_random_engine e(67);
|
||||
for (int i = 0; i < N; ++i) {
|
||||
uids.push_back(100000 + i);
|
||||
timestamps.push_back(0);
|
||||
// append vec
|
||||
float vec[16];
|
||||
for (auto& x : vec) {
|
||||
x = e() % 2000 * 0.001 - 1.0;
|
||||
}
|
||||
row_data.insert(row_data.end(), (const char*)std::begin(vec), (const char*)std::end(vec));
|
||||
int age = e() % 100;
|
||||
row_data.insert(row_data.end(), (const char*)&age, ((const char*)&age) + sizeof(age));
|
||||
}
|
||||
auto line_sizeof = (sizeof(int) + sizeof(float) * 16);
|
||||
assert(row_data.size() == line_sizeof * N);
|
||||
|
||||
int64_t size = N;
|
||||
const int64_t* uids_raw = uids.data();
|
||||
const Timestamp* timestamps_raw = timestamps.data();
|
||||
std::vector<std::tuple<Timestamp, idx_t, int64_t>> ordering(size); // timestamp, pk, order_index
|
||||
for (int i = 0; i < size; ++i) {
|
||||
ordering[i] = std::make_tuple(timestamps_raw[i], uids_raw[i], i);
|
||||
}
|
||||
std::sort(ordering.begin(), ordering.end()); // sort according to timestamp
|
||||
|
||||
// convert row-based data to column-based data accordingly
|
||||
auto sizeof_infos = schema->get_sizeof_infos();
|
||||
std::vector<int> offset_infos(schema->size() + 1, 0);
|
||||
std::partial_sum(sizeof_infos.begin(), sizeof_infos.end(), offset_infos.begin() + 1);
|
||||
std::vector<aligned_vector<uint8_t>> entities(schema->size());
|
||||
|
||||
for (int fid = 0; fid < schema->size(); ++fid) {
|
||||
auto len = sizeof_infos[fid];
|
||||
entities[fid].resize(len * size);
|
||||
}
|
||||
|
||||
auto raw_data = row_data.data();
|
||||
std::vector<idx_t> sorted_uids(size);
|
||||
std::vector<Timestamp> sorted_timestamps(size);
|
||||
for (int index = 0; index < size; ++index) {
|
||||
auto [t, uid, order_index] = ordering[index];
|
||||
sorted_timestamps[index] = t;
|
||||
sorted_uids[index] = uid;
|
||||
for (int fid = 0; fid < schema->size(); ++fid) {
|
||||
auto len = sizeof_infos[fid];
|
||||
auto offset = offset_infos[fid];
|
||||
auto src = raw_data + order_index * line_sizeof + offset;
|
||||
auto dst = entities[fid].data() + index * len;
|
||||
memcpy(dst, src, len);
|
||||
}
|
||||
}
|
||||
|
||||
// insert column-based data
|
||||
ColumnBasedRawData data_chunk{entities, N};
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
auto reserved_begin = segment->PreInsert(N);
|
||||
segment->Insert(reserved_begin, size, sorted_uids.data(), sorted_timestamps.data(), data_chunk);
|
||||
segment->Insert(reserved_begin, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
}
|
||||
|
||||
TEST(SegmentCoreTest, SmallIndex) {
|
||||
|
|
|
@ -24,27 +24,29 @@ TEST(Span, Naive) {
|
|||
int64_t N = ROW_COUNT;
|
||||
constexpr int64_t size_per_chunk = 32 * 1024;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("binaryvec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
schema->AddDebugField("floatvec", DataType::VECTOR_FLOAT, 32, MetricType::METRIC_L2);
|
||||
auto bin_vec_fid = schema->AddDebugField("binaryvec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard);
|
||||
auto float_fid = schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto float_vec_fid = schema->AddDebugField("floatvec", DataType::VECTOR_FLOAT, 32, MetricType::METRIC_L2);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto seg_conf = SegcoreConfig::default_config();
|
||||
auto segment = CreateGrowingSegment(schema, -1, seg_conf);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
auto vec_ptr = dataset.get_col<uint8_t>(0);
|
||||
auto age_ptr = dataset.get_col<float>(1);
|
||||
auto float_ptr = dataset.get_col<float>(2);
|
||||
auto vec_ptr = dataset.get_col<uint8_t>(bin_vec_fid);
|
||||
auto age_ptr = dataset.get_col<float>(float_fid);
|
||||
auto float_ptr = dataset.get_col<float>(float_vec_fid);
|
||||
SegmentInternalInterface& interface = *segment;
|
||||
auto num_chunk = interface.num_chunk();
|
||||
ASSERT_EQ(num_chunk, upper_div(N, size_per_chunk));
|
||||
auto row_count = interface.get_row_count();
|
||||
ASSERT_EQ(N, row_count);
|
||||
for (auto chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
|
||||
auto vec_span = interface.chunk_data<BinaryVector>(FieldOffset(0), chunk_id);
|
||||
auto age_span = interface.chunk_data<float>(FieldOffset(1), chunk_id);
|
||||
auto float_span = interface.chunk_data<FloatVector>(FieldOffset(2), chunk_id);
|
||||
auto vec_span = interface.chunk_data<milvus::BinaryVector>(bin_vec_fid, chunk_id);
|
||||
auto age_span = interface.chunk_data<float>(float_fid, chunk_id);
|
||||
auto float_span = interface.chunk_data<milvus::FloatVector>(float_vec_fid, chunk_id);
|
||||
auto begin = chunk_id * size_per_chunk;
|
||||
auto end = std::min((chunk_id + 1) * size_per_chunk, N);
|
||||
auto size_of_chunk = end - begin;
|
||||
|
|
|
@ -0,0 +1,589 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <memory>
|
||||
#include <boost/format.hpp>
|
||||
#include <regex>
|
||||
|
||||
#include "pb/plan.pb.h"
|
||||
#include "query/Expr.h"
|
||||
#include "query/generated/PlanNodeVisitor.h"
|
||||
#include "query/generated/ExecExprVisitor.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "test_utils/DataGen.h"
|
||||
#include "query/PlanProto.h"
|
||||
#include "query/Utils.h"
|
||||
#include "query/SearchBruteForce.h"
|
||||
|
||||
using namespace milvus;
|
||||
|
||||
namespace {
|
||||
template <typename T>
|
||||
auto
|
||||
GenGenericValue(T value) {
|
||||
auto generic = new proto::plan::GenericValue();
|
||||
if constexpr (std::is_same_v<T, bool>) {
|
||||
generic->set_bool_val(static_cast<bool>(value));
|
||||
} else if constexpr (std::is_integral_v<T>) {
|
||||
generic->set_int64_val(static_cast<int64_t>(value));
|
||||
} else if constexpr (std::is_floating_point_v<T>) {
|
||||
generic->set_float_val(static_cast<float>(value));
|
||||
} else if constexpr (std::is_same_v<T, std::string>) {
|
||||
generic->set_string_val(static_cast<std::string>(value));
|
||||
} else {
|
||||
static_assert(always_false<T>);
|
||||
}
|
||||
return generic;
|
||||
}
|
||||
|
||||
auto
|
||||
GenColumnInfo(int64_t field_id, proto::schema::DataType field_type, bool auto_id, bool is_pk) {
|
||||
auto column_info = new proto::plan::ColumnInfo();
|
||||
column_info->set_field_id(field_id);
|
||||
column_info->set_data_type(field_type);
|
||||
column_info->set_is_autoid(auto_id);
|
||||
column_info->set_is_primary_key(is_pk);
|
||||
return column_info;
|
||||
}
|
||||
|
||||
auto
|
||||
GenQueryInfo(int64_t topk, std::string metric_type, std::string search_params, int64_t round_decimal = -1) {
|
||||
auto query_info = new proto::plan::QueryInfo();
|
||||
query_info->set_topk(topk);
|
||||
query_info->set_metric_type(metric_type);
|
||||
query_info->set_search_params(search_params);
|
||||
query_info->set_round_decimal(round_decimal);
|
||||
return query_info;
|
||||
}
|
||||
|
||||
auto
|
||||
GenAnns(proto::plan::Expr* predicate, bool is_binary, int64_t field_id, std::string placeholder_tag = "$0") {
|
||||
auto query_info = GenQueryInfo(10, "L2", "{\"nprobe\": 10}", -1);
|
||||
auto anns = new proto::plan::VectorANNS();
|
||||
anns->set_is_binary(is_binary);
|
||||
anns->set_field_id(field_id);
|
||||
anns->set_allocated_predicates(predicate);
|
||||
anns->set_allocated_query_info(query_info);
|
||||
anns->set_placeholder_tag(placeholder_tag);
|
||||
return anns;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto
|
||||
GenTermExpr(const std::vector<T>& values) {
|
||||
auto term_expr = new proto::plan::TermExpr();
|
||||
for (int i = 0; i < values.size(); i++) {
|
||||
auto add_value = term_expr->add_values();
|
||||
if constexpr (std::is_same_v<T, bool>) {
|
||||
add_value->set_bool_val(static_cast<T>(values[i]));
|
||||
} else if constexpr (std::is_integral_v<T>) {
|
||||
add_value->set_int64_val(static_cast<int64_t>(values[i]));
|
||||
} else if constexpr (std::is_floating_point_v<T>) {
|
||||
add_value->set_float_val(static_cast<double>(values[i]));
|
||||
} else if constexpr (std::is_same_v<T, std::string>) {
|
||||
add_value->set_string_val(static_cast<T>(values[i]));
|
||||
} else {
|
||||
static_assert(always_false<T>);
|
||||
}
|
||||
}
|
||||
return term_expr;
|
||||
}
|
||||
|
||||
auto
|
||||
GenCompareExpr(proto::plan::OpType op) {
|
||||
auto compare_expr = new proto::plan::CompareExpr();
|
||||
compare_expr->set_op(op);
|
||||
return compare_expr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto
|
||||
GenUnaryRangeExpr(proto::plan::OpType op, T value) {
|
||||
auto unary_range_expr = new proto::plan::UnaryRangeExpr();
|
||||
unary_range_expr->set_op(op);
|
||||
auto generic = GenGenericValue(value);
|
||||
unary_range_expr->set_allocated_value(generic);
|
||||
return unary_range_expr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto
|
||||
GenBinaryRangeExpr(bool lb_inclusive, bool ub_inclusive, T lb, T ub) {
|
||||
auto binary_range_expr = new proto::plan::BinaryRangeExpr();
|
||||
binary_range_expr->set_lower_inclusive(lb_inclusive);
|
||||
binary_range_expr->set_upper_inclusive(ub_inclusive);
|
||||
auto lb_generic = GenGenericValue(lb);
|
||||
auto ub_generic = GenGenericValue(ub);
|
||||
binary_range_expr->set_allocated_lower_value(lb_generic);
|
||||
binary_range_expr->set_allocated_upper_value(ub_generic);
|
||||
return binary_range_expr;
|
||||
}
|
||||
|
||||
auto
|
||||
GenNotExpr() {
|
||||
auto not_expr = new proto::plan::UnaryExpr();
|
||||
not_expr->set_op(proto::plan::UnaryExpr_UnaryOp_Not);
|
||||
return not_expr;
|
||||
}
|
||||
|
||||
auto
|
||||
GenExpr() {
|
||||
return std::make_unique<proto::plan::Expr>();
|
||||
}
|
||||
|
||||
auto
|
||||
GenPlanNode() {
|
||||
return std::make_unique<proto::plan::PlanNode>();
|
||||
}
|
||||
|
||||
void
|
||||
SetTargetEntry(std::unique_ptr<proto::plan::PlanNode>& plan_node, const std::vector<int64_t>& output_fields) {
|
||||
for (auto id : output_fields) {
|
||||
plan_node->add_output_field_ids(id);
|
||||
}
|
||||
}
|
||||
|
||||
auto
|
||||
GenTermPlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta, const std::vector<std::string>& strs)
|
||||
-> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
||||
auto term_expr = GenTermExpr<std::string>(strs);
|
||||
term_expr->set_allocated_column_info(column_info);
|
||||
|
||||
auto expr = GenExpr().release();
|
||||
expr->set_allocated_term_expr(term_expr);
|
||||
|
||||
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
||||
|
||||
auto plan_node = GenPlanNode();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
return std::move(plan_node);
|
||||
}
|
||||
|
||||
auto
|
||||
GenAlwaysFalseExpr(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
||||
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
||||
auto term_expr = GenTermExpr<std::string>({}); // in empty set, always false.
|
||||
term_expr->set_allocated_column_info(column_info);
|
||||
|
||||
auto expr = GenExpr().release();
|
||||
expr->set_allocated_term_expr(term_expr);
|
||||
return expr;
|
||||
}
|
||||
|
||||
auto
|
||||
GenAlwaysTrueExpr(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
||||
auto always_false_expr = GenAlwaysFalseExpr(fvec_meta, str_meta);
|
||||
auto not_expr = GenNotExpr();
|
||||
not_expr->set_allocated_child(always_false_expr);
|
||||
auto expr = GenExpr().release();
|
||||
expr->set_allocated_unary_expr(not_expr);
|
||||
return expr;
|
||||
}
|
||||
|
||||
auto
|
||||
GenAlwaysFalsePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
||||
auto always_false_expr = GenAlwaysFalseExpr(fvec_meta, str_meta);
|
||||
auto anns = GenAnns(always_false_expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
|
||||
fvec_meta.get_id().get(), "$0");
|
||||
|
||||
auto plan_node = GenPlanNode();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
return std::move(plan_node);
|
||||
}
|
||||
|
||||
auto
|
||||
GenAlwaysTruePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
||||
auto always_true_expr = GenAlwaysTrueExpr(fvec_meta, str_meta);
|
||||
auto anns =
|
||||
GenAnns(always_true_expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
||||
|
||||
auto plan_node = GenPlanNode();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
return std::move(plan_node);
|
||||
}
|
||||
|
||||
SchemaPtr
|
||||
GenTestSchema() {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("str", DataType::VARCHAR);
|
||||
schema->AddDebugField("another_str", DataType::VARCHAR);
|
||||
schema->AddDebugField("fvec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
||||
schema->set_primary_field_id(pk);
|
||||
return schema;
|
||||
}
|
||||
|
||||
SchemaPtr
|
||||
GenStrPKSchema() {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto pk = schema->AddDebugField("str", DataType::VARCHAR);
|
||||
schema->AddDebugField("another_str", DataType::VARCHAR);
|
||||
schema->AddDebugField("fvec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
schema->AddDebugField("int64", DataType::INT64);
|
||||
schema->set_primary_field_id(pk);
|
||||
return schema;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(StringExpr, Term) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
||||
auto schema = GenTestSchema();
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
|
||||
auto vec_2k_3k = []() -> std::vector<std::string> {
|
||||
std::vector<std::string> ret;
|
||||
for (int i = 2000; i < 3000; i++) {
|
||||
ret.push_back(std::to_string(i));
|
||||
}
|
||||
return ret;
|
||||
}();
|
||||
|
||||
std::map<int, std::vector<std::string>> terms = {
|
||||
{0, {"2000", "3000"}}, {1, {"2000"}}, {2, {"3000"}}, {3, {}}, {4, {vec_2k_3k}},
|
||||
};
|
||||
|
||||
auto seg = CreateGrowingSegment(schema);
|
||||
int N = 1000;
|
||||
std::vector<std::string> str_col;
|
||||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_str_col = raw_data.get_col(str_meta.get_id());
|
||||
auto begin = new_str_col->scalars().string_data().data().begin();
|
||||
auto end = new_str_col->scalars().string_data().data().end();
|
||||
str_col.insert(str_col.end(), begin, end);
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (const auto& [_, term] : terms) {
|
||||
auto plan_proto = GenTermPlan(fvec_meta, str_meta, term);
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
|
||||
auto val = str_col[i];
|
||||
auto ref = std::find(term.begin(), term.end(), val) != term.end();
|
||||
ASSERT_EQ(ans, ref) << "@" << i << "!!" << val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(StringExpr, Compare) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
||||
auto schema = GenTestSchema();
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
const auto& another_str_meta = schema->operator[](FieldName("another_str"));
|
||||
|
||||
auto gen_compare_plan = [&, fvec_meta, str_meta,
|
||||
another_str_meta](proto::plan::OpType op) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto str_col_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
||||
auto another_str_col_info =
|
||||
GenColumnInfo(another_str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
||||
|
||||
auto compare_expr = GenCompareExpr(op);
|
||||
compare_expr->set_allocated_left_column_info(str_col_info);
|
||||
compare_expr->set_allocated_right_column_info(another_str_col_info);
|
||||
|
||||
auto expr = GenExpr().release();
|
||||
expr->set_allocated_compare_expr(compare_expr);
|
||||
|
||||
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
||||
|
||||
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
return std::move(plan_node);
|
||||
};
|
||||
|
||||
std::vector<std::tuple<proto::plan::OpType, std::function<bool(std::string, std::string)>>> testcases{
|
||||
{proto::plan::OpType::GreaterThan, [](std::string v1, std::string v2) { return v1 > v2; }},
|
||||
{proto::plan::OpType::GreaterEqual, [](std::string v1, std::string v2) { return v1 >= v2; }},
|
||||
{proto::plan::OpType::LessThan, [](std::string v1, std::string v2) { return v1 < v2; }},
|
||||
{proto::plan::OpType::LessEqual, [](std::string v1, std::string v2) { return v1 <= v2; }},
|
||||
{proto::plan::OpType::Equal, [](std::string v1, std::string v2) { return v1 == v2; }},
|
||||
{proto::plan::OpType::NotEqual, [](std::string v1, std::string v2) { return v1 != v2; }},
|
||||
{proto::plan::OpType::PrefixMatch, [](std::string v1, std::string v2) { return PrefixMatch(v1, v2); }},
|
||||
};
|
||||
|
||||
auto seg = CreateGrowingSegment(schema);
|
||||
int N = 1000;
|
||||
std::vector<std::string> str_col;
|
||||
std::vector<std::string> another_str_col;
|
||||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
|
||||
auto reserve_col = [&, raw_data](const FieldMeta& field_meta, std::vector<std::string>& str_col) {
|
||||
auto new_str_col = raw_data.get_col(field_meta.get_id());
|
||||
auto begin = new_str_col->scalars().string_data().data().begin();
|
||||
auto end = new_str_col->scalars().string_data().data().end();
|
||||
str_col.insert(str_col.end(), begin, end);
|
||||
};
|
||||
|
||||
reserve_col(str_meta, str_col);
|
||||
reserve_col(another_str_meta, another_str_col);
|
||||
|
||||
{
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
}
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (const auto& [op, ref_func] : testcases) {
|
||||
auto plan_proto = gen_compare_plan(op);
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
|
||||
auto val = str_col[i];
|
||||
auto another_val = another_str_col[i];
|
||||
auto ref = ref_func(val, another_val);
|
||||
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(StringExpr, UnaryRange) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
||||
auto schema = GenTestSchema();
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
|
||||
auto gen_unary_range_plan = [&, fvec_meta, str_meta](proto::plan::OpType op,
|
||||
std::string value) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
||||
auto unary_range_expr = GenUnaryRangeExpr(op, value);
|
||||
unary_range_expr->set_allocated_column_info(column_info);
|
||||
|
||||
auto expr = GenExpr().release();
|
||||
expr->set_allocated_unary_range_expr(unary_range_expr);
|
||||
|
||||
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
||||
|
||||
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
return std::move(plan_node);
|
||||
};
|
||||
|
||||
std::vector<std::tuple<proto::plan::OpType, std::string, std::function<bool(std::string)>>> testcases{
|
||||
{proto::plan::OpType::GreaterThan, "2000", [](std::string val) { return val > "2000"; }},
|
||||
{proto::plan::OpType::GreaterEqual, "2000", [](std::string val) { return val >= "2000"; }},
|
||||
{proto::plan::OpType::LessThan, "3000", [](std::string val) { return val < "3000"; }},
|
||||
{proto::plan::OpType::LessEqual, "3000", [](std::string val) { return val <= "3000"; }},
|
||||
{proto::plan::OpType::PrefixMatch, "a", [](std::string val) { return PrefixMatch(val, "a"); }},
|
||||
};
|
||||
|
||||
auto seg = CreateGrowingSegment(schema);
|
||||
int N = 1000;
|
||||
std::vector<std::string> str_col;
|
||||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_str_col = raw_data.get_col(str_meta.get_id());
|
||||
auto begin = new_str_col->scalars().string_data().data().begin();
|
||||
auto end = new_str_col->scalars().string_data().data().end();
|
||||
str_col.insert(str_col.end(), begin, end);
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (const auto& [op, value, ref_func] : testcases) {
|
||||
auto plan_proto = gen_unary_range_plan(op, value);
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
|
||||
auto val = str_col[i];
|
||||
auto ref = ref_func(val);
|
||||
ASSERT_EQ(ans, ref) << "@" << op << "@" << value << "@" << i << "!!" << val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(StringExpr, BinaryRange) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
||||
auto schema = GenTestSchema();
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
|
||||
auto gen_binary_range_plan = [&, fvec_meta, str_meta](bool lb_inclusive, bool ub_inclusive, std::string lb,
|
||||
std::string ub) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
||||
auto binary_range_expr = GenBinaryRangeExpr(lb_inclusive, ub_inclusive, lb, ub);
|
||||
binary_range_expr->set_allocated_column_info(column_info);
|
||||
|
||||
auto expr = GenExpr().release();
|
||||
expr->set_allocated_binary_range_expr(binary_range_expr);
|
||||
|
||||
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
||||
|
||||
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
return std::move(plan_node);
|
||||
};
|
||||
|
||||
// bool lb_inclusive, bool ub_inclusive, std::string lb, std::string ub
|
||||
std::vector<std::tuple<bool, bool, std::string, std::string, std::function<bool(std::string)>>> testcases{
|
||||
{false, false, "2000", "3000", [](std::string val) { return val > "2000" && val < "3000"; }},
|
||||
{false, true, "2000", "3000", [](std::string val) { return val > "2000" && val <= "3000"; }},
|
||||
{true, false, "2000", "3000", [](std::string val) { return val >= "2000" && val < "3000"; }},
|
||||
{true, true, "2000", "3000", [](std::string val) { return val >= "2000" && val <= "3000"; }},
|
||||
{true, true, "2000", "1000", [](std::string val) { return false; }},
|
||||
};
|
||||
|
||||
auto seg = CreateGrowingSegment(schema);
|
||||
int N = 1000;
|
||||
std::vector<std::string> str_col;
|
||||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_str_col = raw_data.get_col(str_meta.get_id());
|
||||
auto begin = new_str_col->scalars().string_data().data().begin();
|
||||
auto end = new_str_col->scalars().string_data().data().end();
|
||||
str_col.insert(str_col.end(), begin, end);
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (const auto& [lb_inclusive, ub_inclusive, lb, ub, ref_func] : testcases) {
|
||||
auto plan_proto = gen_binary_range_plan(lb_inclusive, ub_inclusive, lb, ub);
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
|
||||
auto val = str_col[i];
|
||||
auto ref = ref_func(val);
|
||||
ASSERT_EQ(ans, ref) << "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb << "@" << ub << "@" << i
|
||||
<< "!!" << val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(AlwaysTrueStringPlan, SearchWithOutputFields) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
||||
auto schema = GenStrPKSchema();
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
|
||||
auto N = 100000;
|
||||
auto dim = fvec_meta.get_dim();
|
||||
auto round_decimal = -1;
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto vec_col = dataset.get_col<float>(fvec_meta.get_id());
|
||||
auto str_col = dataset.get_col(str_meta.get_id())->scalars().string_data().data();
|
||||
auto query_ptr = vec_col.data();
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->disable_small_index(); // brute-force search.
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
|
||||
auto plan_proto = GenAlwaysTruePlan(fvec_meta, str_meta);
|
||||
SetTargetEntry(plan_proto, {str_meta.get_id().get()});
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
auto num_queries = 5;
|
||||
auto topk = 10;
|
||||
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
Timestamp time = MAX_TIMESTAMP;
|
||||
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
|
||||
|
||||
query::dataset::SearchDataset search_dataset{
|
||||
faiss::MetricType::METRIC_L2, //
|
||||
num_queries, //
|
||||
topk, //
|
||||
round_decimal,
|
||||
dim, //
|
||||
query_ptr //
|
||||
};
|
||||
auto sub_result = FloatSearchBruteForce(search_dataset, vec_col.data(), N, nullptr);
|
||||
|
||||
auto sr = segment->Search(plan.get(), *ph_group, time);
|
||||
segment->FillPrimaryKeys(plan.get(), *sr);
|
||||
segment->FillTargetEntry(plan.get(), *sr);
|
||||
ASSERT_EQ(sr->pk_type_, DataType::VARCHAR);
|
||||
ASSERT_TRUE(sr->output_fields_data_.find(str_meta.get_id()) != sr->output_fields_data_.end());
|
||||
auto retrieved_str_col = sr->output_fields_data_[str_meta.get_id()]->scalars().string_data().data();
|
||||
for (auto q = 0; q < num_queries; q++) {
|
||||
for (auto k = 0; k < topk; k++) {
|
||||
auto offset = q * topk + k;
|
||||
auto seg_offset = sub_result.get_seg_offsets()[offset];
|
||||
ASSERT_EQ(std::get<std::string>(sr->primary_keys_[offset]), str_col[seg_offset]);
|
||||
ASSERT_EQ(retrieved_str_col[offset], str_col[seg_offset]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(AlwaysTrueStringPlan, QueryWithOutputFields) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
||||
auto schema = GenStrPKSchema();
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
|
||||
auto N = 100000;
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto vec_col = dataset.get_col<float>(fvec_meta.get_id());
|
||||
auto str_col = dataset.get_col(str_meta.get_id())->scalars().string_data().data();
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->disable_small_index(); // brute-force search.
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
|
||||
auto expr_proto = GenAlwaysTrueExpr(fvec_meta, str_meta);
|
||||
auto plan_proto = GenPlanNode();
|
||||
plan_proto->set_allocated_predicates(expr_proto);
|
||||
SetTargetEntry(plan_proto, {str_meta.get_id().get()});
|
||||
auto plan = ProtoParser(*schema).CreateRetrievePlan(*plan_proto);
|
||||
|
||||
Timestamp time = MAX_TIMESTAMP;
|
||||
|
||||
auto retrieved = segment->Retrieve(plan.get(), time);
|
||||
ASSERT_EQ(retrieved->ids().str_id().data().size(), N);
|
||||
ASSERT_EQ(retrieved->offset().size(), N);
|
||||
ASSERT_EQ(retrieved->fields_data().size(), 1);
|
||||
ASSERT_EQ(retrieved->fields_data(0).scalars().string_data().data().size(), N);
|
||||
}
|
|
@ -61,6 +61,12 @@ TEST_F(StringIndexMarisaTest, BuildWithDataset) {
|
|||
index->BuildWithDataset(str_ds);
|
||||
}
|
||||
|
||||
TEST_F(StringIndexMarisaTest, Count) {
|
||||
auto index = milvus::scalar::CreateStringIndexMarisa();
|
||||
index->BuildWithDataset(str_ds);
|
||||
ASSERT_EQ(strs.size(), index->Count());
|
||||
}
|
||||
|
||||
TEST_F(StringIndexMarisaTest, In) {
|
||||
auto index = milvus::scalar::CreateStringIndexMarisa();
|
||||
index->BuildWithDataset(str_ds);
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
#include <string.h>
|
||||
#include <knowhere/common/MetricType.h>
|
||||
|
||||
#include "common/Utils.h"
|
||||
#include "query/Utils.h"
|
||||
#include "segcore/Utils.h"
|
||||
|
||||
TEST(Util, FaissMetricTypeToString) {
|
||||
|
@ -33,3 +35,22 @@ TEST(Util, FaissMetricTypeToString) {
|
|||
ASSERT_EQ(MetricTypeToString(MetricType::METRIC_BrayCurtis), "METRIC_BrayCurtis");
|
||||
ASSERT_EQ(MetricTypeToString(MetricType::METRIC_JensenShannon), "METRIC_JensenShannon");
|
||||
}
|
||||
|
||||
TEST(Util, StringMatch) {
|
||||
using namespace milvus;
|
||||
using namespace milvus::query;
|
||||
|
||||
ASSERT_ANY_THROW(Match(1, 2, OpType::PrefixMatch));
|
||||
ASSERT_ANY_THROW(Match(std::string("not_match_operation"), std::string("not_match"), OpType::LessEqual));
|
||||
|
||||
ASSERT_TRUE(PrefixMatch("prefix1", "prefix"));
|
||||
ASSERT_TRUE(PostfixMatch("1postfix", "postfix"));
|
||||
ASSERT_TRUE(Match(std::string("prefix1"), std::string("prefix"), OpType::PrefixMatch));
|
||||
ASSERT_TRUE(Match(std::string("1postfix"), std::string("postfix"), OpType::PostfixMatch));
|
||||
|
||||
ASSERT_FALSE(PrefixMatch("", "longer"));
|
||||
ASSERT_FALSE(PostfixMatch("", "longer"));
|
||||
|
||||
ASSERT_FALSE(PrefixMatch("dontmatch", "prefix"));
|
||||
ASSERT_FALSE(PostfixMatch("dontmatch", "postfix"));
|
||||
}
|
||||
|
|
|
@ -25,89 +25,127 @@
|
|||
#include "query/SearchOnIndex.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
#include "segcore/Utils.h"
|
||||
#include "index/ScalarIndexSort.h"
|
||||
#include "index/StringIndexSort.h"
|
||||
|
||||
using boost::algorithm::starts_with;
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
struct GeneratedData {
|
||||
std::vector<uint8_t> rows_;
|
||||
std::vector<aligned_vector<uint8_t>> cols_;
|
||||
std::vector<idx_t> row_ids_;
|
||||
std::vector<Timestamp> timestamps_;
|
||||
RowBasedRawData raw_;
|
||||
InsertData* raw_;
|
||||
std::vector<FieldId> field_ids;
|
||||
SchemaPtr schema_;
|
||||
template <typename T>
|
||||
auto
|
||||
get_col(int index) const {
|
||||
auto& target = cols_.at(index);
|
||||
std::vector<T> ret(target.size() / sizeof(T));
|
||||
memcpy(ret.data(), target.data(), target.size());
|
||||
return ret;
|
||||
std::vector<T>
|
||||
get_col(FieldId field_id) const {
|
||||
std::vector<T> ret(raw_->num_rows());
|
||||
for (auto target_field_data : raw_->fields_data()) {
|
||||
if (field_id.get() != target_field_data.field_id()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
if (field_meta.is_vector()) {
|
||||
if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
int len = raw_->num_rows() * field_meta.get_dim();
|
||||
ret.resize(len);
|
||||
auto src_data =
|
||||
reinterpret_cast<const T*>(target_field_data.vectors().float_vector().data().data());
|
||||
std::copy_n(src_data, len, ret.data());
|
||||
} else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
int len = raw_->num_rows() * (field_meta.get_dim() / 8);
|
||||
ret.resize(len);
|
||||
auto src_data = reinterpret_cast<const T*>(target_field_data.vectors().binary_vector().data());
|
||||
std::copy_n(src_data, len, ret.data());
|
||||
} else {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
|
||||
return std::move(ret);
|
||||
}
|
||||
switch (field_meta.get_data_type()) {
|
||||
case DataType::BOOL: {
|
||||
auto src_data = reinterpret_cast<const T*>(target_field_data.scalars().bool_data().data().data());
|
||||
std::copy_n(src_data, raw_->num_rows(), ret.data());
|
||||
break;
|
||||
}
|
||||
case DataType::INT8:
|
||||
case DataType::INT16:
|
||||
case DataType::INT32: {
|
||||
auto src_data =
|
||||
reinterpret_cast<const int32_t*>(target_field_data.scalars().int_data().data().data());
|
||||
std::copy_n(src_data, raw_->num_rows(), ret.data());
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
auto src_data = reinterpret_cast<const T*>(target_field_data.scalars().long_data().data().data());
|
||||
std::copy_n(src_data, raw_->num_rows(), ret.data());
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
auto src_data = reinterpret_cast<const T*>(target_field_data.scalars().float_data().data().data());
|
||||
std::copy_n(src_data, raw_->num_rows(), ret.data());
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
auto src_data = reinterpret_cast<const T*>(target_field_data.scalars().double_data().data().data());
|
||||
std::copy_n(src_data, raw_->num_rows(), ret.data());
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto src_data = reinterpret_cast<const T*>(target_field_data.scalars().string_data().data().data());
|
||||
std::copy_n(src_data, raw_->num_rows(), ret.data());
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
}
|
||||
return std::move(ret);
|
||||
}
|
||||
template <typename T>
|
||||
auto
|
||||
get_mutable_col(int index) {
|
||||
auto& target = cols_.at(index);
|
||||
assert(target.size() == row_ids_.size() * sizeof(T));
|
||||
auto ptr = reinterpret_cast<T*>(target.data());
|
||||
return ptr;
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
get_col(FieldId field_id) const {
|
||||
for (auto target_field_data : raw_->fields_data()) {
|
||||
if (field_id.get() == target_field_data.field_id()) {
|
||||
return std::make_unique<DataArray>(target_field_data);
|
||||
}
|
||||
}
|
||||
|
||||
PanicInfo("field id not find");
|
||||
}
|
||||
|
||||
private:
|
||||
GeneratedData() = default;
|
||||
friend GeneratedData
|
||||
DataGen(SchemaPtr schema, int64_t N, uint64_t seed, uint64_t ts_offset);
|
||||
void
|
||||
generate_rows(int64_t N, SchemaPtr schema);
|
||||
};
|
||||
|
||||
inline void
|
||||
GeneratedData::generate_rows(int64_t N, SchemaPtr schema) {
|
||||
std::vector<int> offset_infos(schema->size() + 1, 0);
|
||||
auto sizeof_infos = schema->get_sizeof_infos();
|
||||
std::partial_sum(sizeof_infos.begin(), sizeof_infos.end(), offset_infos.begin() + 1);
|
||||
int64_t len_per_row = offset_infos.back();
|
||||
assert(len_per_row == schema->get_total_sizeof());
|
||||
|
||||
// change column-based data to row-based data
|
||||
std::vector<uint8_t> result(len_per_row * N);
|
||||
for (int index = 0; index < N; ++index) {
|
||||
for (int fid = 0; fid < schema->size(); ++fid) {
|
||||
auto len = sizeof_infos[fid];
|
||||
auto offset = offset_infos[fid];
|
||||
auto src = cols_[fid].data() + index * len;
|
||||
auto dst = result.data() + index * len_per_row + offset;
|
||||
memcpy(dst, src, len);
|
||||
}
|
||||
}
|
||||
rows_ = std::move(result);
|
||||
raw_.raw_data = rows_.data();
|
||||
raw_.sizeof_per_row = schema->get_total_sizeof();
|
||||
raw_.count = N;
|
||||
}
|
||||
|
||||
inline GeneratedData
|
||||
DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0) {
|
||||
using std::vector;
|
||||
std::vector<aligned_vector<uint8_t>> cols;
|
||||
std::default_random_engine er(seed);
|
||||
std::normal_distribution<> distr(0, 1);
|
||||
int offset = 0;
|
||||
|
||||
auto insert_cols = [&cols](auto& data) {
|
||||
using T = std::remove_reference_t<decltype(data)>;
|
||||
auto len = sizeof(typename T::value_type) * data.size();
|
||||
auto ptr = aligned_vector<uint8_t>(len);
|
||||
memcpy(ptr.data(), data.data(), len);
|
||||
cols.emplace_back(std::move(ptr));
|
||||
auto insert_data = std::make_unique<InsertData>();
|
||||
auto insert_cols = [&insert_data](auto& data, int64_t count, auto& field_meta) {
|
||||
auto array = milvus::segcore::CreateDataArrayFrom(data.data(), count, field_meta);
|
||||
insert_data->mutable_fields_data()->AddAllocated(array.release());
|
||||
};
|
||||
|
||||
for (auto& field : schema->get_fields()) {
|
||||
switch (field.get_data_type()) {
|
||||
for (auto field_id : schema->get_field_ids()) {
|
||||
auto field_meta = schema->operator[](field_id);
|
||||
switch (field_meta.get_data_type()) {
|
||||
case engine::DataType::VECTOR_FLOAT: {
|
||||
auto dim = field.get_dim();
|
||||
auto dim = field_meta.get_dim();
|
||||
vector<float> final(dim * N);
|
||||
bool is_ip = starts_with(field.get_name().get(), "normalized");
|
||||
bool is_ip = starts_with(field_meta.get_name().get(), "normalized");
|
||||
#pragma omp parallel for
|
||||
for (int n = 0; n < N; ++n) {
|
||||
vector<float> data(dim);
|
||||
|
@ -128,23 +166,23 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
|
|||
|
||||
std::copy(data.begin(), data.end(), final.begin() + dim * n);
|
||||
}
|
||||
insert_cols(final);
|
||||
insert_cols(final, N, field_meta);
|
||||
break;
|
||||
}
|
||||
case engine::DataType::VECTOR_BINARY: {
|
||||
auto dim = field.get_dim();
|
||||
auto dim = field_meta.get_dim();
|
||||
Assert(dim % 8 == 0);
|
||||
vector<uint8_t> data(dim / 8 * N);
|
||||
for (auto& x : data) {
|
||||
x = er();
|
||||
}
|
||||
insert_cols(data);
|
||||
insert_cols(data, N, field_meta);
|
||||
break;
|
||||
}
|
||||
case engine::DataType::INT64: {
|
||||
vector<int64_t> data(N);
|
||||
// begin with counter
|
||||
if (starts_with(field.get_name().get(), "counter")) {
|
||||
if (starts_with(field_meta.get_name().get(), "counter")) {
|
||||
int64_t index = 0;
|
||||
for (auto& x : data) {
|
||||
x = index++;
|
||||
|
@ -157,7 +195,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
|
|||
i++;
|
||||
}
|
||||
}
|
||||
insert_cols(data);
|
||||
insert_cols(data, N, field_meta);
|
||||
break;
|
||||
}
|
||||
case engine::DataType::INT32: {
|
||||
|
@ -165,7 +203,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
|
|||
for (auto& x : data) {
|
||||
x = er() % (2 * N);
|
||||
}
|
||||
insert_cols(data);
|
||||
insert_cols(data, N, field_meta);
|
||||
break;
|
||||
}
|
||||
case engine::DataType::INT16: {
|
||||
|
@ -173,7 +211,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
|
|||
for (auto& x : data) {
|
||||
x = er() % (2 * N);
|
||||
}
|
||||
insert_cols(data);
|
||||
insert_cols(data, N, field_meta);
|
||||
break;
|
||||
}
|
||||
case engine::DataType::INT8: {
|
||||
|
@ -181,7 +219,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
|
|||
for (auto& x : data) {
|
||||
x = er() % (2 * N);
|
||||
}
|
||||
insert_cols(data);
|
||||
insert_cols(data, N, field_meta);
|
||||
break;
|
||||
}
|
||||
case engine::DataType::FLOAT: {
|
||||
|
@ -189,7 +227,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
|
|||
for (auto& x : data) {
|
||||
x = distr(er);
|
||||
}
|
||||
insert_cols(data);
|
||||
insert_cols(data, N, field_meta);
|
||||
break;
|
||||
}
|
||||
case engine::DataType::DOUBLE: {
|
||||
|
@ -197,7 +235,15 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
|
|||
for (auto& x : data) {
|
||||
x = distr(er);
|
||||
}
|
||||
insert_cols(data);
|
||||
insert_cols(data, N, field_meta);
|
||||
break;
|
||||
}
|
||||
case engine::DataType::VARCHAR: {
|
||||
vector<std::string> data(N);
|
||||
for (auto& x : data) {
|
||||
x = std::to_string(er());
|
||||
}
|
||||
insert_cols(data, N, field_meta);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
|
@ -206,14 +252,16 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
|
|||
}
|
||||
++offset;
|
||||
}
|
||||
|
||||
GeneratedData res;
|
||||
res.cols_ = std::move(cols);
|
||||
res.schema_ = schema;
|
||||
res.raw_ = insert_data.release();
|
||||
res.raw_->set_num_rows(N);
|
||||
for (int i = 0; i < N; ++i) {
|
||||
res.row_ids_.push_back(i);
|
||||
res.timestamps_.push_back(i + ts_offset);
|
||||
}
|
||||
// std::shuffle(res.row_ids_.begin(), res.row_ids_.end(), er);
|
||||
res.generate_rows(N, schema);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -306,7 +354,7 @@ SearchResultToJson(const SearchResult& sr) {
|
|||
std::vector<std::string> result;
|
||||
for (int k = 0; k < topk; ++k) {
|
||||
int index = q * topk + k;
|
||||
result.emplace_back(std::to_string(sr.ids_[index]) + "->" + std::to_string(sr.distances_[index]));
|
||||
result.emplace_back(std::to_string(sr.seg_offsets_[index]) + "->" + std::to_string(sr.distances_[index]));
|
||||
}
|
||||
results.emplace_back(std::move(result));
|
||||
}
|
||||
|
@ -319,26 +367,28 @@ SealedLoader(const GeneratedData& dataset, SegmentSealed& seg) {
|
|||
auto row_count = dataset.row_ids_.size();
|
||||
{
|
||||
LoadFieldDataInfo info;
|
||||
info.blob = dataset.row_ids_.data();
|
||||
FieldMeta field_meta(FieldName("RowID"), RowFieldID, engine::DataType::INT64);
|
||||
auto array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), row_count, field_meta);
|
||||
info.field_data = array.release();
|
||||
info.row_count = dataset.row_ids_.size();
|
||||
info.field_id = 0; // field id for RowId
|
||||
info.field_id = RowFieldID.get(); // field id for RowId
|
||||
seg.LoadFieldData(info);
|
||||
}
|
||||
{
|
||||
LoadFieldDataInfo info;
|
||||
info.blob = dataset.timestamps_.data();
|
||||
FieldMeta field_meta(FieldName("Timestamp"), TimestampFieldID, engine::DataType::INT64);
|
||||
auto array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), row_count, field_meta);
|
||||
info.field_data = array.release();
|
||||
info.row_count = dataset.timestamps_.size();
|
||||
info.field_id = 1;
|
||||
info.field_id = TimestampFieldID.get();
|
||||
seg.LoadFieldData(info);
|
||||
}
|
||||
int field_offset = 0;
|
||||
for (auto& meta : seg.get_schema().get_fields()) {
|
||||
for (auto field_data : dataset.raw_->fields_data()) {
|
||||
LoadFieldDataInfo info;
|
||||
info.field_id = meta.get_id().get();
|
||||
info.field_id = field_data.field_id();
|
||||
info.row_count = row_count;
|
||||
info.blob = dataset.cols_[field_offset].data();
|
||||
info.field_data = &field_data;
|
||||
seg.LoadFieldData(info);
|
||||
++field_offset;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -364,4 +414,18 @@ GenIndexing(int64_t N, int64_t dim, const float* vec) {
|
|||
return indexing;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline scalar::IndexBasePtr
|
||||
GenScalarIndexing(int64_t N, const T* data) {
|
||||
if constexpr (std::is_same_v<T, std::string>) {
|
||||
auto indexing = scalar::CreateStringIndexSort();
|
||||
indexing->Build(N, data);
|
||||
return indexing;
|
||||
} else {
|
||||
auto indexing = scalar::CreateScalarIndexSort<T>();
|
||||
indexing->Build(N, data);
|
||||
return indexing;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
|
|
@ -497,7 +497,6 @@ func (it *IndexBuildTask) saveIndex(ctx context.Context, blobs []*storage.Blob)
|
|||
it.savePaths = make([]string, blobCnt)
|
||||
saveIndexFile := func(idx int) error {
|
||||
blob := blobs[idx]
|
||||
|
||||
savePath := getSavePathByKey(blob.Key)
|
||||
saveIndexFileFn := func() error {
|
||||
v, err := it.etcdKV.Load(it.req.MetaPath)
|
||||
|
|
|
@ -12,6 +12,8 @@ enum OpType {
|
|||
LessEqual = 4;
|
||||
Equal = 5;
|
||||
NotEqual = 6;
|
||||
PrefixMatch = 7; // startsWith
|
||||
PostfixMatch = 8; // endsWith
|
||||
};
|
||||
|
||||
enum ArithOpType {
|
||||
|
@ -46,12 +48,16 @@ message ColumnInfo {
|
|||
bool is_autoID = 4;
|
||||
}
|
||||
|
||||
// For example: a startsWith "prefix", a >= "str", b < 2 and etc,
|
||||
// where both a and b are field in schema.
|
||||
message UnaryRangeExpr {
|
||||
ColumnInfo column_info = 1;
|
||||
OpType op = 2;
|
||||
GenericValue value = 3;
|
||||
}
|
||||
|
||||
// For example: "str1" < a <= "str9", 1 <= b < 9 and etc,
|
||||
// where both a and b are field in schema.
|
||||
message BinaryRangeExpr {
|
||||
ColumnInfo column_info = 1;
|
||||
bool lower_inclusive = 2;
|
||||
|
@ -60,17 +66,22 @@ message BinaryRangeExpr {
|
|||
GenericValue upper_value = 5;
|
||||
}
|
||||
|
||||
// For example: a startsWith b, a >= b, a < b, a == b and etc,
|
||||
// where both a and b are field in schema.
|
||||
message CompareExpr {
|
||||
ColumnInfo left_column_info = 1;
|
||||
ColumnInfo right_column_info = 2;
|
||||
OpType op = 3;
|
||||
}
|
||||
|
||||
// For example: a in ["term0", "term1"], b in [1, 2, 3, 4] and etc,
|
||||
// where both a and b are field in schema.
|
||||
message TermExpr {
|
||||
ColumnInfo column_info = 1;
|
||||
repeated GenericValue values = 2;
|
||||
}
|
||||
|
||||
// !(expr).
|
||||
message UnaryExpr {
|
||||
enum UnaryOp {
|
||||
Invalid = 0;
|
||||
|
@ -80,6 +91,7 @@ message UnaryExpr {
|
|||
Expr child = 2;
|
||||
}
|
||||
|
||||
// (expr) op (expr), where op is of (LogicalAnd, LogicalOr).
|
||||
message BinaryExpr {
|
||||
enum BinaryOp {
|
||||
Invalid = 0;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue