From 967a97b9bd4a48855a19b94e3ed740de2ecd7278 Mon Sep 17 00:00:00 2001 From: Enwei Jiao Date: Thu, 20 Apr 2023 11:32:31 +0800 Subject: [PATCH] Support json & array types (#23408) Signed-off-by: yah01 Co-authored-by: yah01 --- go.mod | 2 +- go.sum | 6 +- internal/core/src/common/Column.h | 37 +- internal/core/src/common/FieldMeta.h | 56 +- internal/core/src/common/Types.h | 2 + internal/core/src/common/Utils.h | 30 +- internal/core/src/pb/common.pb.cc | 0 internal/core/src/pb/common.pb.h | 0 internal/core/src/pb/index_cgo_msg.pb.cc | 0 internal/core/src/pb/index_cgo_msg.pb.h | 0 internal/core/src/pb/plan.pb.cc | 0 internal/core/src/pb/plan.pb.h | 0 internal/core/src/pb/schema.pb.cc | 754 ++++++++++++++-- internal/core/src/pb/schema.pb.h | 721 ++++++++++++++- internal/core/src/pb/segcore.pb.cc | 0 internal/core/src/pb/segcore.pb.h | 0 .../core/src/segcore/ConcurrentVector.cpp | 9 +- internal/core/src/segcore/FieldIndexing.h | 5 + internal/core/src/segcore/InsertRecord.h | 6 + .../core/src/segcore/SegmentSealedImpl.cpp | 22 +- internal/core/src/storage/PayloadWriter.cpp | 8 + internal/core/src/storage/PayloadWriter.h | 3 + internal/core/src/storage/Util.cpp | 27 + internal/core/src/storage/Util.h | 4 + internal/core/src/storage/parquet_c.cpp | 32 +- internal/core/src/storage/parquet_c.h | 4 + .../datanode/flow_graph_insert_buffer_node.go | 5 +- internal/proxy/impl.go | 2 - internal/querynodev2/pipeline/mock_data.go | 8 +- internal/querynodev2/segments/mock_data.go | 264 +++--- internal/querynodev2/segments/plan_test.go | 1 - internal/querynodev2/segments/segment.go | 1 + .../segments/segment_loader_test.go | 10 +- internal/querynodev2/services_test.go | 2 + internal/storage/data_codec.go | 145 ++- internal/storage/data_codec_test.go | 93 ++ internal/storage/data_sorter.go | 6 + internal/storage/payload.go | 67 ++ internal/storage/payload_benchmark_test.go | 221 ----- internal/storage/payload_cgo_test.go | 827 ------------------ internal/storage/payload_reader.go | 40 +- internal/storage/payload_reader_cgo.go | 333 ------- internal/storage/payload_test.go | 116 +++ internal/storage/utils.go | 86 ++ internal/storage/utils_test.go | 130 ++- internal/util/typeutil/result_helper_test.go | 2 + pkg/go.mod | 6 +- pkg/go.sum | 4 +- pkg/util/funcutil/func.go | 4 + pkg/util/typeutil/data_format.go | 258 ------ pkg/util/typeutil/data_format_test.go | 259 ------ pkg/util/typeutil/gen_empty_field_data.go | 30 + pkg/util/typeutil/schema.go | 80 +- pkg/util/typeutil/schema_test.go | 226 ++++- scripts/run_go_codecov.sh | 8 +- 55 files changed, 2735 insertions(+), 2227 deletions(-) mode change 100644 => 100755 internal/core/src/pb/common.pb.cc mode change 100644 => 100755 internal/core/src/pb/common.pb.h mode change 100644 => 100755 internal/core/src/pb/index_cgo_msg.pb.cc mode change 100644 => 100755 internal/core/src/pb/index_cgo_msg.pb.h mode change 100644 => 100755 internal/core/src/pb/plan.pb.cc mode change 100644 => 100755 internal/core/src/pb/plan.pb.h mode change 100644 => 100755 internal/core/src/pb/schema.pb.cc mode change 100644 => 100755 internal/core/src/pb/schema.pb.h mode change 100644 => 100755 internal/core/src/pb/segcore.pb.cc mode change 100644 => 100755 internal/core/src/pb/segcore.pb.h delete mode 100644 internal/storage/payload_benchmark_test.go delete mode 100644 internal/storage/payload_cgo_test.go delete mode 100644 internal/storage/payload_reader_cgo.go delete mode 100644 pkg/util/typeutil/data_format.go delete mode 100644 pkg/util/typeutil/data_format_test.go diff --git a/go.mod b/go.mod index a4db7a3dc9..9e79792ed2 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/golang/protobuf v1.5.3 github.com/klauspost/compress v1.14.4 github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d - github.com/milvus-io/milvus-proto/go-api v0.0.0-20230411174625-2c86533465fb + github.com/milvus-io/milvus-proto/go-api v0.0.0-20230416064425-aec3e83865b2 github.com/milvus-io/milvus/pkg v0.0.0-00010101000000-000000000000 github.com/minio/minio-go/v7 v7.0.17 github.com/panjf2000/ants/v2 v2.7.2 diff --git a/go.sum b/go.sum index b19947332f..7289a158b4 100644 --- a/go.sum +++ b/go.sum @@ -572,10 +572,8 @@ github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/le github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZzUfIfYe5qYDBzt4ZYRqzUjTR6CvUzjat8= github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4= -github.com/milvus-io/milvus-proto/go-api v0.0.0-20230322065753-aa8a66130217 h1:58lCM3+oh3ZuCemnOE3V2VdaPnIL+LS7eoEyrFfrxOM= -github.com/milvus-io/milvus-proto/go-api v0.0.0-20230322065753-aa8a66130217/go.mod h1:148qnlmZ0Fdm1Fq+Mj/OW2uDoEP25g3mjh0vMGtkgmk= -github.com/milvus-io/milvus-proto/go-api v0.0.0-20230411174625-2c86533465fb h1:gMSlJbBbfI6IZ6vktimD94/ASaLYFNXiX2xhXqVeFxA= -github.com/milvus-io/milvus-proto/go-api v0.0.0-20230411174625-2c86533465fb/go.mod h1:148qnlmZ0Fdm1Fq+Mj/OW2uDoEP25g3mjh0vMGtkgmk= +github.com/milvus-io/milvus-proto/go-api v0.0.0-20230416064425-aec3e83865b2 h1:G5uN68X/7eoCfHUkNvkbNueFhHuohCZG94te+ApLAOY= +github.com/milvus-io/milvus-proto/go-api v0.0.0-20230416064425-aec3e83865b2/go.mod h1:148qnlmZ0Fdm1Fq+Mj/OW2uDoEP25g3mjh0vMGtkgmk= github.com/milvus-io/pulsar-client-go v0.6.10 h1:eqpJjU+/QX0iIhEo3nhOqMNXL+TyInAs1IAHZCrCM/A= github.com/milvus-io/pulsar-client-go v0.6.10/go.mod h1:lQqCkgwDF8YFYjKA+zOheTk1tev2B+bKj5j7+nm8M1w= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= diff --git a/internal/core/src/common/Column.h b/internal/core/src/common/Column.h index 6a5fd23a10..f5a3660e83 100644 --- a/internal/core/src/common/Column.h +++ b/internal/core/src/common/Column.h @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -26,9 +27,13 @@ #include "common/Utils.h" #include "exceptions/EasyAssert.h" #include "fmt/core.h" +#include "log/Log.h" +#include "nlohmann/json.hpp" namespace milvus::segcore { +#define FIELD_DATA(info, field) (info->scalars().field##_data().data()) + struct Entry { char* data; uint32_t length; @@ -103,11 +108,17 @@ class VariableColumn : public ColumnBase { using ViewType = std::conditional_t, std::string_view, T>; + template VariableColumn(int64_t segment_id, const FieldMeta& field_meta, - const LoadFieldDataInfo& info) { + const LoadFieldDataInfo& info, + Ctor&& ctor) { auto begin = info.field_data->scalars().string_data().data().begin(); auto end = info.field_data->scalars().string_data().data().end(); + if constexpr (std::is_same_v) { + begin = info.field_data->scalars().json_data().data().begin(); + end = info.field_data->scalars().json_data().data().end(); + } indices_.reserve(info.row_count); while (begin != end) { @@ -117,7 +128,7 @@ class VariableColumn : public ColumnBase { } data_ = static_cast(CreateMap(segment_id, field_meta, info)); - construct_views(); + construct_views(std::forward(ctor)); } VariableColumn(VariableColumn&& field) noexcept @@ -145,14 +156,28 @@ class VariableColumn : public ColumnBase { } protected: + template void - construct_views() { + construct_views(Ctor ctor) { views_.reserve(indices_.size()); for (size_t i = 0; i < indices_.size() - 1; i++) { - views_.emplace_back(data_ + indices_[i], - indices_[i + 1] - indices_[i]); + views_.emplace_back( + ctor(data_ + indices_[i], indices_[i + 1] - indices_[i])); + } + views_.emplace_back( + ctor(data_ + indices_.back(), size_ - indices_.back())); + + // as we stores the json objects entirely in memory, + // the raw data is not needed anymore + if constexpr (std::is_same_v) { + if (munmap(data_, size_)) { + AssertInfo( + true, + fmt::format( + "failed to unmap json field after deserialized, err={}", + strerror(errno))); + } } - views_.emplace_back(data_ + indices_.back(), size_ - indices_.back()); } private: diff --git a/internal/core/src/common/FieldMeta.h b/internal/core/src/common/FieldMeta.h index 3fff5726c5..35098dcf3a 100644 --- a/internal/core/src/common/FieldMeta.h +++ b/internal/core/src/common/FieldMeta.h @@ -75,6 +75,10 @@ datatype_name(DataType data_type) { return "double"; case DataType::VARCHAR: return "varChar"; + case DataType::ARRAY: + return "array"; + case DataType::JSON: + return "json"; case DataType::VECTOR_FLOAT: return "vector_float"; case DataType::VECTOR_BINARY: { @@ -105,11 +109,24 @@ datatype_is_string(DataType datatype) { } } +inline bool +datatype_is_binary(DataType datatype) { + switch (datatype) { + case DataType::ARRAY: + case DataType::JSON: + return true; + default: + return false; + } +} + inline bool datatype_is_variable(DataType datatype) { switch (datatype) { case DataType::VARCHAR: case DataType::STRING: + case DataType::ARRAY: + case DataType::JSON: return true; default: return false; @@ -152,7 +169,7 @@ class FieldMeta { FieldMeta(const FieldName& name, FieldId id, DataType type) : name_(name), id_(id), type_(type) { - Assert(!is_vector()); + Assert(!datatype_is_vector(type_)); } FieldMeta(const FieldName& name, @@ -163,7 +180,7 @@ class FieldMeta { id_(id), type_(type), string_info_(StringInfo{max_length}) { - Assert(is_string()); + Assert(datatype_is_string(type_)); } FieldMeta(const FieldName& name, @@ -175,39 +192,26 @@ class FieldMeta { id_(id), type_(type), vector_info_(VectorInfo{dim, metric_type}) { - Assert(is_vector()); - } - - bool - is_vector() const { - Assert(type_ != DataType::NONE); - return type_ == DataType::VECTOR_BINARY || - type_ == DataType::VECTOR_FLOAT; - } - - bool - is_string() const { - Assert(type_ != DataType::NONE); - return type_ == DataType::VARCHAR || type_ == DataType::STRING; + Assert(datatype_is_vector(type_)); } int64_t get_dim() const { - Assert(is_vector()); + Assert(datatype_is_vector(type_)); Assert(vector_info_.has_value()); return vector_info_->dim_; } int64_t get_max_len() const { - Assert(is_string()); + Assert(datatype_is_string(type_)); Assert(string_info_.has_value()); return string_info_->max_length; } std::optional get_metric_type() const { - Assert(is_vector()); + Assert(datatype_is_vector(type_)); Assert(vector_info_.has_value()); return vector_info_->metric_type_; } @@ -227,12 +231,26 @@ class FieldMeta { return type_; } + bool + is_vector() const { + return datatype_is_vector(type_); + } + + bool + is_string() const { + return datatype_is_string(type_); + } + size_t get_sizeof() const { + static const size_t ARRAY_SIZE = 128; + static const size_t JSON_SIZE = 512; if (is_vector()) { return datatype_sizeof(type_, get_dim()); } else if (is_string()) { return string_info_->max_length; + } else if (datatype_is_variable(type_)) { + return type_ == DataType::ARRAY ? ARRAY_SIZE : JSON_SIZE; } else { return datatype_sizeof(type_); } diff --git a/internal/core/src/common/Types.h b/internal/core/src/common/Types.h index 806a5fa4f4..c359baf20c 100644 --- a/internal/core/src/common/Types.h +++ b/internal/core/src/common/Types.h @@ -59,6 +59,8 @@ enum class DataType { STRING = 20, VARCHAR = 21, + ARRAY = 22, + JSON = 23, VECTOR_BINARY = 100, VECTOR_FLOAT = 101, diff --git a/internal/core/src/common/Utils.h b/internal/core/src/common/Utils.h index 8e8103c004..6353af0d10 100644 --- a/internal/core/src/common/Utils.h +++ b/internal/core/src/common/Utils.h @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -25,6 +26,7 @@ #include "common/Consts.h" #include "common/FieldMeta.h" #include "common/LoadInfo.h" +#include "common/Types.h" #include "config/ConfigChunkManager.h" #include "exceptions/EasyAssert.h" #include "knowhere/dataset.h" @@ -192,17 +194,19 @@ GetDataSize(const FieldMeta& field, size_t row_count, const DataArray* data) { switch (data_type) { case DataType::VARCHAR: case DataType::STRING: { - auto begin = data->scalars().string_data().data().begin(); - auto end = data->scalars().string_data().data().end(); - - ssize_t size{0}; - while (begin != end) { - size += begin->size(); - begin++; + ssize_t size{}; + for (auto& data : data->scalars().string_data().data()) { + size += data.size(); + } + return size; + } + case DataType::JSON: { + ssize_t size{}; + for (auto& data : data->scalars().json_data().data()) { + size += data.size(); } return size; } - default: PanicInfo(fmt::format("not supported data type {}", datatype_name(data_type))); @@ -260,6 +264,16 @@ FillField(DataType data_type, } return dst; } + + case DataType::JSON: { + char* dest = reinterpret_cast(dst); + for (auto& data : data->scalars().json_data().data()) { + memcpy(dest, data.data(), data.size()); + dest += data.size(); + } + return dst; + } + case DataType::VECTOR_FLOAT: return memcpy( dst, data->vectors().float_vector().data().data(), size); diff --git a/internal/core/src/pb/common.pb.cc b/internal/core/src/pb/common.pb.cc old mode 100644 new mode 100755 diff --git a/internal/core/src/pb/common.pb.h b/internal/core/src/pb/common.pb.h old mode 100644 new mode 100755 diff --git a/internal/core/src/pb/index_cgo_msg.pb.cc b/internal/core/src/pb/index_cgo_msg.pb.cc old mode 100644 new mode 100755 diff --git a/internal/core/src/pb/index_cgo_msg.pb.h b/internal/core/src/pb/index_cgo_msg.pb.h old mode 100644 new mode 100755 diff --git a/internal/core/src/pb/plan.pb.cc b/internal/core/src/pb/plan.pb.cc old mode 100644 new mode 100755 diff --git a/internal/core/src/pb/plan.pb.h b/internal/core/src/pb/plan.pb.h old mode 100644 new mode 100755 diff --git a/internal/core/src/pb/schema.pb.cc b/internal/core/src/pb/schema.pb.cc old mode 100644 new mode 100755 index 7b308871b6..c44635849a --- a/internal/core/src/pb/schema.pb.cc +++ b/internal/core/src/pb/schema.pb.cc @@ -34,6 +34,7 @@ PROTOBUF_CONSTEXPR FieldSchema::FieldSchema( , /*decltype(_impl_.is_primary_key_)*/false , /*decltype(_impl_.autoid_)*/false , /*decltype(_impl_.state_)*/0 + , /*decltype(_impl_.element_type_)*/0 , /*decltype(_impl_._cached_size_)*/{}} {} struct FieldSchemaDefaultTypeInternal { PROTOBUF_CONSTEXPR FieldSchemaDefaultTypeInternal() @@ -153,6 +154,33 @@ struct StringArrayDefaultTypeInternal { }; }; PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT PROTOBUF_ATTRIBUTE_INIT_PRIORITY1 StringArrayDefaultTypeInternal _StringArray_default_instance_; +PROTOBUF_CONSTEXPR ArrayArray::ArrayArray( + ::_pbi::ConstantInitialized): _impl_{ + /*decltype(_impl_.data_)*/{} + , /*decltype(_impl_.element_type_)*/0 + , /*decltype(_impl_._cached_size_)*/{}} {} +struct ArrayArrayDefaultTypeInternal { + PROTOBUF_CONSTEXPR ArrayArrayDefaultTypeInternal() + : _instance(::_pbi::ConstantInitialized{}) {} + ~ArrayArrayDefaultTypeInternal() {} + union { + ArrayArray _instance; + }; +}; +PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT PROTOBUF_ATTRIBUTE_INIT_PRIORITY1 ArrayArrayDefaultTypeInternal _ArrayArray_default_instance_; +PROTOBUF_CONSTEXPR JSONArray::JSONArray( + ::_pbi::ConstantInitialized): _impl_{ + /*decltype(_impl_.data_)*/{} + , /*decltype(_impl_._cached_size_)*/{}} {} +struct JSONArrayDefaultTypeInternal { + PROTOBUF_CONSTEXPR JSONArrayDefaultTypeInternal() + : _instance(::_pbi::ConstantInitialized{}) {} + ~JSONArrayDefaultTypeInternal() {} + union { + JSONArray _instance; + }; +}; +PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT PROTOBUF_ATTRIBUTE_INIT_PRIORITY1 JSONArrayDefaultTypeInternal _JSONArray_default_instance_; PROTOBUF_CONSTEXPR ScalarField::ScalarField( ::_pbi::ConstantInitialized): _impl_{ /*decltype(_impl_.data_)*/{} @@ -235,7 +263,7 @@ PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT PROTOBUF_ATTRIBUTE_INIT_PRIORIT } // namespace schema } // namespace proto } // namespace milvus -static ::_pb::Metadata file_level_metadata_schema_2eproto[14]; +static ::_pb::Metadata file_level_metadata_schema_2eproto[16]; static const ::_pb::EnumDescriptor* file_level_enum_descriptors_schema_2eproto[2]; static constexpr ::_pb::ServiceDescriptor const** file_level_service_descriptors_schema_2eproto = nullptr; @@ -255,6 +283,7 @@ const uint32_t TableStruct_schema_2eproto::offsets[] PROTOBUF_SECTION_VARIABLE(p PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::FieldSchema, _impl_.index_params_), PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::FieldSchema, _impl_.autoid_), PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::FieldSchema, _impl_.state_), + PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::FieldSchema, _impl_.element_type_), ~0u, // no _has_bits_ PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::CollectionSchema, _internal_metadata_), ~0u, // no _extensions_ @@ -315,6 +344,21 @@ const uint32_t TableStruct_schema_2eproto::offsets[] PROTOBUF_SECTION_VARIABLE(p ~0u, // no _inlined_string_donated_ PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::StringArray, _impl_.data_), ~0u, // no _has_bits_ + PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::ArrayArray, _internal_metadata_), + ~0u, // no _extensions_ + ~0u, // no _oneof_case_ + ~0u, // no _weak_field_map_ + ~0u, // no _inlined_string_donated_ + PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::ArrayArray, _impl_.data_), + PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::ArrayArray, _impl_.element_type_), + ~0u, // no _has_bits_ + PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::JSONArray, _internal_metadata_), + ~0u, // no _extensions_ + ~0u, // no _oneof_case_ + ~0u, // no _weak_field_map_ + ~0u, // no _inlined_string_donated_ + PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::JSONArray, _impl_.data_), + ~0u, // no _has_bits_ PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::ScalarField, _internal_metadata_), ~0u, // no _extensions_ PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::ScalarField, _impl_._oneof_case_[0]), @@ -327,6 +371,8 @@ const uint32_t TableStruct_schema_2eproto::offsets[] PROTOBUF_SECTION_VARIABLE(p ::_pbi::kInvalidFieldOffsetTag, ::_pbi::kInvalidFieldOffsetTag, ::_pbi::kInvalidFieldOffsetTag, + ::_pbi::kInvalidFieldOffsetTag, + ::_pbi::kInvalidFieldOffsetTag, PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::ScalarField, _impl_.data_), ~0u, // no _has_bits_ PROTOBUF_FIELD_OFFSET(::milvus::proto::schema::VectorField, _internal_metadata_), @@ -374,19 +420,21 @@ const uint32_t TableStruct_schema_2eproto::offsets[] PROTOBUF_SECTION_VARIABLE(p }; static const ::_pbi::MigrationSchema schemas[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) = { { 0, -1, -1, sizeof(::milvus::proto::schema::FieldSchema)}, - { 15, -1, -1, sizeof(::milvus::proto::schema::CollectionSchema)}, - { 25, -1, -1, sizeof(::milvus::proto::schema::BoolArray)}, - { 32, -1, -1, sizeof(::milvus::proto::schema::IntArray)}, - { 39, -1, -1, sizeof(::milvus::proto::schema::LongArray)}, - { 46, -1, -1, sizeof(::milvus::proto::schema::FloatArray)}, - { 53, -1, -1, sizeof(::milvus::proto::schema::DoubleArray)}, - { 60, -1, -1, sizeof(::milvus::proto::schema::BytesArray)}, - { 67, -1, -1, sizeof(::milvus::proto::schema::StringArray)}, - { 74, -1, -1, sizeof(::milvus::proto::schema::ScalarField)}, - { 88, -1, -1, sizeof(::milvus::proto::schema::VectorField)}, - { 98, -1, -1, sizeof(::milvus::proto::schema::FieldData)}, - { 110, -1, -1, sizeof(::milvus::proto::schema::IDs)}, - { 119, -1, -1, sizeof(::milvus::proto::schema::SearchResultData)}, + { 16, -1, -1, sizeof(::milvus::proto::schema::CollectionSchema)}, + { 26, -1, -1, sizeof(::milvus::proto::schema::BoolArray)}, + { 33, -1, -1, sizeof(::milvus::proto::schema::IntArray)}, + { 40, -1, -1, sizeof(::milvus::proto::schema::LongArray)}, + { 47, -1, -1, sizeof(::milvus::proto::schema::FloatArray)}, + { 54, -1, -1, sizeof(::milvus::proto::schema::DoubleArray)}, + { 61, -1, -1, sizeof(::milvus::proto::schema::BytesArray)}, + { 68, -1, -1, sizeof(::milvus::proto::schema::StringArray)}, + { 75, -1, -1, sizeof(::milvus::proto::schema::ArrayArray)}, + { 83, -1, -1, sizeof(::milvus::proto::schema::JSONArray)}, + { 90, -1, -1, sizeof(::milvus::proto::schema::ScalarField)}, + { 106, -1, -1, sizeof(::milvus::proto::schema::VectorField)}, + { 116, -1, -1, sizeof(::milvus::proto::schema::FieldData)}, + { 128, -1, -1, sizeof(::milvus::proto::schema::IDs)}, + { 137, -1, -1, sizeof(::milvus::proto::schema::SearchResultData)}, }; static const ::_pb::Message* const file_default_instances[] = { @@ -399,6 +447,8 @@ static const ::_pb::Message* const file_default_instances[] = { &::milvus::proto::schema::_DoubleArray_default_instance_._instance, &::milvus::proto::schema::_BytesArray_default_instance_._instance, &::milvus::proto::schema::_StringArray_default_instance_._instance, + &::milvus::proto::schema::_ArrayArray_default_instance_._instance, + &::milvus::proto::schema::_JSONArray_default_instance_._instance, &::milvus::proto::schema::_ScalarField_default_instance_._instance, &::milvus::proto::schema::_VectorField_default_instance_._instance, &::milvus::proto::schema::_FieldData_default_instance_._instance, @@ -408,7 +458,7 @@ static const ::_pb::Message* const file_default_instances[] = { const char descriptor_table_protodef_schema_2eproto[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) = "\n\014schema.proto\022\023milvus.proto.schema\032\014com" - "mon.proto\"\274\002\n\013FieldSchema\022\017\n\007fieldID\030\001 \001" + "mon.proto\"\361\002\n\013FieldSchema\022\017\n\007fieldID\030\001 \001" "(\003\022\014\n\004name\030\002 \001(\t\022\026\n\016is_primary_key\030\003 \001(\010" "\022\023\n\013description\030\004 \001(\t\0220\n\tdata_type\030\005 \001(\016" "2\035.milvus.proto.schema.DataType\0226\n\013type_" @@ -416,58 +466,67 @@ const char descriptor_table_protodef_schema_2eproto[] PROTOBUF_SECTION_VARIABLE( "luePair\0227\n\014index_params\030\007 \003(\0132!.milvus.p" "roto.common.KeyValuePair\022\016\n\006autoID\030\010 \001(\010" "\022.\n\005state\030\t \001(\0162\037.milvus.proto.schema.Fi" - "eldState\"w\n\020CollectionSchema\022\014\n\004name\030\001 \001" - "(\t\022\023\n\013description\030\002 \001(\t\022\016\n\006autoID\030\003 \001(\010\022" - "0\n\006fields\030\004 \003(\0132 .milvus.proto.schema.Fi" - "eldSchema\"\031\n\tBoolArray\022\014\n\004data\030\001 \003(\010\"\030\n\010" - "IntArray\022\014\n\004data\030\001 \003(\005\"\031\n\tLongArray\022\014\n\004d" - "ata\030\001 \003(\003\"\032\n\nFloatArray\022\014\n\004data\030\001 \003(\002\"\033\n" - "\013DoubleArray\022\014\n\004data\030\001 \003(\001\"\032\n\nBytesArray" - "\022\014\n\004data\030\001 \003(\014\"\033\n\013StringArray\022\014\n\004data\030\001 " - "\003(\t\"\222\003\n\013ScalarField\0223\n\tbool_data\030\001 \001(\0132\036" - ".milvus.proto.schema.BoolArrayH\000\0221\n\010int_" - "data\030\002 \001(\0132\035.milvus.proto.schema.IntArra" - "yH\000\0223\n\tlong_data\030\003 \001(\0132\036.milvus.proto.sc" - "hema.LongArrayH\000\0225\n\nfloat_data\030\004 \001(\0132\037.m" - "ilvus.proto.schema.FloatArrayH\000\0227\n\013doubl" - "e_data\030\005 \001(\0132 .milvus.proto.schema.Doubl" - "eArrayH\000\0227\n\013string_data\030\006 \001(\0132 .milvus.p" - "roto.schema.StringArrayH\000\0225\n\nbytes_data\030" - "\007 \001(\0132\037.milvus.proto.schema.BytesArrayH\000" - "B\006\n\004data\"t\n\013VectorField\022\013\n\003dim\030\001 \001(\003\0227\n\014" - "float_vector\030\002 \001(\0132\037.milvus.proto.schema" - ".FloatArrayH\000\022\027\n\rbinary_vector\030\003 \001(\014H\000B\006" - "\n\004data\"\321\001\n\tFieldData\022+\n\004type\030\001 \001(\0162\035.mil" - "vus.proto.schema.DataType\022\022\n\nfield_name\030" - "\002 \001(\t\0223\n\007scalars\030\003 \001(\0132 .milvus.proto.sc" - "hema.ScalarFieldH\000\0223\n\007vectors\030\004 \001(\0132 .mi" - "lvus.proto.schema.VectorFieldH\000\022\020\n\010field" - "_id\030\005 \001(\003B\007\n\005field\"w\n\003IDs\0220\n\006int_id\030\001 \001(" - "\0132\036.milvus.proto.schema.LongArrayH\000\0222\n\006s" - "tr_id\030\002 \001(\0132 .milvus.proto.schema.String" - "ArrayH\000B\n\n\010id_field\"\261\001\n\020SearchResultData" - "\022\023\n\013num_queries\030\001 \001(\003\022\r\n\005top_k\030\002 \001(\003\0223\n\013" - "fields_data\030\003 \003(\0132\036.milvus.proto.schema." - "FieldData\022\016\n\006scores\030\004 \003(\002\022%\n\003ids\030\005 \001(\0132\030" - ".milvus.proto.schema.IDs\022\r\n\005topks\030\006 \003(\003*" - "\234\001\n\010DataType\022\010\n\004None\020\000\022\010\n\004Bool\020\001\022\010\n\004Int8" - "\020\002\022\t\n\005Int16\020\003\022\t\n\005Int32\020\004\022\t\n\005Int64\020\005\022\t\n\005F" - "loat\020\n\022\n\n\006Double\020\013\022\n\n\006String\020\024\022\013\n\007VarCha" - "r\020\025\022\020\n\014BinaryVector\020d\022\017\n\013FloatVector\020e*V" - "\n\nFieldState\022\020\n\014FieldCreated\020\000\022\021\n\rFieldC" - "reating\020\001\022\021\n\rFieldDropping\020\002\022\020\n\014FieldDro" - "pped\020\003Bf\n\016io.milvus.grpcB\013SchemaProtoP\001Z" - "1github.com/milvus-io/milvus-proto/go-ap" - "i/schemapb\240\001\001\252\002\016IO.Milvus.Grpcb\006proto3" + "eldState\0223\n\014element_type\030\n \001(\0162\035.milvus." + "proto.schema.DataType\"w\n\020CollectionSchem" + "a\022\014\n\004name\030\001 \001(\t\022\023\n\013description\030\002 \001(\t\022\016\n\006" + "autoID\030\003 \001(\010\0220\n\006fields\030\004 \003(\0132 .milvus.pr" + "oto.schema.FieldSchema\"\031\n\tBoolArray\022\014\n\004d" + "ata\030\001 \003(\010\"\030\n\010IntArray\022\014\n\004data\030\001 \003(\005\"\031\n\tL" + "ongArray\022\014\n\004data\030\001 \003(\003\"\032\n\nFloatArray\022\014\n\004" + "data\030\001 \003(\002\"\033\n\013DoubleArray\022\014\n\004data\030\001 \003(\001\"" + "\032\n\nBytesArray\022\014\n\004data\030\001 \003(\014\"\033\n\013StringArr" + "ay\022\014\n\004data\030\001 \003(\t\"q\n\nArrayArray\022.\n\004data\030\001" + " \003(\0132 .milvus.proto.schema.ScalarField\0223" + "\n\014element_type\030\002 \001(\0162\035.milvus.proto.sche" + "ma.DataType\"\031\n\tJSONArray\022\014\n\004data\030\001 \003(\014\"\376" + "\003\n\013ScalarField\0223\n\tbool_data\030\001 \001(\0132\036.milv" + "us.proto.schema.BoolArrayH\000\0221\n\010int_data\030" + "\002 \001(\0132\035.milvus.proto.schema.IntArrayH\000\0223" + "\n\tlong_data\030\003 \001(\0132\036.milvus.proto.schema." + "LongArrayH\000\0225\n\nfloat_data\030\004 \001(\0132\037.milvus" + ".proto.schema.FloatArrayH\000\0227\n\013double_dat" + "a\030\005 \001(\0132 .milvus.proto.schema.DoubleArra" + "yH\000\0227\n\013string_data\030\006 \001(\0132 .milvus.proto." + "schema.StringArrayH\000\0225\n\nbytes_data\030\007 \001(\013" + "2\037.milvus.proto.schema.BytesArrayH\000\0225\n\na" + "rray_data\030\010 \001(\0132\037.milvus.proto.schema.Ar" + "rayArrayH\000\0223\n\tjson_data\030\t \001(\0132\036.milvus.p" + "roto.schema.JSONArrayH\000B\006\n\004data\"t\n\013Vecto" + "rField\022\013\n\003dim\030\001 \001(\003\0227\n\014float_vector\030\002 \001(" + "\0132\037.milvus.proto.schema.FloatArrayH\000\022\027\n\r" + "binary_vector\030\003 \001(\014H\000B\006\n\004data\"\321\001\n\tFieldD" + "ata\022+\n\004type\030\001 \001(\0162\035.milvus.proto.schema." + "DataType\022\022\n\nfield_name\030\002 \001(\t\0223\n\007scalars\030" + "\003 \001(\0132 .milvus.proto.schema.ScalarFieldH" + "\000\0223\n\007vectors\030\004 \001(\0132 .milvus.proto.schema" + ".VectorFieldH\000\022\020\n\010field_id\030\005 \001(\003B\007\n\005fiel" + "d\"w\n\003IDs\0220\n\006int_id\030\001 \001(\0132\036.milvus.proto." + "schema.LongArrayH\000\0222\n\006str_id\030\002 \001(\0132 .mil" + "vus.proto.schema.StringArrayH\000B\n\n\010id_fie" + "ld\"\261\001\n\020SearchResultData\022\023\n\013num_queries\030\001" + " \001(\003\022\r\n\005top_k\030\002 \001(\003\0223\n\013fields_data\030\003 \003(\013" + "2\036.milvus.proto.schema.FieldData\022\016\n\006scor" + "es\030\004 \003(\002\022%\n\003ids\030\005 \001(\0132\030.milvus.proto.sch" + "ema.IDs\022\r\n\005topks\030\006 \003(\003*\261\001\n\010DataType\022\010\n\004N" + "one\020\000\022\010\n\004Bool\020\001\022\010\n\004Int8\020\002\022\t\n\005Int16\020\003\022\t\n\005" + "Int32\020\004\022\t\n\005Int64\020\005\022\t\n\005Float\020\n\022\n\n\006Double\020" + "\013\022\n\n\006String\020\024\022\013\n\007VarChar\020\025\022\t\n\005Array\020\026\022\010\n" + "\004JSON\020\027\022\020\n\014BinaryVector\020d\022\017\n\013FloatVector" + "\020e*V\n\nFieldState\022\020\n\014FieldCreated\020\000\022\021\n\rFi" + "eldCreating\020\001\022\021\n\rFieldDropping\020\002\022\020\n\014Fiel" + "dDropped\020\003Bf\n\016io.milvus.grpcB\013SchemaProt" + "oP\001Z1github.com/milvus-io/milvus-proto/g" + "o-api/schemapb\240\001\001\252\002\016IO.Milvus.Grpcb\006prot" + "o3" ; static const ::_pbi::DescriptorTable* const descriptor_table_schema_2eproto_deps[1] = { &::descriptor_table_common_2eproto, }; static ::_pbi::once_flag descriptor_table_schema_2eproto_once; const ::_pbi::DescriptorTable descriptor_table_schema_2eproto = { - false, false, 2078, descriptor_table_protodef_schema_2eproto, + false, false, 2402, descriptor_table_protodef_schema_2eproto, "schema.proto", - &descriptor_table_schema_2eproto_once, descriptor_table_schema_2eproto_deps, 1, 14, + &descriptor_table_schema_2eproto_once, descriptor_table_schema_2eproto_deps, 1, 16, schemas, file_default_instances, TableStruct_schema_2eproto::offsets, file_level_metadata_schema_2eproto, file_level_enum_descriptors_schema_2eproto, file_level_service_descriptors_schema_2eproto, @@ -497,6 +556,8 @@ bool DataType_IsValid(int value) { case 11: case 20: case 21: + case 22: + case 23: case 100: case 101: return true; @@ -553,6 +614,7 @@ FieldSchema::FieldSchema(const FieldSchema& from) , decltype(_impl_.is_primary_key_){} , decltype(_impl_.autoid_){} , decltype(_impl_.state_){} + , decltype(_impl_.element_type_){} , /*decltype(_impl_._cached_size_)*/{}}; _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -573,8 +635,8 @@ FieldSchema::FieldSchema(const FieldSchema& from) _this->GetArenaForAllocation()); } ::memcpy(&_impl_.fieldid_, &from._impl_.fieldid_, - static_cast(reinterpret_cast(&_impl_.state_) - - reinterpret_cast(&_impl_.fieldid_)) + sizeof(_impl_.state_)); + static_cast(reinterpret_cast(&_impl_.element_type_) - + reinterpret_cast(&_impl_.fieldid_)) + sizeof(_impl_.element_type_)); // @@protoc_insertion_point(copy_constructor:milvus.proto.schema.FieldSchema) } @@ -592,6 +654,7 @@ inline void FieldSchema::SharedCtor( , decltype(_impl_.is_primary_key_){false} , decltype(_impl_.autoid_){false} , decltype(_impl_.state_){0} + , decltype(_impl_.element_type_){0} , /*decltype(_impl_._cached_size_)*/{} }; _impl_.name_.InitDefault(); @@ -636,8 +699,8 @@ void FieldSchema::Clear() { _impl_.name_.ClearToEmpty(); _impl_.description_.ClearToEmpty(); ::memset(&_impl_.fieldid_, 0, static_cast( - reinterpret_cast(&_impl_.state_) - - reinterpret_cast(&_impl_.fieldid_)) + sizeof(_impl_.state_)); + reinterpret_cast(&_impl_.element_type_) - + reinterpret_cast(&_impl_.fieldid_)) + sizeof(_impl_.element_type_)); _internal_metadata_.Clear<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(); } @@ -735,6 +798,15 @@ const char* FieldSchema::_InternalParse(const char* ptr, ::_pbi::ParseContext* c } else goto handle_unusual; continue; + // .milvus.proto.schema.DataType element_type = 10; + case 10: + if (PROTOBUF_PREDICT_TRUE(static_cast(tag) == 80)) { + uint64_t val = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + _internal_set_element_type(static_cast<::milvus::proto::schema::DataType>(val)); + } else + goto handle_unusual; + continue; default: goto handle_unusual; } // switch @@ -832,6 +904,13 @@ uint8_t* FieldSchema::_InternalSerialize( 9, this->_internal_state(), target); } + // .milvus.proto.schema.DataType element_type = 10; + if (this->_internal_element_type() != 0) { + target = stream->EnsureSpace(target); + target = ::_pbi::WireFormatLite::WriteEnumToArray( + 10, this->_internal_element_type(), target); + } + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { target = ::_pbi::WireFormat::InternalSerializeUnknownFieldsToArray( _internal_metadata_.unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(::PROTOBUF_NAMESPACE_ID::UnknownFieldSet::default_instance), target, stream); @@ -903,6 +982,12 @@ size_t FieldSchema::ByteSizeLong() const { ::_pbi::WireFormatLite::EnumSize(this->_internal_state()); } + // .milvus.proto.schema.DataType element_type = 10; + if (this->_internal_element_type() != 0) { + total_size += 1 + + ::_pbi::WireFormatLite::EnumSize(this->_internal_element_type()); + } + return MaybeComputeUnknownFieldsSize(total_size, &_impl_._cached_size_); } @@ -944,6 +1029,9 @@ void FieldSchema::MergeImpl(::PROTOBUF_NAMESPACE_ID::Message& to_msg, const ::PR if (from._internal_state() != 0) { _this->_internal_set_state(from._internal_state()); } + if (from._internal_element_type() != 0) { + _this->_internal_set_element_type(from._internal_element_type()); + } _this->_internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); } @@ -974,8 +1062,8 @@ void FieldSchema::InternalSwap(FieldSchema* other) { &other->_impl_.description_, rhs_arena ); ::PROTOBUF_NAMESPACE_ID::internal::memswap< - PROTOBUF_FIELD_OFFSET(FieldSchema, _impl_.state_) - + sizeof(FieldSchema::_impl_.state_) + PROTOBUF_FIELD_OFFSET(FieldSchema, _impl_.element_type_) + + sizeof(FieldSchema::_impl_.element_type_) - PROTOBUF_FIELD_OFFSET(FieldSchema, _impl_.fieldid_)>( reinterpret_cast(&_impl_.fieldid_), reinterpret_cast(&other->_impl_.fieldid_)); @@ -2616,6 +2704,406 @@ void StringArray::InternalSwap(StringArray* other) { // =================================================================== +class ArrayArray::_Internal { + public: +}; + +ArrayArray::ArrayArray(::PROTOBUF_NAMESPACE_ID::Arena* arena, + bool is_message_owned) + : ::PROTOBUF_NAMESPACE_ID::Message(arena, is_message_owned) { + SharedCtor(arena, is_message_owned); + // @@protoc_insertion_point(arena_constructor:milvus.proto.schema.ArrayArray) +} +ArrayArray::ArrayArray(const ArrayArray& from) + : ::PROTOBUF_NAMESPACE_ID::Message() { + ArrayArray* const _this = this; (void)_this; + new (&_impl_) Impl_{ + decltype(_impl_.data_){from._impl_.data_} + , decltype(_impl_.element_type_){} + , /*decltype(_impl_._cached_size_)*/{}}; + + _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); + _this->_impl_.element_type_ = from._impl_.element_type_; + // @@protoc_insertion_point(copy_constructor:milvus.proto.schema.ArrayArray) +} + +inline void ArrayArray::SharedCtor( + ::_pb::Arena* arena, bool is_message_owned) { + (void)arena; + (void)is_message_owned; + new (&_impl_) Impl_{ + decltype(_impl_.data_){arena} + , decltype(_impl_.element_type_){0} + , /*decltype(_impl_._cached_size_)*/{} + }; +} + +ArrayArray::~ArrayArray() { + // @@protoc_insertion_point(destructor:milvus.proto.schema.ArrayArray) + if (auto *arena = _internal_metadata_.DeleteReturnArena<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>()) { + (void)arena; + return; + } + SharedDtor(); +} + +inline void ArrayArray::SharedDtor() { + GOOGLE_DCHECK(GetArenaForAllocation() == nullptr); + _impl_.data_.~RepeatedPtrField(); +} + +void ArrayArray::SetCachedSize(int size) const { + _impl_._cached_size_.Set(size); +} + +void ArrayArray::Clear() { +// @@protoc_insertion_point(message_clear_start:milvus.proto.schema.ArrayArray) + uint32_t cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; + + _impl_.data_.Clear(); + _impl_.element_type_ = 0; + _internal_metadata_.Clear<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(); +} + +const char* ArrayArray::_InternalParse(const char* ptr, ::_pbi::ParseContext* ctx) { +#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure + while (!ctx->Done(&ptr)) { + uint32_t tag; + ptr = ::_pbi::ReadTag(ptr, &tag); + switch (tag >> 3) { + // repeated .milvus.proto.schema.ScalarField data = 1; + case 1: + if (PROTOBUF_PREDICT_TRUE(static_cast(tag) == 10)) { + ptr -= 1; + do { + ptr += 1; + ptr = ctx->ParseMessage(_internal_add_data(), ptr); + CHK_(ptr); + if (!ctx->DataAvailable(ptr)) break; + } while (::PROTOBUF_NAMESPACE_ID::internal::ExpectTag<10>(ptr)); + } else + goto handle_unusual; + continue; + // .milvus.proto.schema.DataType element_type = 2; + case 2: + if (PROTOBUF_PREDICT_TRUE(static_cast(tag) == 16)) { + uint64_t val = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + _internal_set_element_type(static_cast<::milvus::proto::schema::DataType>(val)); + } else + goto handle_unusual; + continue; + default: + goto handle_unusual; + } // switch + handle_unusual: + if ((tag == 0) || ((tag & 7) == 4)) { + CHK_(ptr); + ctx->SetLastTag(tag); + goto message_done; + } + ptr = UnknownFieldParse( + tag, + _internal_metadata_.mutable_unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(), + ptr, ctx); + CHK_(ptr != nullptr); + } // while +message_done: + return ptr; +failure: + ptr = nullptr; + goto message_done; +#undef CHK_ +} + +uint8_t* ArrayArray::_InternalSerialize( + uint8_t* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const { + // @@protoc_insertion_point(serialize_to_array_start:milvus.proto.schema.ArrayArray) + uint32_t cached_has_bits = 0; + (void) cached_has_bits; + + // repeated .milvus.proto.schema.ScalarField data = 1; + for (unsigned i = 0, + n = static_cast(this->_internal_data_size()); i < n; i++) { + const auto& repfield = this->_internal_data(i); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: + InternalWriteMessage(1, repfield, repfield.GetCachedSize(), target, stream); + } + + // .milvus.proto.schema.DataType element_type = 2; + if (this->_internal_element_type() != 0) { + target = stream->EnsureSpace(target); + target = ::_pbi::WireFormatLite::WriteEnumToArray( + 2, this->_internal_element_type(), target); + } + + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + target = ::_pbi::WireFormat::InternalSerializeUnknownFieldsToArray( + _internal_metadata_.unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(::PROTOBUF_NAMESPACE_ID::UnknownFieldSet::default_instance), target, stream); + } + // @@protoc_insertion_point(serialize_to_array_end:milvus.proto.schema.ArrayArray) + return target; +} + +size_t ArrayArray::ByteSizeLong() const { +// @@protoc_insertion_point(message_byte_size_start:milvus.proto.schema.ArrayArray) + size_t total_size = 0; + + uint32_t cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; + + // repeated .milvus.proto.schema.ScalarField data = 1; + total_size += 1UL * this->_internal_data_size(); + for (const auto& msg : this->_impl_.data_) { + total_size += + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize(msg); + } + + // .milvus.proto.schema.DataType element_type = 2; + if (this->_internal_element_type() != 0) { + total_size += 1 + + ::_pbi::WireFormatLite::EnumSize(this->_internal_element_type()); + } + + return MaybeComputeUnknownFieldsSize(total_size, &_impl_._cached_size_); +} + +const ::PROTOBUF_NAMESPACE_ID::Message::ClassData ArrayArray::_class_data_ = { + ::PROTOBUF_NAMESPACE_ID::Message::CopyWithSourceCheck, + ArrayArray::MergeImpl +}; +const ::PROTOBUF_NAMESPACE_ID::Message::ClassData*ArrayArray::GetClassData() const { return &_class_data_; } + + +void ArrayArray::MergeImpl(::PROTOBUF_NAMESPACE_ID::Message& to_msg, const ::PROTOBUF_NAMESPACE_ID::Message& from_msg) { + auto* const _this = static_cast(&to_msg); + auto& from = static_cast(from_msg); + // @@protoc_insertion_point(class_specific_merge_from_start:milvus.proto.schema.ArrayArray) + GOOGLE_DCHECK_NE(&from, _this); + uint32_t cached_has_bits = 0; + (void) cached_has_bits; + + _this->_impl_.data_.MergeFrom(from._impl_.data_); + if (from._internal_element_type() != 0) { + _this->_internal_set_element_type(from._internal_element_type()); + } + _this->_internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); +} + +void ArrayArray::CopyFrom(const ArrayArray& from) { +// @@protoc_insertion_point(class_specific_copy_from_start:milvus.proto.schema.ArrayArray) + if (&from == this) return; + Clear(); + MergeFrom(from); +} + +bool ArrayArray::IsInitialized() const { + return true; +} + +void ArrayArray::InternalSwap(ArrayArray* other) { + using std::swap; + _internal_metadata_.InternalSwap(&other->_internal_metadata_); + _impl_.data_.InternalSwap(&other->_impl_.data_); + swap(_impl_.element_type_, other->_impl_.element_type_); +} + +::PROTOBUF_NAMESPACE_ID::Metadata ArrayArray::GetMetadata() const { + return ::_pbi::AssignDescriptors( + &descriptor_table_schema_2eproto_getter, &descriptor_table_schema_2eproto_once, + file_level_metadata_schema_2eproto[9]); +} + +// =================================================================== + +class JSONArray::_Internal { + public: +}; + +JSONArray::JSONArray(::PROTOBUF_NAMESPACE_ID::Arena* arena, + bool is_message_owned) + : ::PROTOBUF_NAMESPACE_ID::Message(arena, is_message_owned) { + SharedCtor(arena, is_message_owned); + // @@protoc_insertion_point(arena_constructor:milvus.proto.schema.JSONArray) +} +JSONArray::JSONArray(const JSONArray& from) + : ::PROTOBUF_NAMESPACE_ID::Message() { + JSONArray* const _this = this; (void)_this; + new (&_impl_) Impl_{ + decltype(_impl_.data_){from._impl_.data_} + , /*decltype(_impl_._cached_size_)*/{}}; + + _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); + // @@protoc_insertion_point(copy_constructor:milvus.proto.schema.JSONArray) +} + +inline void JSONArray::SharedCtor( + ::_pb::Arena* arena, bool is_message_owned) { + (void)arena; + (void)is_message_owned; + new (&_impl_) Impl_{ + decltype(_impl_.data_){arena} + , /*decltype(_impl_._cached_size_)*/{} + }; +} + +JSONArray::~JSONArray() { + // @@protoc_insertion_point(destructor:milvus.proto.schema.JSONArray) + if (auto *arena = _internal_metadata_.DeleteReturnArena<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>()) { + (void)arena; + return; + } + SharedDtor(); +} + +inline void JSONArray::SharedDtor() { + GOOGLE_DCHECK(GetArenaForAllocation() == nullptr); + _impl_.data_.~RepeatedPtrField(); +} + +void JSONArray::SetCachedSize(int size) const { + _impl_._cached_size_.Set(size); +} + +void JSONArray::Clear() { +// @@protoc_insertion_point(message_clear_start:milvus.proto.schema.JSONArray) + uint32_t cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; + + _impl_.data_.Clear(); + _internal_metadata_.Clear<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(); +} + +const char* JSONArray::_InternalParse(const char* ptr, ::_pbi::ParseContext* ctx) { +#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure + while (!ctx->Done(&ptr)) { + uint32_t tag; + ptr = ::_pbi::ReadTag(ptr, &tag); + switch (tag >> 3) { + // repeated bytes data = 1; + case 1: + if (PROTOBUF_PREDICT_TRUE(static_cast(tag) == 10)) { + ptr -= 1; + do { + ptr += 1; + auto str = _internal_add_data(); + ptr = ::_pbi::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + if (!ctx->DataAvailable(ptr)) break; + } while (::PROTOBUF_NAMESPACE_ID::internal::ExpectTag<10>(ptr)); + } else + goto handle_unusual; + continue; + default: + goto handle_unusual; + } // switch + handle_unusual: + if ((tag == 0) || ((tag & 7) == 4)) { + CHK_(ptr); + ctx->SetLastTag(tag); + goto message_done; + } + ptr = UnknownFieldParse( + tag, + _internal_metadata_.mutable_unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(), + ptr, ctx); + CHK_(ptr != nullptr); + } // while +message_done: + return ptr; +failure: + ptr = nullptr; + goto message_done; +#undef CHK_ +} + +uint8_t* JSONArray::_InternalSerialize( + uint8_t* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const { + // @@protoc_insertion_point(serialize_to_array_start:milvus.proto.schema.JSONArray) + uint32_t cached_has_bits = 0; + (void) cached_has_bits; + + // repeated bytes data = 1; + for (int i = 0, n = this->_internal_data_size(); i < n; i++) { + const auto& s = this->_internal_data(i); + target = stream->WriteBytes(1, s, target); + } + + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + target = ::_pbi::WireFormat::InternalSerializeUnknownFieldsToArray( + _internal_metadata_.unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(::PROTOBUF_NAMESPACE_ID::UnknownFieldSet::default_instance), target, stream); + } + // @@protoc_insertion_point(serialize_to_array_end:milvus.proto.schema.JSONArray) + return target; +} + +size_t JSONArray::ByteSizeLong() const { +// @@protoc_insertion_point(message_byte_size_start:milvus.proto.schema.JSONArray) + size_t total_size = 0; + + uint32_t cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; + + // repeated bytes data = 1; + total_size += 1 * + ::PROTOBUF_NAMESPACE_ID::internal::FromIntSize(_impl_.data_.size()); + for (int i = 0, n = _impl_.data_.size(); i < n; i++) { + total_size += ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::BytesSize( + _impl_.data_.Get(i)); + } + + return MaybeComputeUnknownFieldsSize(total_size, &_impl_._cached_size_); +} + +const ::PROTOBUF_NAMESPACE_ID::Message::ClassData JSONArray::_class_data_ = { + ::PROTOBUF_NAMESPACE_ID::Message::CopyWithSourceCheck, + JSONArray::MergeImpl +}; +const ::PROTOBUF_NAMESPACE_ID::Message::ClassData*JSONArray::GetClassData() const { return &_class_data_; } + + +void JSONArray::MergeImpl(::PROTOBUF_NAMESPACE_ID::Message& to_msg, const ::PROTOBUF_NAMESPACE_ID::Message& from_msg) { + auto* const _this = static_cast(&to_msg); + auto& from = static_cast(from_msg); + // @@protoc_insertion_point(class_specific_merge_from_start:milvus.proto.schema.JSONArray) + GOOGLE_DCHECK_NE(&from, _this); + uint32_t cached_has_bits = 0; + (void) cached_has_bits; + + _this->_impl_.data_.MergeFrom(from._impl_.data_); + _this->_internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); +} + +void JSONArray::CopyFrom(const JSONArray& from) { +// @@protoc_insertion_point(class_specific_copy_from_start:milvus.proto.schema.JSONArray) + if (&from == this) return; + Clear(); + MergeFrom(from); +} + +bool JSONArray::IsInitialized() const { + return true; +} + +void JSONArray::InternalSwap(JSONArray* other) { + using std::swap; + _internal_metadata_.InternalSwap(&other->_internal_metadata_); + _impl_.data_.InternalSwap(&other->_impl_.data_); +} + +::PROTOBUF_NAMESPACE_ID::Metadata JSONArray::GetMetadata() const { + return ::_pbi::AssignDescriptors( + &descriptor_table_schema_2eproto_getter, &descriptor_table_schema_2eproto_once, + file_level_metadata_schema_2eproto[10]); +} + +// =================================================================== + class ScalarField::_Internal { public: static const ::milvus::proto::schema::BoolArray& bool_data(const ScalarField* msg); @@ -2625,6 +3113,8 @@ class ScalarField::_Internal { static const ::milvus::proto::schema::DoubleArray& double_data(const ScalarField* msg); static const ::milvus::proto::schema::StringArray& string_data(const ScalarField* msg); static const ::milvus::proto::schema::BytesArray& bytes_data(const ScalarField* msg); + static const ::milvus::proto::schema::ArrayArray& array_data(const ScalarField* msg); + static const ::milvus::proto::schema::JSONArray& json_data(const ScalarField* msg); }; const ::milvus::proto::schema::BoolArray& @@ -2655,6 +3145,14 @@ const ::milvus::proto::schema::BytesArray& ScalarField::_Internal::bytes_data(const ScalarField* msg) { return *msg->_impl_.data_.bytes_data_; } +const ::milvus::proto::schema::ArrayArray& +ScalarField::_Internal::array_data(const ScalarField* msg) { + return *msg->_impl_.data_.array_data_; +} +const ::milvus::proto::schema::JSONArray& +ScalarField::_Internal::json_data(const ScalarField* msg) { + return *msg->_impl_.data_.json_data_; +} void ScalarField::set_allocated_bool_data(::milvus::proto::schema::BoolArray* bool_data) { ::PROTOBUF_NAMESPACE_ID::Arena* message_arena = GetArenaForAllocation(); clear_data(); @@ -2760,6 +3258,36 @@ void ScalarField::set_allocated_bytes_data(::milvus::proto::schema::BytesArray* } // @@protoc_insertion_point(field_set_allocated:milvus.proto.schema.ScalarField.bytes_data) } +void ScalarField::set_allocated_array_data(::milvus::proto::schema::ArrayArray* array_data) { + ::PROTOBUF_NAMESPACE_ID::Arena* message_arena = GetArenaForAllocation(); + clear_data(); + if (array_data) { + ::PROTOBUF_NAMESPACE_ID::Arena* submessage_arena = + ::PROTOBUF_NAMESPACE_ID::Arena::InternalGetOwningArena(array_data); + if (message_arena != submessage_arena) { + array_data = ::PROTOBUF_NAMESPACE_ID::internal::GetOwnedMessage( + message_arena, array_data, submessage_arena); + } + set_has_array_data(); + _impl_.data_.array_data_ = array_data; + } + // @@protoc_insertion_point(field_set_allocated:milvus.proto.schema.ScalarField.array_data) +} +void ScalarField::set_allocated_json_data(::milvus::proto::schema::JSONArray* json_data) { + ::PROTOBUF_NAMESPACE_ID::Arena* message_arena = GetArenaForAllocation(); + clear_data(); + if (json_data) { + ::PROTOBUF_NAMESPACE_ID::Arena* submessage_arena = + ::PROTOBUF_NAMESPACE_ID::Arena::InternalGetOwningArena(json_data); + if (message_arena != submessage_arena) { + json_data = ::PROTOBUF_NAMESPACE_ID::internal::GetOwnedMessage( + message_arena, json_data, submessage_arena); + } + set_has_json_data(); + _impl_.data_.json_data_ = json_data; + } + // @@protoc_insertion_point(field_set_allocated:milvus.proto.schema.ScalarField.json_data) +} ScalarField::ScalarField(::PROTOBUF_NAMESPACE_ID::Arena* arena, bool is_message_owned) : ::PROTOBUF_NAMESPACE_ID::Message(arena, is_message_owned) { @@ -2812,6 +3340,16 @@ ScalarField::ScalarField(const ScalarField& from) from._internal_bytes_data()); break; } + case kArrayData: { + _this->_internal_mutable_array_data()->::milvus::proto::schema::ArrayArray::MergeFrom( + from._internal_array_data()); + break; + } + case kJsonData: { + _this->_internal_mutable_json_data()->::milvus::proto::schema::JSONArray::MergeFrom( + from._internal_json_data()); + break; + } case DATA_NOT_SET: { break; } @@ -2896,6 +3434,18 @@ void ScalarField::clear_data() { } break; } + case kArrayData: { + if (GetArenaForAllocation() == nullptr) { + delete _impl_.data_.array_data_; + } + break; + } + case kJsonData: { + if (GetArenaForAllocation() == nullptr) { + delete _impl_.data_.json_data_; + } + break; + } case DATA_NOT_SET: { break; } @@ -2976,6 +3526,22 @@ const char* ScalarField::_InternalParse(const char* ptr, ::_pbi::ParseContext* c } else goto handle_unusual; continue; + // .milvus.proto.schema.ArrayArray array_data = 8; + case 8: + if (PROTOBUF_PREDICT_TRUE(static_cast(tag) == 66)) { + ptr = ctx->ParseMessage(_internal_mutable_array_data(), ptr); + CHK_(ptr); + } else + goto handle_unusual; + continue; + // .milvus.proto.schema.JSONArray json_data = 9; + case 9: + if (PROTOBUF_PREDICT_TRUE(static_cast(tag) == 74)) { + ptr = ctx->ParseMessage(_internal_mutable_json_data(), ptr); + CHK_(ptr); + } else + goto handle_unusual; + continue; default: goto handle_unusual; } // switch @@ -3054,6 +3620,20 @@ uint8_t* ScalarField::_InternalSerialize( _Internal::bytes_data(this).GetCachedSize(), target, stream); } + // .milvus.proto.schema.ArrayArray array_data = 8; + if (_internal_has_array_data()) { + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: + InternalWriteMessage(8, _Internal::array_data(this), + _Internal::array_data(this).GetCachedSize(), target, stream); + } + + // .milvus.proto.schema.JSONArray json_data = 9; + if (_internal_has_json_data()) { + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: + InternalWriteMessage(9, _Internal::json_data(this), + _Internal::json_data(this).GetCachedSize(), target, stream); + } + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { target = ::_pbi::WireFormat::InternalSerializeUnknownFieldsToArray( _internal_metadata_.unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(::PROTOBUF_NAMESPACE_ID::UnknownFieldSet::default_instance), target, stream); @@ -3120,6 +3700,20 @@ size_t ScalarField::ByteSizeLong() const { *_impl_.data_.bytes_data_); break; } + // .milvus.proto.schema.ArrayArray array_data = 8; + case kArrayData: { + total_size += 1 + + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize( + *_impl_.data_.array_data_); + break; + } + // .milvus.proto.schema.JSONArray json_data = 9; + case kJsonData: { + total_size += 1 + + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize( + *_impl_.data_.json_data_); + break; + } case DATA_NOT_SET: { break; } @@ -3178,6 +3772,16 @@ void ScalarField::MergeImpl(::PROTOBUF_NAMESPACE_ID::Message& to_msg, const ::PR from._internal_bytes_data()); break; } + case kArrayData: { + _this->_internal_mutable_array_data()->::milvus::proto::schema::ArrayArray::MergeFrom( + from._internal_array_data()); + break; + } + case kJsonData: { + _this->_internal_mutable_json_data()->::milvus::proto::schema::JSONArray::MergeFrom( + from._internal_json_data()); + break; + } case DATA_NOT_SET: { break; } @@ -3206,7 +3810,7 @@ void ScalarField::InternalSwap(ScalarField* other) { ::PROTOBUF_NAMESPACE_ID::Metadata ScalarField::GetMetadata() const { return ::_pbi::AssignDescriptors( &descriptor_table_schema_2eproto_getter, &descriptor_table_schema_2eproto_once, - file_level_metadata_schema_2eproto[9]); + file_level_metadata_schema_2eproto[11]); } // =================================================================== @@ -3514,7 +4118,7 @@ void VectorField::InternalSwap(VectorField* other) { ::PROTOBUF_NAMESPACE_ID::Metadata VectorField::GetMetadata() const { return ::_pbi::AssignDescriptors( &descriptor_table_schema_2eproto_getter, &descriptor_table_schema_2eproto_once, - file_level_metadata_schema_2eproto[10]); + file_level_metadata_schema_2eproto[12]); } // =================================================================== @@ -3934,7 +4538,7 @@ void FieldData::InternalSwap(FieldData* other) { ::PROTOBUF_NAMESPACE_ID::Metadata FieldData::GetMetadata() const { return ::_pbi::AssignDescriptors( &descriptor_table_schema_2eproto_getter, &descriptor_table_schema_2eproto_once, - file_level_metadata_schema_2eproto[11]); + file_level_metadata_schema_2eproto[13]); } // =================================================================== @@ -4239,7 +4843,7 @@ void IDs::InternalSwap(IDs* other) { ::PROTOBUF_NAMESPACE_ID::Metadata IDs::GetMetadata() const { return ::_pbi::AssignDescriptors( &descriptor_table_schema_2eproto_getter, &descriptor_table_schema_2eproto_once, - file_level_metadata_schema_2eproto[12]); + file_level_metadata_schema_2eproto[14]); } // =================================================================== @@ -4600,7 +5204,7 @@ void SearchResultData::InternalSwap(SearchResultData* other) { ::PROTOBUF_NAMESPACE_ID::Metadata SearchResultData::GetMetadata() const { return ::_pbi::AssignDescriptors( &descriptor_table_schema_2eproto_getter, &descriptor_table_schema_2eproto_once, - file_level_metadata_schema_2eproto[13]); + file_level_metadata_schema_2eproto[15]); } // @@protoc_insertion_point(namespace_scope) @@ -4644,6 +5248,14 @@ template<> PROTOBUF_NOINLINE ::milvus::proto::schema::StringArray* Arena::CreateMaybeMessage< ::milvus::proto::schema::StringArray >(Arena* arena) { return Arena::CreateMessageInternal< ::milvus::proto::schema::StringArray >(arena); } +template<> PROTOBUF_NOINLINE ::milvus::proto::schema::ArrayArray* +Arena::CreateMaybeMessage< ::milvus::proto::schema::ArrayArray >(Arena* arena) { + return Arena::CreateMessageInternal< ::milvus::proto::schema::ArrayArray >(arena); +} +template<> PROTOBUF_NOINLINE ::milvus::proto::schema::JSONArray* +Arena::CreateMaybeMessage< ::milvus::proto::schema::JSONArray >(Arena* arena) { + return Arena::CreateMessageInternal< ::milvus::proto::schema::JSONArray >(arena); +} template<> PROTOBUF_NOINLINE ::milvus::proto::schema::ScalarField* Arena::CreateMaybeMessage< ::milvus::proto::schema::ScalarField >(Arena* arena) { return Arena::CreateMessageInternal< ::milvus::proto::schema::ScalarField >(arena); diff --git a/internal/core/src/pb/schema.pb.h b/internal/core/src/pb/schema.pb.h old mode 100644 new mode 100755 index bead2dfee5..1af1abdeff --- a/internal/core/src/pb/schema.pb.h +++ b/internal/core/src/pb/schema.pb.h @@ -49,6 +49,9 @@ extern const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table namespace milvus { namespace proto { namespace schema { +class ArrayArray; +struct ArrayArrayDefaultTypeInternal; +extern ArrayArrayDefaultTypeInternal _ArrayArray_default_instance_; class BoolArray; struct BoolArrayDefaultTypeInternal; extern BoolArrayDefaultTypeInternal _BoolArray_default_instance_; @@ -76,6 +79,9 @@ extern IDsDefaultTypeInternal _IDs_default_instance_; class IntArray; struct IntArrayDefaultTypeInternal; extern IntArrayDefaultTypeInternal _IntArray_default_instance_; +class JSONArray; +struct JSONArrayDefaultTypeInternal; +extern JSONArrayDefaultTypeInternal _JSONArray_default_instance_; class LongArray; struct LongArrayDefaultTypeInternal; extern LongArrayDefaultTypeInternal _LongArray_default_instance_; @@ -95,6 +101,7 @@ extern VectorFieldDefaultTypeInternal _VectorField_default_instance_; } // namespace proto } // namespace milvus PROTOBUF_NAMESPACE_OPEN +template<> ::milvus::proto::schema::ArrayArray* Arena::CreateMaybeMessage<::milvus::proto::schema::ArrayArray>(Arena*); template<> ::milvus::proto::schema::BoolArray* Arena::CreateMaybeMessage<::milvus::proto::schema::BoolArray>(Arena*); template<> ::milvus::proto::schema::BytesArray* Arena::CreateMaybeMessage<::milvus::proto::schema::BytesArray>(Arena*); template<> ::milvus::proto::schema::CollectionSchema* Arena::CreateMaybeMessage<::milvus::proto::schema::CollectionSchema>(Arena*); @@ -104,6 +111,7 @@ template<> ::milvus::proto::schema::FieldSchema* Arena::CreateMaybeMessage<::mil template<> ::milvus::proto::schema::FloatArray* Arena::CreateMaybeMessage<::milvus::proto::schema::FloatArray>(Arena*); template<> ::milvus::proto::schema::IDs* Arena::CreateMaybeMessage<::milvus::proto::schema::IDs>(Arena*); template<> ::milvus::proto::schema::IntArray* Arena::CreateMaybeMessage<::milvus::proto::schema::IntArray>(Arena*); +template<> ::milvus::proto::schema::JSONArray* Arena::CreateMaybeMessage<::milvus::proto::schema::JSONArray>(Arena*); template<> ::milvus::proto::schema::LongArray* Arena::CreateMaybeMessage<::milvus::proto::schema::LongArray>(Arena*); template<> ::milvus::proto::schema::ScalarField* Arena::CreateMaybeMessage<::milvus::proto::schema::ScalarField>(Arena*); template<> ::milvus::proto::schema::SearchResultData* Arena::CreateMaybeMessage<::milvus::proto::schema::SearchResultData>(Arena*); @@ -125,6 +133,8 @@ enum DataType : int { Double = 11, String = 20, VarChar = 21, + Array = 22, + JSON = 23, BinaryVector = 100, FloatVector = 101, DataType_INT_MIN_SENTINEL_DO_NOT_USE_ = std::numeric_limits::min(), @@ -308,6 +318,7 @@ class FieldSchema final : kIsPrimaryKeyFieldNumber = 3, kAutoIDFieldNumber = 8, kStateFieldNumber = 9, + kElementTypeFieldNumber = 10, }; // repeated .milvus.proto.common.KeyValuePair type_params = 6; int type_params_size() const; @@ -418,6 +429,15 @@ class FieldSchema final : void _internal_set_state(::milvus::proto::schema::FieldState value); public: + // .milvus.proto.schema.DataType element_type = 10; + void clear_element_type(); + ::milvus::proto::schema::DataType element_type() const; + void set_element_type(::milvus::proto::schema::DataType value); + private: + ::milvus::proto::schema::DataType _internal_element_type() const; + void _internal_set_element_type(::milvus::proto::schema::DataType value); + public: + // @@protoc_insertion_point(class_scope:milvus.proto.schema.FieldSchema) private: class _Internal; @@ -435,6 +455,7 @@ class FieldSchema final : bool is_primary_key_; bool autoid_; int state_; + int element_type_; mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; }; union { Impl_ _impl_; }; @@ -1775,6 +1796,337 @@ class StringArray final : }; // ------------------------------------------------------------------- +class ArrayArray final : + public ::PROTOBUF_NAMESPACE_ID::Message /* @@protoc_insertion_point(class_definition:milvus.proto.schema.ArrayArray) */ { + public: + inline ArrayArray() : ArrayArray(nullptr) {} + ~ArrayArray() override; + explicit PROTOBUF_CONSTEXPR ArrayArray(::PROTOBUF_NAMESPACE_ID::internal::ConstantInitialized); + + ArrayArray(const ArrayArray& from); + ArrayArray(ArrayArray&& from) noexcept + : ArrayArray() { + *this = ::std::move(from); + } + + inline ArrayArray& operator=(const ArrayArray& from) { + CopyFrom(from); + return *this; + } + inline ArrayArray& operator=(ArrayArray&& from) noexcept { + if (this == &from) return *this; + if (GetOwningArena() == from.GetOwningArena() + #ifdef PROTOBUF_FORCE_COPY_IN_MOVE + && GetOwningArena() != nullptr + #endif // !PROTOBUF_FORCE_COPY_IN_MOVE + ) { + InternalSwap(&from); + } else { + CopyFrom(from); + } + return *this; + } + + static const ::PROTOBUF_NAMESPACE_ID::Descriptor* descriptor() { + return GetDescriptor(); + } + static const ::PROTOBUF_NAMESPACE_ID::Descriptor* GetDescriptor() { + return default_instance().GetMetadata().descriptor; + } + static const ::PROTOBUF_NAMESPACE_ID::Reflection* GetReflection() { + return default_instance().GetMetadata().reflection; + } + static const ArrayArray& default_instance() { + return *internal_default_instance(); + } + static inline const ArrayArray* internal_default_instance() { + return reinterpret_cast( + &_ArrayArray_default_instance_); + } + static constexpr int kIndexInFileMessages = + 9; + + friend void swap(ArrayArray& a, ArrayArray& b) { + a.Swap(&b); + } + inline void Swap(ArrayArray* other) { + if (other == this) return; + #ifdef PROTOBUF_FORCE_COPY_IN_SWAP + if (GetOwningArena() != nullptr && + GetOwningArena() == other->GetOwningArena()) { + #else // PROTOBUF_FORCE_COPY_IN_SWAP + if (GetOwningArena() == other->GetOwningArena()) { + #endif // !PROTOBUF_FORCE_COPY_IN_SWAP + InternalSwap(other); + } else { + ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); + } + } + void UnsafeArenaSwap(ArrayArray* other) { + if (other == this) return; + GOOGLE_DCHECK(GetOwningArena() == other->GetOwningArena()); + InternalSwap(other); + } + + // implements Message ---------------------------------------------- + + ArrayArray* New(::PROTOBUF_NAMESPACE_ID::Arena* arena = nullptr) const final { + return CreateMaybeMessage(arena); + } + using ::PROTOBUF_NAMESPACE_ID::Message::CopyFrom; + void CopyFrom(const ArrayArray& from); + using ::PROTOBUF_NAMESPACE_ID::Message::MergeFrom; + void MergeFrom( const ArrayArray& from) { + ArrayArray::MergeImpl(*this, from); + } + private: + static void MergeImpl(::PROTOBUF_NAMESPACE_ID::Message& to_msg, const ::PROTOBUF_NAMESPACE_ID::Message& from_msg); + public: + PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; + bool IsInitialized() const final; + + size_t ByteSizeLong() const final; + const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; + uint8_t* _InternalSerialize( + uint8_t* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; + int GetCachedSize() const final { return _impl_._cached_size_.Get(); } + + private: + void SharedCtor(::PROTOBUF_NAMESPACE_ID::Arena* arena, bool is_message_owned); + void SharedDtor(); + void SetCachedSize(int size) const final; + void InternalSwap(ArrayArray* other); + + private: + friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; + static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { + return "milvus.proto.schema.ArrayArray"; + } + protected: + explicit ArrayArray(::PROTOBUF_NAMESPACE_ID::Arena* arena, + bool is_message_owned = false); + public: + + static const ClassData _class_data_; + const ::PROTOBUF_NAMESPACE_ID::Message::ClassData*GetClassData() const final; + + ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadata() const final; + + // nested types ---------------------------------------------------- + + // accessors ------------------------------------------------------- + + enum : int { + kDataFieldNumber = 1, + kElementTypeFieldNumber = 2, + }; + // repeated .milvus.proto.schema.ScalarField data = 1; + int data_size() const; + private: + int _internal_data_size() const; + public: + void clear_data(); + ::milvus::proto::schema::ScalarField* mutable_data(int index); + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::milvus::proto::schema::ScalarField >* + mutable_data(); + private: + const ::milvus::proto::schema::ScalarField& _internal_data(int index) const; + ::milvus::proto::schema::ScalarField* _internal_add_data(); + public: + const ::milvus::proto::schema::ScalarField& data(int index) const; + ::milvus::proto::schema::ScalarField* add_data(); + const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::milvus::proto::schema::ScalarField >& + data() const; + + // .milvus.proto.schema.DataType element_type = 2; + void clear_element_type(); + ::milvus::proto::schema::DataType element_type() const; + void set_element_type(::milvus::proto::schema::DataType value); + private: + ::milvus::proto::schema::DataType _internal_element_type() const; + void _internal_set_element_type(::milvus::proto::schema::DataType value); + public: + + // @@protoc_insertion_point(class_scope:milvus.proto.schema.ArrayArray) + private: + class _Internal; + + template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; + typedef void InternalArenaConstructable_; + typedef void DestructorSkippable_; + struct Impl_ { + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::milvus::proto::schema::ScalarField > data_; + int element_type_; + mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; + }; + union { Impl_ _impl_; }; + friend struct ::TableStruct_schema_2eproto; +}; +// ------------------------------------------------------------------- + +class JSONArray final : + public ::PROTOBUF_NAMESPACE_ID::Message /* @@protoc_insertion_point(class_definition:milvus.proto.schema.JSONArray) */ { + public: + inline JSONArray() : JSONArray(nullptr) {} + ~JSONArray() override; + explicit PROTOBUF_CONSTEXPR JSONArray(::PROTOBUF_NAMESPACE_ID::internal::ConstantInitialized); + + JSONArray(const JSONArray& from); + JSONArray(JSONArray&& from) noexcept + : JSONArray() { + *this = ::std::move(from); + } + + inline JSONArray& operator=(const JSONArray& from) { + CopyFrom(from); + return *this; + } + inline JSONArray& operator=(JSONArray&& from) noexcept { + if (this == &from) return *this; + if (GetOwningArena() == from.GetOwningArena() + #ifdef PROTOBUF_FORCE_COPY_IN_MOVE + && GetOwningArena() != nullptr + #endif // !PROTOBUF_FORCE_COPY_IN_MOVE + ) { + InternalSwap(&from); + } else { + CopyFrom(from); + } + return *this; + } + + static const ::PROTOBUF_NAMESPACE_ID::Descriptor* descriptor() { + return GetDescriptor(); + } + static const ::PROTOBUF_NAMESPACE_ID::Descriptor* GetDescriptor() { + return default_instance().GetMetadata().descriptor; + } + static const ::PROTOBUF_NAMESPACE_ID::Reflection* GetReflection() { + return default_instance().GetMetadata().reflection; + } + static const JSONArray& default_instance() { + return *internal_default_instance(); + } + static inline const JSONArray* internal_default_instance() { + return reinterpret_cast( + &_JSONArray_default_instance_); + } + static constexpr int kIndexInFileMessages = + 10; + + friend void swap(JSONArray& a, JSONArray& b) { + a.Swap(&b); + } + inline void Swap(JSONArray* other) { + if (other == this) return; + #ifdef PROTOBUF_FORCE_COPY_IN_SWAP + if (GetOwningArena() != nullptr && + GetOwningArena() == other->GetOwningArena()) { + #else // PROTOBUF_FORCE_COPY_IN_SWAP + if (GetOwningArena() == other->GetOwningArena()) { + #endif // !PROTOBUF_FORCE_COPY_IN_SWAP + InternalSwap(other); + } else { + ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); + } + } + void UnsafeArenaSwap(JSONArray* other) { + if (other == this) return; + GOOGLE_DCHECK(GetOwningArena() == other->GetOwningArena()); + InternalSwap(other); + } + + // implements Message ---------------------------------------------- + + JSONArray* New(::PROTOBUF_NAMESPACE_ID::Arena* arena = nullptr) const final { + return CreateMaybeMessage(arena); + } + using ::PROTOBUF_NAMESPACE_ID::Message::CopyFrom; + void CopyFrom(const JSONArray& from); + using ::PROTOBUF_NAMESPACE_ID::Message::MergeFrom; + void MergeFrom( const JSONArray& from) { + JSONArray::MergeImpl(*this, from); + } + private: + static void MergeImpl(::PROTOBUF_NAMESPACE_ID::Message& to_msg, const ::PROTOBUF_NAMESPACE_ID::Message& from_msg); + public: + PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; + bool IsInitialized() const final; + + size_t ByteSizeLong() const final; + const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; + uint8_t* _InternalSerialize( + uint8_t* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; + int GetCachedSize() const final { return _impl_._cached_size_.Get(); } + + private: + void SharedCtor(::PROTOBUF_NAMESPACE_ID::Arena* arena, bool is_message_owned); + void SharedDtor(); + void SetCachedSize(int size) const final; + void InternalSwap(JSONArray* other); + + private: + friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; + static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { + return "milvus.proto.schema.JSONArray"; + } + protected: + explicit JSONArray(::PROTOBUF_NAMESPACE_ID::Arena* arena, + bool is_message_owned = false); + public: + + static const ClassData _class_data_; + const ::PROTOBUF_NAMESPACE_ID::Message::ClassData*GetClassData() const final; + + ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadata() const final; + + // nested types ---------------------------------------------------- + + // accessors ------------------------------------------------------- + + enum : int { + kDataFieldNumber = 1, + }; + // repeated bytes data = 1; + int data_size() const; + private: + int _internal_data_size() const; + public: + void clear_data(); + const std::string& data(int index) const; + std::string* mutable_data(int index); + void set_data(int index, const std::string& value); + void set_data(int index, std::string&& value); + void set_data(int index, const char* value); + void set_data(int index, const void* value, size_t size); + std::string* add_data(); + void add_data(const std::string& value); + void add_data(std::string&& value); + void add_data(const char* value); + void add_data(const void* value, size_t size); + const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField& data() const; + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField* mutable_data(); + private: + const std::string& _internal_data(int index) const; + std::string* _internal_add_data(); + public: + + // @@protoc_insertion_point(class_scope:milvus.proto.schema.JSONArray) + private: + class _Internal; + + template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; + typedef void InternalArenaConstructable_; + typedef void DestructorSkippable_; + struct Impl_ { + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField data_; + mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; + }; + union { Impl_ _impl_; }; + friend struct ::TableStruct_schema_2eproto; +}; +// ------------------------------------------------------------------- + class ScalarField final : public ::PROTOBUF_NAMESPACE_ID::Message /* @@protoc_insertion_point(class_definition:milvus.proto.schema.ScalarField) */ { public: @@ -1826,6 +2178,8 @@ class ScalarField final : kDoubleData = 5, kStringData = 6, kBytesData = 7, + kArrayData = 8, + kJsonData = 9, DATA_NOT_SET = 0, }; @@ -1834,7 +2188,7 @@ class ScalarField final : &_ScalarField_default_instance_); } static constexpr int kIndexInFileMessages = - 9; + 11; friend void swap(ScalarField& a, ScalarField& b) { a.Swap(&b); @@ -1914,6 +2268,8 @@ class ScalarField final : kDoubleDataFieldNumber = 5, kStringDataFieldNumber = 6, kBytesDataFieldNumber = 7, + kArrayDataFieldNumber = 8, + kJsonDataFieldNumber = 9, }; // .milvus.proto.schema.BoolArray bool_data = 1; bool has_bool_data() const; @@ -2041,6 +2397,42 @@ class ScalarField final : ::milvus::proto::schema::BytesArray* bytes_data); ::milvus::proto::schema::BytesArray* unsafe_arena_release_bytes_data(); + // .milvus.proto.schema.ArrayArray array_data = 8; + bool has_array_data() const; + private: + bool _internal_has_array_data() const; + public: + void clear_array_data(); + const ::milvus::proto::schema::ArrayArray& array_data() const; + PROTOBUF_NODISCARD ::milvus::proto::schema::ArrayArray* release_array_data(); + ::milvus::proto::schema::ArrayArray* mutable_array_data(); + void set_allocated_array_data(::milvus::proto::schema::ArrayArray* array_data); + private: + const ::milvus::proto::schema::ArrayArray& _internal_array_data() const; + ::milvus::proto::schema::ArrayArray* _internal_mutable_array_data(); + public: + void unsafe_arena_set_allocated_array_data( + ::milvus::proto::schema::ArrayArray* array_data); + ::milvus::proto::schema::ArrayArray* unsafe_arena_release_array_data(); + + // .milvus.proto.schema.JSONArray json_data = 9; + bool has_json_data() const; + private: + bool _internal_has_json_data() const; + public: + void clear_json_data(); + const ::milvus::proto::schema::JSONArray& json_data() const; + PROTOBUF_NODISCARD ::milvus::proto::schema::JSONArray* release_json_data(); + ::milvus::proto::schema::JSONArray* mutable_json_data(); + void set_allocated_json_data(::milvus::proto::schema::JSONArray* json_data); + private: + const ::milvus::proto::schema::JSONArray& _internal_json_data() const; + ::milvus::proto::schema::JSONArray* _internal_mutable_json_data(); + public: + void unsafe_arena_set_allocated_json_data( + ::milvus::proto::schema::JSONArray* json_data); + ::milvus::proto::schema::JSONArray* unsafe_arena_release_json_data(); + void clear_data(); DataCase data_case() const; // @@protoc_insertion_point(class_scope:milvus.proto.schema.ScalarField) @@ -2053,6 +2445,8 @@ class ScalarField final : void set_has_double_data(); void set_has_string_data(); void set_has_bytes_data(); + void set_has_array_data(); + void set_has_json_data(); inline bool has_data() const; inline void clear_has_data(); @@ -2071,6 +2465,8 @@ class ScalarField final : ::milvus::proto::schema::DoubleArray* double_data_; ::milvus::proto::schema::StringArray* string_data_; ::milvus::proto::schema::BytesArray* bytes_data_; + ::milvus::proto::schema::ArrayArray* array_data_; + ::milvus::proto::schema::JSONArray* json_data_; } data_; mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; uint32_t _oneof_case_[1]; @@ -2135,7 +2531,7 @@ class VectorField final : &_VectorField_default_instance_); } static constexpr int kIndexInFileMessages = - 10; + 12; friend void swap(VectorField& a, VectorField& b) { a.Swap(&b); @@ -2342,7 +2738,7 @@ class FieldData final : &_FieldData_default_instance_); } static constexpr int kIndexInFileMessages = - 11; + 13; friend void swap(FieldData& a, FieldData& b) { a.Swap(&b); @@ -2576,7 +2972,7 @@ class IDs final : &_IDs_default_instance_); } static constexpr int kIndexInFileMessages = - 12; + 14; friend void swap(IDs& a, IDs& b) { a.Swap(&b); @@ -2766,7 +3162,7 @@ class SearchResultData final : &_SearchResultData_default_instance_); } static constexpr int kIndexInFileMessages = - 13; + 15; friend void swap(SearchResultData& a, SearchResultData& b) { a.Swap(&b); @@ -3249,6 +3645,26 @@ inline void FieldSchema::set_state(::milvus::proto::schema::FieldState value) { // @@protoc_insertion_point(field_set:milvus.proto.schema.FieldSchema.state) } +// .milvus.proto.schema.DataType element_type = 10; +inline void FieldSchema::clear_element_type() { + _impl_.element_type_ = 0; +} +inline ::milvus::proto::schema::DataType FieldSchema::_internal_element_type() const { + return static_cast< ::milvus::proto::schema::DataType >(_impl_.element_type_); +} +inline ::milvus::proto::schema::DataType FieldSchema::element_type() const { + // @@protoc_insertion_point(field_get:milvus.proto.schema.FieldSchema.element_type) + return _internal_element_type(); +} +inline void FieldSchema::_internal_set_element_type(::milvus::proto::schema::DataType value) { + + _impl_.element_type_ = value; +} +inline void FieldSchema::set_element_type(::milvus::proto::schema::DataType value) { + _internal_set_element_type(value); + // @@protoc_insertion_point(field_set:milvus.proto.schema.FieldSchema.element_type) +} + // ------------------------------------------------------------------- // CollectionSchema @@ -3828,6 +4244,149 @@ StringArray::mutable_data() { // ------------------------------------------------------------------- +// ArrayArray + +// repeated .milvus.proto.schema.ScalarField data = 1; +inline int ArrayArray::_internal_data_size() const { + return _impl_.data_.size(); +} +inline int ArrayArray::data_size() const { + return _internal_data_size(); +} +inline void ArrayArray::clear_data() { + _impl_.data_.Clear(); +} +inline ::milvus::proto::schema::ScalarField* ArrayArray::mutable_data(int index) { + // @@protoc_insertion_point(field_mutable:milvus.proto.schema.ArrayArray.data) + return _impl_.data_.Mutable(index); +} +inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::milvus::proto::schema::ScalarField >* +ArrayArray::mutable_data() { + // @@protoc_insertion_point(field_mutable_list:milvus.proto.schema.ArrayArray.data) + return &_impl_.data_; +} +inline const ::milvus::proto::schema::ScalarField& ArrayArray::_internal_data(int index) const { + return _impl_.data_.Get(index); +} +inline const ::milvus::proto::schema::ScalarField& ArrayArray::data(int index) const { + // @@protoc_insertion_point(field_get:milvus.proto.schema.ArrayArray.data) + return _internal_data(index); +} +inline ::milvus::proto::schema::ScalarField* ArrayArray::_internal_add_data() { + return _impl_.data_.Add(); +} +inline ::milvus::proto::schema::ScalarField* ArrayArray::add_data() { + ::milvus::proto::schema::ScalarField* _add = _internal_add_data(); + // @@protoc_insertion_point(field_add:milvus.proto.schema.ArrayArray.data) + return _add; +} +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::milvus::proto::schema::ScalarField >& +ArrayArray::data() const { + // @@protoc_insertion_point(field_list:milvus.proto.schema.ArrayArray.data) + return _impl_.data_; +} + +// .milvus.proto.schema.DataType element_type = 2; +inline void ArrayArray::clear_element_type() { + _impl_.element_type_ = 0; +} +inline ::milvus::proto::schema::DataType ArrayArray::_internal_element_type() const { + return static_cast< ::milvus::proto::schema::DataType >(_impl_.element_type_); +} +inline ::milvus::proto::schema::DataType ArrayArray::element_type() const { + // @@protoc_insertion_point(field_get:milvus.proto.schema.ArrayArray.element_type) + return _internal_element_type(); +} +inline void ArrayArray::_internal_set_element_type(::milvus::proto::schema::DataType value) { + + _impl_.element_type_ = value; +} +inline void ArrayArray::set_element_type(::milvus::proto::schema::DataType value) { + _internal_set_element_type(value); + // @@protoc_insertion_point(field_set:milvus.proto.schema.ArrayArray.element_type) +} + +// ------------------------------------------------------------------- + +// JSONArray + +// repeated bytes data = 1; +inline int JSONArray::_internal_data_size() const { + return _impl_.data_.size(); +} +inline int JSONArray::data_size() const { + return _internal_data_size(); +} +inline void JSONArray::clear_data() { + _impl_.data_.Clear(); +} +inline std::string* JSONArray::add_data() { + std::string* _s = _internal_add_data(); + // @@protoc_insertion_point(field_add_mutable:milvus.proto.schema.JSONArray.data) + return _s; +} +inline const std::string& JSONArray::_internal_data(int index) const { + return _impl_.data_.Get(index); +} +inline const std::string& JSONArray::data(int index) const { + // @@protoc_insertion_point(field_get:milvus.proto.schema.JSONArray.data) + return _internal_data(index); +} +inline std::string* JSONArray::mutable_data(int index) { + // @@protoc_insertion_point(field_mutable:milvus.proto.schema.JSONArray.data) + return _impl_.data_.Mutable(index); +} +inline void JSONArray::set_data(int index, const std::string& value) { + _impl_.data_.Mutable(index)->assign(value); + // @@protoc_insertion_point(field_set:milvus.proto.schema.JSONArray.data) +} +inline void JSONArray::set_data(int index, std::string&& value) { + _impl_.data_.Mutable(index)->assign(std::move(value)); + // @@protoc_insertion_point(field_set:milvus.proto.schema.JSONArray.data) +} +inline void JSONArray::set_data(int index, const char* value) { + GOOGLE_DCHECK(value != nullptr); + _impl_.data_.Mutable(index)->assign(value); + // @@protoc_insertion_point(field_set_char:milvus.proto.schema.JSONArray.data) +} +inline void JSONArray::set_data(int index, const void* value, size_t size) { + _impl_.data_.Mutable(index)->assign( + reinterpret_cast(value), size); + // @@protoc_insertion_point(field_set_pointer:milvus.proto.schema.JSONArray.data) +} +inline std::string* JSONArray::_internal_add_data() { + return _impl_.data_.Add(); +} +inline void JSONArray::add_data(const std::string& value) { + _impl_.data_.Add()->assign(value); + // @@protoc_insertion_point(field_add:milvus.proto.schema.JSONArray.data) +} +inline void JSONArray::add_data(std::string&& value) { + _impl_.data_.Add(std::move(value)); + // @@protoc_insertion_point(field_add:milvus.proto.schema.JSONArray.data) +} +inline void JSONArray::add_data(const char* value) { + GOOGLE_DCHECK(value != nullptr); + _impl_.data_.Add()->assign(value); + // @@protoc_insertion_point(field_add_char:milvus.proto.schema.JSONArray.data) +} +inline void JSONArray::add_data(const void* value, size_t size) { + _impl_.data_.Add()->assign(reinterpret_cast(value), size); + // @@protoc_insertion_point(field_add_pointer:milvus.proto.schema.JSONArray.data) +} +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField& +JSONArray::data() const { + // @@protoc_insertion_point(field_list:milvus.proto.schema.JSONArray.data) + return _impl_.data_; +} +inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField* +JSONArray::mutable_data() { + // @@protoc_insertion_point(field_mutable_list:milvus.proto.schema.JSONArray.data) + return &_impl_.data_; +} + +// ------------------------------------------------------------------- + // ScalarField // .milvus.proto.schema.BoolArray bool_data = 1; @@ -4348,6 +4907,154 @@ inline ::milvus::proto::schema::BytesArray* ScalarField::mutable_bytes_data() { return _msg; } +// .milvus.proto.schema.ArrayArray array_data = 8; +inline bool ScalarField::_internal_has_array_data() const { + return data_case() == kArrayData; +} +inline bool ScalarField::has_array_data() const { + return _internal_has_array_data(); +} +inline void ScalarField::set_has_array_data() { + _impl_._oneof_case_[0] = kArrayData; +} +inline void ScalarField::clear_array_data() { + if (_internal_has_array_data()) { + if (GetArenaForAllocation() == nullptr) { + delete _impl_.data_.array_data_; + } + clear_has_data(); + } +} +inline ::milvus::proto::schema::ArrayArray* ScalarField::release_array_data() { + // @@protoc_insertion_point(field_release:milvus.proto.schema.ScalarField.array_data) + if (_internal_has_array_data()) { + clear_has_data(); + ::milvus::proto::schema::ArrayArray* temp = _impl_.data_.array_data_; + if (GetArenaForAllocation() != nullptr) { + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + } + _impl_.data_.array_data_ = nullptr; + return temp; + } else { + return nullptr; + } +} +inline const ::milvus::proto::schema::ArrayArray& ScalarField::_internal_array_data() const { + return _internal_has_array_data() + ? *_impl_.data_.array_data_ + : reinterpret_cast< ::milvus::proto::schema::ArrayArray&>(::milvus::proto::schema::_ArrayArray_default_instance_); +} +inline const ::milvus::proto::schema::ArrayArray& ScalarField::array_data() const { + // @@protoc_insertion_point(field_get:milvus.proto.schema.ScalarField.array_data) + return _internal_array_data(); +} +inline ::milvus::proto::schema::ArrayArray* ScalarField::unsafe_arena_release_array_data() { + // @@protoc_insertion_point(field_unsafe_arena_release:milvus.proto.schema.ScalarField.array_data) + if (_internal_has_array_data()) { + clear_has_data(); + ::milvus::proto::schema::ArrayArray* temp = _impl_.data_.array_data_; + _impl_.data_.array_data_ = nullptr; + return temp; + } else { + return nullptr; + } +} +inline void ScalarField::unsafe_arena_set_allocated_array_data(::milvus::proto::schema::ArrayArray* array_data) { + clear_data(); + if (array_data) { + set_has_array_data(); + _impl_.data_.array_data_ = array_data; + } + // @@protoc_insertion_point(field_unsafe_arena_set_allocated:milvus.proto.schema.ScalarField.array_data) +} +inline ::milvus::proto::schema::ArrayArray* ScalarField::_internal_mutable_array_data() { + if (!_internal_has_array_data()) { + clear_data(); + set_has_array_data(); + _impl_.data_.array_data_ = CreateMaybeMessage< ::milvus::proto::schema::ArrayArray >(GetArenaForAllocation()); + } + return _impl_.data_.array_data_; +} +inline ::milvus::proto::schema::ArrayArray* ScalarField::mutable_array_data() { + ::milvus::proto::schema::ArrayArray* _msg = _internal_mutable_array_data(); + // @@protoc_insertion_point(field_mutable:milvus.proto.schema.ScalarField.array_data) + return _msg; +} + +// .milvus.proto.schema.JSONArray json_data = 9; +inline bool ScalarField::_internal_has_json_data() const { + return data_case() == kJsonData; +} +inline bool ScalarField::has_json_data() const { + return _internal_has_json_data(); +} +inline void ScalarField::set_has_json_data() { + _impl_._oneof_case_[0] = kJsonData; +} +inline void ScalarField::clear_json_data() { + if (_internal_has_json_data()) { + if (GetArenaForAllocation() == nullptr) { + delete _impl_.data_.json_data_; + } + clear_has_data(); + } +} +inline ::milvus::proto::schema::JSONArray* ScalarField::release_json_data() { + // @@protoc_insertion_point(field_release:milvus.proto.schema.ScalarField.json_data) + if (_internal_has_json_data()) { + clear_has_data(); + ::milvus::proto::schema::JSONArray* temp = _impl_.data_.json_data_; + if (GetArenaForAllocation() != nullptr) { + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + } + _impl_.data_.json_data_ = nullptr; + return temp; + } else { + return nullptr; + } +} +inline const ::milvus::proto::schema::JSONArray& ScalarField::_internal_json_data() const { + return _internal_has_json_data() + ? *_impl_.data_.json_data_ + : reinterpret_cast< ::milvus::proto::schema::JSONArray&>(::milvus::proto::schema::_JSONArray_default_instance_); +} +inline const ::milvus::proto::schema::JSONArray& ScalarField::json_data() const { + // @@protoc_insertion_point(field_get:milvus.proto.schema.ScalarField.json_data) + return _internal_json_data(); +} +inline ::milvus::proto::schema::JSONArray* ScalarField::unsafe_arena_release_json_data() { + // @@protoc_insertion_point(field_unsafe_arena_release:milvus.proto.schema.ScalarField.json_data) + if (_internal_has_json_data()) { + clear_has_data(); + ::milvus::proto::schema::JSONArray* temp = _impl_.data_.json_data_; + _impl_.data_.json_data_ = nullptr; + return temp; + } else { + return nullptr; + } +} +inline void ScalarField::unsafe_arena_set_allocated_json_data(::milvus::proto::schema::JSONArray* json_data) { + clear_data(); + if (json_data) { + set_has_json_data(); + _impl_.data_.json_data_ = json_data; + } + // @@protoc_insertion_point(field_unsafe_arena_set_allocated:milvus.proto.schema.ScalarField.json_data) +} +inline ::milvus::proto::schema::JSONArray* ScalarField::_internal_mutable_json_data() { + if (!_internal_has_json_data()) { + clear_data(); + set_has_json_data(); + _impl_.data_.json_data_ = CreateMaybeMessage< ::milvus::proto::schema::JSONArray >(GetArenaForAllocation()); + } + return _impl_.data_.json_data_; +} +inline ::milvus::proto::schema::JSONArray* ScalarField::mutable_json_data() { + ::milvus::proto::schema::JSONArray* _msg = _internal_mutable_json_data(); + // @@protoc_insertion_point(field_mutable:milvus.proto.schema.ScalarField.json_data) + return _msg; +} + inline bool ScalarField::has_data() const { return data_case() != DATA_NOT_SET; } @@ -5250,6 +5957,10 @@ SearchResultData::mutable_topks() { // ------------------------------------------------------------------- +// ------------------------------------------------------------------- + +// ------------------------------------------------------------------- + // @@protoc_insertion_point(namespace_scope) diff --git a/internal/core/src/pb/segcore.pb.cc b/internal/core/src/pb/segcore.pb.cc old mode 100644 new mode 100755 diff --git a/internal/core/src/pb/segcore.pb.h b/internal/core/src/pb/segcore.pb.h old mode 100644 new mode 100755 diff --git a/internal/core/src/segcore/ConcurrentVector.cpp b/internal/core/src/segcore/ConcurrentVector.cpp index 88ef77be05..18921955d6 100644 --- a/internal/core/src/segcore/ConcurrentVector.cpp +++ b/internal/core/src/segcore/ConcurrentVector.cpp @@ -76,8 +76,15 @@ VectorBase::set_data_raw(ssize_t element_offset, std::vector data_raw(begin, end); return set_data_raw(element_offset, data_raw.data(), element_count); } + case DataType::JSON: { + auto begin = data->scalars().json_data().data().begin(); + auto end = data->scalars().json_data().data().end(); + std::vector data_raw(begin, end); + return set_data_raw(element_offset, data_raw.data(), element_count); + } default: { - PanicInfo("unsupported"); + PanicInfo(fmt::format("unsupported datatype {}", + field_meta.get_data_type())); } } } diff --git a/internal/core/src/segcore/FieldIndexing.h b/internal/core/src/segcore/FieldIndexing.h index 140a73ceff..83de44d566 100644 --- a/internal/core/src/segcore/FieldIndexing.h +++ b/internal/core/src/segcore/FieldIndexing.h @@ -139,6 +139,11 @@ class IndexingRecord { continue; } } + if (field_meta.get_data_type() == DataType::ARRAY || + field_meta.get_data_type() == DataType::JSON) { + // not supported yet + continue; + } field_indexings_.try_emplace( field_id, CreateIndex(field_meta, segcore_config_)); diff --git a/internal/core/src/segcore/InsertRecord.h b/internal/core/src/segcore/InsertRecord.h index 1715ae154b..9f32a21430 100644 --- a/internal/core/src/segcore/InsertRecord.h +++ b/internal/core/src/segcore/InsertRecord.h @@ -217,6 +217,12 @@ struct InsertRecord { size_per_chunk); break; } + case DataType::JSON: + case DataType::ARRAY: { + this->append_field_data(field_id, + size_per_chunk); + break; + } default: { PanicInfo("unsupported"); } diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index dc4eb72eb9..d4f6425221 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -24,6 +24,7 @@ #include "common/Consts.h" #include "common/FieldMeta.h" #include "common/Types.h" +#include "nlohmann/json.hpp" #include "query/ScalarIndex.h" #include "query/SearchBruteForce.h" #include "query/SearchOnSealed.h" @@ -221,7 +222,7 @@ SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& info) { ++system_ready_count_; } else { // prepare data - auto& field_meta = schema_->operator[](field_id); + auto& field_meta = (*schema_)[field_id]; auto data_type = field_meta.get_data_type(); AssertInfo(data_type == DataType(info.field_data->type()), "field type of load data is inconsistent with the schema"); @@ -237,9 +238,26 @@ SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& info) { case milvus::DataType::STRING: case milvus::DataType::VARCHAR: { column = std::make_unique>( - get_segment_id(), field_meta, info); + get_segment_id(), + field_meta, + info, + [](const char* data, size_t len) { + return std::string_view(data, len); + }); break; } + case milvus::DataType::JSON: { + column = std::make_unique>( + get_segment_id(), + field_meta, + info, + [](const char* data, size_t len) { + if (len > 0) { + return nlohmann::json::parse(data, data + len); + } + return nlohmann::json{}; + }); + } default: { } } diff --git a/internal/core/src/storage/PayloadWriter.cpp b/internal/core/src/storage/PayloadWriter.cpp index af22edbf03..7d9def154a 100644 --- a/internal/core/src/storage/PayloadWriter.cpp +++ b/internal/core/src/storage/PayloadWriter.cpp @@ -55,6 +55,14 @@ PayloadWriter::add_one_string_payload(const char* str, int str_size) { rows_.fetch_add(1); } +void +PayloadWriter::add_one_binary_payload(const uint8_t* data, int length) { + AssertInfo(output_ == nullptr, "payload writer has been finished"); + AssertInfo(milvus::datatype_is_binary(column_type_), "mismatch data type"); + AddOneBinaryToArrowBuilder(builder_, data, length); + rows_.fetch_add(1); +} + void PayloadWriter::add_payload(const Payload& raw_data) { AssertInfo(output_ == nullptr, "payload writer has been finished"); diff --git a/internal/core/src/storage/PayloadWriter.h b/internal/core/src/storage/PayloadWriter.h index ce2e22f653..1bd2d652be 100644 --- a/internal/core/src/storage/PayloadWriter.h +++ b/internal/core/src/storage/PayloadWriter.h @@ -35,6 +35,9 @@ class PayloadWriter { void add_one_string_payload(const char* str, int str_size); + void + add_one_binary_payload(const uint8_t* data, int length); + void finish(); diff --git a/internal/core/src/storage/Util.cpp b/internal/core/src/storage/Util.cpp index 564896d588..b92cc0d93e 100644 --- a/internal/core/src/storage/Util.cpp +++ b/internal/core/src/storage/Util.cpp @@ -15,9 +15,12 @@ // limitations under the License. #include "storage/Util.h" +#include "arrow/array/builder_binary.h" +#include "arrow/type_fwd.h" #include "exceptions/EasyAssert.h" #include "common/Consts.h" #include "config/ConfigChunkManager.h" +#include "storage/parquet_c.h" #ifdef BUILD_DISK_ANN #include "storage/DiskFileManagerImpl.h" @@ -140,6 +143,22 @@ AddOneStringToArrowBuilder(std::shared_ptr builder, AssertInfo(ast.ok(), "append value to arrow builder failed"); } +void +AddOneBinaryToArrowBuilder(std::shared_ptr builder, + const uint8_t* data, + int length) { + AssertInfo(builder != nullptr, "empty arrow builder"); + auto binary_builder = + std::dynamic_pointer_cast(builder); + arrow::Status ast; + if (data == nullptr || length < 0) { + ast = binary_builder->AppendNull(); + } else { + ast = binary_builder->Append(data, length); + } + AssertInfo(ast.ok(), "append value to arrow builder failed"); +} + std::shared_ptr CreateArrowBuilder(DataType data_type) { switch (static_cast(data_type)) { @@ -168,6 +187,10 @@ CreateArrowBuilder(DataType data_type) { case DataType::STRING: { return std::make_shared(); } + case DataType::ARRAY: + case DataType::JSON: { + return std::make_shared(); + } default: { PanicInfo("unsupported numeric data type"); } @@ -221,6 +244,10 @@ CreateArrowSchema(DataType data_type) { case DataType::STRING: { return arrow::schema({arrow::field("val", arrow::utf8())}); } + case DataType::ARRAY: + case DataType::JSON: { + return arrow::schema({arrow::field("val", arrow::binary())}); + } default: { PanicInfo("unsupported numeric data type"); } diff --git a/internal/core/src/storage/Util.h b/internal/core/src/storage/Util.h index 9d57913589..1c0cf16d47 100644 --- a/internal/core/src/storage/Util.h +++ b/internal/core/src/storage/Util.h @@ -38,6 +38,10 @@ void AddOneStringToArrowBuilder(std::shared_ptr builder, const char* str, int str_size); +void +AddOneBinaryToArrowBuilder(std::shared_ptr builder, + const uint8_t* data, + int length); std::shared_ptr CreateArrowBuilder(DataType data_type); diff --git a/internal/core/src/storage/parquet_c.cpp b/internal/core/src/storage/parquet_c.cpp index 262681152c..176f7995f9 100644 --- a/internal/core/src/storage/parquet_c.cpp +++ b/internal/core/src/storage/parquet_c.cpp @@ -39,16 +39,6 @@ ReleaseArrowUnused() { } } -static const char* -ErrorMsg(const std::string_view msg) { - if (msg.empty()) - return nullptr; - auto ret = (char*)malloc(msg.size() + 1); - std::memcpy(ret, msg.data(), msg.size()); - ret[msg.size()] = '\0'; - return ret; -} - extern "C" CPayloadWriter NewPayloadWriter(int columnType) { auto data_type = static_cast(columnType); @@ -143,6 +133,28 @@ AddOneStringToPayload(CPayloadWriter payloadWriter, char* cstr, int str_size) { } } +extern "C" CStatus +AddOneArrayToPayload(CPayloadWriter payloadWriter, uint8_t* data, int length) { + try { + auto p = reinterpret_cast(payloadWriter); + p->add_one_binary_payload(data, length); + return milvus::SuccessCStatus(); + } catch (std::exception& e) { + return milvus::FailureCStatus(UnexpectedError, e.what()); + } +} + +extern "C" CStatus +AddOneJSONToPayload(CPayloadWriter payloadWriter, uint8_t* data, int length) { + try { + auto p = reinterpret_cast(payloadWriter); + p->add_one_binary_payload(data, length); + return milvus::SuccessCStatus(); + } catch (std::exception& e) { + return milvus::FailureCStatus(UnexpectedError, e.what()); + } +} + extern "C" CStatus AddBinaryVectorToPayload(CPayloadWriter payloadWriter, uint8_t* values, diff --git a/internal/core/src/storage/parquet_c.h b/internal/core/src/storage/parquet_c.h index d117669856..db54eb7c63 100644 --- a/internal/core/src/storage/parquet_c.h +++ b/internal/core/src/storage/parquet_c.h @@ -53,6 +53,10 @@ AddDoubleToPayload(CPayloadWriter payloadWriter, double* values, int length); CStatus AddOneStringToPayload(CPayloadWriter payloadWriter, char* cstr, int str_size); CStatus +AddOneArrayToPayload(CPayloadWriter payloadWriter, uint8_t* cdata, int length); +CStatus +AddOneJSONToPayload(CPayloadWriter payloadWriter, uint8_t* cdata, int length); +CStatus AddBinaryVectorToPayload(CPayloadWriter payloadWriter, uint8_t* values, int dimension, diff --git a/internal/datanode/flow_graph_insert_buffer_node.go b/internal/datanode/flow_graph_insert_buffer_node.go index 72e92678ee..774447a8c9 100644 --- a/internal/datanode/flow_graph_insert_buffer_node.go +++ b/internal/datanode/flow_graph_insert_buffer_node.go @@ -23,6 +23,7 @@ import ( "reflect" "sync" + "github.com/cockroachdb/errors" "github.com/golang/protobuf/proto" "go.opentelemetry.io/otel/trace" "go.uber.org/atomic" @@ -183,7 +184,7 @@ func (ibNode *insertBufferNode) Operate(in []Msg) []Msg { seg2Upload, err := ibNode.addSegmentAndUpdateRowNum(fgMsg.insertMessages, startPositions[0], endPositions[0]) if err != nil { // Occurs only if the collectionID is mismatch, should not happen - err = fmt.Errorf("update segment states in channel meta wrong, err = %s", err) + err = errors.Wrap(err, "update segment states in channel meta wrong") log.Error(err.Error()) panic(err) } @@ -193,7 +194,7 @@ func (ibNode *insertBufferNode) Operate(in []Msg) []Msg { err := ibNode.bufferInsertMsg(msg, startPositions[0], endPositions[0]) if err != nil { // error occurs when missing schema info or data is misaligned, should not happen - err = fmt.Errorf("insertBufferNode msg to buffer failed, err = %s", err) + err = errors.Wrap(err, "insertBufferNode msg to buffer failed") log.Error(err.Error()) panic(err) } diff --git a/internal/proxy/impl.go b/internal/proxy/impl.go index c36413b05a..d3e1788fca 100644 --- a/internal/proxy/impl.go +++ b/internal/proxy/impl.go @@ -2042,7 +2042,6 @@ func (node *Proxy) Insert(ctx context.Context, request *milvuspb.InsertRequest) it := &insertTask{ ctx: ctx, Condition: NewTaskCondition(ctx), - // req: request, insertMsg: &msgstream.InsertMsg{ BaseMsg: msgstream.BaseMsg{ HashValues: request.HashKeys, @@ -2058,7 +2057,6 @@ func (node *Proxy) Insert(ctx context.Context, request *milvuspb.InsertRequest) FieldsData: request.FieldsData, NumRows: uint64(request.NumRows), Version: msgpb.InsertDataVersion_ColumnBased, - // RowData: transfer column based request to this }, }, idAllocator: node.rowIDAllocator, diff --git a/internal/querynodev2/pipeline/mock_data.go b/internal/querynodev2/pipeline/mock_data.go index 69567813ea..0fb6961955 100644 --- a/internal/querynodev2/pipeline/mock_data.go +++ b/internal/querynodev2/pipeline/mock_data.go @@ -89,7 +89,7 @@ func emptyInsertMsg(collectionID int64, partitionID int64, segmentID int64, chan return insertMsg } -//gen IDs with random pks for DeleteMsg +// gen IDs with random pks for DeleteMsg func genDefaultDeletePK(rowSum int) *schemapb.IDs { pkDatas := []int64{} @@ -106,7 +106,7 @@ func genDefaultDeletePK(rowSum int) *schemapb.IDs { } } -//gen IDs with specified pk +// gen IDs with specified pk func genDeletePK(pks ...int64) *schemapb.IDs { pkDatas := make([]int64, 0, len(pks)) pkDatas = append(pkDatas, pks...) @@ -164,9 +164,9 @@ func genFiledDataWithSchema(schema *schemapb.CollectionSchema, numRows int) []*s fieldsData := make([]*schemapb.FieldData, 0) for _, field := range schema.Fields { if field.DataType < 100 { - fieldsData = append(fieldsData, segments.GenTestScalarFieldData(field.DataType, field.DataType.String(), numRows)) + fieldsData = append(fieldsData, segments.GenTestScalarFieldData(field.DataType, field.DataType.String(), field.GetFieldID(), numRows)) } else { - fieldsData = append(fieldsData, segments.GenTestVectorFiledData(field.DataType, field.DataType.String(), numRows, defaultDim)) + fieldsData = append(fieldsData, segments.GenTestVectorFiledData(field.DataType, field.DataType.String(), field.GetFieldID(), numRows, defaultDim)) } } return fieldsData diff --git a/internal/querynodev2/segments/mock_data.go b/internal/querynodev2/segments/mock_data.go index 7e251ccd23..192a8304e6 100644 --- a/internal/querynodev2/segments/mock_data.go +++ b/internal/querynodev2/segments/mock_data.go @@ -159,8 +159,20 @@ var simpleDoubleField = constFieldParam{ fieldName: "doubleField", } -var simpleVarCharField = constFieldParam{ +var simpleJSONField = constFieldParam{ id: 109, + dataType: schemapb.DataType_JSON, + fieldName: "jsonField", +} + +var simpleArrayField = constFieldParam{ + id: 110, + dataType: schemapb.DataType_Array, + fieldName: "arrayField", +} + +var simpleVarCharField = constFieldParam{ + id: 111, dataType: schemapb.DataType_VarChar, fieldName: "varCharField", } @@ -183,6 +195,7 @@ func genConstantFieldSchema(param constFieldParam) *schemapb.FieldSchema { Name: param.fieldName, IsPrimaryKey: false, DataType: param.dataType, + ElementType: schemapb.DataType_Int32, } return field } @@ -232,6 +245,8 @@ func GenTestCollectionSchema(collectionName string, pkType schemapb.DataType) *s fieldInt32 := genConstantFieldSchema(simpleInt32Field) fieldFloat := genConstantFieldSchema(simpleFloatField) fieldDouble := genConstantFieldSchema(simpleDoubleField) + // fieldArray := genConstantFieldSchema(simpleArrayField) + fieldJSON := genConstantFieldSchema(simpleJSONField) floatVecFieldSchema := genVectorFieldSchema(simpleFloatVecField) binVecFieldSchema := genVectorFieldSchema(simpleBinVecField) var pkFieldSchema *schemapb.FieldSchema @@ -241,7 +256,6 @@ func GenTestCollectionSchema(collectionName string, pkType schemapb.DataType) *s pkFieldSchema = genPKFieldSchema(simpleInt64Field) case schemapb.DataType_VarChar: pkFieldSchema = genPKFieldSchema(simpleVarCharField) - pkFieldSchema.FieldID = 106 } schema := schemapb.CollectionSchema{ // schema for segCore @@ -254,11 +268,17 @@ func GenTestCollectionSchema(collectionName string, pkType schemapb.DataType) *s fieldInt32, fieldFloat, fieldDouble, + // fieldArray, + fieldJSON, floatVecFieldSchema, binVecFieldSchema, pkFieldSchema, }, } + + for i, field := range schema.GetFields() { + field.FieldID = 100 + int64(i) + } return &schema } @@ -319,6 +339,26 @@ func generateStringArray(numRows int) []string { } return ret } +func generateArrayArray(numRows int) []*schemapb.ScalarField { + ret := make([]*schemapb.ScalarField, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: generateInt32Array(10), + }, + }, + }) + } + return ret +} +func generateJSONArray(numRows int) [][]byte { + ret := make([][]byte, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, []byte(fmt.Sprintf(`{"key":%d}`, i+1))) + } + return ret +} func generateFloat64Array(numRows int) []float64 { ret := make([]float64, 0, numRows) @@ -347,7 +387,7 @@ func generateBinaryVectors(numRows, dim int) []byte { return ret } -func GenTestScalarFieldData(dType schemapb.DataType, fieldName string, numRows int) *schemapb.FieldData { +func GenTestScalarFieldData(dType schemapb.DataType, fieldName string, fieldID int64, numRows int) *schemapb.FieldData { ret := &schemapb.FieldData{ Type: dType, FieldName: fieldName, @@ -356,7 +396,7 @@ func GenTestScalarFieldData(dType schemapb.DataType, fieldName string, numRows i switch dType { case schemapb.DataType_Bool: - ret.FieldId = simpleBoolField.id + ret.FieldId = fieldID ret.Field = &schemapb.FieldData_Scalars{ Scalars: &schemapb.ScalarField{ Data: &schemapb.ScalarField_BoolData{ @@ -367,7 +407,7 @@ func GenTestScalarFieldData(dType schemapb.DataType, fieldName string, numRows i }, } case schemapb.DataType_Int8: - ret.FieldId = simpleInt8Field.id + ret.FieldId = fieldID ret.Field = &schemapb.FieldData_Scalars{ Scalars: &schemapb.ScalarField{ Data: &schemapb.ScalarField_IntData{ @@ -378,7 +418,7 @@ func GenTestScalarFieldData(dType schemapb.DataType, fieldName string, numRows i }, } case schemapb.DataType_Int16: - ret.FieldId = simpleInt16Field.id + ret.FieldId = fieldID ret.Field = &schemapb.FieldData_Scalars{ Scalars: &schemapb.ScalarField{ Data: &schemapb.ScalarField_IntData{ @@ -389,7 +429,7 @@ func GenTestScalarFieldData(dType schemapb.DataType, fieldName string, numRows i }, } case schemapb.DataType_Int32: - ret.FieldId = simpleInt32Field.id + ret.FieldId = fieldID ret.Field = &schemapb.FieldData_Scalars{ Scalars: &schemapb.ScalarField{ Data: &schemapb.ScalarField_IntData{ @@ -400,7 +440,7 @@ func GenTestScalarFieldData(dType schemapb.DataType, fieldName string, numRows i }, } case schemapb.DataType_Int64: - ret.FieldId = simpleInt64Field.id + ret.FieldId = fieldID ret.Field = &schemapb.FieldData_Scalars{ Scalars: &schemapb.ScalarField{ Data: &schemapb.ScalarField_LongData{ @@ -411,7 +451,7 @@ func GenTestScalarFieldData(dType schemapb.DataType, fieldName string, numRows i }, } case schemapb.DataType_Float: - ret.FieldId = simpleFloatField.id + ret.FieldId = fieldID ret.Field = &schemapb.FieldData_Scalars{ Scalars: &schemapb.ScalarField{ Data: &schemapb.ScalarField_FloatData{ @@ -422,7 +462,7 @@ func GenTestScalarFieldData(dType schemapb.DataType, fieldName string, numRows i }, } case schemapb.DataType_Double: - ret.FieldId = simpleDoubleField.id + ret.FieldId = fieldID ret.Field = &schemapb.FieldData_Scalars{ Scalars: &schemapb.ScalarField{ Data: &schemapb.ScalarField_DoubleData{ @@ -433,7 +473,7 @@ func GenTestScalarFieldData(dType schemapb.DataType, fieldName string, numRows i }, } case schemapb.DataType_VarChar: - ret.FieldId = simpleVarCharField.id + ret.FieldId = fieldID ret.Field = &schemapb.FieldData_Scalars{ Scalars: &schemapb.ScalarField{ Data: &schemapb.ScalarField_StringData{ @@ -443,6 +483,29 @@ func GenTestScalarFieldData(dType schemapb.DataType, fieldName string, numRows i }, }, } + + case schemapb.DataType_Array: + ret.FieldId = fieldID + ret.Field = &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_ArrayData{ + ArrayData: &schemapb.ArrayArray{ + Data: generateArrayArray(numRows), + }, + }, + }, + } + + case schemapb.DataType_JSON: + ret.FieldId = fieldID + ret.Field = &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_JsonData{ + JsonData: &schemapb.JSONArray{ + Data: generateJSONArray(numRows), + }}, + }} + default: panic("data type not supported") } @@ -450,7 +513,7 @@ func GenTestScalarFieldData(dType schemapb.DataType, fieldName string, numRows i return ret } -func GenTestVectorFiledData(dType schemapb.DataType, fieldName string, numRows int, dim int) *schemapb.FieldData { +func GenTestVectorFiledData(dType schemapb.DataType, fieldName string, fieldID int64, numRows int, dim int) *schemapb.FieldData { ret := &schemapb.FieldData{ Type: dType, FieldName: fieldName, @@ -458,7 +521,7 @@ func GenTestVectorFiledData(dType schemapb.DataType, fieldName string, numRows i } switch dType { case schemapb.DataType_BinaryVector: - ret.FieldId = simpleBinVecField.id + ret.FieldId = fieldID ret.Field = &schemapb.FieldData_Vectors{ Vectors: &schemapb.VectorField{ Dim: int64(dim), @@ -468,7 +531,7 @@ func GenTestVectorFiledData(dType schemapb.DataType, fieldName string, numRows i }, } case schemapb.DataType_FloatVector: - ret.FieldId = simpleFloatVecField.id + ret.FieldId = fieldID ret.Field = &schemapb.FieldData_Vectors{ Vectors: &schemapb.VectorField{ Dim: int64(dim), @@ -629,6 +692,14 @@ func genInsertData(msgLength int, schema *schemapb.CollectionSchema) (*storage.I insertData.Data[f.FieldID] = &storage.StringFieldData{ Data: generateStringArray(msgLength), } + case schemapb.DataType_Array: + insertData.Data[f.FieldID] = &storage.ArrayFieldData{ + Data: generateArrayArray(msgLength), + } + case schemapb.DataType_JSON: + insertData.Data[f.FieldID] = &storage.JSONFieldData{ + Data: generateJSONArray(msgLength), + } case schemapb.DataType_FloatVector: dim := simpleFloatVecField.dim // if no dim specified, use simpleFloatVecField's dim insertData.Data[f.FieldID] = &storage.FloatVectorFieldData{ @@ -709,7 +780,7 @@ func SaveDeltaLog(collectionID int64, return fieldBinlog, cm.MultiWrite(context.Background(), kvs) } -func GenAndSaveIndex(collectionID, partitionID, segmentID int64, msgLength int, indexType, metricType string, cm storage.ChunkManager) (*querypb.FieldIndexInfo, error) { +func GenAndSaveIndex(collectionID, partitionID, segmentID, fieldID int64, msgLength int, indexType, metricType string, cm storage.ChunkManager) (*querypb.FieldIndexInfo, error) { typeParams, indexParams := genIndexParams(indexType, metricType) index, err := indexcgowrapper.NewCgoIndex(schemapb.DataType_FloatVector, typeParams, indexParams, genStorageConfig()) @@ -758,7 +829,7 @@ func GenAndSaveIndex(collectionID, partitionID, segmentID int64, msgLength int, } return &querypb.FieldIndexInfo{ - FieldID: simpleFloatVecField.id, + FieldID: fieldID, EnableIndex: true, IndexName: "querynode-test", IndexParams: funcutil.Map2KeyValuePair(indexParams), @@ -1038,27 +1109,31 @@ func genInsertMsg(collection *Collection, partitionID, segment int64, numRows in for _, f := range collection.Schema().Fields { switch f.DataType { case schemapb.DataType_Bool: - fieldsData = append(fieldsData, newScalarFieldData(f.DataType, simpleBoolField.fieldName, numRows)) + fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleBoolField.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_Int8: - fieldsData = append(fieldsData, newScalarFieldData(f.DataType, simpleInt8Field.fieldName, numRows)) + fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleInt8Field.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_Int16: - fieldsData = append(fieldsData, newScalarFieldData(f.DataType, simpleInt16Field.fieldName, numRows)) + fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleInt16Field.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_Int32: - fieldsData = append(fieldsData, newScalarFieldData(f.DataType, simpleInt32Field.fieldName, numRows)) + fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleInt32Field.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_Int64: - fieldsData = append(fieldsData, newScalarFieldData(f.DataType, simpleInt64Field.fieldName, numRows)) + fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleInt64Field.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_Float: - fieldsData = append(fieldsData, newScalarFieldData(f.DataType, simpleFloatField.fieldName, numRows)) + fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleFloatField.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_Double: - fieldsData = append(fieldsData, newScalarFieldData(f.DataType, simpleDoubleField.fieldName, numRows)) + fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleDoubleField.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_VarChar: - fieldsData = append(fieldsData, newScalarFieldData(f.DataType, simpleVarCharField.fieldName, numRows)) + fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleVarCharField.fieldName, f.GetFieldID(), numRows)) + case schemapb.DataType_Array: + fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleArrayField.fieldName, f.GetFieldID(), numRows)) + case schemapb.DataType_JSON: + fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleJSONField.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_FloatVector: dim := simpleFloatVecField.dim // if no dim specified, use simpleFloatVecField's dim - fieldsData = append(fieldsData, newFloatVectorFieldData(simpleFloatVecField.fieldName, numRows, dim)) + fieldsData = append(fieldsData, GenTestVectorFiledData(f.DataType, f.Name, f.FieldID, numRows, dim)) case schemapb.DataType_BinaryVector: dim := simpleBinVecField.dim // if no dim specified, use simpleFloatVecField's dim - fieldsData = append(fieldsData, newBinaryVectorFieldData(simpleBinVecField.fieldName, numRows, dim)) + fieldsData = append(fieldsData, GenTestVectorFiledData(f.DataType, f.Name, f.FieldID, numRows, dim)) default: err := errors.New("data type not supported") return nil, err @@ -1117,143 +1192,6 @@ func genSimpleRowIDField(numRows int) []int64 { return ids } -func newScalarFieldData(dType schemapb.DataType, fieldName string, numRows int) *schemapb.FieldData { - ret := &schemapb.FieldData{ - Type: dType, - FieldName: fieldName, - Field: nil, - } - - switch dType { - case schemapb.DataType_Bool: - ret.FieldId = simpleBoolField.id - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_BoolData{ - BoolData: &schemapb.BoolArray{ - Data: generateBoolArray(numRows), - }, - }, - }, - } - case schemapb.DataType_Int8: - ret.FieldId = simpleInt8Field.id - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: generateInt32Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Int16: - ret.FieldId = simpleInt16Field.id - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: generateInt32Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Int32: - ret.FieldId = simpleInt32Field.id - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: generateInt32Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Int64: - ret.FieldId = simpleInt64Field.id - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_LongData{ - LongData: &schemapb.LongArray{ - Data: generateInt64Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Float: - ret.FieldId = simpleFloatField.id - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_FloatData{ - FloatData: &schemapb.FloatArray{ - Data: generateFloat32Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Double: - ret.FieldId = simpleDoubleField.id - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_DoubleData{ - DoubleData: &schemapb.DoubleArray{ - Data: generateFloat64Array(numRows), - }, - }, - }, - } - case schemapb.DataType_VarChar: - ret.FieldId = simpleVarCharField.id - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_StringData{ - StringData: &schemapb.StringArray{ - Data: generateStringArray(numRows), - }, - }, - }, - } - default: - panic("data type not supported") - } - - return ret -} - -func newFloatVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData { - return &schemapb.FieldData{ - FieldId: simpleFloatVecField.id, - Type: schemapb.DataType_FloatVector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(dim), - Data: &schemapb.VectorField_FloatVector{ - FloatVector: &schemapb.FloatArray{ - Data: generateFloatVectors(numRows, dim), - }, - }, - }, - }, - } -} - -func newBinaryVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData { - return &schemapb.FieldData{ - FieldId: simpleBinVecField.id, - Type: schemapb.DataType_BinaryVector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(dim), - Data: &schemapb.VectorField_BinaryVector{ - BinaryVector: generateBinaryVectors(numRows, dim), - }, - }, - }, - } -} - func genSimpleRetrievePlan(collection *Collection) (*RetrievePlan, error) { timestamp := storage.Timestamp(1000) planBytes, err := genSimpleRetrievePlanExpr(collection.schema) diff --git a/internal/querynodev2/segments/plan_test.go b/internal/querynodev2/segments/plan_test.go index 45f8712c85..982d2dc55d 100644 --- a/internal/querynodev2/segments/plan_test.go +++ b/internal/querynodev2/segments/plan_test.go @@ -142,7 +142,6 @@ func (suite *PlanSuite) TestPlanNewSearchRequest() { searchReq, err := NewSearchRequest(suite.collection, req, req.Req.GetPlaceholderGroup()) suite.NoError(err) - suite.Equal(simpleFloatVecField.id, searchReq.searchFieldID) suite.EqualValues(nq, searchReq.getNumOfQuery()) searchReq.Delete() diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index 9914b5da5e..f4a14485db 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -594,6 +594,7 @@ func (s *LocalSegment) Delete(primaryKeys []storage.PrimaryKey, timestamps []typ const long* primary_keys, const unsigned long* timestamps); */ + s.mut.RLock() defer s.mut.RUnlock() diff --git a/internal/querynodev2/segments/segment_loader_test.go b/internal/querynodev2/segments/segment_loader_test.go index 75a16ae2f8..59429ac4d3 100644 --- a/internal/querynodev2/segments/segment_loader_test.go +++ b/internal/querynodev2/segments/segment_loader_test.go @@ -27,6 +27,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/schemapb" "github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/pkg/util/funcutil" "github.com/milvus-io/milvus/pkg/util/paramtable" ) @@ -79,7 +80,7 @@ func (suite *SegmentLoaderSuite) TestLoad() { suite.collectionID, suite.partitionID, suite.segmentID, - 100, + 4, suite.schema, suite.chunkManager, ) @@ -99,7 +100,7 @@ func (suite *SegmentLoaderSuite) TestLoad() { suite.collectionID, suite.partitionID, suite.segmentID+1, - 100, + 4, suite.schema, suite.chunkManager, ) @@ -204,10 +205,12 @@ func (suite *SegmentLoaderSuite) TestLoadWithIndex() { ) suite.NoError(err) + vecFields := funcutil.GetVecFieldIDs(suite.schema) indexInfo, err := GenAndSaveIndex( suite.collectionID, suite.partitionID, segmentID, + vecFields[0], 100, IndexFaissIVFFlat, L2, @@ -227,8 +230,9 @@ func (suite *SegmentLoaderSuite) TestLoadWithIndex() { segments, err := suite.loader.Load(ctx, suite.collectionID, SegmentTypeSealed, 0, loadInfos...) suite.NoError(err) + vecFields := funcutil.GetVecFieldIDs(suite.schema) for _, segment := range segments { - suite.True(segment.ExistIndex(simpleFloatVecField.id)) + suite.True(segment.ExistIndex(vecFields[0])) } } diff --git a/internal/querynodev2/services_test.go b/internal/querynodev2/services_test.go index 1d2a322459..0c90670577 100644 --- a/internal/querynodev2/services_test.go +++ b/internal/querynodev2/services_test.go @@ -435,10 +435,12 @@ func (suite *ServiceSuite) genSegmentLoadInfos(schema *schemapb.CollectionSchema ) suite.Require().NoError(err) + vecFieldIDs := funcutil.GetVecFieldIDs(schema) indexes, err := segments.GenAndSaveIndex( suite.collectionID, suite.partitionIDs[i%partNum], suite.validSegmentIDs[i], + vecFieldIDs[0], 100, segments.IndexFaissIVFFlat, segments.L2, diff --git a/internal/storage/data_codec.go b/internal/storage/data_codec.go index 1248bf9702..ae94a95ef4 100644 --- a/internal/storage/data_codec.go +++ b/internal/storage/data_codec.go @@ -133,6 +133,13 @@ type DoubleFieldData struct { type StringFieldData struct { Data []string } +type ArrayFieldData struct { + ElementType schemapb.DataType + Data []*schemapb.ScalarField +} +type JSONFieldData struct { + Data [][]byte +} type BinaryVectorFieldData struct { Data []byte Dim int @@ -153,20 +160,24 @@ func (data *DoubleFieldData) RowNum() int { return len(data.Data) } func (data *StringFieldData) RowNum() int { return len(data.Data) } func (data *BinaryVectorFieldData) RowNum() int { return len(data.Data) * 8 / data.Dim } func (data *FloatVectorFieldData) RowNum() int { return len(data.Data) / data.Dim } +func (data *ArrayFieldData) RowNum() int { return len(data.Data) } +func (data *JSONFieldData) RowNum() int { return len(data.Data) } // GetRow implements FieldData.GetRow -func (data *BoolFieldData) GetRow(i int) interface{} { return data.Data[i] } -func (data *Int8FieldData) GetRow(i int) interface{} { return data.Data[i] } -func (data *Int16FieldData) GetRow(i int) interface{} { return data.Data[i] } -func (data *Int32FieldData) GetRow(i int) interface{} { return data.Data[i] } -func (data *Int64FieldData) GetRow(i int) interface{} { return data.Data[i] } -func (data *FloatFieldData) GetRow(i int) interface{} { return data.Data[i] } -func (data *DoubleFieldData) GetRow(i int) interface{} { return data.Data[i] } -func (data *StringFieldData) GetRow(i int) interface{} { return data.Data[i] } -func (data *BinaryVectorFieldData) GetRow(i int) interface{} { +func (data *BoolFieldData) GetRow(i int) any { return data.Data[i] } +func (data *Int8FieldData) GetRow(i int) any { return data.Data[i] } +func (data *Int16FieldData) GetRow(i int) any { return data.Data[i] } +func (data *Int32FieldData) GetRow(i int) any { return data.Data[i] } +func (data *Int64FieldData) GetRow(i int) any { return data.Data[i] } +func (data *FloatFieldData) GetRow(i int) any { return data.Data[i] } +func (data *DoubleFieldData) GetRow(i int) any { return data.Data[i] } +func (data *StringFieldData) GetRow(i int) any { return data.Data[i] } +func (data *ArrayFieldData) GetRow(i int) any { return data.Data[i] } +func (data *JSONFieldData) GetRow(i int) any { return data.Data[i] } +func (data *BinaryVectorFieldData) GetRow(i int) any { return data.Data[i*data.Dim/8 : (i+1)*data.Dim/8] } -func (data *FloatVectorFieldData) GetRow(i int) interface{} { +func (data *FloatVectorFieldData) GetRow(i int) any { return data.Data[i*data.Dim : (i+1)*data.Dim] } @@ -216,6 +227,37 @@ func (data *StringFieldData) GetMemorySize() int { return size } +func (data *ArrayFieldData) GetMemorySize() int { + var size int + for _, val := range data.Data { + switch data.ElementType { + case schemapb.DataType_Bool: + size += binary.Size(val.GetBoolData().GetData()) + case schemapb.DataType_Int8: + size += binary.Size(val.GetIntData().GetData()) / 4 + case schemapb.DataType_Int16: + size += binary.Size(val.GetIntData().GetData()) / 2 + case schemapb.DataType_Int32: + size += binary.Size(val.GetIntData().GetData()) + case schemapb.DataType_Float: + size += binary.Size(val.GetFloatData().GetData()) + case schemapb.DataType_Double: + size += binary.Size(val.GetDoubleData().GetData()) + case schemapb.DataType_String, schemapb.DataType_VarChar: + size += (&StringFieldData{Data: val.GetStringData().GetData()}).GetMemorySize() + } + } + return size +} + +func (data *JSONFieldData) GetMemorySize() int { + var size int + for _, val := range data.Data { + size += len(val) + 16 + } + return size +} + func (data *BinaryVectorFieldData) GetMemorySize() int { return binary.Size(data.Data) + 4 } @@ -382,6 +424,26 @@ func (insertCodec *InsertCodec) Serialize(partitionID UniqueID, segmentID Unique } } writer.AddExtra(originalSizeKey, fmt.Sprintf("%v", singleData.(*StringFieldData).GetMemorySize())) + case schemapb.DataType_Array: + for _, singleArray := range singleData.(*ArrayFieldData).Data { + err = eventWriter.AddOneArrayToPayload(singleArray) + if err != nil { + eventWriter.Close() + writer.Close() + return nil, nil, err + } + } + writer.AddExtra(originalSizeKey, fmt.Sprintf("%v", singleData.(*ArrayFieldData).GetMemorySize())) + case schemapb.DataType_JSON: + for _, singleJSON := range singleData.(*JSONFieldData).Data { + err = eventWriter.AddOneJSONToPayload(singleJSON) + if err != nil { + eventWriter.Close() + writer.Close() + return nil, nil, err + } + } + writer.AddExtra(originalSizeKey, fmt.Sprintf("%v", singleData.(*JSONFieldData).GetMemorySize())) case schemapb.DataType_BinaryVector: err = eventWriter.AddBinaryVectorToPayload(singleData.(*BinaryVectorFieldData).Data, singleData.(*BinaryVectorFieldData).Dim) if err != nil { @@ -652,6 +714,44 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int totalLength += len(stringPayload) insertData.Data[fieldID] = stringFieldData + case schemapb.DataType_Array: + arrayPayload, err := eventReader.GetArrayFromPayload() + if err != nil { + eventReader.Close() + binlogReader.Close() + return InvalidUniqueID, InvalidUniqueID, InvalidUniqueID, err + } + + if insertData.Data[fieldID] == nil { + insertData.Data[fieldID] = &ArrayFieldData{ + Data: make([]*schemapb.ScalarField, 0, rowNum), + } + } + arrayFieldData := insertData.Data[fieldID].(*ArrayFieldData) + + arrayFieldData.Data = append(arrayFieldData.Data, arrayPayload...) + totalLength += len(arrayPayload) + insertData.Data[fieldID] = arrayFieldData + + case schemapb.DataType_JSON: + jsonPayload, err := eventReader.GetJSONFromPayload() + if err != nil { + eventReader.Close() + binlogReader.Close() + return InvalidUniqueID, InvalidUniqueID, InvalidUniqueID, err + } + + if insertData.Data[fieldID] == nil { + insertData.Data[fieldID] = &JSONFieldData{ + Data: make([][]byte, 0, rowNum), + } + } + jsonFieldData := insertData.Data[fieldID].(*JSONFieldData) + + jsonFieldData.Data = append(jsonFieldData.Data, jsonPayload...) + totalLength += len(jsonPayload) + insertData.Data[fieldID] = jsonFieldData + case schemapb.DataType_BinaryVector: var singleData []byte singleData, dim, err = eventReader.GetBinaryVectorFromPayload() @@ -730,6 +830,31 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int return collectionID, partitionID, segmentID, nil } +// func deserializeEntity[T any, U any]( +// eventReader *EventReader, +// binlogReader *BinlogReader, +// insertData *InsertData, +// getPayloadFunc func() (U, error), +// fillDataFunc func() FieldData, +// ) error { +// fieldID := binlogReader.FieldID +// stringPayload, err := getPayloadFunc() +// if err != nil { +// eventReader.Close() +// binlogReader.Close() +// return err +// } +// +// if insertData.Data[fieldID] == nil { +// insertData.Data[fieldID] = fillDataFunc() +// } +// stringFieldData := insertData.Data[fieldID].(*T) +// +// stringFieldData.Data = append(stringFieldData.Data, stringPayload...) +// totalLength += len(stringPayload) +// insertData.Data[fieldID] = stringFieldData +// } + // Deserialize transfer blob back to insert data. // From schema, it get all fields. // For each field, it will create a binlog reader, and read all event to the buffer. diff --git a/internal/storage/data_codec_test.go b/internal/storage/data_codec_test.go index 123387127f..d8b3996956 100644 --- a/internal/storage/data_codec_test.go +++ b/internal/storage/data_codec_test.go @@ -45,6 +45,8 @@ const ( StringField = 107 BinaryVectorField = 108 FloatVectorField = 109 + ArrayField = 110 + JSONField = 111 ) func TestInsertCodec(t *testing.T) { @@ -128,6 +130,19 @@ func TestInsertCodec(t *testing.T) { Description: "string", DataType: schemapb.DataType_String, }, + { + FieldID: ArrayField, + Name: "field_int32_array", + Description: "int32 array", + DataType: schemapb.DataType_Array, + ElementType: schemapb.DataType_Int32, + }, + { + FieldID: JSONField, + Name: "field_json", + Description: "json", + DataType: schemapb.DataType_JSON, + }, { FieldID: BinaryVectorField, Name: "field_binary_vector", @@ -186,6 +201,27 @@ func TestInsertCodec(t *testing.T) { Data: []float32{4, 5, 6, 7, 4, 5, 6, 7}, Dim: 4, }, + ArrayField: &ArrayFieldData{ + ElementType: schemapb.DataType_Int32, + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}}, + }, + }, + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}}, + }, + }, + }, + }, + JSONField: &JSONFieldData{ + Data: [][]byte{ + []byte(`{"batch":2}`), + []byte(`{"key":"world"}`), + }, + }, }, } @@ -229,6 +265,27 @@ func TestInsertCodec(t *testing.T) { Data: []float32{0, 1, 2, 3, 0, 1, 2, 3}, Dim: 4, }, + ArrayField: &ArrayFieldData{ + ElementType: schemapb.DataType_Int32, + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{1, 2, 3}}, + }, + }, + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{4, 5, 6}}, + }, + }, + }, + }, + JSONField: &JSONFieldData{ + Data: [][]byte{ + []byte(`{"batch":1}`), + []byte(`{"key":"hello"}`), + }, + }, }, } @@ -246,6 +303,8 @@ func TestInsertCodec(t *testing.T) { StringField: &StringFieldData{[]string{}}, BinaryVectorField: &BinaryVectorFieldData{[]byte{}, 8}, FloatVectorField: &FloatVectorFieldData{[]float32{}, 4}, + ArrayField: &ArrayFieldData{schemapb.DataType_Int32, []*schemapb.ScalarField{}}, + JSONField: &JSONFieldData{[][]byte{}}, }, } b, s, err := insertCodec.Serialize(PartitionID, SegmentID, insertDataEmpty) @@ -283,6 +342,23 @@ func TestInsertCodec(t *testing.T) { assert.Equal(t, []string{"1", "2", "3", "4"}, resultData.Data[StringField].(*StringFieldData).Data) assert.Equal(t, []byte{0, 255, 0, 255}, resultData.Data[BinaryVectorField].(*BinaryVectorFieldData).Data) assert.Equal(t, []float32{0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7}, resultData.Data[FloatVectorField].(*FloatVectorFieldData).Data) + + int32ArrayList := [][]int32{{1, 2, 3}, {4, 5, 6}, {3, 2, 1}, {6, 5, 4}} + resultArrayList := [][]int32{} + for _, v := range resultData.Data[ArrayField].(*ArrayFieldData).Data { + resultArrayList = append(resultArrayList, v.GetIntData().GetData()) + } + assert.EqualValues(t, int32ArrayList, resultArrayList) + + assert.Equal(t, + [][]byte{ + []byte(`{"batch":1}`), + []byte(`{"key":"hello"}`), + []byte(`{"batch":2}`), + []byte(`{"key":"world"}`), + }, + resultData.Data[JSONField].(*JSONFieldData).Data) + log.Debug("Data", zap.Any("Data", resultData.Data)) log.Debug("Infos", zap.Any("Infos", resultData.Infos)) @@ -465,6 +541,21 @@ func TestMemorySize(t *testing.T) { Data: []float32{4, 5, 6, 7}, Dim: 4, }, + ArrayField: &ArrayFieldData{ + ElementType: schemapb.DataType_Int32, + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{1, 2, 3}}, + }, + }, + }, + }, + JSONField: &JSONFieldData{ + Data: [][]byte{ + []byte(`{"batch":1}`), + }, + }, }, } assert.Equal(t, insertData1.Data[RowIDField].GetMemorySize(), 8) @@ -479,6 +570,8 @@ func TestMemorySize(t *testing.T) { assert.Equal(t, insertData1.Data[StringField].GetMemorySize(), 17) assert.Equal(t, insertData1.Data[BinaryVectorField].GetMemorySize(), 5) assert.Equal(t, insertData1.Data[FloatField].GetMemorySize(), 4) + assert.Equal(t, insertData1.Data[ArrayField].GetMemorySize(), 3*4) + assert.Equal(t, insertData1.Data[JSONField].GetMemorySize(), len([]byte(`{"batch":1}`))+16) insertData2 := &InsertData{ Data: map[int64]FieldData{ diff --git a/internal/storage/data_sorter.go b/internal/storage/data_sorter.go index e435bd4fb7..c18661e192 100644 --- a/internal/storage/data_sorter.go +++ b/internal/storage/data_sorter.go @@ -94,6 +94,12 @@ func (ds *DataSorter) Swap(i, j int) { for idx := 0; idx < dim; idx++ { data[i*dim+idx], data[j*dim+idx] = data[j*dim+idx], data[i*dim+idx] } + case schemapb.DataType_Array: + data := singleData.(*ArrayFieldData).Data + data[i], data[j] = data[j], data[i] + case schemapb.DataType_JSON: + data := singleData.(*JSONFieldData).Data + data[i], data[j] = data[j], data[i] default: errMsg := "undefined data type " + string(field.DataType) panic(errMsg) diff --git a/internal/storage/payload.go b/internal/storage/payload.go index cfea63b60d..37f5fe079b 100644 --- a/internal/storage/payload.go +++ b/internal/storage/payload.go @@ -29,8 +29,12 @@ import ( "unsafe" "github.com/cockroachdb/errors" + "github.com/golang/protobuf/proto" + "go.uber.org/zap" + "github.com/milvus-io/milvus-proto/go-api/commonpb" "github.com/milvus-io/milvus-proto/go-api/schemapb" + "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -46,6 +50,8 @@ type PayloadWriterInterface interface { AddFloatToPayload(msgs []float32) error AddDoubleToPayload(msgs []float64) error AddOneStringToPayload(msgs string) error + AddOneArrayToPayload(msg *schemapb.ScalarField) error + AddOneJSONToPayload(msg []byte) error AddBinaryVectorToPayload(binVec []byte, dim int) error AddFloatVectorToPayload(binVec []float32, dim int) error FinishPayloadWriter() error @@ -67,6 +73,8 @@ type PayloadReaderInterface interface { GetFloatFromPayload() ([]float32, error) GetDoubleFromPayload() ([]float64, error) GetStringFromPayload() ([]string, error) + GetArrayFromPayload() ([]*schemapb.ScalarField, error) + GetJSONFromPayload() ([][]byte, error) GetBinaryVectorFromPayload() ([]byte, int, error) GetFloatVectorFromPayload() ([]float32, int, error) GetPayloadLengthFromReader() (int, error) @@ -150,6 +158,18 @@ func (w *PayloadWriter) AddDataToPayload(msgs interface{}, dim ...int) error { return errors.New("incorrect data type") } return w.AddOneStringToPayload(val) + case schemapb.DataType_Array: + val, ok := msgs.(*schemapb.ScalarField) + if !ok { + return errors.New("incorrect data type") + } + return w.AddOneArrayToPayload(val) + case schemapb.DataType_JSON: + val, ok := msgs.([]byte) + if !ok { + return errors.New("incorrect data type") + } + return w.AddOneJSONToPayload(val) default: return errors.New("incorrect datatype") } @@ -290,6 +310,34 @@ func (w *PayloadWriter) AddOneStringToPayload(msg string) error { return HandleCStatus(&status, "AddOneStringToPayload failed") } +func (w *PayloadWriter) AddOneArrayToPayload(msg *schemapb.ScalarField) error { + bytes, err := proto.Marshal(msg) + if err != nil { + return errors.New("Marshal ListValue failed") + } + + length := len(bytes) + cmsg := (*C.uint8_t)(unsafe.Pointer(&bytes[0])) + clength := C.int(length) + // defer C.free(unsafe.Pointer(cmsg)) + + status := C.AddOneArrayToPayload(w.payloadWriterPtr, cmsg, clength) + return HandleCStatus(&status, "AddOneArrayToPayload failed") +} + +func (w *PayloadWriter) AddOneJSONToPayload(msg []byte) error { + bytes := msg + length := len(bytes) + cmsg := (*C.uint8_t)(unsafe.Pointer(&bytes[0])) + clength := C.int(length) + // defer C.free(unsafe.Pointer(cmsg)) + + log.Debug("yah01", zap.String("jsonBytes", string(bytes))) + + status := C.AddOneJSONToPayload(w.payloadWriterPtr, cmsg, clength) + return HandleCStatus(&status, "AddOneJSONToPayload failed") +} + // AddBinaryVectorToPayload dimension > 0 && (%8 == 0) func (w *PayloadWriter) AddBinaryVectorToPayload(binVec []byte, dim int) error { length := len(binVec) @@ -360,3 +408,22 @@ func (w *PayloadWriter) ReleasePayloadWriter() { func (w *PayloadWriter) Close() { w.ReleasePayloadWriter() } + +// HandleCStatus deal with the error returned from CGO +func HandleCStatus(status *C.CStatus, extraInfo string) error { + if status.error_code == 0 { + return nil + } + errorCode := status.error_code + errorName, ok := commonpb.ErrorCode_name[int32(errorCode)] + if !ok { + errorName = "UnknownError" + } + errorMsg := C.GoString(status.error_msg) + defer C.free(unsafe.Pointer(status.error_msg)) + + finalMsg := fmt.Sprintf("[%s] %s", errorName, errorMsg) + logMsg := fmt.Sprintf("%s, C Runtime Exception: %s\n", extraInfo, finalMsg) + log.Warn(logMsg) + return errors.New(finalMsg) +} diff --git a/internal/storage/payload_benchmark_test.go b/internal/storage/payload_benchmark_test.go deleted file mode 100644 index 53b7b63260..0000000000 --- a/internal/storage/payload_benchmark_test.go +++ /dev/null @@ -1,221 +0,0 @@ -package storage - -import ( - "math/rand" - "testing" - - "github.com/stretchr/testify/assert" - - "github.com/milvus-io/milvus-proto/go-api/schemapb" -) - -// workload setting for benchmark -const ( - numElements = 10000 - vectorDim = 512 -) - -func BenchmarkPayloadReader_Bool(b *testing.B) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) - assert.NoError(b, err) - defer w.ReleasePayloadWriter() - data := make([]bool, 0, numElements) - for i := 0; i < numElements; i++ { - data = append(data, rand.Intn(2) != 0) - } - w.AddBoolToPayload(data) - w.FinishPayloadWriter() - buffer, _ := w.GetPayloadBufferFromWriter() - - b.Run("cgo reader", func(b *testing.B) { - for i := 0; i < b.N; i++ { - r, _ := NewPayloadReaderCgo(schemapb.DataType_Bool, buffer) - r.GetBoolFromPayload() - r.ReleasePayloadReader() - } - }) - - b.Run("go reader", func(b *testing.B) { - for i := 0; i < b.N; i++ { - r, _ := NewPayloadReader(schemapb.DataType_Bool, buffer) - r.GetBoolFromPayload() - r.ReleasePayloadReader() - } - }) -} - -func BenchmarkPayloadReader_Int32(b *testing.B) { - w, err := NewPayloadWriter(schemapb.DataType_Int32) - assert.NoError(b, err) - defer w.ReleasePayloadWriter() - data := make([]int32, 0, numElements) - for i := 0; i < numElements; i++ { - data = append(data, rand.Int31n(1000)) - } - w.AddInt32ToPayload(data) - w.FinishPayloadWriter() - buffer, _ := w.GetPayloadBufferFromWriter() - - b.Run("cgo reader", func(b *testing.B) { - for i := 0; i < b.N; i++ { - r, _ := NewPayloadReaderCgo(schemapb.DataType_Int32, buffer) - r.GetInt32FromPayload() - r.ReleasePayloadReader() - } - }) - - b.Run("go reader", func(b *testing.B) { - for i := 0; i < b.N; i++ { - r, _ := NewPayloadReader(schemapb.DataType_Int32, buffer) - r.GetInt32FromPayload() - r.ReleasePayloadReader() - } - }) -} - -func BenchmarkPayloadReader_Int64(b *testing.B) { - w, err := NewPayloadWriter(schemapb.DataType_Int64) - assert.NoError(b, err) - defer w.ReleasePayloadWriter() - data := make([]int64, 0, numElements) - for i := 0; i < numElements; i++ { - data = append(data, rand.Int63n(1000)) - } - w.AddInt64ToPayload(data) - w.FinishPayloadWriter() - buffer, _ := w.GetPayloadBufferFromWriter() - - b.Run("cgo reader", func(b *testing.B) { - for i := 0; i < b.N; i++ { - r, _ := NewPayloadReaderCgo(schemapb.DataType_Int64, buffer) - r.GetInt64FromPayload() - r.ReleasePayloadReader() - } - }) - - b.Run("go reader", func(b *testing.B) { - for i := 0; i < b.N; i++ { - r, _ := NewPayloadReader(schemapb.DataType_Int64, buffer) - r.GetInt64FromPayload() - r.ReleasePayloadReader() - } - }) -} - -func BenchmarkPayloadReader_Float32(b *testing.B) { - w, err := NewPayloadWriter(schemapb.DataType_Float) - assert.NoError(b, err) - defer w.ReleasePayloadWriter() - data := make([]float32, 0, numElements) - for i := 0; i < numElements; i++ { - data = append(data, rand.Float32()) - } - w.AddFloatToPayload(data) - w.FinishPayloadWriter() - buffer, _ := w.GetPayloadBufferFromWriter() - - b.Run("cgo reader", func(b *testing.B) { - for i := 0; i < b.N; i++ { - r, _ := NewPayloadReaderCgo(schemapb.DataType_Float, buffer) - r.GetFloatFromPayload() - r.ReleasePayloadReader() - } - }) - - b.Run("go reader", func(b *testing.B) { - for i := 0; i < b.N; i++ { - r, _ := NewPayloadReader(schemapb.DataType_Float, buffer) - r.GetFloatFromPayload() - r.ReleasePayloadReader() - } - }) -} - -func BenchmarkPayloadReader_Float64(b *testing.B) { - w, err := NewPayloadWriter(schemapb.DataType_Double) - assert.NoError(b, err) - defer w.ReleasePayloadWriter() - data := make([]float64, 0, numElements) - for i := 0; i < numElements; i++ { - data = append(data, rand.Float64()) - } - w.AddDoubleToPayload(data) - w.FinishPayloadWriter() - buffer, _ := w.GetPayloadBufferFromWriter() - - b.Run("cgo reader", func(b *testing.B) { - for i := 0; i < b.N; i++ { - r, _ := NewPayloadReaderCgo(schemapb.DataType_Double, buffer) - r.GetDoubleFromPayload() - r.ReleasePayloadReader() - } - }) - - b.Run("go reader", func(b *testing.B) { - for i := 0; i < b.N; i++ { - r, _ := NewPayloadReader(schemapb.DataType_Double, buffer) - r.GetDoubleFromPayload() - r.ReleasePayloadReader() - } - }) -} - -func BenchmarkPayloadReader_FloatVector(b *testing.B) { - w, err := NewPayloadWriter(schemapb.DataType_FloatVector, vectorDim) - assert.NoError(b, err) - defer w.ReleasePayloadWriter() - data := make([]float32, 0, numElements*vectorDim) - for i := 0; i < numElements; i++ { - data = append(data, rand.Float32()) - } - w.AddFloatVectorToPayload(data, vectorDim) - w.FinishPayloadWriter() - buffer, _ := w.GetPayloadBufferFromWriter() - - b.Run("cgo reader", func(b *testing.B) { - for i := 0; i < b.N; i++ { - r, _ := NewPayloadReaderCgo(schemapb.DataType_FloatVector, buffer) - r.GetFloatVectorFromPayload() - r.ReleasePayloadReader() - } - }) - - b.Run("go reader", func(b *testing.B) { - for i := 0; i < b.N; i++ { - r, _ := NewPayloadReader(schemapb.DataType_FloatVector, buffer) - r.GetFloatVectorFromPayload() - r.ReleasePayloadReader() - } - }) -} - -func BenchmarkPayloadReader_BinaryVector(b *testing.B) { - w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, vectorDim) - assert.NoError(b, err) - defer w.ReleasePayloadWriter() - data := make([]byte, numElements*vectorDim/8) - rand.Read(data) - - err = w.AddBinaryVectorToPayload(data, vectorDim) - if err != nil { - panic(err) - } - w.FinishPayloadWriter() - buffer, _ := w.GetPayloadBufferFromWriter() - - b.Run("cgo reader", func(b *testing.B) { - for i := 0; i < b.N; i++ { - r, _ := NewPayloadReaderCgo(schemapb.DataType_BinaryVector, buffer) - r.GetBinaryVectorFromPayload() - r.ReleasePayloadReader() - } - }) - - b.Run("go reader", func(b *testing.B) { - for i := 0; i < b.N; i++ { - r, _ := NewPayloadReader(schemapb.DataType_BinaryVector, buffer) - r.GetBinaryVectorFromPayload() - r.ReleasePayloadReader() - } - }) -} diff --git a/internal/storage/payload_cgo_test.go b/internal/storage/payload_cgo_test.go deleted file mode 100644 index b641917a11..0000000000 --- a/internal/storage/payload_cgo_test.go +++ /dev/null @@ -1,827 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package storage - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/milvus-io/milvus-proto/go-api/schemapb" -) - -func TestPayload_CGO_ReaderandWriter(t *testing.T) { - - t.Run("TestBool", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddBoolToPayload([]bool{false, false, false, false}) - assert.Nil(t, err) - err = w.AddDataToPayload([]bool{false, false, false, false}) - assert.Nil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - length, err := w.GetPayloadLengthFromWriter() - assert.Nil(t, err) - assert.Equal(t, 8, length) - defer w.ReleasePayloadWriter() - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - r, err := NewPayloadReaderCgo(schemapb.DataType_Bool, buffer) - require.Nil(t, err) - length, err = r.GetPayloadLengthFromReader() - assert.Nil(t, err) - assert.Equal(t, length, 8) - bools, err := r.GetBoolFromPayload() - assert.Nil(t, err) - assert.ElementsMatch(t, []bool{false, false, false, false, false, false, false, false}, bools) - ibools, _, err := r.GetDataFromPayload() - bools = ibools.([]bool) - assert.Nil(t, err) - assert.ElementsMatch(t, []bool{false, false, false, false, false, false, false, false}, bools) - defer r.ReleasePayloadReader() - - }) - - t.Run("TestInt8", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int8) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddInt8ToPayload([]int8{1, 2, 3}) - assert.Nil(t, err) - err = w.AddDataToPayload([]int8{4, 5, 6}) - assert.Nil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - length, err := w.GetPayloadLengthFromWriter() - assert.Nil(t, err) - assert.Equal(t, 6, length) - defer w.ReleasePayloadWriter() - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - r, err := NewPayloadReaderCgo(schemapb.DataType_Int8, buffer) - require.Nil(t, err) - length, err = r.GetPayloadLengthFromReader() - assert.Nil(t, err) - assert.Equal(t, length, 6) - - int8s, err := r.GetInt8FromPayload() - assert.Nil(t, err) - assert.ElementsMatch(t, []int8{1, 2, 3, 4, 5, 6}, int8s) - - iint8s, _, err := r.GetDataFromPayload() - int8s = iint8s.([]int8) - assert.Nil(t, err) - - assert.ElementsMatch(t, []int8{1, 2, 3, 4, 5, 6}, int8s) - defer r.ReleasePayloadReader() - }) - - t.Run("TestInt16", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int16) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddInt16ToPayload([]int16{1, 2, 3}) - assert.Nil(t, err) - err = w.AddDataToPayload([]int16{1, 2, 3}) - assert.Nil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - length, err := w.GetPayloadLengthFromWriter() - assert.Nil(t, err) - assert.Equal(t, 6, length) - defer w.ReleasePayloadWriter() - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - r, err := NewPayloadReaderCgo(schemapb.DataType_Int16, buffer) - require.Nil(t, err) - length, err = r.GetPayloadLengthFromReader() - assert.Nil(t, err) - assert.Equal(t, length, 6) - int16s, err := r.GetInt16FromPayload() - assert.Nil(t, err) - assert.ElementsMatch(t, []int16{1, 2, 3, 1, 2, 3}, int16s) - - iint16s, _, err := r.GetDataFromPayload() - int16s = iint16s.([]int16) - assert.Nil(t, err) - assert.ElementsMatch(t, []int16{1, 2, 3, 1, 2, 3}, int16s) - defer r.ReleasePayloadReader() - }) - - t.Run("TestInt32", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int32) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddInt32ToPayload([]int32{1, 2, 3}) - assert.Nil(t, err) - err = w.AddDataToPayload([]int32{1, 2, 3}) - assert.Nil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - length, err := w.GetPayloadLengthFromWriter() - assert.Nil(t, err) - assert.Equal(t, 6, length) - defer w.ReleasePayloadWriter() - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - r, err := NewPayloadReaderCgo(schemapb.DataType_Int32, buffer) - require.Nil(t, err) - length, err = r.GetPayloadLengthFromReader() - assert.Nil(t, err) - assert.Equal(t, length, 6) - - int32s, err := r.GetInt32FromPayload() - assert.Nil(t, err) - assert.ElementsMatch(t, []int32{1, 2, 3, 1, 2, 3}, int32s) - - iint32s, _, err := r.GetDataFromPayload() - int32s = iint32s.([]int32) - assert.Nil(t, err) - assert.ElementsMatch(t, []int32{1, 2, 3, 1, 2, 3}, int32s) - defer r.ReleasePayloadReader() - }) - - t.Run("TestInt64", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int64) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddInt64ToPayload([]int64{1, 2, 3}) - assert.Nil(t, err) - err = w.AddDataToPayload([]int64{1, 2, 3}) - assert.Nil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - length, err := w.GetPayloadLengthFromWriter() - assert.Nil(t, err) - assert.Equal(t, 6, length) - defer w.ReleasePayloadWriter() - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - r, err := NewPayloadReaderCgo(schemapb.DataType_Int64, buffer) - require.Nil(t, err) - length, err = r.GetPayloadLengthFromReader() - assert.Nil(t, err) - assert.Equal(t, length, 6) - - int64s, err := r.GetInt64FromPayload() - assert.Nil(t, err) - assert.ElementsMatch(t, []int64{1, 2, 3, 1, 2, 3}, int64s) - - iint64s, _, err := r.GetDataFromPayload() - int64s = iint64s.([]int64) - assert.Nil(t, err) - assert.ElementsMatch(t, []int64{1, 2, 3, 1, 2, 3}, int64s) - defer r.ReleasePayloadReader() - }) - - t.Run("TestFloat32", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Float) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddFloatToPayload([]float32{1.0, 2.0, 3.0}) - assert.Nil(t, err) - err = w.AddDataToPayload([]float32{1.0, 2.0, 3.0}) - assert.Nil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - length, err := w.GetPayloadLengthFromWriter() - assert.Nil(t, err) - assert.Equal(t, 6, length) - defer w.ReleasePayloadWriter() - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - r, err := NewPayloadReaderCgo(schemapb.DataType_Float, buffer) - require.Nil(t, err) - length, err = r.GetPayloadLengthFromReader() - assert.Nil(t, err) - assert.Equal(t, length, 6) - - float32s, err := r.GetFloatFromPayload() - assert.Nil(t, err) - assert.ElementsMatch(t, []float32{1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, float32s) - - ifloat32s, _, err := r.GetDataFromPayload() - float32s = ifloat32s.([]float32) - assert.Nil(t, err) - assert.ElementsMatch(t, []float32{1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, float32s) - defer r.ReleasePayloadReader() - }) - - t.Run("TestDouble", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Double) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddDoubleToPayload([]float64{1.0, 2.0, 3.0}) - assert.Nil(t, err) - err = w.AddDataToPayload([]float64{1.0, 2.0, 3.0}) - assert.Nil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - length, err := w.GetPayloadLengthFromWriter() - assert.Nil(t, err) - assert.Equal(t, 6, length) - defer w.ReleasePayloadWriter() - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - r, err := NewPayloadReaderCgo(schemapb.DataType_Double, buffer) - require.Nil(t, err) - length, err = r.GetPayloadLengthFromReader() - assert.Nil(t, err) - assert.Equal(t, length, 6) - - float64s, err := r.GetDoubleFromPayload() - assert.Nil(t, err) - assert.ElementsMatch(t, []float64{1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, float64s) - - ifloat64s, _, err := r.GetDataFromPayload() - float64s = ifloat64s.([]float64) - assert.Nil(t, err) - assert.ElementsMatch(t, []float64{1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, float64s) - defer r.ReleasePayloadReader() - }) - - t.Run("TestAddString", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_String) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddOneStringToPayload("hello0") - assert.Nil(t, err) - err = w.AddOneStringToPayload("hello1") - assert.Nil(t, err) - err = w.AddOneStringToPayload("hello2") - assert.Nil(t, err) - err = w.AddDataToPayload("hello3") - assert.Nil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - length, err := w.GetPayloadLengthFromWriter() - assert.Nil(t, err) - assert.Equal(t, length, 4) - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - r, err := NewPayloadReaderCgo(schemapb.DataType_String, buffer) - assert.Nil(t, err) - length, err = r.GetPayloadLengthFromReader() - assert.Nil(t, err) - assert.Equal(t, length, 4) - - str, err := r.GetStringFromPayload() - assert.Nil(t, err) - - assert.Equal(t, str[0], "hello0") - assert.Equal(t, str[1], "hello1") - assert.Equal(t, str[2], "hello2") - assert.Equal(t, str[3], "hello3") - - istr, _, err := r.GetDataFromPayload() - strArray := istr.([]string) - assert.Nil(t, err) - assert.Equal(t, strArray[0], "hello0") - assert.Equal(t, strArray[1], "hello1") - assert.Equal(t, strArray[2], "hello2") - assert.Equal(t, strArray[3], "hello3") - r.ReleasePayloadReader() - w.ReleasePayloadWriter() - }) - - t.Run("TestBinaryVector", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, 8) - require.Nil(t, err) - require.NotNil(t, w) - - in := make([]byte, 16) - for i := 0; i < 16; i++ { - in[i] = 1 - } - in2 := make([]byte, 8) - for i := 0; i < 8; i++ { - in2[i] = 1 - } - - err = w.AddBinaryVectorToPayload(in, 8) - assert.Nil(t, err) - err = w.AddDataToPayload(in2, 8) - assert.Nil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - length, err := w.GetPayloadLengthFromWriter() - assert.Nil(t, err) - assert.Equal(t, 24, length) - defer w.ReleasePayloadWriter() - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - r, err := NewPayloadReaderCgo(schemapb.DataType_BinaryVector, buffer) - require.Nil(t, err) - length, err = r.GetPayloadLengthFromReader() - assert.Nil(t, err) - assert.Equal(t, length, 24) - - binVecs, dim, err := r.GetBinaryVectorFromPayload() - assert.Nil(t, err) - assert.Equal(t, 8, dim) - assert.Equal(t, 24, len(binVecs)) - t.Log(binVecs) - - ibinVecs, dim, err := r.GetDataFromPayload() - assert.Nil(t, err) - binVecs = ibinVecs.([]byte) - assert.Equal(t, 8, dim) - assert.Equal(t, 24, len(binVecs)) - defer r.ReleasePayloadReader() - }) - - t.Run("TestFloatVector", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_FloatVector, 1) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddFloatVectorToPayload([]float32{1.0, 2.0}, 1) - assert.Nil(t, err) - err = w.AddDataToPayload([]float32{3.0, 4.0}, 1) - assert.Nil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - length, err := w.GetPayloadLengthFromWriter() - assert.Nil(t, err) - assert.Equal(t, 4, length) - defer w.ReleasePayloadWriter() - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - r, err := NewPayloadReaderCgo(schemapb.DataType_FloatVector, buffer) - require.Nil(t, err) - length, err = r.GetPayloadLengthFromReader() - assert.Nil(t, err) - assert.Equal(t, length, 4) - - floatVecs, dim, err := r.GetFloatVectorFromPayload() - assert.Nil(t, err) - assert.Equal(t, 1, dim) - assert.Equal(t, 4, len(floatVecs)) - assert.ElementsMatch(t, []float32{1.0, 2.0, 3.0, 4.0}, floatVecs) - - ifloatVecs, dim, err := r.GetDataFromPayload() - assert.Nil(t, err) - floatVecs = ifloatVecs.([]float32) - assert.Equal(t, 1, dim) - assert.Equal(t, 4, len(floatVecs)) - assert.ElementsMatch(t, []float32{1.0, 2.0, 3.0, 4.0}, floatVecs) - defer r.ReleasePayloadReader() - }) - - t.Run("TestAddDataToPayload", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) - w.colType = 999 - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddDataToPayload([]bool{false, false, false, false}) - assert.NotNil(t, err) - - err = w.AddDataToPayload([]bool{false, false, false, false}, 0) - assert.NotNil(t, err) - - err = w.AddDataToPayload([]bool{false, false, false, false}, 0, 0) - assert.NotNil(t, err) - - err = w.AddBoolToPayload([]bool{}) - assert.NotNil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - err = w.AddBoolToPayload([]bool{false}) - assert.NotNil(t, err) - }) - - t.Run("TestAddBoolAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) - require.Nil(t, err) - require.NotNil(t, w) - - _, err = w.GetPayloadBufferFromWriter() - assert.NotNil(t, err) - - err = w.AddBoolToPayload([]bool{}) - assert.NotNil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - err = w.AddBoolToPayload([]bool{false}) - assert.NotNil(t, err) - }) - - t.Run("TestAddInt8AfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int8) - require.Nil(t, err) - require.NotNil(t, w) - defer w.Close() - - _, err = w.GetPayloadBufferFromWriter() - assert.NotNil(t, err) - - err = w.AddInt8ToPayload([]int8{}) - assert.NotNil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - err = w.AddInt8ToPayload([]int8{0}) - assert.NotNil(t, err) - }) - t.Run("TestAddInt16AfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int16) - require.Nil(t, err) - require.NotNil(t, w) - defer w.Close() - - _, err = w.GetPayloadBufferFromWriter() - assert.NotNil(t, err) - - err = w.AddInt16ToPayload([]int16{}) - assert.NotNil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - err = w.AddInt16ToPayload([]int16{0}) - assert.NotNil(t, err) - }) - t.Run("TestAddInt32AfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int32) - require.Nil(t, err) - require.NotNil(t, w) - defer w.Close() - - _, err = w.GetPayloadBufferFromWriter() - assert.NotNil(t, err) - - err = w.AddInt32ToPayload([]int32{}) - assert.NotNil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - err = w.AddInt32ToPayload([]int32{0}) - assert.NotNil(t, err) - }) - t.Run("TestAddInt64AfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int64) - require.Nil(t, err) - require.NotNil(t, w) - defer w.Close() - - _, err = w.GetPayloadBufferFromWriter() - assert.NotNil(t, err) - - err = w.AddInt64ToPayload([]int64{}) - assert.NotNil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - err = w.AddInt64ToPayload([]int64{0}) - assert.NotNil(t, err) - }) - t.Run("TestAddFloatAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Float) - require.Nil(t, err) - require.NotNil(t, w) - defer w.Close() - - _, err = w.GetPayloadBufferFromWriter() - assert.NotNil(t, err) - - err = w.AddFloatToPayload([]float32{}) - assert.NotNil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - err = w.AddFloatToPayload([]float32{0.0}) - assert.NotNil(t, err) - }) - t.Run("TestAddDoubleAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Double) - require.Nil(t, err) - require.NotNil(t, w) - defer w.Close() - - _, err = w.GetPayloadBufferFromWriter() - assert.NotNil(t, err) - - err = w.AddDoubleToPayload([]float64{}) - assert.NotNil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - err = w.AddDoubleToPayload([]float64{0.0}) - assert.NotNil(t, err) - }) - t.Run("TestAddOneStringAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_String) - require.Nil(t, err) - require.NotNil(t, w) - defer w.Close() - - _, err = w.GetPayloadBufferFromWriter() - assert.NotNil(t, err) - - err = w.AddOneStringToPayload("") - assert.Nil(t, err) - err = w.FinishPayloadWriter() - assert.Nil(t, err) - err = w.AddOneStringToPayload("c") - assert.NotNil(t, err) - }) - t.Run("TestAddBinVectorAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, 8) - require.Nil(t, err) - require.NotNil(t, w) - defer w.Close() - - _, err = w.GetPayloadBufferFromWriter() - assert.NotNil(t, err) - - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - err = w.AddBinaryVectorToPayload([]byte{}, 8) - assert.NotNil(t, err) - err = w.AddBinaryVectorToPayload([]byte{1}, 0) - assert.NotNil(t, err) - - err = w.AddBinaryVectorToPayload([]byte{1, 0, 0, 0, 0, 0, 0, 0}, 8) - assert.NotNil(t, err) - err = w.FinishPayloadWriter() - assert.NotNil(t, err) - err = w.AddBinaryVectorToPayload([]byte{1, 0, 0, 0, 0, 0, 0, 0}, 8) - assert.NotNil(t, err) - }) - t.Run("TestAddFloatVectorAfterFinish", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_FloatVector, 8) - require.Nil(t, err) - require.NotNil(t, w) - defer w.Close() - - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - err = w.AddFloatVectorToPayload([]float32{}, 8) - assert.NotNil(t, err) - err = w.AddFloatVectorToPayload([]float32{1.0}, 0) - assert.NotNil(t, err) - - err = w.AddFloatVectorToPayload([]float32{1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, 8) - assert.NotNil(t, err) - err = w.FinishPayloadWriter() - assert.NotNil(t, err) - err = w.AddFloatVectorToPayload([]float32{1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, 8) - assert.NotNil(t, err) - }) - - t.Run("TestNewReadError", func(t *testing.T) { - buffer := []byte{0} - r, err := NewPayloadReaderCgo(999, buffer) - assert.NotNil(t, err) - assert.Nil(t, r) - }) - t.Run("TestGetDataError", func(t *testing.T) { - r := PayloadReader{} - r.colType = 999 - - _, _, err := r.GetDataFromPayload() - assert.NotNil(t, err) - }) - t.Run("TestGetBoolError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Int8) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddInt8ToPayload([]int8{1, 2, 3}) - assert.Nil(t, err) - - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - _, err = NewPayloadReaderCgo(schemapb.DataType_Bool, buffer) - assert.NotNil(t, err) - }) - t.Run("TestGetInt8Error", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddBoolToPayload([]bool{false, true, true}) - assert.Nil(t, err) - - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - _, err = NewPayloadReaderCgo(schemapb.DataType_Int8, buffer) - assert.NotNil(t, err) - }) - t.Run("TestGetInt16Error", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddBoolToPayload([]bool{false, true, true}) - assert.Nil(t, err) - - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - _, err = NewPayloadReaderCgo(schemapb.DataType_Int16, buffer) - assert.NotNil(t, err) - }) - t.Run("TestGetInt32Error", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddBoolToPayload([]bool{false, true, true}) - assert.Nil(t, err) - - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - _, err = NewPayloadReaderCgo(schemapb.DataType_Int32, buffer) - assert.NotNil(t, err) - }) - t.Run("TestGetInt64Error", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddBoolToPayload([]bool{false, true, true}) - assert.Nil(t, err) - - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - _, err = NewPayloadReaderCgo(schemapb.DataType_Int64, buffer) - assert.NotNil(t, err) - }) - t.Run("TestGetFloatError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddBoolToPayload([]bool{false, true, true}) - assert.Nil(t, err) - - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - _, err = NewPayloadReaderCgo(schemapb.DataType_Float, buffer) - assert.NotNil(t, err) - }) - t.Run("TestGetDoubleError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddBoolToPayload([]bool{false, true, true}) - assert.Nil(t, err) - - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - _, err = NewPayloadReaderCgo(schemapb.DataType_Double, buffer) - assert.NotNil(t, err) - }) - t.Run("TestGetStringError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddBoolToPayload([]bool{false, true, true}) - assert.Nil(t, err) - - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - _, err = NewPayloadReaderCgo(schemapb.DataType_String, buffer) - assert.NotNil(t, err) - }) - t.Run("TestGetBinaryVectorError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddBoolToPayload([]bool{false, true, true}) - assert.Nil(t, err) - - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - _, err = NewPayloadReaderCgo(schemapb.DataType_BinaryVector, buffer) - assert.NotNil(t, err) - }) - t.Run("TestGetFloatVectorError", func(t *testing.T) { - w, err := NewPayloadWriter(schemapb.DataType_Bool) - require.Nil(t, err) - require.NotNil(t, w) - - err = w.AddBoolToPayload([]bool{false, true, true}) - assert.Nil(t, err) - - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - buffer, err := w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - _, err = NewPayloadReaderCgo(schemapb.DataType_FloatVector, buffer) - assert.NotNil(t, err) - }) - - t.Run("TestWriteLargeSizeData", func(t *testing.T) { - t.Skip("Large data skip for online ut") - size := 1 << 29 // 512M - var vec []float32 - for i := 0; i < size/4; i++ { - vec = append(vec, 1) - } - - w, err := NewPayloadWriter(schemapb.DataType_FloatVector, 128) - assert.Nil(t, err) - - err = w.AddFloatVectorToPayload(vec, 128) - assert.Nil(t, err) - - err = w.FinishPayloadWriter() - assert.Nil(t, err) - - _, err = w.GetPayloadBufferFromWriter() - assert.Nil(t, err) - - w.ReleasePayloadWriter() - }) -} diff --git a/internal/storage/payload_reader.go b/internal/storage/payload_reader.go index 47a095a362..5eb4dcebd8 100644 --- a/internal/storage/payload_reader.go +++ b/internal/storage/payload_reader.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/cockroachdb/errors" + "github.com/golang/protobuf/proto" "github.com/apache/arrow/go/v8/arrow" "github.com/apache/arrow/go/v8/parquet" @@ -20,6 +21,8 @@ type PayloadReader struct { numRows int64 } +var _ PayloadReaderInterface = (*PayloadReader)(nil) + func NewPayloadReader(colType schemapb.DataType, buf []byte) (*PayloadReader, error) { if len(buf) == 0 { return nil, errors.New("create Payload reader failed, buffer is empty") @@ -67,6 +70,12 @@ func (r *PayloadReader) GetDataFromPayload() (interface{}, int, error) { case schemapb.DataType_String, schemapb.DataType_VarChar: val, err := r.GetStringFromPayload() return val, 0, err + case schemapb.DataType_Array: + val, err := r.GetArrayFromPayload() + return val, 0, err + case schemapb.DataType_JSON: + val, err := r.GetJSONFromPayload() + return val, 0, err default: return nil, 0, errors.New("unknown type") } @@ -238,6 +247,33 @@ func (r *PayloadReader) GetStringFromPayload() ([]string, error) { return nil, fmt.Errorf("failed to get string from datatype %v", r.colType.String()) } + return readByteAndConvert(r, func(bytes parquet.ByteArray) string { + return bytes.String() + }) +} + +func (r *PayloadReader) GetArrayFromPayload() ([]*schemapb.ScalarField, error) { + if r.colType != schemapb.DataType_Array { + return nil, fmt.Errorf("failed to get string from datatype %v", r.colType.String()) + } + return readByteAndConvert(r, func(bytes parquet.ByteArray) *schemapb.ScalarField { + v := &schemapb.ScalarField{} + proto.Unmarshal(bytes, v) + return v + }) +} + +func (r *PayloadReader) GetJSONFromPayload() ([][]byte, error) { + if r.colType != schemapb.DataType_JSON { + return nil, fmt.Errorf("failed to get string from datatype %v", r.colType.String()) + } + + return readByteAndConvert(r, func(bytes parquet.ByteArray) []byte { + return bytes + }) +} + +func readByteAndConvert[T any](r *PayloadReader, convert func(parquet.ByteArray) T) ([]T, error) { values := make([]parquet.ByteArray, r.numRows) valuesRead, err := ReadDataFromAllRowGroups[parquet.ByteArray, *file.ByteArrayColumnChunkReader](r.reader, values, 0, r.numRows) if err != nil { @@ -248,9 +284,9 @@ func (r *PayloadReader) GetStringFromPayload() ([]string, error) { return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead) } - ret := make([]string, r.numRows) + ret := make([]T, r.numRows) for i := 0; i < int(r.numRows); i++ { - ret[i] = values[i].String() + ret[i] = convert(values[i]) } return ret, nil } diff --git a/internal/storage/payload_reader_cgo.go b/internal/storage/payload_reader_cgo.go deleted file mode 100644 index f4a2fdae2c..0000000000 --- a/internal/storage/payload_reader_cgo.go +++ /dev/null @@ -1,333 +0,0 @@ -package storage - -/* -#cgo pkg-config: milvus_storage - -#include -#include "storage/parquet_c.h" -*/ -import "C" -import ( - "fmt" - "unsafe" - - "github.com/cockroachdb/errors" - - "github.com/milvus-io/milvus-proto/go-api/commonpb" - "github.com/milvus-io/milvus-proto/go-api/schemapb" - "github.com/milvus-io/milvus/pkg/log" -) - -// PayloadReaderCgo reads data from payload -type PayloadReaderCgo struct { - payloadReaderPtr C.CPayloadReader - colType schemapb.DataType -} - -func NewPayloadReaderCgo(colType schemapb.DataType, buf []byte) (*PayloadReaderCgo, error) { - if len(buf) == 0 { - return nil, errors.New("create Payload reader failed, buffer is empty") - } - var r C.CPayloadReader - status := C.NewPayloadReader(C.int(colType), (*C.uint8_t)(unsafe.Pointer(&buf[0])), C.int64_t(len(buf)), &r) - if err := HandleCStatus(&status, "NewPayloadReader failed"); err != nil { - return nil, err - } - return &PayloadReaderCgo{payloadReaderPtr: r, colType: colType}, nil -} - -// GetDataFromPayload returns data,length from payload, returns err if failed -// Params: -// -// `idx`: String index -// -// Return: -// -// `interface{}`: all types. -// `int`: length, only meaningful to FLOAT/BINARY VECTOR type. -// `error`: error. -func (r *PayloadReaderCgo) GetDataFromPayload() (interface{}, int, error) { - switch r.colType { - case schemapb.DataType_Bool: - val, err := r.GetBoolFromPayload() - return val, 0, err - case schemapb.DataType_Int8: - val, err := r.GetInt8FromPayload() - return val, 0, err - case schemapb.DataType_Int16: - val, err := r.GetInt16FromPayload() - return val, 0, err - case schemapb.DataType_Int32: - val, err := r.GetInt32FromPayload() - return val, 0, err - case schemapb.DataType_Int64: - val, err := r.GetInt64FromPayload() - return val, 0, err - case schemapb.DataType_Float: - val, err := r.GetFloatFromPayload() - return val, 0, err - case schemapb.DataType_Double: - val, err := r.GetDoubleFromPayload() - return val, 0, err - case schemapb.DataType_BinaryVector: - return r.GetBinaryVectorFromPayload() - case schemapb.DataType_FloatVector: - return r.GetFloatVectorFromPayload() - case schemapb.DataType_String: - val, err := r.GetStringFromPayload() - return val, 0, err - default: - return nil, 0, errors.New("unknown type") - } -} - -// ReleasePayloadReader release payload reader. -func (r *PayloadReaderCgo) ReleasePayloadReader() error { - status := C.ReleasePayloadReader(r.payloadReaderPtr) - if err := HandleCStatus(&status, "ReleasePayloadReader failed"); err != nil { - return err - } - - return nil -} - -// GetBoolFromPayload returns bool slice from payload. -func (r *PayloadReaderCgo) GetBoolFromPayload() ([]bool, error) { - if r.colType != schemapb.DataType_Bool { - return nil, errors.New("incorrect data type") - } - - length, err := r.GetPayloadLengthFromReader() - if err != nil { - return nil, err - } - slice := make([]bool, length) - for i := 0; i < length; i++ { - status := C.GetBoolFromPayload(r.payloadReaderPtr, C.int(i), (*C.bool)(&slice[i])) - if err := HandleCStatus(&status, "GetBoolFromPayload failed"); err != nil { - return nil, err - } - } - - return slice, nil -} - -// GetByteFromPayload returns byte slice from payload -func (r *PayloadReaderCgo) GetByteFromPayload() ([]byte, error) { - if r.colType != schemapb.DataType_Int8 { - return nil, errors.New("incorrect data type") - } - - var cMsg *C.int8_t - var cSize C.int - - status := C.GetInt8FromPayload(r.payloadReaderPtr, &cMsg, &cSize) - if err := HandleCStatus(&status, "GetInt8FromPayload failed"); err != nil { - return nil, err - } - - slice := (*[1 << 28]byte)(unsafe.Pointer(cMsg))[:cSize:cSize] - return slice, nil -} - -// GetInt8FromPayload returns int8 slice from payload -func (r *PayloadReaderCgo) GetInt8FromPayload() ([]int8, error) { - if r.colType != schemapb.DataType_Int8 { - return nil, errors.New("incorrect data type") - } - - var cMsg *C.int8_t - var cSize C.int - - status := C.GetInt8FromPayload(r.payloadReaderPtr, &cMsg, &cSize) - if err := HandleCStatus(&status, "GetInt8FromPayload failed"); err != nil { - return nil, err - } - - slice := (*[1 << 28]int8)(unsafe.Pointer(cMsg))[:cSize:cSize] - return slice, nil -} - -func (r *PayloadReaderCgo) GetInt16FromPayload() ([]int16, error) { - if r.colType != schemapb.DataType_Int16 { - return nil, errors.New("incorrect data type") - } - - var cMsg *C.int16_t - var cSize C.int - - status := C.GetInt16FromPayload(r.payloadReaderPtr, &cMsg, &cSize) - if err := HandleCStatus(&status, "GetInt16FromPayload failed"); err != nil { - return nil, err - } - - slice := (*[1 << 28]int16)(unsafe.Pointer(cMsg))[:cSize:cSize] - return slice, nil -} - -func (r *PayloadReaderCgo) GetInt32FromPayload() ([]int32, error) { - if r.colType != schemapb.DataType_Int32 { - return nil, errors.New("incorrect data type") - } - - var cMsg *C.int32_t - var cSize C.int - - status := C.GetInt32FromPayload(r.payloadReaderPtr, &cMsg, &cSize) - if err := HandleCStatus(&status, "GetInt32FromPayload failed"); err != nil { - return nil, err - } - - slice := (*[1 << 28]int32)(unsafe.Pointer(cMsg))[:cSize:cSize] - return slice, nil -} - -func (r *PayloadReaderCgo) GetInt64FromPayload() ([]int64, error) { - if r.colType != schemapb.DataType_Int64 { - return nil, errors.New("incorrect data type") - } - - var cMsg *C.int64_t - var cSize C.int - - status := C.GetInt64FromPayload(r.payloadReaderPtr, &cMsg, &cSize) - if err := HandleCStatus(&status, "GetInt64FromPayload failed"); err != nil { - return nil, err - } - - slice := (*[1 << 28]int64)(unsafe.Pointer(cMsg))[:cSize:cSize] - return slice, nil -} - -func (r *PayloadReaderCgo) GetFloatFromPayload() ([]float32, error) { - if r.colType != schemapb.DataType_Float { - return nil, errors.New("incorrect data type") - } - - var cMsg *C.float - var cSize C.int - - status := C.GetFloatFromPayload(r.payloadReaderPtr, &cMsg, &cSize) - if err := HandleCStatus(&status, "GetFloatFromPayload failed"); err != nil { - return nil, err - } - - slice := (*[1 << 28]float32)(unsafe.Pointer(cMsg))[:cSize:cSize] - return slice, nil -} - -func (r *PayloadReaderCgo) GetDoubleFromPayload() ([]float64, error) { - if r.colType != schemapb.DataType_Double { - return nil, errors.New("incorrect data type") - } - - var cMsg *C.double - var cSize C.int - - status := C.GetDoubleFromPayload(r.payloadReaderPtr, &cMsg, &cSize) - if err := HandleCStatus(&status, "GetDoubleFromPayload failed"); err != nil { - return nil, err - } - - slice := (*[1 << 28]float64)(unsafe.Pointer(cMsg))[:cSize:cSize] - return slice, nil -} - -func (r *PayloadReaderCgo) GetStringFromPayload() ([]string, error) { - length, err := r.GetPayloadLengthFromReader() - if err != nil { - return nil, err - } - ret := make([]string, length) - for i := 0; i < length; i++ { - ret[i], err = r.GetOneStringFromPayload(i) - if err != nil { - return nil, err - } - } - return ret, nil -} - -func (r *PayloadReaderCgo) GetOneStringFromPayload(idx int) (string, error) { - if r.colType != schemapb.DataType_String { - return "", errors.New("incorrect data type") - } - - var cStr *C.char - var cSize C.int - - status := C.GetOneStringFromPayload(r.payloadReaderPtr, C.int(idx), &cStr, &cSize) - if err := HandleCStatus(&status, "GetOneStringFromPayload failed"); err != nil { - return "", err - } - return C.GoStringN(cStr, cSize), nil -} - -// GetBinaryVectorFromPayload returns vector, dimension, error -func (r *PayloadReaderCgo) GetBinaryVectorFromPayload() ([]byte, int, error) { - if r.colType != schemapb.DataType_BinaryVector { - return nil, 0, errors.New("incorrect data type") - } - - var cMsg *C.uint8_t - var cDim C.int - var cLen C.int - - status := C.GetBinaryVectorFromPayload(r.payloadReaderPtr, &cMsg, &cDim, &cLen) - if err := HandleCStatus(&status, "GetBinaryVectorFromPayload failed"); err != nil { - return nil, 0, err - } - length := (cDim / 8) * cLen - - slice := (*[1 << 28]byte)(unsafe.Pointer(cMsg))[:length:length] - return slice, int(cDim), nil -} - -// GetFloatVectorFromPayload returns vector, dimension, error -func (r *PayloadReaderCgo) GetFloatVectorFromPayload() ([]float32, int, error) { - if r.colType != schemapb.DataType_FloatVector { - return nil, 0, errors.New("incorrect data type") - } - - var cMsg *C.float - var cDim C.int - var cLen C.int - - status := C.GetFloatVectorFromPayload(r.payloadReaderPtr, &cMsg, &cDim, &cLen) - if err := HandleCStatus(&status, "GetFloatVectorFromPayload failed"); err != nil { - return nil, 0, err - } - length := cDim * cLen - - slice := (*[1 << 28]float32)(unsafe.Pointer(cMsg))[:length:length] - return slice, int(cDim), nil -} - -func (r *PayloadReaderCgo) GetPayloadLengthFromReader() (int, error) { - length := C.GetPayloadLengthFromReader(r.payloadReaderPtr) - return int(length), nil -} - -// Close closes the payload reader -func (r *PayloadReaderCgo) Close() error { - return r.ReleasePayloadReader() -} - -// HandleCStatus deal with the error returned from CGO -func HandleCStatus(status *C.CStatus, extraInfo string) error { - if status.error_code == 0 { - return nil - } - errorCode := status.error_code - errorName, ok := commonpb.ErrorCode_name[int32(errorCode)] - if !ok { - errorName = "UnknownError" - } - errorMsg := C.GoString(status.error_msg) - defer C.free(unsafe.Pointer(status.error_msg)) - - finalMsg := fmt.Sprintf("[%s] %s", errorName, errorMsg) - logMsg := fmt.Sprintf("%s, C Runtime Exception: %s\n", extraInfo, finalMsg) - log.Warn(logMsg) - return errors.New(finalMsg) -} diff --git a/internal/storage/payload_test.go b/internal/storage/payload_test.go index 87062c9eb3..844d30ad85 100644 --- a/internal/storage/payload_test.go +++ b/internal/storage/payload_test.go @@ -331,6 +331,122 @@ func TestPayload_ReaderAndWriter(t *testing.T) { w.ReleasePayloadWriter() }) + t.Run("TestAddArray", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_Array) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddOneArrayToPayload(&schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{1, 2}, + }, + }, + }) + assert.Nil(t, err) + err = w.AddOneArrayToPayload(&schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{3, 4}, + }, + }, + }) + assert.Nil(t, err) + err = w.AddOneArrayToPayload(&schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{5, 6}, + }, + }, + }) + assert.Nil(t, err) + err = w.AddDataToPayload(&schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{7, 8}, + }, + }, + }) + assert.Nil(t, err) + err = w.FinishPayloadWriter() + assert.Nil(t, err) + length, err := w.GetPayloadLengthFromWriter() + assert.Nil(t, err) + assert.Equal(t, length, 4) + buffer, err := w.GetPayloadBufferFromWriter() + assert.Nil(t, err) + + r, err := NewPayloadReader(schemapb.DataType_Array, buffer) + assert.Nil(t, err) + length, err = r.GetPayloadLengthFromReader() + assert.Nil(t, err) + assert.Equal(t, length, 4) + + arrayList, err := r.GetArrayFromPayload() + assert.Nil(t, err) + + assert.EqualValues(t, []int32{1, 2}, arrayList[0].GetIntData().GetData()) + assert.EqualValues(t, []int32{3, 4}, arrayList[1].GetIntData().GetData()) + assert.EqualValues(t, []int32{5, 6}, arrayList[2].GetIntData().GetData()) + assert.EqualValues(t, []int32{7, 8}, arrayList[3].GetIntData().GetData()) + + iArrayList, _, err := r.GetDataFromPayload() + arrayList = iArrayList.([]*schemapb.ScalarField) + assert.Nil(t, err) + assert.EqualValues(t, []int32{1, 2}, arrayList[0].GetIntData().GetData()) + assert.EqualValues(t, []int32{3, 4}, arrayList[1].GetIntData().GetData()) + assert.EqualValues(t, []int32{5, 6}, arrayList[2].GetIntData().GetData()) + assert.EqualValues(t, []int32{7, 8}, arrayList[3].GetIntData().GetData()) + r.ReleasePayloadReader() + w.ReleasePayloadWriter() + }) + + t.Run("TestAddJSON", func(t *testing.T) { + w, err := NewPayloadWriter(schemapb.DataType_JSON) + require.Nil(t, err) + require.NotNil(t, w) + + err = w.AddOneJSONToPayload([]byte(`{"1":"1"}`)) + assert.Nil(t, err) + err = w.AddOneJSONToPayload([]byte(`{"2":"2"}`)) + assert.Nil(t, err) + err = w.AddOneJSONToPayload([]byte(`{"3":"3"}`)) + assert.Nil(t, err) + err = w.AddDataToPayload([]byte(`{"4":"4"}`)) + assert.Nil(t, err) + err = w.FinishPayloadWriter() + assert.Nil(t, err) + length, err := w.GetPayloadLengthFromWriter() + assert.Nil(t, err) + assert.Equal(t, length, 4) + buffer, err := w.GetPayloadBufferFromWriter() + assert.Nil(t, err) + + r, err := NewPayloadReader(schemapb.DataType_JSON, buffer) + assert.Nil(t, err) + length, err = r.GetPayloadLengthFromReader() + assert.Nil(t, err) + assert.Equal(t, length, 4) + + json, err := r.GetJSONFromPayload() + assert.Nil(t, err) + + assert.EqualValues(t, []byte(`{"1":"1"}`), json[0]) + assert.EqualValues(t, []byte(`{"2":"2"}`), json[1]) + assert.EqualValues(t, []byte(`{"3":"3"}`), json[2]) + assert.EqualValues(t, []byte(`{"4":"4"}`), json[3]) + + iJSON, _, err := r.GetDataFromPayload() + json = iJSON.([][]byte) + assert.Nil(t, err) + assert.EqualValues(t, []byte(`{"1":"1"}`), json[0]) + assert.EqualValues(t, []byte(`{"2":"2"}`), json[1]) + assert.EqualValues(t, []byte(`{"3":"3"}`), json[2]) + assert.EqualValues(t, []byte(`{"4":"4"}`), json[3]) + r.ReleasePayloadReader() + w.ReleasePayloadWriter() + }) + t.Run("TestBinaryVector", func(t *testing.T) { w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, 8) require.Nil(t, err) diff --git a/internal/storage/utils.go b/internal/storage/utils.go index a5bff53a37..0fd7a6d12b 100644 --- a/internal/storage/utils.go +++ b/internal/storage/utils.go @@ -531,6 +531,24 @@ func ColumnBasedInsertMsgToInsertData(msg *msgstream.InsertMsg, collSchema *sche Data: make([]string, 0, len(srcData)), } + fieldData.Data = append(fieldData.Data, srcData...) + idata.Data[field.FieldID] = fieldData + case schemapb.DataType_Array: + srcData := srcFields[field.FieldID].GetScalars().GetArrayData().GetData() + + fieldData := &ArrayFieldData{ + Data: make([]*schemapb.ScalarField, 0, len(srcData)), + } + + fieldData.Data = append(fieldData.Data, srcData...) + idata.Data[field.FieldID] = fieldData + case schemapb.DataType_JSON: + srcData := srcFields[field.FieldID].GetScalars().GetJsonData().GetData() + + fieldData := &JSONFieldData{ + Data: make([][]byte, 0, len(srcData)), + } + fieldData.Data = append(fieldData.Data, srcData...) idata.Data[field.FieldID] = fieldData } @@ -634,6 +652,28 @@ func mergeStringField(data *InsertData, fid FieldID, field *StringFieldData) { fieldData.Data = append(fieldData.Data, field.Data...) } +func mergeArrayField(data *InsertData, fid FieldID, field *ArrayFieldData) { + if _, ok := data.Data[fid]; !ok { + fieldData := &ArrayFieldData{ + Data: nil, + } + data.Data[fid] = fieldData + } + fieldData := data.Data[fid].(*ArrayFieldData) + fieldData.Data = append(fieldData.Data, field.Data...) +} + +func mergeJSONField(data *InsertData, fid FieldID, field *JSONFieldData) { + if _, ok := data.Data[fid]; !ok { + fieldData := &JSONFieldData{ + Data: nil, + } + data.Data[fid] = fieldData + } + fieldData := data.Data[fid].(*JSONFieldData) + fieldData.Data = append(fieldData.Data, field.Data...) +} + func mergeBinaryVectorField(data *InsertData, fid FieldID, field *BinaryVectorFieldData) { if _, ok := data.Data[fid]; !ok { fieldData := &BinaryVectorFieldData{ @@ -680,6 +720,10 @@ func MergeFieldData(data *InsertData, fid FieldID, field FieldData) { mergeDoubleField(data, fid, field) case *StringFieldData: mergeStringField(data, fid, field) + case *ArrayFieldData: + mergeArrayField(data, fid, field) + case *JSONFieldData: + mergeJSONField(data, fid, field) case *BinaryVectorFieldData: mergeBinaryVectorField(data, fid, field) case *FloatVectorFieldData: @@ -771,6 +815,16 @@ func stringFieldDataToPbBytes(field *StringFieldData) ([]byte, error) { return proto.Marshal(arr) } +func arrayFieldDataToPbBytes(field *ArrayFieldData) ([]byte, error) { + arr := &schemapb.ArrayArray{Data: field.Data} + return proto.Marshal(arr) +} + +func jsonFieldDataToPbBytes(field *JSONFieldData) ([]byte, error) { + arr := &schemapb.JSONArray{Data: field.Data} + return proto.Marshal(arr) +} + func binaryWrite(endian binary.ByteOrder, data interface{}) ([]byte, error) { buf := new(bytes.Buffer) err := binary.Write(buf, endian, data) @@ -793,6 +847,10 @@ func FieldDataToBytes(endian binary.ByteOrder, fieldData FieldData) ([]byte, err return boolFieldDataToPbBytes(field) case *StringFieldData: return stringFieldDataToPbBytes(field) + case *ArrayFieldData: + return arrayFieldDataToPbBytes(field) + case *JSONFieldData: + return jsonFieldDataToPbBytes(field) case *BinaryVectorFieldData: return field.Data, nil case *FloatVectorFieldData: @@ -939,6 +997,34 @@ func TransferInsertDataToInsertRecord(insertData *InsertData) (*segcorepb.Insert }, }, } + case *ArrayFieldData: + fieldData = &schemapb.FieldData{ + Type: schemapb.DataType_Array, + FieldId: fieldID, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_ArrayData{ + ArrayData: &schemapb.ArrayArray{ + Data: rawData.Data, + }, + }, + }, + }, + } + case *JSONFieldData: + fieldData = &schemapb.FieldData{ + Type: schemapb.DataType_JSON, + FieldId: fieldID, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_JsonData{ + JsonData: &schemapb.JSONArray{ + Data: rawData.Data, + }, + }, + }, + }, + } case *FloatVectorFieldData: fieldData = &schemapb.FieldData{ Type: schemapb.DataType_FloatVector, diff --git a/internal/storage/utils_test.go b/internal/storage/utils_test.go index 1c9eb730b4..f541d21e26 100644 --- a/internal/storage/utils_test.go +++ b/internal/storage/utils_test.go @@ -20,6 +20,7 @@ import ( "bytes" "encoding/binary" "encoding/json" + "fmt" "math/rand" "strconv" "testing" @@ -358,6 +359,12 @@ func genAllFieldsSchema(fVecDim, bVecDim int) (schema *schemapb.CollectionSchema }, }, }, + { + DataType: schemapb.DataType_Array, + }, + { + DataType: schemapb.DataType_JSON, + }, }, } fieldIDs = make([]UniqueID, 0) @@ -445,6 +452,28 @@ func generateFloat64Array(numRows int) []float64 { return ret } +func generateBytesArray(numRows int) [][]byte { + ret := make([][]byte, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, []byte(fmt.Sprint(rand.Int()))) + } + return ret +} + +func generateInt32ArrayList(numRows int) []*schemapb.ScalarField { + ret := make([]*schemapb.ScalarField, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{rand.Int31(), rand.Int31()}, + }, + }, + }) + } + return ret +} + func genRowWithAllFields(fVecDim, bVecDim int) (blob *commonpb.Blob, pk int64, row []interface{}) { schema, _, _ := genAllFieldsSchema(fVecDim, bVecDim) ret := &commonpb.Blob{ @@ -499,6 +528,23 @@ func genRowWithAllFields(fVecDim, bVecDim int) (blob *commonpb.Blob, pk int64, r _ = binary.Write(&buffer, common.Endian, data) ret.Value = append(ret.Value, buffer.Bytes()...) row = append(row, data) + case schemapb.DataType_Array: + data := &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{1, 2, 3}, + }, + }, + } + bytes, _ := proto.Marshal(data) + binary.Write(&buffer, common.Endian, bytes) + ret.Value = append(ret.Value, buffer.Bytes()...) + row = append(row, data) + case schemapb.DataType_JSON: + data := []byte(`{"key":"value"}`) + binary.Write(&buffer, common.Endian, data) + ret.Value = append(ret.Value, buffer.Bytes()...) + row = append(row, data) } } return ret, pk, row @@ -749,6 +795,49 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim for nrows := 0; nrows < numRows; nrows++ { columns[idx] = append(columns[idx], data[nrows*bVecDim/8:(nrows+1)*bVecDim/8]) } + + case schemapb.DataType_Array: + data := generateInt32ArrayList(numRows) + f := &schemapb.FieldData{ + Type: schemapb.DataType_Array, + FieldName: field.GetName(), + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_ArrayData{ + ArrayData: &schemapb.ArrayArray{ + Data: data, + ElementType: schemapb.DataType_Int32, + }, + }, + }, + }, + FieldId: field.FieldID, + } + msg.FieldsData = append(msg.FieldsData, f) + for _, d := range data { + columns[idx] = append(columns[idx], d) + } + + case schemapb.DataType_JSON: + data := generateBytesArray(numRows) + f := &schemapb.FieldData{ + Type: schemapb.DataType_Array, + FieldName: field.GetName(), + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_JsonData{ + JsonData: &schemapb.JSONArray{ + Data: data, + }, + }, + }, + }, + FieldId: field.FieldID, + } + msg.FieldsData = append(msg.FieldsData, f) + for _, d := range data { + columns[idx] = append(columns[idx], d) + } } } @@ -758,6 +847,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim func TestRowBasedInsertMsgToInsertData(t *testing.T) { numRows, fVecDim, bVecDim := 10, 8, 8 schema, _, fieldIDs := genAllFieldsSchema(fVecDim, bVecDim) + fieldIDs = fieldIDs[:len(fieldIDs)-2] msg, _, columns := genRowBasedInsertMsg(numRows, fVecDim, bVecDim) idata, err := RowBasedInsertMsgToInsertData(msg, schema) @@ -794,6 +884,7 @@ func TestColumnBasedInsertMsgToInsertData(t *testing.T) { func TestInsertMsgToInsertData(t *testing.T) { numRows, fVecDim, bVecDim := 10, 8, 8 schema, _, fieldIDs := genAllFieldsSchema(fVecDim, bVecDim) + fieldIDs = fieldIDs[:len(fieldIDs)-2] msg, _, columns := genRowBasedInsertMsg(numRows, fVecDim, bVecDim) idata, err := InsertMsgToInsertData(msg, schema) @@ -801,7 +892,7 @@ func TestInsertMsgToInsertData(t *testing.T) { for idx, fID := range fieldIDs { column := columns[idx] fData, ok := idata.Data[fID] - assert.True(t, ok) + assert.True(t, ok, "fID =", fID) assert.Equal(t, len(column), fData.RowNum()) for j := range column { assert.Equal(t, fData.GetRow(j), column[j]) @@ -868,6 +959,20 @@ func TestMergeInsertData(t *testing.T) { Data: []float32{0}, Dim: 1, }, + ArrayField: &ArrayFieldData{ + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{1, 2, 3}, + }, + }, + }, + }, + }, + JSONField: &JSONFieldData{ + Data: [][]byte{[]byte(`{"key":"value"}`)}, + }, }, Infos: nil, } @@ -911,6 +1016,20 @@ func TestMergeInsertData(t *testing.T) { Data: []float32{0}, Dim: 1, }, + ArrayField: &ArrayFieldData{ + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{4, 5, 6}, + }, + }, + }, + }, + }, + JSONField: &JSONFieldData{ + Data: [][]byte{[]byte(`{"hello":"world"}`)}, + }, }, Infos: nil, } @@ -964,6 +1083,15 @@ func TestMergeInsertData(t *testing.T) { f, ok = merged.Data[FloatVectorField] assert.True(t, ok) assert.Equal(t, []float32{0, 0}, f.(*FloatVectorFieldData).Data) + + f, ok = merged.Data[ArrayField] + assert.True(t, ok) + assert.Equal(t, []int32{1, 2, 3}, f.(*ArrayFieldData).Data[0].GetIntData().GetData()) + assert.Equal(t, []int32{4, 5, 6}, f.(*ArrayFieldData).Data[1].GetIntData().GetData()) + + f, ok = merged.Data[JSONField] + assert.True(t, ok) + assert.EqualValues(t, [][]byte{[]byte(`{"key":"value"}`), []byte(`{"hello":"world"}`)}, f.(*JSONFieldData).Data) } func TestGetPkFromInsertData(t *testing.T) { diff --git a/internal/util/typeutil/result_helper_test.go b/internal/util/typeutil/result_helper_test.go index ce07704f99..d5d5f34332 100644 --- a/internal/util/typeutil/result_helper_test.go +++ b/internal/util/typeutil/result_helper_test.go @@ -56,6 +56,8 @@ func TestGenEmptyFieldData(t *testing.T) { schemapb.DataType_Float, schemapb.DataType_Double, schemapb.DataType_VarChar, + schemapb.DataType_Array, + schemapb.DataType_JSON, } allUnsupportedTypes := []schemapb.DataType{ schemapb.DataType_String, diff --git a/pkg/go.mod b/pkg/go.mod index 066969af04..eb3c5db08e 100644 --- a/pkg/go.mod +++ b/pkg/go.mod @@ -12,7 +12,7 @@ require ( github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 github.com/klauspost/compress v1.14.4 github.com/lingdor/stackerror v0.0.0-20191119040541-976d8885ed76 - github.com/milvus-io/milvus-proto/go-api v0.0.0-20230309062747-133bf302bb11 + github.com/milvus-io/milvus-proto/go-api v0.0.0-20230416064425-aec3e83865b2 github.com/panjf2000/ants/v2 v2.4.8 github.com/prometheus/client_golang v1.11.1 github.com/samber/lo v1.27.0 @@ -78,8 +78,8 @@ require ( github.com/pierrec/lz4 v2.5.2+incompatible // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect - github.com/prometheus/client_model v0.2.0 // indirect - github.com/prometheus/common v0.26.0 // indirect + github.com/prometheus/client_model v0.2.0 + github.com/prometheus/common v0.26.0 github.com/prometheus/procfs v0.6.0 // indirect github.com/sirupsen/logrus v1.8.1 // indirect github.com/soheilhy/cmux v0.1.5 // indirect diff --git a/pkg/go.sum b/pkg/go.sum index 30fa0ec03b..c6ec0bd65c 100644 --- a/pkg/go.sum +++ b/pkg/go.sum @@ -465,8 +465,8 @@ github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5 github.com/mediocregopher/radix/v3 v3.4.2/go.mod h1:8FL3F6UQRXHXIBSPUs5h0RybMF8i4n7wVopoX3x7Bv8= github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/milvus-io/milvus-proto/go-api v0.0.0-20230309062747-133bf302bb11 h1:ly7SWpwWH0ezos2xw3HPOQCqxTdTjlpUSvfpguPg91c= -github.com/milvus-io/milvus-proto/go-api v0.0.0-20230309062747-133bf302bb11/go.mod h1:148qnlmZ0Fdm1Fq+Mj/OW2uDoEP25g3mjh0vMGtkgmk= +github.com/milvus-io/milvus-proto/go-api v0.0.0-20230416064425-aec3e83865b2 h1:G5uN68X/7eoCfHUkNvkbNueFhHuohCZG94te+ApLAOY= +github.com/milvus-io/milvus-proto/go-api v0.0.0-20230416064425-aec3e83865b2/go.mod h1:148qnlmZ0Fdm1Fq+Mj/OW2uDoEP25g3mjh0vMGtkgmk= github.com/milvus-io/pulsar-client-go v0.6.10 h1:eqpJjU+/QX0iIhEo3nhOqMNXL+TyInAs1IAHZCrCM/A= github.com/milvus-io/pulsar-client-go v0.6.10/go.mod h1:lQqCkgwDF8YFYjKA+zOheTk1tev2B+bKj5j7+nm8M1w= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= diff --git a/pkg/util/funcutil/func.go b/pkg/util/funcutil/func.go index 23e8e7d1cd..aa8e93a41a 100644 --- a/pkg/util/funcutil/func.go +++ b/pkg/util/funcutil/func.go @@ -234,6 +234,10 @@ func GetNumRowOfFieldData(fieldData *schemapb.FieldData) (uint64, error) { fieldNumRows = getNumRowsOfScalarField(scalarField.GetDoubleData().Data) case *schemapb.ScalarField_StringData: fieldNumRows = getNumRowsOfScalarField(scalarField.GetStringData().Data) + case *schemapb.ScalarField_ArrayData: + fieldNumRows = getNumRowsOfScalarField(scalarField.GetArrayData().Data) + case *schemapb.ScalarField_JsonData: + fieldNumRows = getNumRowsOfScalarField(scalarField.GetJsonData().Data) default: return 0, fmt.Errorf("%s is not supported now", scalarType) } diff --git a/pkg/util/typeutil/data_format.go b/pkg/util/typeutil/data_format.go deleted file mode 100644 index 5924ced697..0000000000 --- a/pkg/util/typeutil/data_format.go +++ /dev/null @@ -1,258 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package typeutil - -import ( - "bytes" - "encoding/binary" - "fmt" - "reflect" - - "github.com/cockroachdb/errors" - "github.com/milvus-io/milvus-proto/go-api/commonpb" - "github.com/milvus-io/milvus-proto/go-api/schemapb" - "go.uber.org/zap" - - "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/log" -) - -type rowsHelper = [][]interface{} - -func appendScalarField(datas *rowsHelper, rowNum *int, getDataFunc func() interface{}) error { - fieldDatas := reflect.ValueOf(getDataFunc()) - if *rowNum != 0 && *rowNum != fieldDatas.Len() { - return errors.New("the row num of different column is not equal") - } - *rowNum = fieldDatas.Len() - *datas = append(*datas, make([]interface{}, 0, *rowNum)) - idx := len(*datas) - 1 - for i := 0; i < *rowNum; i++ { - (*datas)[idx] = append((*datas)[idx], fieldDatas.Index(i).Interface()) - } - - return nil -} - -func appendFloatVectorField(datas *rowsHelper, rowNum *int, fDatas []float32, dim int64) error { - l := len(fDatas) - if int64(l)%dim != 0 { - return errors.New("invalid vectors") - } - r := int64(l) / dim - if *rowNum != 0 && *rowNum != int(r) { - return errors.New("the row num of different column is not equal") - } - *rowNum = int(r) - *datas = append(*datas, make([]interface{}, 0, *rowNum)) - idx := len(*datas) - 1 - vector := make([]float32, 0, dim) - for i := 0; i < l; i++ { - vector = append(vector, fDatas[i]) - if int64(i+1)%dim == 0 { - (*datas)[idx] = append((*datas)[idx], vector) - vector = make([]float32, 0, dim) - } - } - - return nil -} - -func appendBinaryVectorField(datas *rowsHelper, rowNum *int, bDatas []byte, dim int64) error { - l := len(bDatas) - if dim%8 != 0 { - return errors.New("invalid dim") - } - if (8*int64(l))%dim != 0 { - return errors.New("invalid vectors") - } - r := (8 * int64(l)) / dim - if *rowNum != 0 && *rowNum != int(r) { - return errors.New("the row num of different column is not equal") - } - *rowNum = int(r) - *datas = append(*datas, make([]interface{}, 0, *rowNum)) - idx := len(*datas) - 1 - vector := make([]byte, 0, dim) - for i := 0; i < l; i++ { - vector = append(vector, bDatas[i]) - if (8*int64(i+1))%dim == 0 { - (*datas)[idx] = append((*datas)[idx], vector) - vector = make([]byte, 0, dim) - } - } - - return nil -} - -func TransferColumnBasedDataToRowBasedData(schema *schemapb.CollectionSchema, columns []*schemapb.FieldData) (rows []*commonpb.Blob, err error) { - dTypes := make([]schemapb.DataType, 0, len(columns)) - data := make([][]interface{}, 0, len(columns)) - rowNum := 0 - - fieldID2FieldData := make(map[int64]schemapb.FieldData) - for _, field := range columns { - fieldID2FieldData[field.FieldId] = *field - } - - // reorder field data by schema field orider - for _, field := range schema.Fields { - if field.FieldID == common.RowIDField || field.FieldID == common.TimeStampField { - continue - } - fieldData, ok := fieldID2FieldData[field.FieldID] - if !ok { - return nil, fmt.Errorf("field %s data not exist", field.Name) - } - - switch fieldData.Field.(type) { - case *schemapb.FieldData_Scalars: - scalarField := fieldData.GetScalars() - switch scalarField.Data.(type) { - case *schemapb.ScalarField_BoolData: - err := appendScalarField(&data, &rowNum, func() interface{} { - return scalarField.GetBoolData().Data - }) - if err != nil { - return nil, err - } - case *schemapb.ScalarField_IntData: - err := appendScalarField(&data, &rowNum, func() interface{} { - return scalarField.GetIntData().Data - }) - if err != nil { - return nil, err - } - case *schemapb.ScalarField_LongData: - err := appendScalarField(&data, &rowNum, func() interface{} { - return scalarField.GetLongData().Data - }) - if err != nil { - return nil, err - } - case *schemapb.ScalarField_FloatData: - err := appendScalarField(&data, &rowNum, func() interface{} { - return scalarField.GetFloatData().Data - }) - if err != nil { - return nil, err - } - case *schemapb.ScalarField_DoubleData: - err := appendScalarField(&data, &rowNum, func() interface{} { - return scalarField.GetDoubleData().Data - }) - if err != nil { - return nil, err - } - case *schemapb.ScalarField_BytesData: - return nil, errors.New("bytes field is not supported now") - case *schemapb.ScalarField_StringData: - return nil, errors.New("string field is not supported now") - case nil: - continue - default: - continue - } - case *schemapb.FieldData_Vectors: - vectorField := fieldData.GetVectors() - switch vectorField.Data.(type) { - case *schemapb.VectorField_FloatVector: - floatVectorFieldData := vectorField.GetFloatVector().Data - dim := vectorField.GetDim() - err := appendFloatVectorField(&data, &rowNum, floatVectorFieldData, dim) - if err != nil { - return nil, err - } - case *schemapb.VectorField_BinaryVector: - binaryVectorFieldData := vectorField.GetBinaryVector() - dim := vectorField.GetDim() - err := appendBinaryVectorField(&data, &rowNum, binaryVectorFieldData, dim) - if err != nil { - return nil, err - } - case nil: - continue - default: - continue - } - case nil: - continue - default: - continue - } - - dTypes = append(dTypes, field.DataType) - } - - return parseToRowData(data, dTypes, rowNum) -} - -func parseToRowData(data [][]any, dTypes []schemapb.DataType, rowNum int) ([]*commonpb.Blob, error) { - rows := make([]*commonpb.Blob, 0, rowNum) - l := len(dTypes) - // TODO(dragondriver): big endian or little endian? - endian := common.Endian - for i := 0; i < rowNum; i++ { - blob := &commonpb.Blob{ - Value: make([]byte, 0, l), - } - - for j := 0; j < l; j++ { - var buffer bytes.Buffer - var err error - switch dTypes[j] { - case schemapb.DataType_Bool: - d := data[j][i].(bool) - err = binary.Write(&buffer, endian, d) - case schemapb.DataType_Int8: - d := int8(data[j][i].(int32)) - err = binary.Write(&buffer, endian, d) - case schemapb.DataType_Int16: - d := int16(data[j][i].(int32)) - err = binary.Write(&buffer, endian, d) - case schemapb.DataType_Int32: - d := data[j][i].(int32) - err = binary.Write(&buffer, endian, d) - case schemapb.DataType_Int64: - d := data[j][i].(int64) - err = binary.Write(&buffer, endian, d) - case schemapb.DataType_Float: - d := data[j][i].(float32) - err = binary.Write(&buffer, endian, d) - case schemapb.DataType_Double: - d := data[j][i].(float64) - err = binary.Write(&buffer, endian, d) - case schemapb.DataType_FloatVector: - d := data[j][i].([]float32) - err = binary.Write(&buffer, endian, d) - case schemapb.DataType_BinaryVector: - d := data[j][i].([]byte) - err = binary.Write(&buffer, endian, d) - default: - log.Warn("unsupported data type", zap.String("type", dTypes[j].String())) - } - if err != nil { - log.Error("failed to write to buffer", zap.Error(err)) - return nil, err - } - blob.Value = append(blob.Value, buffer.Bytes()...) - } - rows = append(rows, blob) - } - - return rows, nil -} diff --git a/pkg/util/typeutil/data_format_test.go b/pkg/util/typeutil/data_format_test.go deleted file mode 100644 index 8a5e95ab4e..0000000000 --- a/pkg/util/typeutil/data_format_test.go +++ /dev/null @@ -1,259 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package typeutil - -import ( - "encoding/binary" - "testing" - - "github.com/milvus-io/milvus-proto/go-api/commonpb" - "github.com/milvus-io/milvus-proto/go-api/schemapb" - "github.com/stretchr/testify/assert" - - "github.com/milvus-io/milvus/pkg/common" -) - -func TestTransferColumnBasedDataToRowBasedData(t *testing.T) { - fieldSchema := []*schemapb.FieldSchema{ - { - FieldID: 100, - Name: "bool_field", - DataType: schemapb.DataType_Bool, - }, - { - FieldID: 101, - Name: "int8_field", - DataType: schemapb.DataType_Int8, - }, - { - FieldID: 102, - Name: "int16_field", - DataType: schemapb.DataType_Int16, - }, - { - FieldID: 103, - Name: "int32_field", - DataType: schemapb.DataType_Int32, - }, - { - FieldID: 104, - Name: "int64_field", - DataType: schemapb.DataType_Int64, - }, - { - FieldID: 105, - Name: "float32_field", - DataType: schemapb.DataType_Float, - }, - { - FieldID: 106, - Name: "float64_field", - DataType: schemapb.DataType_Double, - }, - { - FieldID: 107, - Name: "float_vector_field", - DataType: schemapb.DataType_FloatVector, - TypeParams: []*commonpb.KeyValuePair{ - { - Key: "dim", - Value: "1", - }, - }, - }, - { - FieldID: 108, - Name: "binary_vector_field", - DataType: schemapb.DataType_BinaryVector, - TypeParams: []*commonpb.KeyValuePair{ - { - Key: "dim", - Value: "8", - }, - }, - }, - } - - columns := []*schemapb.FieldData{ - { - FieldId: 100, - Type: schemapb.DataType_Bool, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_BoolData{ - BoolData: &schemapb.BoolArray{ - Data: []bool{true, false, true}, - }, - }, - }, - }, - }, - { - FieldId: 101, - Type: schemapb.DataType_Int8, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: []int32{0, 0xf, 0x1f}, - }, - }, - }, - }, - }, - { - FieldId: 102, - Type: schemapb.DataType_Int16, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: []int32{0, 0xff, 0x1fff}, - }, - }, - }, - }, - }, - { - FieldId: 103, - Type: schemapb.DataType_Int32, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: []int32{0, 0xffff, 0x1fffffff}, - }, - }, - }, - }, - }, - { - FieldId: 104, - Type: schemapb.DataType_Int64, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_LongData{ - LongData: &schemapb.LongArray{ - Data: []int64{0, 0xffffffff, 0x1fffffffffffffff}, - }, - }, - }, - }, - }, - { - FieldId: 105, - Type: schemapb.DataType_Float, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_FloatData{ - FloatData: &schemapb.FloatArray{ - Data: []float32{0, 0, 0}, - }, - }, - }, - }, - }, - { - FieldId: 106, - Type: schemapb.DataType_Double, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_DoubleData{ - DoubleData: &schemapb.DoubleArray{ - Data: []float64{0, 0, 0}, - }, - }, - }, - }, - }, - { - FieldId: 107, - Type: schemapb.DataType_FloatVector, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: 1, - Data: &schemapb.VectorField_FloatVector{ - FloatVector: &schemapb.FloatArray{ - Data: []float32{0, 0, 0}, - }, - }, - }, - }, - }, - { - FieldId: 108, - Type: schemapb.DataType_BinaryVector, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: 8, - Data: &schemapb.VectorField_BinaryVector{ - BinaryVector: []byte{1, 2, 3}, - }, - }, - }, - }, - } - rows, err := TransferColumnBasedDataToRowBasedData(&schemapb.CollectionSchema{Fields: fieldSchema}, columns) - assert.NoError(t, err) - assert.Equal(t, 3, len(rows)) - if common.Endian == binary.LittleEndian { - // low byte in high address - - assert.ElementsMatch(t, - []byte{ - 1, // true - 0, // 0 - 0, 0, // 0 - 0, 0, 0, 0, // 0 - 0, 0, 0, 0, 0, 0, 0, 0, // 0 - 0, 0, 0, 0, // 0 - 0, 0, 0, 0, 0, 0, 0, 0, // 0 - // b + 1, // "1" - 1, // 1 - 0, 0, 0, 0, // 0 - }, - rows[0].Value) - assert.ElementsMatch(t, - []byte{ - 0, // false - 0xf, // 0xf - 0, 0xff, // 0xff - 0, 0, 0xff, 0xff, // 0xffff - 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, // 0xffffffff - 0, 0, 0, 0, // 0 - 0, 0, 0, 0, 0, 0, 0, 0, // 0 - // b + 2, // "2" - 2, // 2 - 0, 0, 0, 0, // 0 - }, - rows[1].Value) - assert.ElementsMatch(t, - []byte{ - 1, // false - 0x1f, // 0x1f - 0xff, 0x1f, // 0x1fff - 0xff, 0xff, 0xff, 0x1f, // 0x1fffffff - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, // 0x1fffffffffffffff - 0, 0, 0, 0, // 0 - 0, 0, 0, 0, 0, 0, 0, 0, // 0 - // b + 3, // "3" - 3, // 3 - 0, 0, 0, 0, // 0 - }, - rows[2].Value) - } -} diff --git a/pkg/util/typeutil/gen_empty_field_data.go b/pkg/util/typeutil/gen_empty_field_data.go index 458f91c597..e3b54148e4 100644 --- a/pkg/util/typeutil/gen_empty_field_data.go +++ b/pkg/util/typeutil/gen_empty_field_data.go @@ -71,6 +71,32 @@ func genEmptyVarCharFieldData(field *schemapb.FieldSchema) *schemapb.FieldData { } } +func genEmptyArrayFieldData(field *schemapb.FieldSchema) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: field.GetDataType(), + FieldName: field.GetName(), + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_ArrayData{ArrayData: &schemapb.ArrayArray{Data: nil}}, + }, + }, + FieldId: field.GetFieldID(), + } +} + +func genEmptyJSONFieldData(field *schemapb.FieldSchema) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: field.GetDataType(), + FieldName: field.GetName(), + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_JsonData{JsonData: &schemapb.JSONArray{Data: nil}}, + }, + }, + FieldId: field.GetFieldID(), + } +} + func genEmptyBinaryVectorFieldData(field *schemapb.FieldSchema) (*schemapb.FieldData, error) { dim, err := GetDim(field) if err != nil { @@ -124,6 +150,10 @@ func GenEmptyFieldData(field *schemapb.FieldSchema) (*schemapb.FieldData, error) return genEmptyDoubleFieldData(field), nil case schemapb.DataType_VarChar: return genEmptyVarCharFieldData(field), nil + case schemapb.DataType_Array: + return genEmptyArrayFieldData(field), nil + case schemapb.DataType_JSON: + return genEmptyJSONFieldData(field), nil case schemapb.DataType_BinaryVector: return genEmptyBinaryVectorFieldData(field) case schemapb.DataType_FloatVector: diff --git a/pkg/util/typeutil/schema.go b/pkg/util/typeutil/schema.go index 3ece6aacac..21a45d7a33 100644 --- a/pkg/util/typeutil/schema.go +++ b/pkg/util/typeutil/schema.go @@ -28,6 +28,8 @@ import ( "github.com/milvus-io/milvus/pkg/log" ) +const DynamicFieldMaxLength = 512 + func GetAvgLengthOfVarLengthField(fieldSchema *schemapb.FieldSchema) (int, error) { maxLength := 0 var err error @@ -37,10 +39,9 @@ func GetAvgLengthOfVarLengthField(fieldSchema *schemapb.FieldSchema) (int, error paramsMap[p.Key] = p.Value } - maxLengthPerRowKey := "max_length" - switch fieldSchema.DataType { case schemapb.DataType_VarChar: + maxLengthPerRowKey := "max_length" maxLengthPerRowValue, ok := paramsMap[maxLengthPerRowKey] if !ok { return 0, fmt.Errorf("the max_length was not specified, field type is %s", fieldSchema.DataType.String()) @@ -49,6 +50,8 @@ func GetAvgLengthOfVarLengthField(fieldSchema *schemapb.FieldSchema) (int, error if err != nil { return 0, err } + case schemapb.DataType_Array, schemapb.DataType_JSON: + return DynamicFieldMaxLength, nil default: return 0, fmt.Errorf("field %s is not a variable-length type", fieldSchema.DataType.String()) } @@ -74,7 +77,7 @@ func EstimateSizePerRecord(schema *schemapb.CollectionSchema) (int, error) { res += 4 case schemapb.DataType_Int64, schemapb.DataType_Double: res += 8 - case schemapb.DataType_VarChar: + case schemapb.DataType_VarChar, schemapb.DataType_Array, schemapb.DataType_JSON: maxLengthPerRow, err := GetAvgLengthOfVarLengthField(fs) if err != nil { return 0, err @@ -107,6 +110,42 @@ func EstimateSizePerRecord(schema *schemapb.CollectionSchema) (int, error) { return res, nil } +func CalcColumnSize(column *schemapb.FieldData) int { + res := 0 + switch column.GetType() { + case schemapb.DataType_Bool: + res += len(column.GetScalars().GetBoolData().GetData()) + case schemapb.DataType_Int8: + res += len(column.GetScalars().GetIntData().GetData()) + case schemapb.DataType_Int16: + res += len(column.GetScalars().GetIntData().GetData()) * 2 + case schemapb.DataType_Int32: + res += len(column.GetScalars().GetIntData().GetData()) * 4 + case schemapb.DataType_Int64: + res += len(column.GetScalars().GetLongData().GetData()) * 8 + case schemapb.DataType_Float: + res += len(column.GetScalars().GetFloatData().GetData()) * 4 + case schemapb.DataType_Double: + res += len(column.GetScalars().GetDoubleData().GetData()) * 8 + case schemapb.DataType_VarChar: + for _, str := range column.GetScalars().GetStringData().GetData() { + res += len(str) + } + case schemapb.DataType_Array: + for _, array := range column.GetScalars().GetArrayData().GetData() { + res += CalcColumnSize(&schemapb.FieldData{ + Field: &schemapb.FieldData_Scalars{Scalars: array}, + Type: column.GetScalars().GetArrayData().GetElementType(), + }) + } + case schemapb.DataType_JSON: + for _, str := range column.GetScalars().GetJsonData().GetData() { + res += len(str) + } + } + return res +} + func EstimateEntitySize(fieldsData []*schemapb.FieldData, rowOffset int) (int, error) { res := 0 for _, fs := range fieldsData { @@ -123,8 +162,21 @@ func EstimateEntitySize(fieldsData []*schemapb.FieldData, rowOffset int) (int, e if rowOffset >= len(fs.GetScalars().GetStringData().GetData()) { return 0, fmt.Errorf("offset out range of field datas") } - //TODO:: check len(varChar) <= maxLengthPerRow res += len(fs.GetScalars().GetStringData().Data[rowOffset]) + case schemapb.DataType_Array: + if rowOffset >= len(fs.GetScalars().GetArrayData().GetData()) { + return 0, fmt.Errorf("offset out range of field datas") + } + array := fs.GetScalars().GetArrayData().GetData()[rowOffset] + res += CalcColumnSize(&schemapb.FieldData{ + Field: &schemapb.FieldData_Scalars{Scalars: array}, + Type: fs.GetScalars().GetArrayData().GetElementType(), + }) + case schemapb.DataType_JSON: + if rowOffset >= len(fs.GetScalars().GetJsonData().GetData()) { + return 0, fmt.Errorf("offset out range of field datas") + } + res += len(fs.GetScalars().GetJsonData().GetData()[rowOffset]) case schemapb.DataType_BinaryVector: res += int(fs.GetVectors().GetDim()) case schemapb.DataType_FloatVector: @@ -347,6 +399,26 @@ func AppendFieldData(dst []*schemapb.FieldData, src []*schemapb.FieldData, idx i } else { dstScalar.GetStringData().Data = append(dstScalar.GetStringData().Data, srcScalar.StringData.Data[idx]) } + case *schemapb.ScalarField_ArrayData: + if dstScalar.GetArrayData() == nil { + dstScalar.Data = &schemapb.ScalarField_ArrayData{ + ArrayData: &schemapb.ArrayArray{ + Data: []*schemapb.ScalarField{srcScalar.ArrayData.Data[idx]}, + }, + } + } else { + dstScalar.GetArrayData().Data = append(dstScalar.GetArrayData().Data, srcScalar.ArrayData.Data[idx]) + } + case *schemapb.ScalarField_JsonData: + if dstScalar.GetJsonData() == nil { + dstScalar.Data = &schemapb.ScalarField_JsonData{ + JsonData: &schemapb.JSONArray{ + Data: [][]byte{srcScalar.JsonData.Data[idx]}, + }, + } + } else { + dstScalar.GetJsonData().Data = append(dstScalar.GetJsonData().Data, srcScalar.JsonData.Data[idx]) + } default: log.Error("Not supported field type", zap.String("field type", fieldData.Type.String())) } diff --git a/pkg/util/typeutil/schema_test.go b/pkg/util/typeutil/schema_test.go index 2e42280a3e..e40b4da6aa 100644 --- a/pkg/util/typeutil/schema_test.go +++ b/pkg/util/typeutil/schema_test.go @@ -17,6 +17,7 @@ package typeutil import ( + "encoding/binary" "reflect" "testing" @@ -123,12 +124,23 @@ func TestSchema(t *testing.T) { }, }, }, + { + FieldID: 109, + Name: "field_array", + DataType: schemapb.DataType_Array, + ElementType: schemapb.DataType_Int32, + }, + { + FieldID: 110, + Name: "field_json", + DataType: schemapb.DataType_JSON, + }, }, } t.Run("EstimateSizePerRecord", func(t *testing.T) { size, err := EstimateSizePerRecord(schema) - assert.Equal(t, 680, size) + assert.Equal(t, 680+DynamicFieldMaxLength*2, size) assert.Nil(t, err) }) @@ -360,6 +372,44 @@ func genFieldData(fieldName string, fieldID int64, fieldType schemapb.DataType, }, FieldId: fieldID, } + case schemapb.DataType_Int8: + data := []int32{} + for _, v := range fieldValue.([]int8) { + data = append(data, int32(v)) + } + fieldData = &schemapb.FieldData{ + Type: schemapb.DataType_Int8, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: data, + }, + }, + }, + }, + FieldId: fieldID, + } + case schemapb.DataType_Int16: + data := []int32{} + for _, v := range fieldValue.([]int16) { + data = append(data, int32(v)) + } + fieldData = &schemapb.FieldData{ + Type: schemapb.DataType_Int16, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: data, + }, + }, + }, + }, + FieldId: fieldID, + } case schemapb.DataType_Int32: fieldData = &schemapb.FieldData{ Type: schemapb.DataType_Int32, @@ -420,6 +470,21 @@ func genFieldData(fieldName string, fieldID int64, fieldType schemapb.DataType, }, FieldId: fieldID, } + case schemapb.DataType_VarChar: + fieldData = &schemapb.FieldData{ + Type: schemapb.DataType_VarChar, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_StringData{ + StringData: &schemapb.StringArray{ + Data: fieldValue.([]string), + }, + }, + }, + }, + FieldId: fieldID, + } case schemapb.DataType_BinaryVector: fieldData = &schemapb.FieldData{ Type: schemapb.DataType_BinaryVector, @@ -450,6 +515,49 @@ func genFieldData(fieldName string, fieldID int64, fieldType schemapb.DataType, }, FieldId: fieldID, } + case schemapb.DataType_Array: + data := fieldValue.([][]int32) + fieldData = &schemapb.FieldData{ + Type: schemapb.DataType_Array, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_ArrayData{ + ArrayData: &schemapb.ArrayArray{ + Data: []*schemapb.ScalarField{}, + ElementType: schemapb.DataType_Int32, + }, + }, + }, + }, + } + + for _, list := range data { + arrayList := fieldData.GetScalars().GetArrayData() + arrayList.Data = append(arrayList.Data, &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: list, + }, + }, + }) + } + + case schemapb.DataType_JSON: + fieldData = &schemapb.FieldData{ + Type: schemapb.DataType_JSON, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_JsonData{ + JsonData: &schemapb.JSONArray{ + Data: fieldValue.([][]byte), + }, + }, + }, + }, + FieldId: fieldID, + } default: log.Error("not supported field type", zap.String("field type", fieldType.String())) } @@ -766,3 +874,119 @@ func TestComparePk(t *testing.T) { less = ComparePKInSlice(strPks, 2, 1) assert.False(t, less) } + +func TestCalcColumnSize(t *testing.T) { + fieldValues := map[int64]any{ + 100: []int8{0, 1}, + 101: []int16{0, 1}, + 102: []int32{0, 1}, + 103: []int64{0, 1}, + 104: []float32{0, 1}, + 105: []float64{0, 1}, + 106: []string{"0", "1"}, + 107: []float32{0, 1, 2, 3}, + 109: [][]int32{{1, 2, 3}, {4, 5, 6}}, + 110: [][]byte{[]byte(`{"key":"value"}`), []byte(`{"hello":"world"}`)}, + } + schema := &schemapb.CollectionSchema{ + Name: "testColl", + Description: "", + AutoID: false, + Fields: []*schemapb.FieldSchema{ + { + FieldID: 100, + Name: "field_int8", + IsPrimaryKey: false, + DataType: schemapb.DataType_Int8, + }, + { + FieldID: 101, + Name: "field_int16", + IsPrimaryKey: false, + Description: "", + DataType: schemapb.DataType_Int16, + }, + { + FieldID: 102, + Name: "field_int32", + IsPrimaryKey: false, + Description: "", + DataType: schemapb.DataType_Int32, + }, + { + FieldID: 103, + Name: "field_int64", + IsPrimaryKey: true, + Description: "", + DataType: schemapb.DataType_Int64, + }, + { + FieldID: 104, + Name: "field_float", + IsPrimaryKey: false, + Description: "", + DataType: schemapb.DataType_Float, + }, + { + FieldID: 105, + Name: "field_double", + IsPrimaryKey: false, + Description: "", + DataType: schemapb.DataType_Double, + }, + { + FieldID: 106, + Name: "field_string", + IsPrimaryKey: false, + Description: "", + DataType: schemapb.DataType_VarChar, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: "max_length", + Value: "125", + }, + }, + }, + { + FieldID: 109, + Name: "field_array", + DataType: schemapb.DataType_Array, + ElementType: schemapb.DataType_Int32, + }, + { + FieldID: 110, + Name: "field_json", + DataType: schemapb.DataType_JSON, + }, + }, + } + + for _, field := range schema.GetFields() { + values := fieldValues[field.GetFieldID()] + fieldData := genFieldData(field.GetName(), field.GetFieldID(), field.GetDataType(), values, 0) + size := CalcColumnSize(fieldData) + expected := 0 + switch field.GetDataType() { + case schemapb.DataType_VarChar: + data := values.([]string) + for _, v := range data { + expected += len(v) + } + case schemapb.DataType_Array: + data := values.([][]int32) + for _, v := range data { + expected += binary.Size(v) + } + case schemapb.DataType_JSON: + data := values.([][]byte) + for _, v := range data { + expected += len(v) + } + + default: + expected = binary.Size(fieldValues[field.GetFieldID()]) + } + + assert.Equal(t, expected, size, field.GetName()) + } +} diff --git a/scripts/run_go_codecov.sh b/scripts/run_go_codecov.sh index bdd8ce5641..ee30dd4e9b 100755 --- a/scripts/run_go_codecov.sh +++ b/scripts/run_go_codecov.sh @@ -42,10 +42,10 @@ for d in $(go list ./internal/... | grep -v -e vendor -e kafka -e planparserv2/g done pushd pkg for d in $(go list ./... | grep -v -e vendor -e kafka -e planparserv2/generated -e mocks); do - go test -race ${APPLE_SILICON_FLAG} -v -coverpkg=./... -coverprofile=profile.out -covermode=atomic "$d" - if [ -f profile.out ]; then - grep -v kafka profile.out | grep -v planparserv2/generated | grep -v mocks | sed '1d' >> ${FILE_COVERAGE_INFO} - rm profile.out + go test -race ${APPLE_SILICON_FLAG} -v -coverpkg=./... -coverprofile=../profile.out -covermode=atomic "$d" + if [ -f ../profile.out ]; then + grep -v kafka ../profile.out | grep -v planparserv2/generated | grep -v mocks | sed '1d' >> ../${FILE_COVERAGE_INFO} + rm ../profile.out fi done popd