Merge utils/Types.h with common/Types.h (#16445)

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>
pull/16751/head
Cai Yudong 2022-05-02 22:21:51 +08:00 committed by GitHub
parent 16f8539ee1
commit d5db4ae463
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 54 additions and 91 deletions

View File

@ -26,9 +26,8 @@
#include <boost/dynamic_bitset.hpp>
#include <NamedType/named_type.hpp>
#include "common/FieldMeta.h"
#include "pb/schema.pb.h"
#include "utils/Types.h"
#include "FieldMeta.h"
namespace milvus {
struct SearchResult {

View File

@ -58,7 +58,7 @@ MetricTypeToName(MetricType metric_type) {
bool
IsPrimaryKeyDataType(DataType data_type) {
return data_type == engine::DataType::INT64 || data_type == DataType::VARCHAR;
return data_type == DataType::INT64 || data_type == DataType::VARCHAR;
}
} // namespace milvus

View File

@ -30,16 +30,35 @@
#include "knowhere/common/MetricType.h"
#include "pb/schema.pb.h"
#include "pb/segcore.pb.h"
#include "utils/Types.h"
namespace milvus {
using idx_t = int64_t;
using offset_t = int32_t;
using date_t = int32_t;
using distance_t = float;
enum class DataType {
NONE = 0,
BOOL = 1,
INT8 = 2,
INT16 = 3,
INT32 = 4,
INT64 = 5,
FLOAT = 10,
DOUBLE = 11,
STRING = 20,
VARCHAR = 21,
VECTOR_BINARY = 100,
VECTOR_FLOAT = 101,
};
using Timestamp = uint64_t; // TODO: use TiKV-like timestamp
constexpr auto MAX_TIMESTAMP = std::numeric_limits<Timestamp>::max();
using engine::DataType;
using engine::idx_t;
constexpr auto MAX_ROW_COUNT = std::numeric_limits<engine::idx_t>::max();
constexpr auto MAX_ROW_COUNT = std::numeric_limits<idx_t>::max();
using ScalarArray = proto::schema::ScalarField;
using DataArray = proto::schema::FieldData;

View File

@ -11,12 +11,12 @@
#pragma once
#include <utils/Types.h>
#include "index/Index.h"
#include <string>
#include "common/type_c.h"
#include "index/Index.h"
#include "index/ScalarIndex.h"
#include "index/StringIndex.h"
#include <string>
namespace milvus::scalar {

View File

@ -30,7 +30,7 @@ SegmentInternalInterface::FillPrimaryKeys(const query::Plan* plan, SearchResult&
AssertInfo(IsPrimaryKeyDataType(get_schema()[pk_field_id].get_data_type()),
"Primary key field is not INT64 or VARCHAR type");
auto field_data = bulk_subscript(pk_field_id, results.seg_offsets_.data(), size);
results.pk_type_ = engine::DataType(field_data->type());
results.pk_type_ = DataType(field_data->type());
std::vector<PkType> pks(size);
ParsePksFromFieldData(pks, *field_data.get());

View File

@ -165,7 +165,7 @@ SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& info) {
// prepare data
auto& field_meta = schema_->operator[](field_id);
auto data_type = field_meta.get_data_type();
AssertInfo(data_type == engine::DataType(info.field_data->type()),
AssertInfo(data_type == DataType(info.field_data->type()),
"field type of load data is inconsistent with the schema");
auto field_data = insert_record_.get_field_data_base(field_id);
AssertInfo(field_data->empty(), "already exists");

View File

@ -1,50 +0,0 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#pragma once
#include <cstdint>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
namespace milvus::engine {
using idx_t = int64_t;
using offset_t = int32_t;
using date_t = int32_t;
using distance_t = float;
using IDNumbers = std::vector<idx_t>;
enum class DataType {
NONE = 0,
BOOL = 1,
INT8 = 2,
INT16 = 3,
INT32 = 4,
INT64 = 5,
FLOAT = 10,
DOUBLE = 11,
STRING = 20,
VARCHAR = 21,
VECTOR_BINARY = 100,
VECTOR_FLOAT = 101,
};
} // namespace milvus::engine

View File

@ -29,7 +29,6 @@
#include "segcore/reduce_c.h"
#include "segcore/Reduce.h"
#include "test_utils/DataGen.h"
#include "utils/Types.h"
namespace chrono = std::chrono;
@ -2226,7 +2225,7 @@ TEST(CApiTest, SealedSegmentTest) {
age = e() % 2000;
}
auto blob = (void*)(&ages[0]);
FieldMeta field_meta(FieldName("age"), FieldId(101), engine::DataType::INT64);
FieldMeta field_meta(FieldName("age"), FieldId(101), DataType::INT64);
auto array = CreateScalarDataArrayFrom(ages.data(), N, field_meta);
std::string age_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*array.get(), &age_data);
@ -2257,19 +2256,19 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) {
auto query_ptr = vec_col.data() + 42000 * DIM;
auto counter_col = dataset.get_col<int64_t>(FieldId(101));
FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), engine::DataType::INT64);
FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), DataType::INT64);
auto count_array = CreateScalarDataArrayFrom(counter_col.data(), N, counter_field_meta);
std::string counter_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*count_array.get(), &counter_data);
assert(marshal == true);
FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, engine::DataType::INT64);
FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64);
auto row_ids_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta);
std::string row_ids_data;
marshal = google::protobuf::TextFormat::PrintToString(*row_ids_array.get(), &row_ids_data);
assert(marshal == true);
FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, engine::DataType::INT64);
FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
auto timestamps_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, timestamp_field_meta);
std::string timestamps_data;
marshal = google::protobuf::TextFormat::PrintToString(*timestamps_array.get(), &timestamps_data);
@ -2431,19 +2430,19 @@ TEST(CApiTest, SealedSegment_search_without_predicates) {
assert(marshal == true);
auto counter_col = dataset.get_col<int64_t>(FieldId(101));
FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), engine::DataType::INT64);
FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), DataType::INT64);
auto count_array = CreateScalarDataArrayFrom(counter_col.data(), N, counter_field_meta);
std::string counter_data;
marshal = google::protobuf::TextFormat::PrintToString(*count_array.get(), &counter_data);
assert(marshal == true);
FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, engine::DataType::INT64);
FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64);
auto row_ids_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta);
std::string row_ids_data;
marshal = google::protobuf::TextFormat::PrintToString(*row_ids_array.get(), &row_ids_data);
assert(marshal == true);
FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, engine::DataType::INT64);
FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
auto timestamps_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, timestamp_field_meta);
std::string timestamps_data;
marshal = google::protobuf::TextFormat::PrintToString(*timestamps_array.get(), &timestamps_data);
@ -2542,19 +2541,19 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
auto query_ptr = vec_col.data() + 42000 * DIM;
auto counter_col = dataset.get_col<int64_t>(FieldId(101));
FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), engine::DataType::INT64);
FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), DataType::INT64);
auto count_array = CreateScalarDataArrayFrom(counter_col.data(), N, counter_field_meta);
std::string counter_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*count_array.get(), &counter_data);
assert(marshal == true);
FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, engine::DataType::INT64);
FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64);
auto row_ids_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta);
std::string row_ids_data;
marshal = google::protobuf::TextFormat::PrintToString(*row_ids_array.get(), &row_ids_data);
assert(marshal == true);
FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, engine::DataType::INT64);
FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
auto timestamps_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, timestamp_field_meta);
std::string timestamps_data;
marshal = google::protobuf::TextFormat::PrintToString(*timestamps_array.get(), &timestamps_data);

View File

@ -19,7 +19,6 @@
#include "segcore/SegmentGrowing.h"
#include "segcore/AckResponder.h"
using namespace milvus::engine;
using namespace milvus::segcore;
using std::vector;

View File

@ -46,7 +46,6 @@ generate_data(int N) {
TEST(SegmentCoreTest, NormalDistributionTest) {
using namespace milvus::segcore;
using namespace milvus::engine;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
schema->AddDebugField("age", DataType::INT32);
@ -60,7 +59,6 @@ TEST(SegmentCoreTest, NormalDistributionTest) {
// Test insert column-based data
TEST(SegmentCoreTest, MockTest2) {
using namespace milvus::segcore;
using namespace milvus::engine;
// schema
auto schema = std::make_shared<Schema>();
@ -77,7 +75,6 @@ TEST(SegmentCoreTest, MockTest2) {
TEST(SegmentCoreTest, SmallIndex) {
using namespace milvus::segcore;
using namespace milvus::engine;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
schema->AddDebugField("age", DataType::INT32);

View File

@ -142,7 +142,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
for (auto field_id : schema->get_field_ids()) {
auto field_meta = schema->operator[](field_id);
switch (field_meta.get_data_type()) {
case engine::DataType::VECTOR_FLOAT: {
case DataType::VECTOR_FLOAT: {
auto dim = field_meta.get_dim();
vector<float> final(dim * N);
bool is_ip = starts_with(field_meta.get_name().get(), "normalized");
@ -169,7 +169,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
insert_cols(final, N, field_meta);
break;
}
case engine::DataType::VECTOR_BINARY: {
case DataType::VECTOR_BINARY: {
auto dim = field_meta.get_dim();
Assert(dim % 8 == 0);
vector<uint8_t> data(dim / 8 * N);
@ -179,7 +179,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
insert_cols(data, N, field_meta);
break;
}
case engine::DataType::INT64: {
case DataType::INT64: {
vector<int64_t> data(N);
// begin with counter
if (starts_with(field_meta.get_name().get(), "counter")) {
@ -198,7 +198,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
insert_cols(data, N, field_meta);
break;
}
case engine::DataType::INT32: {
case DataType::INT32: {
vector<int> data(N);
for (auto& x : data) {
x = er() % (2 * N);
@ -206,7 +206,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
insert_cols(data, N, field_meta);
break;
}
case engine::DataType::INT16: {
case DataType::INT16: {
vector<int16_t> data(N);
for (auto& x : data) {
x = er() % (2 * N);
@ -214,7 +214,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
insert_cols(data, N, field_meta);
break;
}
case engine::DataType::INT8: {
case DataType::INT8: {
vector<int8_t> data(N);
for (auto& x : data) {
x = er() % (2 * N);
@ -222,7 +222,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
insert_cols(data, N, field_meta);
break;
}
case engine::DataType::FLOAT: {
case DataType::FLOAT: {
vector<float> data(N);
for (auto& x : data) {
x = distr(er);
@ -230,7 +230,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
insert_cols(data, N, field_meta);
break;
}
case engine::DataType::DOUBLE: {
case DataType::DOUBLE: {
vector<double> data(N);
for (auto& x : data) {
x = distr(er);
@ -238,7 +238,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0)
insert_cols(data, N, field_meta);
break;
}
case engine::DataType::VARCHAR: {
case DataType::VARCHAR: {
vector<std::string> data(N);
for (auto& x : data) {
x = std::to_string(er());
@ -367,7 +367,7 @@ SealedLoader(const GeneratedData& dataset, SegmentSealed& seg) {
auto row_count = dataset.row_ids_.size();
{
LoadFieldDataInfo info;
FieldMeta field_meta(FieldName("RowID"), RowFieldID, engine::DataType::INT64);
FieldMeta field_meta(FieldName("RowID"), RowFieldID, DataType::INT64);
auto array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), row_count, field_meta);
info.field_data = array.release();
info.row_count = dataset.row_ids_.size();
@ -376,7 +376,7 @@ SealedLoader(const GeneratedData& dataset, SegmentSealed& seg) {
}
{
LoadFieldDataInfo info;
FieldMeta field_meta(FieldName("Timestamp"), TimestampFieldID, engine::DataType::INT64);
FieldMeta field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
auto array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), row_count, field_meta);
info.field_data = array.release();
info.row_count = dataset.timestamps_.size();

View File

@ -244,10 +244,10 @@ GenDataset(int64_t N, const knowhere::MetricType& metric_type, bool is_binary, i
auto schema = std::make_shared<milvus::Schema>();
auto faiss_metric_type = knowhere::GetMetricType(metric_type);
if (!is_binary) {
schema->AddDebugField("fakevec", milvus::engine::DataType::VECTOR_FLOAT, dim, faiss_metric_type);
schema->AddDebugField("fakevec", milvus::DataType::VECTOR_FLOAT, dim, faiss_metric_type);
return milvus::segcore::DataGen(schema, N);
} else {
schema->AddDebugField("fakebinvec", milvus::engine::DataType::VECTOR_BINARY, dim, faiss_metric_type);
schema->AddDebugField("fakebinvec", milvus::DataType::VECTOR_BINARY, dim, faiss_metric_type);
return milvus::segcore::DataGen(schema, N);
}
}