Fix empty schema proto hack

Signed-off-by: FluorineDog <guilin.gou@zilliz.com>
pull/4973/head^2
FluorineDog 2021-03-17 11:35:28 +08:00 committed by yefu.chen
parent 1b4c354059
commit 2cec04ed90
11 changed files with 151 additions and 242 deletions

View File

@ -72,6 +72,12 @@ Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) {
schema->AddField(name, field_id, data_type);
}
}
if (schema->is_auto_id_) {
AssertInfo(!schema->primary_key_offset_opt_.has_value(), "auto id mode: shouldn't have primary key");
} else {
AssertInfo(schema->primary_key_offset_opt_.has_value(), "primary key should be specified when autoId is off");
}
return schema;
}
} // namespace milvus

View File

@ -15,8 +15,32 @@
#include <set>
#include <atomic>
namespace milvus::segcore {
// determined the largest number `ack` where
// consecutive range [0, ack) has been all processed
// e.g.:
#if 0
void
example() {
AckResponder acker; // initially empty
acker.AddSegment(10, 20); // add [10, 20)
auto ack1 = acker.GetAck(); // get 0, since acker has { [10, 20) }
acker.AddSegment(0, 5); // add [0, 5),
auto ack2 = acker.GetAck(); // get 5, since acker has { [0, 5), [10, 20) }
acker.AddSegment(5, 7); // add [5, 7), will concatenated with [0, 5)
auto ack3 = acker.GetAck(); // get 7, since acker has { [0, 7), [10, 20) }
acker.AddSegment(7, 10); // add [7, 10), will concatenate with [0, 5) & [10, 20)
auto ack4 = acker.GetAck(); // get 20, since acker has { [0, 20) }
}
#endif
class AckResponder {
public:
// specify that segment [seg_begin, seg_end) has been processed
// WARN: segments shouldn't overlap
void
AddSegment(int64_t seg_begin, int64_t seg_end) {
std::lock_guard lck(mutex_);
@ -27,6 +51,7 @@ class AckResponder {
}
}
// return ack
int64_t
GetAck() const {
return minimum_;

View File

@ -14,7 +14,9 @@ set(SEGCORE_FILES
load_index_c.cpp
SealedIndexingRecord.cpp
SegmentInterface.cpp
SegcoreConfig.h SegcoreConfig.cpp)
SegcoreConfig.cpp
SegcoreInit.cpp
)
add_library(milvus_segcore SHARED
${SEGCORE_FILES}
)

View File

@ -25,17 +25,18 @@ Collection::Collection(const std::string& collection_proto) : schema_proto_(coll
void
Collection::parse() {
if (schema_proto_.empty()) {
// TODO: remove hard code use unittests are ready
std::cout << "WARN: Use default schema" << std::endl;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
schema->AddDebugField("age", DataType::INT32);
collection_name_ = "default-collection";
schema_ = schema;
return;
}
// if (schema_proto_.empty()) {
// // TODO: remove hard code use unittests are ready
// std::cout << "WARN: Use default schema" << std::endl;
// auto schema = std::make_shared<Schema>();
// schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
// schema->AddDebugField("age", DataType::INT32);
// collection_name_ = "default-collection";
// schema_ = schema;
// return;
// }
Assert(!schema_proto_.empty());
milvus::proto::schema::CollectionSchema collection_schema;
auto suc = google::protobuf::TextFormat::ParseFromString(schema_proto_, &collection_schema);
@ -45,6 +46,7 @@ Collection::parse() {
collection_name_ = collection_schema.name();
schema_ = Schema::ParseFrom(collection_schema);
int i = 1 + 1;
}
} // namespace milvus::segcore

View File

@ -0,0 +1,21 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "index/thirdparty/faiss/FaissHook.h"
#include "segcore/SegcoreInit.h"
#include <iostream>
namespace milvus::segcore {
void
SegcoreInit() {
std::string cpu_flags;
faiss::hook_init(cpu_flags);
}
} // namespace milvus::segcore

View File

@ -0,0 +1,15 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
namespace milvus::segcore {
void
SegcoreInit();
}

View File

@ -22,6 +22,7 @@ set(MILVUS_TEST_FILES
test_span.cpp
test_load.cpp
init_gtest.cpp
test_init.cpp
)
add_executable(all_tests

View File

@ -35,31 +35,53 @@ using namespace milvus::segcore;
// using namespace milvus::proto;
using namespace milvus::knowhere;
const char*
get_default_schema_config() {
static std::string conf = R"(name: "default-collection"
autoID: true
fields: <
fieldID: 100
name: "fakevec"
data_type: FloatVector
type_params: <
key: "dim"
value: "16"
>
index_params: <
key: "metric_type"
value: "L2"
>
>
fields: <
fieldID: 101
name: "age"
data_type: Int32
>)";
static std::string fake_conf = "";
return conf.c_str();
}
TEST(CApiTest, CollectionTest) {
auto schema_tmp_conf = "";
auto collection = NewCollection(schema_tmp_conf);
auto collection = NewCollection(get_default_schema_config());
DeleteCollection(collection);
}
TEST(CApiTest, GetCollectionNameTest) {
auto schema_tmp_conf = "";
auto collection = NewCollection(schema_tmp_conf);
auto collection = NewCollection(get_default_schema_config());
auto name = GetCollectionName(collection);
assert(strcmp(name, "default-collection") == 0);
DeleteCollection(collection);
}
TEST(CApiTest, SegmentTest) {
auto schema_tmp_conf = "";
auto collection = NewCollection(schema_tmp_conf);
auto collection = NewCollection(get_default_schema_config());
auto segment = NewSegment(collection, 0, Growing);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, InsertTest) {
auto schema_tmp_conf = "";
auto collection = NewCollection(schema_tmp_conf);
auto collection = NewCollection(get_default_schema_config());
auto segment = NewSegment(collection, 0, Growing);
std::vector<char> raw_data;
@ -94,8 +116,7 @@ TEST(CApiTest, InsertTest) {
}
TEST(CApiTest, DeleteTest) {
auto schema_tmp_conf = "";
auto collection = NewCollection(schema_tmp_conf);
auto collection = NewCollection(get_default_schema_config());
auto segment = NewSegment(collection, 0, Growing);
long delete_row_ids[] = {100000, 100001, 100002};
@ -111,8 +132,7 @@ TEST(CApiTest, DeleteTest) {
}
TEST(CApiTest, SearchTest) {
auto schema_tmp_conf = "";
auto collection = NewCollection(schema_tmp_conf);
auto collection = NewCollection(get_default_schema_config());
auto segment = NewSegment(collection, 0, Growing);
std::vector<char> raw_data;
@ -200,93 +220,8 @@ TEST(CApiTest, SearchTest) {
DeleteSegment(segment);
}
// TEST(CApiTest, BuildIndexTest) {
// auto schema_tmp_conf = "";
// auto collection = NewCollection(schema_tmp_conf);
// auto segment = NewSegment(collection, 0, Growing);
//
// std::vector<char> raw_data;
// std::vector<uint64_t> timestamps;
// std::vector<int64_t> uids;
// int N = 10000;
// std::default_random_engine e(67);
// for (int i = 0; i < N; ++i) {
// uids.push_back(100000 + i);
// timestamps.push_back(0);
// // append vec
// float vec[16];
// for (auto& x : vec) {
// x = e() % 2000 * 0.001 - 1.0;
// }
// raw_data.insert(raw_data.end(), (const char*)std::begin(vec), (const char*)std::end(vec));
// int age = e() % 100;
// raw_data.insert(raw_data.end(), (const char*)&age, ((const char*)&age) + sizeof(age));
// }
//
// auto line_sizeof = (sizeof(int) + sizeof(float) * 16);
//
// auto offset = PreInsert(segment, N);
//
// auto ins_res = Insert(segment, offset, N, uids.data(), timestamps.data(), raw_data.data(), (int)line_sizeof, N);
// assert(ins_res == 0);
//
// // TODO: add index ptr
// Close(segment);
// BuildIndex(collection, segment);
//
// const char* dsl_string = R"(
// {
// "bool": {
// "vector": {
// "fakevec": {
// "metric_type": "L2",
// "params": {
// "nprobe": 10
// },
// "query": "$0",
// "topk": 10
// }
// }
// }
// })";
//
// namespace ser = milvus::proto::service;
// int num_queries = 10;
// int dim = 16;
// std::normal_distribution<double> dis(0, 1);
// ser::PlaceholderGroup raw_group;
// auto value = raw_group.add_placeholders();
// value->set_tag("$0");
// value->set_type(ser::PlaceholderType::VECTOR_FLOAT);
// for (int i = 0; i < num_queries; ++i) {
// std::vector<float> vec;
// for (int d = 0; d < dim; ++d) {
// vec.push_back(dis(e));
// }
// // std::string line((char*)vec.data(), (char*)vec.data() + vec.size() * sizeof(float));
// value->add_values(vec.data(), vec.size() * sizeof(float));
// }
// auto blob = raw_group.SerializeAsString();
//
// auto plan = CreatePlan(collection, dsl_string);
// auto placeholderGroup = ParsePlaceholderGroup(plan, blob.data(), blob.length());
// std::vector<CPlaceholderGroup> placeholderGroups;
// placeholderGroups.push_back(placeholderGroup);
// timestamps.clear();
// timestamps.push_back(1);
//
// auto search_res = Search(segment, plan, placeholderGroups.data(), timestamps.data(), 1);
//
// DeletePlan(plan);
// DeletePlaceholderGroup(placeholderGroup);
// DeleteQueryResult(search_res);
// DeleteCollection(collection);
// DeleteSegment(segment);
//}
TEST(CApiTest, IsOpenedTest) {
auto schema_tmp_conf = "";
auto collection = NewCollection(schema_tmp_conf);
auto collection = NewCollection(get_default_schema_config());
auto segment = NewSegment(collection, 0, Growing);
auto is_opened = IsOpened(segment);
@ -297,8 +232,7 @@ TEST(CApiTest, IsOpenedTest) {
}
TEST(CApiTest, CloseTest) {
auto schema_tmp_conf = "";
auto collection = NewCollection(schema_tmp_conf);
auto collection = NewCollection(get_default_schema_config());
auto segment = NewSegment(collection, 0, Growing);
auto status = Close(segment);
@ -309,8 +243,7 @@ TEST(CApiTest, CloseTest) {
}
TEST(CApiTest, GetMemoryUsageInBytesTest) {
auto schema_tmp_conf = "";
auto collection = NewCollection(schema_tmp_conf);
auto collection = NewCollection(get_default_schema_config());
auto segment = NewSegment(collection, 0, Growing);
auto old_memory_usage_size = GetMemoryUsageInBytes(segment);
@ -336,7 +269,7 @@ TEST(CApiTest, GetMemoryUsageInBytesTest) {
auto line_sizeof = (sizeof(int) + sizeof(float) * 16);
// auto offset = PreInsert(segment, N);
// auto offset = PreInsert(segment, N);
int64_t offset;
PreInsert(segment, N, &offset);
@ -430,128 +363,8 @@ generate_index(
} // namespace
// TEST(CApiTest, TestSearchPreference) {
// auto schema_tmp_conf = "";
// auto collection = NewCollection(schema_tmp_conf);
// auto segment = NewSegment(collection, 0, Growing);
//
// auto beg = chrono::high_resolution_clock::now();
// auto next = beg;
// int N = 1000 * 1000 * 10;
// auto [raw_data, timestamps, uids] = generate_data(N);
// auto line_sizeof = (sizeof(int) + sizeof(float) * 16);
//
// next = chrono::high_resolution_clock::now();
// std::cout << "generate_data: " << chrono::duration_cast<chrono::milliseconds>(next - beg).count() << "ms"
// << std::endl;
// beg = next;
//
// auto offset = PreInsert(segment, N);
// auto res = Insert(segment, offset, N, uids.data(), timestamps.data(), raw_data.data(), (int)line_sizeof, N);
// assert(res == 0);
// next = chrono::high_resolution_clock::now();
// std::cout << "insert: " << chrono::duration_cast<chrono::milliseconds>(next - beg).count() << "ms" << std::endl;
// beg = next;
//
// auto N_del = N / 100;
// std::vector<uint64_t> del_ts(N_del, 100);
// auto pre_off = PreDelete(segment, N_del);
// Delete(segment, pre_off, N_del, uids.data(), del_ts.data());
//
// next = chrono::high_resolution_clock::now();
// std::cout << "delete1: " << chrono::duration_cast<chrono::milliseconds>(next - beg).count() << "ms" << std::endl;
// beg = next;
//
// auto row_count = GetRowCount(segment);
// assert(row_count == N);
//
// std::vector<long> result_ids(10 * 16);
// std::vector<float> result_distances(10 * 16);
//
// CQueryInfo queryInfo{1, 10, "fakevec"};
// auto sea_res =
// Search(segment, queryInfo, 104, (float*)raw_data.data(), 16, result_ids.data(), result_distances.data());
//
// // ASSERT_EQ(sea_res, 0);
// // ASSERT_EQ(result_ids[0], 10 * N);
// // ASSERT_EQ(result_distances[0], 0);
//
// next = chrono::high_resolution_clock::now();
// std::cout << "query1: " << chrono::duration_cast<chrono::milliseconds>(next - beg).count() << "ms" << std::endl;
// beg = next;
// sea_res = Search(segment, queryInfo, 104, (float*)raw_data.data(), 16, result_ids.data(),
// result_distances.data());
//
// // ASSERT_EQ(sea_res, 0);
// // ASSERT_EQ(result_ids[0], 10 * N);
// // ASSERT_EQ(result_distances[0], 0);
//
// next = chrono::high_resolution_clock::now();
// std::cout << "query2: " << chrono::duration_cast<chrono::milliseconds>(next - beg).count() << "ms" << std::endl;
// beg = next;
//
// // Close(segment);
// // BuildIndex(segment);
//
// next = chrono::high_resolution_clock::now();
// std::cout << "build index: " << chrono::duration_cast<chrono::milliseconds>(next - beg).count() << "ms"
// << std::endl;
// beg = next;
//
// std::vector<int64_t> result_ids2(10);
// std::vector<float> result_distances2(10);
//
// sea_res =
// Search(segment, queryInfo, 104, (float*)raw_data.data(), 16, result_ids2.data(), result_distances2.data());
//
// // sea_res = Search(segment, nullptr, 104, result_ids2.data(),
// // result_distances2.data());
//
// next = chrono::high_resolution_clock::now();
// std::cout << "search10: " << chrono::duration_cast<chrono::milliseconds>(next - beg).count() << "ms" << std::endl;
// beg = next;
//
// sea_res =
// Search(segment, queryInfo, 104, (float*)raw_data.data(), 16, result_ids2.data(), result_distances2.data());
//
// next = chrono::high_resolution_clock::now();
// std::cout << "search11: " << chrono::duration_cast<chrono::milliseconds>(next - beg).count() << "ms" << std::endl;
// beg = next;
//
// // std::cout << "case 1" << std::endl;
// // for (int i = 0; i < 10; ++i) {
// // std::cout << result_ids[i] << "->" << result_distances[i] << std::endl;
// // }
// // std::cout << "case 2" << std::endl;
// // for (int i = 0; i < 10; ++i) {
// // std::cout << result_ids2[i] << "->" << result_distances2[i] << std::endl;
// // }
// //
// // for (auto x : result_ids2) {
// // ASSERT_GE(x, 10 * N + N_del);
// // ASSERT_LT(x, 10 * N + N);
// // }
//
// // auto iter = 0;
// // for(int i = 0; i < result_ids.size(); ++i) {
// // auto uid = result_ids[i];
// // auto dis = result_distances[i];
// // if(uid >= 10 * N + N_del) {
// // auto uid2 = result_ids2[iter];
// // auto dis2 = result_distances2[iter];
// // ASSERT_EQ(uid, uid2);
// // ASSERT_EQ(dis, dis2);
// // ++iter;
// // }
// // }
//
// DeleteCollection(collection);
// DeleteSegment(segment);
//}
TEST(CApiTest, GetDeletedCountTest) {
auto schema_tmp_conf = "";
auto collection = NewCollection(schema_tmp_conf);
auto collection = NewCollection(get_default_schema_config());
auto segment = NewSegment(collection, 0, Growing);
long delete_row_ids[] = {100000, 100001, 100002};
@ -571,14 +384,13 @@ TEST(CApiTest, GetDeletedCountTest) {
}
TEST(CApiTest, GetRowCountTest) {
auto schema_tmp_conf = "";
auto collection = NewCollection(schema_tmp_conf);
auto collection = NewCollection(get_default_schema_config());
auto segment = NewSegment(collection, 0, Growing);
int N = 10000;
auto [raw_data, timestamps, uids] = generate_data(N);
auto line_sizeof = (sizeof(int) + sizeof(float) * 16);
// auto offset = PreInsert(segment, N);
// auto offset = PreInsert(segment, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto res = Insert(segment, offset, N, uids.data(), timestamps.data(), raw_data.data(), (int)line_sizeof, N);
@ -634,8 +446,7 @@ TEST(CApiTest, MergeInto) {
}
TEST(CApiTest, Reduce) {
auto schema_tmp_conf = "";
auto collection = NewCollection(schema_tmp_conf);
auto collection = NewCollection(get_default_schema_config());
auto segment = NewSegment(collection, 0, Growing);
std::vector<char> raw_data;
@ -658,7 +469,7 @@ TEST(CApiTest, Reduce) {
auto line_sizeof = (sizeof(int) + sizeof(float) * 16);
// auto offset = PreInsert(segment, N);
// auto offset = PreInsert(segment, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto ins_res = Insert(segment, offset, N, uids.data(), timestamps.data(), raw_data.data(), (int)line_sizeof, N);
@ -1536,6 +1347,7 @@ TEST(CApiTest, UpdateSegmentIndex_With_binary_Predicate_Term) {
TEST(CApiTest, SealedSegmentTest) {
auto schema_tmp_conf = R"(name: "test"
autoID: true
fields: <
fieldID: 100
name: "vec"

View File

@ -120,7 +120,7 @@ TEST(Indexing, SmartBruteForce) {
}
}
TEST(Indexing, DISABLED_Naive) {
TEST(Indexing, Naive) {
constexpr int N = 10000;
constexpr int DIM = 16;
constexpr int TOPK = 10;

View File

@ -0,0 +1,23 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
//
// Created by Mike Dog on 2021/3/15.
//
#include "test_utils/DataGen.h"
#include <gtest/gtest.h>
#include "segcore/SegcoreInit.h"
TEST(Init, Naive) {
using namespace milvus;
using namespace milvus::segcore;
SegcoreInit();
}

View File

@ -89,6 +89,7 @@ func TestMetaService_processCollectionCreate(t *testing.T) {
id := "0"
value := `schema: <
name: "test"
autoID: true
fields: <
fieldID:100
name: "vec"
@ -151,6 +152,7 @@ func TestMetaService_processCreate(t *testing.T) {
key1 := Params.MetaRootPath + "/collection/0"
msg1 := `schema: <
name: "test"
autoID: true
fields: <
fieldID:100
name: "vec"