// Copyright (C) 2019-2020 Zilliz. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software distributed under the License // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License #include #include #include #include "gtest/gtest-typed-test.h" #include "index/IndexFactory.h" #include "common/CDataType.h" #include "knowhere/comp/index_param.h" #include "test_utils/indexbuilder_test_utils.h" #include "test_utils/AssertUtils.h" #include "test_utils/DataGen.h" #include #include "test_utils/storage_test_utils.h" constexpr int64_t nb = 100; namespace indexcgo = milvus::proto::indexcgo; namespace schemapb = milvus::proto::schema; using milvus::index::ScalarIndexPtr; using milvus::segcore::GeneratedData; template class TypedScalarIndexTest : public ::testing::Test { protected: // void // SetUp() override { // } // void // TearDown() override { // } }; TYPED_TEST_CASE_P(TypedScalarIndexTest); TYPED_TEST_P(TypedScalarIndexTest, Dummy) { using T = TypeParam; std::cout << typeid(T()).name() << std::endl; std::cout << milvus::GetDType() << std::endl; } TYPED_TEST_P(TypedScalarIndexTest, Constructor) { using T = TypeParam; auto dtype = milvus::GetDType(); auto index_types = GetIndexTypes(); for (const auto& index_type : index_types) { milvus::index::CreateIndexInfo create_index_info; create_index_info.field_type = milvus::DataType(dtype); create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( create_index_info); } } TYPED_TEST_P(TypedScalarIndexTest, Count) { using T = TypeParam; auto dtype = milvus::GetDType(); auto index_types = GetIndexTypes(); for (const auto& index_type : index_types) { milvus::index::CreateIndexInfo create_index_info; create_index_info.field_type = milvus::DataType(dtype); create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( create_index_info); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenArr(nb); scalar_index->Build(nb, arr.data()); ASSERT_EQ(nb, scalar_index->Count()); } } TYPED_TEST_P(TypedScalarIndexTest, HasRawData) { using T = TypeParam; auto dtype = milvus::GetDType(); auto index_types = GetIndexTypes(); for (const auto& index_type : index_types) { milvus::index::CreateIndexInfo create_index_info; create_index_info.field_type = milvus::DataType(dtype); create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( create_index_info); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenArr(nb); scalar_index->Build(nb, arr.data()); ASSERT_EQ(nb, scalar_index->Count()); ASSERT_TRUE(scalar_index->HasRawData()); } } TYPED_TEST_P(TypedScalarIndexTest, In) { using T = TypeParam; auto dtype = milvus::GetDType(); auto index_types = GetIndexTypes(); for (const auto& index_type : index_types) { milvus::index::CreateIndexInfo create_index_info; create_index_info.field_type = milvus::DataType(dtype); create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( create_index_info); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenArr(nb); scalar_index->Build(nb, arr.data()); assert_in(scalar_index, arr); } } TYPED_TEST_P(TypedScalarIndexTest, NotIn) { using T = TypeParam; auto dtype = milvus::GetDType(); auto index_types = GetIndexTypes(); for (const auto& index_type : index_types) { milvus::index::CreateIndexInfo create_index_info; create_index_info.field_type = milvus::DataType(dtype); create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( create_index_info); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenArr(nb); scalar_index->Build(nb, arr.data()); assert_not_in(scalar_index, arr); } } TYPED_TEST_P(TypedScalarIndexTest, Reverse) { using T = TypeParam; auto dtype = milvus::GetDType(); auto index_types = GetIndexTypes(); for (const auto& index_type : index_types) { milvus::index::CreateIndexInfo create_index_info; create_index_info.field_type = milvus::DataType(dtype); create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( create_index_info); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenArr(nb); scalar_index->Build(nb, arr.data()); assert_reverse(scalar_index, arr); } } TYPED_TEST_P(TypedScalarIndexTest, Range) { using T = TypeParam; auto dtype = milvus::GetDType(); auto index_types = GetIndexTypes(); for (const auto& index_type : index_types) { milvus::index::CreateIndexInfo create_index_info; create_index_info.field_type = milvus::DataType(dtype); create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( create_index_info); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenArr(nb); scalar_index->Build(nb, arr.data()); assert_range(scalar_index, arr); } } TYPED_TEST_P(TypedScalarIndexTest, Codec) { using T = TypeParam; auto dtype = milvus::GetDType(); auto index_types = GetIndexTypes(); for (const auto& index_type : index_types) { milvus::index::CreateIndexInfo create_index_info; create_index_info.field_type = milvus::DataType(dtype); create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( create_index_info); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenArr(nb); scalar_index->Build(nb, arr.data()); auto binary_set = index->Serialize(nullptr); auto copy_index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( create_index_info); copy_index->Load(binary_set); auto copy_scalar_index = dynamic_cast*>(copy_index.get()); ASSERT_EQ(nb, copy_scalar_index->Count()); assert_in(copy_scalar_index, arr); assert_not_in(copy_scalar_index, arr); assert_range(copy_scalar_index, arr); } } // TODO: it's easy to overflow for int8_t. Design more reasonable ut. using ScalarT = ::testing::Types; REGISTER_TYPED_TEST_CASE_P(TypedScalarIndexTest, Dummy, Constructor, Count, In, NotIn, Range, Codec, Reverse, HasRawData); INSTANTIATE_TYPED_TEST_CASE_P(ArithmeticCheck, TypedScalarIndexTest, ScalarT); template class TypedScalarIndexTestV2 : public ::testing::Test { public: struct Helper {}; protected: std::unordered_map> m_fields = {{typeid(int8_t), arrow::int8()}, {typeid(int16_t), arrow::int16()}, {typeid(int32_t), arrow::int32()}, {typeid(int64_t), arrow::int64()}, {typeid(float), arrow::float32()}, {typeid(double), arrow::float64()}}; std::shared_ptr TestSchema(int vec_size) { arrow::FieldVector fields; fields.push_back(arrow::field("pk", arrow::int64())); fields.push_back(arrow::field("ts", arrow::int64())); fields.push_back(arrow::field("scalar", m_fields[typeid(T)])); fields.push_back( arrow::field("vec", arrow::fixed_size_binary(vec_size))); return std::make_shared(fields); } std::shared_ptr TestRecords(int vec_size, GeneratedData& dataset, std::vector& scalars) { arrow::Int64Builder pk_builder; arrow::Int64Builder ts_builder; arrow::NumericBuilder scalar_builder; arrow::FixedSizeBinaryBuilder vec_builder( arrow::fixed_size_binary(vec_size)); auto xb_data = dataset.get_col(milvus::FieldId(100)); auto data = reinterpret_cast(xb_data.data()); for (auto i = 0; i < nb; ++i) { EXPECT_TRUE(pk_builder.Append(i).ok()); EXPECT_TRUE(ts_builder.Append(i).ok()); EXPECT_TRUE(vec_builder.Append(data + i * vec_size).ok()); } for (auto& v : scalars) { EXPECT_TRUE(scalar_builder.Append(v).ok()); } std::shared_ptr pk_array; EXPECT_TRUE(pk_builder.Finish(&pk_array).ok()); std::shared_ptr ts_array; EXPECT_TRUE(ts_builder.Finish(&ts_array).ok()); std::shared_ptr scalar_array; EXPECT_TRUE(scalar_builder.Finish(&scalar_array).ok()); std::shared_ptr vec_array; EXPECT_TRUE(vec_builder.Finish(&vec_array).ok()); auto schema = TestSchema(vec_size); auto rec_batch = arrow::RecordBatch::Make( schema, nb, {pk_array, ts_array, scalar_array, vec_array}); auto reader = arrow::RecordBatchReader::Make({rec_batch}, schema).ValueOrDie(); return reader; } std::shared_ptr TestSpace(int vec_size, GeneratedData& dataset, std::vector& scalars) { auto arrow_schema = TestSchema(vec_size); auto schema_options = std::make_shared(); schema_options->primary_column = "pk"; schema_options->version_column = "ts"; schema_options->vector_column = "vec"; auto schema = std::make_shared(arrow_schema, schema_options); EXPECT_TRUE(schema->Validate().ok()); auto space_res = milvus_storage::Space::Open( "file://" + boost::filesystem::canonical(temp_path).string(), milvus_storage::Options{schema}); EXPECT_TRUE(space_res.has_value()); auto space = std::move(space_res.value()); auto rec = TestRecords(vec_size, dataset, scalars); auto write_opt = milvus_storage::WriteOption{nb}; space->Write(rec.get(), &write_opt); return std::move(space); } void SetUp() override { temp_path = boost::filesystem::temp_directory_path() / boost::filesystem::unique_path(); boost::filesystem::create_directory(temp_path); auto vec_size = DIM * 4; auto dataset = GenDataset(nb, knowhere::metric::L2, false); auto scalars = GenArr(nb); space = TestSpace(vec_size, dataset, scalars); } void TearDown() override { boost::filesystem::remove_all(temp_path); } protected: boost::filesystem::path temp_path; std::shared_ptr space; }; template <> struct TypedScalarIndexTestV2::Helper { using C = arrow::Int8Type; }; template <> struct TypedScalarIndexTestV2::Helper { using C = arrow::Int16Type; }; template <> struct TypedScalarIndexTestV2::Helper { using C = arrow::Int32Type; }; template <> struct TypedScalarIndexTestV2::Helper { using C = arrow::Int64Type; }; template <> struct TypedScalarIndexTestV2::Helper { using C = arrow::FloatType; }; template <> struct TypedScalarIndexTestV2::Helper { using C = arrow::DoubleType; }; TYPED_TEST_CASE_P(TypedScalarIndexTestV2); TYPED_TEST_P(TypedScalarIndexTestV2, Base) { using T = TypeParam; auto dtype = milvus::GetDType(); auto index_types = GetIndexTypes(); for (const auto& index_type : index_types) { milvus::index::CreateIndexInfo create_index_info; create_index_info.field_type = milvus::DataType(dtype); create_index_info.index_type = index_type; create_index_info.field_name = "scalar"; auto storage_config = get_default_local_storage_config(); auto chunk_manager = milvus::storage::CreateChunkManager(storage_config); milvus::storage::FileManagerContext file_manager_context( {}, {.field_name = "scalar"}, chunk_manager, this->space); auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( create_index_info, file_manager_context, this->space); auto scalar_index = dynamic_cast*>(index.get()); scalar_index->BuildV2(); scalar_index->UploadV2(); auto new_index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( create_index_info, file_manager_context, this->space); auto new_scalar_index = dynamic_cast*>(new_index.get()); new_scalar_index->LoadV2(); ASSERT_EQ(nb, scalar_index->Count()); } } REGISTER_TYPED_TEST_CASE_P(TypedScalarIndexTestV2, Base); INSTANTIATE_TYPED_TEST_CASE_P(ArithmeticCheck, TypedScalarIndexTestV2, ScalarT);