mirror of https://github.com/milvus-io/milvus.git
fix build index bug (#3066)
* build index Signed-off-by: yhmo <yihua.mo@zilliz.com> * fix some problem Signed-off-by: yhmo <yihua.mo@zilliz.com> * build index ut Signed-off-by: yhmo <yihua.mo@zilliz.com> * typo Signed-off-by: yhmo <yihua.mo@zilliz.com> * refine code Signed-off-by: yhmo <yihua.mo@zilliz.com> * fix ut Signed-off-by: yhmo <yihua.mo@zilliz.com> * typo Signed-off-by: yhmo <yihua.mo@zilliz.com> * avoid ut error Signed-off-by: yhmo <yihua.mo@zilliz.com> * refine code Signed-off-by: yhmo <yihua.mo@zilliz.com>pull/3059/head^2
parent
824ff3e1ff
commit
4872fff4e0
|
@ -960,8 +960,8 @@ DBImpl::BackgroundBuildIndexTask(std::vector<std::string> collection_names) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOG_ENGINE_DEBUG_ << "Create BuildIndexJob for " << segment_ids.size() << " segments of " << collection_name;
|
||||||
scheduler::BuildIndexJobPtr job = std::make_shared<scheduler::BuildIndexJob>(latest_ss, options_, segment_ids);
|
scheduler::BuildIndexJobPtr job = std::make_shared<scheduler::BuildIndexJob>(latest_ss, options_, segment_ids);
|
||||||
|
|
||||||
scheduler::JobMgrInst::GetInstance()->Put(job);
|
scheduler::JobMgrInst::GetInstance()->Put(job);
|
||||||
job->WaitFinish();
|
job->WaitFinish();
|
||||||
|
|
||||||
|
|
|
@ -67,7 +67,7 @@ SetSnapshotIndex(const std::string& collection_name, const std::string& field_na
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
auto new_element = std::make_shared<snapshot::FieldElement>(
|
auto new_element = std::make_shared<snapshot::FieldElement>(
|
||||||
ss->GetCollectionId(), field->GetID(), "structured_index", milvus::engine::FieldElementType::FET_INDEX);
|
ss->GetCollectionId(), field->GetID(), index_info.index_name_, milvus::engine::FieldElementType::FET_INDEX);
|
||||||
ss_context.new_field_elements.push_back(new_element);
|
ss_context.new_field_elements.push_back(new_element);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,7 +109,7 @@ GetSnapshotIndex(const std::string& collection_name, const std::string& field_na
|
||||||
} else {
|
} else {
|
||||||
for (auto& field_element : field_elements) {
|
for (auto& field_element : field_elements) {
|
||||||
if (field_element->GetFtype() == (int64_t)milvus::engine::FieldElementType::FET_INDEX) {
|
if (field_element->GetFtype() == (int64_t)milvus::engine::FieldElementType::FET_INDEX) {
|
||||||
index_info.index_name_ = "SORTED";
|
index_info.index_name_ = DEFAULT_STRUCTURED_INDEX_NAME;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
#include "db/Types.h"
|
#include "db/Types.h"
|
||||||
|
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
|
||||||
|
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
namespace engine {
|
namespace engine {
|
||||||
|
@ -19,13 +20,14 @@ const char* DEFAULT_UID_NAME = "_uid";
|
||||||
const char* DEFAULT_RAW_DATA_NAME = "_raw";
|
const char* DEFAULT_RAW_DATA_NAME = "_raw";
|
||||||
const char* DEFAULT_BLOOM_FILTER_NAME = "_blf";
|
const char* DEFAULT_BLOOM_FILTER_NAME = "_blf";
|
||||||
const char* DEFAULT_DELETED_DOCS_NAME = "_del";
|
const char* DEFAULT_DELETED_DOCS_NAME = "_del";
|
||||||
const char* DEFAULT_INDEX_NAME = "_idx";
|
|
||||||
const char* DEFAULT_INDEX_COMPRESS_NAME = "_compress";
|
const char* DEFAULT_INDEX_COMPRESS_NAME = "_compress";
|
||||||
|
|
||||||
const char* PARAM_DIMENSION = "dim";
|
const char* DEFAULT_STRUCTURED_INDEX_NAME = "SORTED"; // this string should be defined in knowhere::IndexEnum
|
||||||
const char* PARAM_INDEX_METRIC_TYPE = "metric_type";
|
|
||||||
|
const char* PARAM_DIMENSION = knowhere::meta::DIM;
|
||||||
|
const char* PARAM_INDEX_METRIC_TYPE = knowhere::Metric::TYPE;
|
||||||
const char* PARAM_INDEX_EXTRA_PARAMS = "extra_params";
|
const char* PARAM_INDEX_EXTRA_PARAMS = "extra_params";
|
||||||
const char* PARAM_SEGMENT_SIZE = "segment_size";
|
const char* PARAM_SEGMENT_ROW_COUNT = "segment_row_count";
|
||||||
|
|
||||||
} // namespace engine
|
} // namespace engine
|
||||||
} // namespace milvus
|
} // namespace milvus
|
||||||
|
|
|
@ -82,13 +82,13 @@ extern const char* DEFAULT_UID_NAME;
|
||||||
extern const char* DEFAULT_RAW_DATA_NAME;
|
extern const char* DEFAULT_RAW_DATA_NAME;
|
||||||
extern const char* DEFAULT_BLOOM_FILTER_NAME;
|
extern const char* DEFAULT_BLOOM_FILTER_NAME;
|
||||||
extern const char* DEFAULT_DELETED_DOCS_NAME;
|
extern const char* DEFAULT_DELETED_DOCS_NAME;
|
||||||
extern const char* DEFAULT_INDEX_NAME;
|
|
||||||
extern const char* DEFAULT_INDEX_COMPRESS_NAME;
|
extern const char* DEFAULT_INDEX_COMPRESS_NAME;
|
||||||
|
extern const char* DEFAULT_STRUCTURED_INDEX_NAME;
|
||||||
|
|
||||||
extern const char* PARAM_DIMENSION;
|
extern const char* PARAM_DIMENSION;
|
||||||
extern const char* PARAM_INDEX_METRIC_TYPE;
|
extern const char* PARAM_INDEX_METRIC_TYPE;
|
||||||
extern const char* PARAM_INDEX_EXTRA_PARAMS;
|
extern const char* PARAM_INDEX_EXTRA_PARAMS;
|
||||||
extern const char* PARAM_SEGMENT_SIZE;
|
extern const char* PARAM_SEGMENT_ROW_COUNT;
|
||||||
|
|
||||||
using FieldType = meta::DataType;
|
using FieldType = meta::DataType;
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#endif
|
#endif
|
||||||
#include "config/ServerConfig.h"
|
#include "config/ServerConfig.h"
|
||||||
//#include "storage/s3/S3ClientWrapper.h"
|
//#include "storage/s3/S3ClientWrapper.h"
|
||||||
|
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
|
||||||
#include "utils/CommonUtil.h"
|
#include "utils/CommonUtil.h"
|
||||||
#include "utils/Log.h"
|
#include "utils/Log.h"
|
||||||
|
|
||||||
|
@ -53,12 +54,10 @@ IsSameIndex(const CollectionIndex& index1, const CollectionIndex& index2) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
IsBinaryMetricType(int32_t metric_type) {
|
IsBinaryMetricType(const std::string& metric_type) {
|
||||||
return (metric_type == (int32_t)engine::MetricType::HAMMING) ||
|
return (metric_type == knowhere::Metric::HAMMING) || (metric_type == knowhere::Metric::JACCARD) ||
|
||||||
(metric_type == (int32_t)engine::MetricType::JACCARD) ||
|
(metric_type == knowhere::Metric::SUBSTRUCTURE) || (metric_type == knowhere::Metric::SUPERSTRUCTURE) ||
|
||||||
(metric_type == (int32_t)engine::MetricType::SUBSTRUCTURE) ||
|
(metric_type == knowhere::Metric::TANIMOTO);
|
||||||
(metric_type == (int32_t)engine::MetricType::SUPERSTRUCTURE) ||
|
|
||||||
(metric_type == (int32_t)engine::MetricType::TANIMOTO);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
meta::DateT
|
meta::DateT
|
||||||
|
|
|
@ -35,7 +35,7 @@ bool
|
||||||
IsSameIndex(const CollectionIndex& index1, const CollectionIndex& index2);
|
IsSameIndex(const CollectionIndex& index1, const CollectionIndex& index2);
|
||||||
|
|
||||||
bool
|
bool
|
||||||
IsBinaryMetricType(int32_t metric_type);
|
IsBinaryMetricType(const std::string& metric_type);
|
||||||
|
|
||||||
meta::DateT
|
meta::DateT
|
||||||
GetDate(const std::time_t& t, int day_delta = 0);
|
GetDate(const std::time_t& t, int day_delta = 0);
|
||||||
|
|
|
@ -518,26 +518,30 @@ ExecutionEngineImpl::BuildIndex() {
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
segment_writer_ptr->SetVectorIndex(field_name, new_index);
|
segment_writer_ptr->SetVectorIndex(field_name, new_index);
|
||||||
rc.RecordSection("build structured index");
|
|
||||||
|
rc.RecordSection("build vector index for field: " + field_name);
|
||||||
|
|
||||||
// serialze index files
|
// serialze index files
|
||||||
status = segment_writer_ptr->WriteVectorIndex(field_name);
|
status = segment_writer_ptr->WriteVectorIndex(field_name);
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
rc.RecordSection("serialize vector index");
|
|
||||||
|
rc.RecordSection("serialize vector index for field: " + field_name);
|
||||||
} else {
|
} else {
|
||||||
knowhere::IndexPtr index_ptr;
|
knowhere::IndexPtr index_ptr;
|
||||||
segment_ptr->GetStructuredIndex(field_name, index_ptr);
|
segment_ptr->GetStructuredIndex(field_name, index_ptr);
|
||||||
segment_writer_ptr->SetStructuredIndex(field_name, index_ptr);
|
segment_writer_ptr->SetStructuredIndex(field_name, index_ptr);
|
||||||
rc.RecordSection("build structured index");
|
|
||||||
|
rc.RecordSection("build structured index for field: " + field_name);
|
||||||
|
|
||||||
// serialze index files
|
// serialze index files
|
||||||
status = segment_writer_ptr->WriteStructuredIndex(field_name);
|
status = segment_writer_ptr->WriteStructuredIndex(field_name);
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
rc.RecordSection("serialize structured index");
|
|
||||||
|
rc.RecordSection("serialize structured index for field: " + field_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -554,6 +558,7 @@ ExecutionEngineImpl::CreateSnapshotIndexFile(AddSegmentFileOperation& operation,
|
||||||
auto& segment = segment_visitor->GetSegment();
|
auto& segment = segment_visitor->GetSegment();
|
||||||
auto field_visitor = segment_visitor->GetFieldVisitor(field_name);
|
auto field_visitor = segment_visitor->GetFieldVisitor(field_name);
|
||||||
auto& field = field_visitor->GetField();
|
auto& field = field_visitor->GetField();
|
||||||
|
bool is_vector = IsVectorField(field);
|
||||||
|
|
||||||
auto element_visitor = field_visitor->GetElementVisitor(engine::FieldElementType::FET_INDEX);
|
auto element_visitor = field_visitor->GetElementVisitor(engine::FieldElementType::FET_INDEX);
|
||||||
if (element_visitor == nullptr) {
|
if (element_visitor == nullptr) {
|
||||||
|
@ -575,7 +580,8 @@ ExecutionEngineImpl::CreateSnapshotIndexFile(AddSegmentFileOperation& operation,
|
||||||
// index already build?
|
// index already build?
|
||||||
std::string file_path = engine::snapshot::GetResPath<engine::snapshot::SegmentFile>(
|
std::string file_path = engine::snapshot::GetResPath<engine::snapshot::SegmentFile>(
|
||||||
segment_reader_->GetCollectionsPath(), seg_file);
|
segment_reader_->GetCollectionsPath(), seg_file);
|
||||||
file_path += codec::VectorIndexFormat::FilePostfix();
|
file_path +=
|
||||||
|
(is_vector ? codec::VectorIndexFormat::FilePostfix() : codec::StructuredIndexFormat::FilePostfix());
|
||||||
if (CommonUtil::IsFileExist(file_path)) {
|
if (CommonUtil::IsFileExist(file_path)) {
|
||||||
return Status(DB_ERROR, "Could not build index: Index file already exist"); // index already build
|
return Status(DB_ERROR, "Could not build index: Index file already exist"); // index already build
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,8 +23,6 @@
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
namespace engine {
|
namespace engine {
|
||||||
|
|
||||||
static const char* DIMENSION = "dim";
|
|
||||||
|
|
||||||
// TODO(linxj): replace with VecIndex::IndexType
|
// TODO(linxj): replace with VecIndex::IndexType
|
||||||
enum class EngineType {
|
enum class EngineType {
|
||||||
INVALID = 0,
|
INVALID = 0,
|
||||||
|
@ -107,11 +105,6 @@ static std::map<std::string, MetricType> s_map_metric_type = {
|
||||||
{"SUPERSTRUCTURE", MetricType::SUPERSTRUCTURE},
|
{"SUPERSTRUCTURE", MetricType::SUPERSTRUCTURE},
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class StructuredIndexType {
|
|
||||||
INVALID = 0,
|
|
||||||
SORTED = 1,
|
|
||||||
};
|
|
||||||
|
|
||||||
namespace meta {
|
namespace meta {
|
||||||
|
|
||||||
using DateT = int;
|
using DateT = int;
|
||||||
|
|
|
@ -20,11 +20,11 @@ namespace milvus {
|
||||||
namespace scheduler {
|
namespace scheduler {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
// each vector field create one group
|
||||||
// each vector field in one group
|
|
||||||
// all structured fields put into one group
|
// all structured fields put into one group
|
||||||
void
|
void
|
||||||
WhichFieldsToBuild(const engine::snapshot::ScopedSnapshotT& snapshot, std::vector<engine::TargetFields>& field_groups) {
|
WhichFieldsToBuild(const engine::snapshot::ScopedSnapshotT& snapshot, engine::snapshot::ID_TYPE segment_id,
|
||||||
|
std::vector<engine::TargetFields>& field_groups) {
|
||||||
auto field_names = snapshot->GetFieldNames();
|
auto field_names = snapshot->GetFieldNames();
|
||||||
engine::TargetFields structured_fields;
|
engine::TargetFields structured_fields;
|
||||||
for (auto& field_name : field_names) {
|
for (auto& field_name : field_names) {
|
||||||
|
@ -33,16 +33,22 @@ WhichFieldsToBuild(const engine::snapshot::ScopedSnapshotT& snapshot, std::vecto
|
||||||
bool is_vector = (ftype == engine::FIELD_TYPE::VECTOR_FLOAT || ftype == engine::FIELD_TYPE::VECTOR_BINARY);
|
bool is_vector = (ftype == engine::FIELD_TYPE::VECTOR_FLOAT || ftype == engine::FIELD_TYPE::VECTOR_BINARY);
|
||||||
auto elements = snapshot->GetFieldElementsByField(field_name);
|
auto elements = snapshot->GetFieldElementsByField(field_name);
|
||||||
for (auto& element : elements) {
|
for (auto& element : elements) {
|
||||||
if (element->GetFtype() == engine::FieldElementType::FET_INDEX) {
|
if (element->GetFtype() != engine::FieldElementType::FET_INDEX) {
|
||||||
// index has been defined
|
continue; // only check index element
|
||||||
|
}
|
||||||
|
|
||||||
|
auto element_file = snapshot->GetSegmentFile(segment_id, element->GetID());
|
||||||
|
if (element_file != nullptr) {
|
||||||
|
continue; // index file has been created, no need to build index for this field
|
||||||
|
}
|
||||||
|
|
||||||
|
// index has been defined, but index file not yet created, this field need to be build index
|
||||||
if (is_vector) {
|
if (is_vector) {
|
||||||
engine::TargetFields fields = {field_name};
|
engine::TargetFields fields = {field_name};
|
||||||
field_groups.emplace_back(fields);
|
field_groups.emplace_back(fields);
|
||||||
} else {
|
} else {
|
||||||
structured_fields.insert(field_name);
|
structured_fields.insert(field_name);
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,11 +66,11 @@ BuildIndexJob::BuildIndexJob(const engine::snapshot::ScopedSnapshotT& snapshot,
|
||||||
|
|
||||||
void
|
void
|
||||||
BuildIndexJob::OnCreateTasks(JobTasks& tasks) {
|
BuildIndexJob::OnCreateTasks(JobTasks& tasks) {
|
||||||
|
for (auto& segment_id : segment_ids_) {
|
||||||
std::vector<engine::TargetFields> field_groups;
|
std::vector<engine::TargetFields> field_groups;
|
||||||
WhichFieldsToBuild(snapshot_, field_groups);
|
WhichFieldsToBuild(snapshot_, segment_id, field_groups);
|
||||||
for (auto& id : segment_ids_) {
|
|
||||||
for (auto& group : field_groups) {
|
for (auto& group : field_groups) {
|
||||||
auto task = std::make_shared<BuildIndexTask>(snapshot_, options_, id, group, nullptr);
|
auto task = std::make_shared<BuildIndexTask>(snapshot_, options_, segment_id, group, nullptr);
|
||||||
task->job_ = this;
|
task->job_ = this;
|
||||||
tasks.emplace_back(task);
|
tasks.emplace_back(task);
|
||||||
}
|
}
|
||||||
|
@ -80,6 +86,5 @@ BuildIndexJob::Dump() const {
|
||||||
ret.insert(base.begin(), base.end());
|
ret.insert(base.begin(), base.end());
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace scheduler
|
} // namespace scheduler
|
||||||
} // namespace milvus
|
} // namespace milvus
|
||||||
|
|
|
@ -378,7 +378,6 @@ SegmentWriter::WriteStructuredIndex(const std::string& field_name) {
|
||||||
return Status(DB_ERROR, "Structured index doesn't exist: " + status.message());
|
return Status(DB_ERROR, "Structured index doesn't exist: " + status.message());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& field_visitors_map = segment_visitor_->GetFieldVisitors();
|
|
||||||
auto field = segment_visitor_->GetFieldVisitor(field_name);
|
auto field = segment_visitor_->GetFieldVisitor(field_name);
|
||||||
if (field == nullptr) {
|
if (field == nullptr) {
|
||||||
return Status(DB_ERROR, "Invalid filed name: " + field_name);
|
return Status(DB_ERROR, "Invalid filed name: " + field_name);
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
|
|
||||||
#include <fiu-local.h>
|
#include <fiu-local.h>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
|
@ -27,7 +28,7 @@ namespace {
|
||||||
|
|
||||||
constexpr size_t NAME_SIZE_LIMIT = 255;
|
constexpr size_t NAME_SIZE_LIMIT = 255;
|
||||||
constexpr int64_t COLLECTION_DIMENSION_LIMIT = 32768;
|
constexpr int64_t COLLECTION_DIMENSION_LIMIT = 32768;
|
||||||
constexpr int32_t INDEX_FILE_SIZE_LIMIT = 4096; // index trigger size max = 4096 MB
|
constexpr int32_t SEGMENT_ROW_COUNT_LIMIT = 4 * 1024 * 1024;
|
||||||
constexpr int64_t M_BYTE = 1024 * 1024;
|
constexpr int64_t M_BYTE = 1024 * 1024;
|
||||||
constexpr int64_t MAX_INSERT_DATA_SIZE = 256 * M_BYTE;
|
constexpr int64_t MAX_INSERT_DATA_SIZE = 256 * M_BYTE;
|
||||||
|
|
||||||
|
@ -167,45 +168,41 @@ ValidateFieldName(const std::string& field_name) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Status
|
Status
|
||||||
ValidateIndexName(const std::string& index_name) {
|
ValidateIndexType(const std::string& index_type) {
|
||||||
// Index name shouldn't be empty.
|
// Index name shouldn't be empty.
|
||||||
if (index_name.empty()) {
|
if (index_type.empty()) {
|
||||||
std::string msg = "Index name should not be empty.";
|
std::string msg = "Index name should not be empty.";
|
||||||
LOG_SERVER_ERROR_ << msg;
|
LOG_SERVER_ERROR_ << msg;
|
||||||
return Status(SERVER_INVALID_FIELD_NAME, msg);
|
return Status(SERVER_INVALID_FIELD_NAME, msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string invalid_msg = "Invalid index name: " + index_name + ". ";
|
static std::set<std::string> s_valid_index_names = {
|
||||||
// Index name size shouldn't exceed 255.
|
knowhere::IndexEnum::INVALID,
|
||||||
if (index_name.size() > NAME_SIZE_LIMIT) {
|
knowhere::IndexEnum::INDEX_FAISS_IDMAP,
|
||||||
std::string msg = invalid_msg + "The length of a field name must be less than 255 characters.";
|
knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
|
||||||
LOG_SERVER_ERROR_ << msg;
|
knowhere::IndexEnum::INDEX_FAISS_IVFPQ,
|
||||||
return Status(SERVER_INVALID_FIELD_NAME, msg);
|
knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
|
||||||
}
|
knowhere::IndexEnum::INDEX_FAISS_IVFSQ8NR,
|
||||||
|
knowhere::IndexEnum::INDEX_FAISS_IVFSQ8H,
|
||||||
|
knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
|
||||||
|
knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||||
|
knowhere::IndexEnum::INDEX_NSG,
|
||||||
|
knowhere::IndexEnum::INDEX_HNSW,
|
||||||
|
knowhere::IndexEnum::INDEX_ANNOY,
|
||||||
|
knowhere::IndexEnum::INDEX_HNSW_SQ8NM,
|
||||||
|
};
|
||||||
|
|
||||||
// Field name first character should be underscore or character.
|
if (s_valid_index_names.find(index_type) == s_valid_index_names.end()) {
|
||||||
char first_char = index_name[0];
|
std::string msg = "Invalid index name: " + index_type;
|
||||||
if (first_char != '_' && std::isalpha(first_char) == 0) {
|
|
||||||
std::string msg = invalid_msg + "The first character of a field name must be an underscore or letter.";
|
|
||||||
LOG_SERVER_ERROR_ << msg;
|
LOG_SERVER_ERROR_ << msg;
|
||||||
return Status(SERVER_INVALID_FIELD_NAME, msg);
|
return Status(SERVER_INVALID_INDEX_TYPE, msg);
|
||||||
}
|
|
||||||
|
|
||||||
int64_t field_name_size = index_name.size();
|
|
||||||
for (int64_t i = 1; i < field_name_size; ++i) {
|
|
||||||
char name_char = index_name[i];
|
|
||||||
if (name_char != '_' && std::isalnum(name_char) == 0) {
|
|
||||||
std::string msg = invalid_msg + "Field name cannot only contain numbers, letters, and underscores.";
|
|
||||||
LOG_SERVER_ERROR_ << msg;
|
|
||||||
return Status(SERVER_INVALID_FIELD_NAME, msg);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
Status
|
Status
|
||||||
ValidateTableDimension(int64_t dimension, int64_t metric_type) {
|
ValidateVectorDimension(int64_t dimension, const std::string& metric_type) {
|
||||||
if (dimension <= 0 || dimension > COLLECTION_DIMENSION_LIMIT) {
|
if (dimension <= 0 || dimension > COLLECTION_DIMENSION_LIMIT) {
|
||||||
std::string msg = "Invalid collection dimension: " + std::to_string(dimension) + ". " +
|
std::string msg = "Invalid collection dimension: " + std::to_string(dimension) + ". " +
|
||||||
"The collection dimension must be within the range of 1 ~ " +
|
"The collection dimension must be within the range of 1 ~ " +
|
||||||
|
@ -226,16 +223,6 @@ ValidateTableDimension(int64_t dimension, int64_t metric_type) {
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
Status
|
|
||||||
ValidateIndexType(const std::string& index_type) {
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status
|
|
||||||
ValidateMetricType(const std::string& metric_type) {
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status
|
Status
|
||||||
ValidateIndexParams(const milvus::json& index_params, int64_t dimension, const std::string& index_type) {
|
ValidateIndexParams(const milvus::json& index_params, int64_t dimension, const std::string& index_type) {
|
||||||
if (index_type == knowhere::IndexEnum::INDEX_FAISS_IDMAP ||
|
if (index_type == knowhere::IndexEnum::INDEX_FAISS_IDMAP ||
|
||||||
|
@ -322,23 +309,32 @@ ValidateIndexParams(const milvus::json& index_params, int64_t dimension, const s
|
||||||
}
|
}
|
||||||
|
|
||||||
Status
|
Status
|
||||||
ValidateCollectionIndexFileSize(int64_t index_file_size) {
|
ValidateSegmentRowCount(int64_t segment_row_count) {
|
||||||
if (index_file_size <= 0 || index_file_size > INDEX_FILE_SIZE_LIMIT) {
|
if (segment_row_count <= 0 || segment_row_count > SEGMENT_ROW_COUNT_LIMIT) {
|
||||||
std::string msg = "Invalid index file size: " + std::to_string(index_file_size) + ". " +
|
std::string msg = "Invalid segment row count: " + std::to_string(segment_row_count) + ". " +
|
||||||
"The index file size must be within the range of 1 ~ " +
|
"The segment row count must be within the range of 1 ~ " +
|
||||||
std::to_string(INDEX_FILE_SIZE_LIMIT) + ".";
|
std::to_string(SEGMENT_ROW_COUNT_LIMIT) + ".";
|
||||||
LOG_SERVER_ERROR_ << msg;
|
LOG_SERVER_ERROR_ << msg;
|
||||||
return Status(SERVER_INVALID_INDEX_FILE_SIZE, msg);
|
return Status(SERVER_INVALID_SEGMENT_ROW_COUNT, msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
Status
|
Status
|
||||||
ValidateCollectionIndexMetricType(int32_t metric_type) {
|
ValidateIndexMetricType(const std::string& metric_type) {
|
||||||
if (metric_type <= 0 || metric_type > static_cast<int32_t>(engine::MetricType::MAX_VALUE)) {
|
static std::set<std::string> s_valid_metric = {
|
||||||
std::string msg = "Invalid index metric type: " + std::to_string(metric_type) + ". " +
|
milvus::knowhere::Metric::L2,
|
||||||
"Make sure the metric type is in MetricType list.";
|
milvus::knowhere::Metric::IP,
|
||||||
|
milvus::knowhere::Metric::HAMMING,
|
||||||
|
milvus::knowhere::Metric::JACCARD,
|
||||||
|
milvus::knowhere::Metric::TANIMOTO,
|
||||||
|
milvus::knowhere::Metric::SUBSTRUCTURE,
|
||||||
|
milvus::knowhere::Metric::SUPERSTRUCTURE,
|
||||||
|
};
|
||||||
|
if (s_valid_metric.find(metric_type) == s_valid_metric.end()) {
|
||||||
|
std::string msg =
|
||||||
|
"Invalid index metric type: " + metric_type + ". " + "Make sure the metric type is in MetricType list.";
|
||||||
LOG_SERVER_ERROR_ << msg;
|
LOG_SERVER_ERROR_ << msg;
|
||||||
return Status(SERVER_INVALID_INDEX_METRIC_TYPE, msg);
|
return Status(SERVER_INVALID_INDEX_METRIC_TYPE, msg);
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,23 +30,20 @@ ValidateCollectionName(const std::string& collection_name);
|
||||||
extern Status
|
extern Status
|
||||||
ValidateFieldName(const std::string& field_name);
|
ValidateFieldName(const std::string& field_name);
|
||||||
|
|
||||||
extern Status
|
|
||||||
ValidateIndexName(const std::string& index_name);
|
|
||||||
|
|
||||||
extern Status
|
|
||||||
ValidateTableDimension(int64_t dimension, int64_t metric_type);
|
|
||||||
|
|
||||||
extern Status
|
extern Status
|
||||||
ValidateIndexType(const std::string& index_type);
|
ValidateIndexType(const std::string& index_type);
|
||||||
|
|
||||||
extern Status
|
extern Status
|
||||||
ValidateMetricType(const std::string& metric_type);
|
ValidateVectorDimension(int64_t dimension, const std::string& metric_type);
|
||||||
|
|
||||||
extern Status
|
extern Status
|
||||||
ValidateIndexParams(const milvus::json& index_params, int64_t dimension, const std::string& index_type);
|
ValidateIndexParams(const milvus::json& index_params, int64_t dimension, const std::string& index_type);
|
||||||
|
|
||||||
extern Status
|
extern Status
|
||||||
ValidateCollectionIndexFileSize(int64_t index_file_size);
|
ValidateSegmentRowCount(int64_t segment_row_count);
|
||||||
|
|
||||||
|
extern Status
|
||||||
|
ValidateIndexMetricType(const std::string& metric_type);
|
||||||
|
|
||||||
extern Status
|
extern Status
|
||||||
ValidateSearchTopk(int64_t top_k);
|
ValidateSearchTopk(int64_t top_k);
|
||||||
|
|
|
@ -91,11 +91,11 @@ CreateCollectionReq::OnExecute() {
|
||||||
create_collection_context.fields_schema[field] = {field_element};
|
create_collection_context.fields_schema[field] = {field_element};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!extra_params_.contains(engine::PARAM_SEGMENT_SIZE)) {
|
if (!extra_params_.contains(engine::PARAM_SEGMENT_ROW_COUNT)) {
|
||||||
return Status(SERVER_UNEXPECTED_ERROR, "Segment size not defined");
|
return Status(SERVER_UNEXPECTED_ERROR, "Segment row count not defined");
|
||||||
} else {
|
} else {
|
||||||
auto segment_size = extra_params_[engine::PARAM_SEGMENT_SIZE].get<int64_t>();
|
auto segment_row = extra_params_[engine::PARAM_SEGMENT_ROW_COUNT].get<int64_t>();
|
||||||
STATUS_CHECK(ValidateCollectionIndexFileSize(segment_size));
|
STATUS_CHECK(ValidateSegmentRowCount(segment_row));
|
||||||
}
|
}
|
||||||
|
|
||||||
// step 3: create collection
|
// step 3: create collection
|
||||||
|
|
|
@ -59,7 +59,7 @@ CreateIndexReq::OnExecute() {
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
status = ValidateIndexName(index_name_);
|
status = ValidateIndexType(index_name_);
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
@ -92,7 +92,7 @@ CreateIndexReq::OnExecute() {
|
||||||
if (engine::IsVectorField(field)) {
|
if (engine::IsVectorField(field)) {
|
||||||
int32_t field_type = field->GetFtype();
|
int32_t field_type = field->GetFtype();
|
||||||
auto params = field->GetParams();
|
auto params = field->GetParams();
|
||||||
int64_t dimension = params[engine::DIMENSION].get<int64_t>();
|
int64_t dimension = params[engine::PARAM_DIMENSION].get<int64_t>();
|
||||||
|
|
||||||
// validate index type
|
// validate index type
|
||||||
std::string index_type = 0;
|
std::string index_type = 0;
|
||||||
|
@ -109,7 +109,7 @@ CreateIndexReq::OnExecute() {
|
||||||
if (json_params_.contains("metric_type")) {
|
if (json_params_.contains("metric_type")) {
|
||||||
metric_type = json_params_["metric_type"].get<std::string>();
|
metric_type = json_params_["metric_type"].get<std::string>();
|
||||||
}
|
}
|
||||||
status = ValidateMetricType(metric_type);
|
status = ValidateIndexMetricType(metric_type);
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
|
@ -72,7 +72,7 @@ SearchReq::OnExecute() {
|
||||||
field_types.insert(std::make_pair(field->GetName(), (engine::meta::DataType)field->GetFtype()));
|
field_types.insert(std::make_pair(field->GetName(), (engine::meta::DataType)field->GetFtype()));
|
||||||
if (field->GetFtype() == (int)engine::meta::DataType::VECTOR_FLOAT ||
|
if (field->GetFtype() == (int)engine::meta::DataType::VECTOR_FLOAT ||
|
||||||
field->GetFtype() == (int)engine::meta::DataType::VECTOR_BINARY) {
|
field->GetFtype() == (int)engine::meta::DataType::VECTOR_BINARY) {
|
||||||
dimension = field->GetParams()[engine::DIMENSION];
|
dimension = field->GetParams()[engine::PARAM_DIMENSION];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -60,7 +60,7 @@ ErrorMap(ErrorCode code) {
|
||||||
{SERVER_INVALID_NPROBE, ::milvus::grpc::ErrorCode::ILLEGAL_ARGUMENT},
|
{SERVER_INVALID_NPROBE, ::milvus::grpc::ErrorCode::ILLEGAL_ARGUMENT},
|
||||||
{SERVER_INVALID_INDEX_NLIST, ::milvus::grpc::ErrorCode::ILLEGAL_NLIST},
|
{SERVER_INVALID_INDEX_NLIST, ::milvus::grpc::ErrorCode::ILLEGAL_NLIST},
|
||||||
{SERVER_INVALID_INDEX_METRIC_TYPE, ::milvus::grpc::ErrorCode::ILLEGAL_METRIC_TYPE},
|
{SERVER_INVALID_INDEX_METRIC_TYPE, ::milvus::grpc::ErrorCode::ILLEGAL_METRIC_TYPE},
|
||||||
{SERVER_INVALID_INDEX_FILE_SIZE, ::milvus::grpc::ErrorCode::ILLEGAL_ARGUMENT},
|
{SERVER_INVALID_SEGMENT_ROW_COUNT, ::milvus::grpc::ErrorCode::ILLEGAL_ARGUMENT},
|
||||||
{SERVER_ILLEGAL_VECTOR_ID, ::milvus::grpc::ErrorCode::ILLEGAL_VECTOR_ID},
|
{SERVER_ILLEGAL_VECTOR_ID, ::milvus::grpc::ErrorCode::ILLEGAL_VECTOR_ID},
|
||||||
{SERVER_ILLEGAL_SEARCH_RESULT, ::milvus::grpc::ErrorCode::ILLEGAL_SEARCH_RESULT},
|
{SERVER_ILLEGAL_SEARCH_RESULT, ::milvus::grpc::ErrorCode::ILLEGAL_SEARCH_RESULT},
|
||||||
{SERVER_CACHE_FULL, ::milvus::grpc::ErrorCode::CACHE_FAILED},
|
{SERVER_CACHE_FULL, ::milvus::grpc::ErrorCode::CACHE_FAILED},
|
||||||
|
|
|
@ -60,7 +60,7 @@ WebErrorMap(ErrorCode code) {
|
||||||
{SERVER_INVALID_NPROBE, StatusCode::ILLEGAL_ARGUMENT},
|
{SERVER_INVALID_NPROBE, StatusCode::ILLEGAL_ARGUMENT},
|
||||||
{SERVER_INVALID_INDEX_NLIST, StatusCode::ILLEGAL_NLIST},
|
{SERVER_INVALID_INDEX_NLIST, StatusCode::ILLEGAL_NLIST},
|
||||||
{SERVER_INVALID_INDEX_METRIC_TYPE, StatusCode::ILLEGAL_METRIC_TYPE},
|
{SERVER_INVALID_INDEX_METRIC_TYPE, StatusCode::ILLEGAL_METRIC_TYPE},
|
||||||
{SERVER_INVALID_INDEX_FILE_SIZE, StatusCode::ILLEGAL_ARGUMENT},
|
{SERVER_INVALID_SEGMENT_ROW_COUNT, StatusCode::ILLEGAL_ARGUMENT},
|
||||||
{SERVER_ILLEGAL_VECTOR_ID, StatusCode::ILLEGAL_VECTOR_ID},
|
{SERVER_ILLEGAL_VECTOR_ID, StatusCode::ILLEGAL_VECTOR_ID},
|
||||||
{SERVER_ILLEGAL_SEARCH_RESULT, StatusCode::ILLEGAL_SEARCH_RESULT},
|
{SERVER_ILLEGAL_SEARCH_RESULT, StatusCode::ILLEGAL_SEARCH_RESULT},
|
||||||
{SERVER_CACHE_FULL, StatusCode::CACHE_FAILED},
|
{SERVER_CACHE_FULL, StatusCode::CACHE_FAILED},
|
||||||
|
@ -159,7 +159,7 @@ WebRequestHandler::GetCollectionMetaInfo(const std::string& collection_name, nlo
|
||||||
|
|
||||||
json_out["collection_name"] = schema.collection_name_;
|
json_out["collection_name"] = schema.collection_name_;
|
||||||
json_out["dimension"] = schema.extra_params_[engine::PARAM_DIMENSION].get<int64_t>();
|
json_out["dimension"] = schema.extra_params_[engine::PARAM_DIMENSION].get<int64_t>();
|
||||||
json_out["index_file_size"] = schema.extra_params_[engine::PARAM_SEGMENT_SIZE].get<int64_t>();
|
json_out["segment_row_count"] = schema.extra_params_[engine::PARAM_SEGMENT_ROW_COUNT].get<int64_t>();
|
||||||
json_out["metric_type"] = schema.extra_params_[engine::PARAM_INDEX_METRIC_TYPE].get<int64_t>();
|
json_out["metric_type"] = schema.extra_params_[engine::PARAM_INDEX_METRIC_TYPE].get<int64_t>();
|
||||||
json_out["index_params"] = schema.extra_params_[engine::PARAM_INDEX_EXTRA_PARAMS].get<std::string>();
|
json_out["index_params"] = schema.extra_params_[engine::PARAM_INDEX_EXTRA_PARAMS].get<std::string>();
|
||||||
json_out["count"] = count;
|
json_out["count"] = count;
|
||||||
|
|
|
@ -88,7 +88,7 @@ constexpr ErrorCode SERVER_WRITE_ERROR = ToServerErrorCode(112);
|
||||||
constexpr ErrorCode SERVER_INVALID_NPROBE = ToServerErrorCode(113);
|
constexpr ErrorCode SERVER_INVALID_NPROBE = ToServerErrorCode(113);
|
||||||
constexpr ErrorCode SERVER_INVALID_INDEX_NLIST = ToServerErrorCode(114);
|
constexpr ErrorCode SERVER_INVALID_INDEX_NLIST = ToServerErrorCode(114);
|
||||||
constexpr ErrorCode SERVER_INVALID_INDEX_METRIC_TYPE = ToServerErrorCode(115);
|
constexpr ErrorCode SERVER_INVALID_INDEX_METRIC_TYPE = ToServerErrorCode(115);
|
||||||
constexpr ErrorCode SERVER_INVALID_INDEX_FILE_SIZE = ToServerErrorCode(116);
|
constexpr ErrorCode SERVER_INVALID_SEGMENT_ROW_COUNT = ToServerErrorCode(116);
|
||||||
constexpr ErrorCode SERVER_OUT_OF_MEMORY = ToServerErrorCode(117);
|
constexpr ErrorCode SERVER_OUT_OF_MEMORY = ToServerErrorCode(117);
|
||||||
constexpr ErrorCode SERVER_INVALID_PARTITION_TAG = ToServerErrorCode(118);
|
constexpr ErrorCode SERVER_INVALID_PARTITION_TAG = ToServerErrorCode(118);
|
||||||
constexpr ErrorCode SERVER_INVALID_BINARY_QUERY = ToServerErrorCode(119);
|
constexpr ErrorCode SERVER_INVALID_BINARY_QUERY = ToServerErrorCode(119);
|
||||||
|
|
|
@ -503,13 +503,13 @@ TEST_F(DBTest, IndexTest) {
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
// milvus::engine::CollectionIndex index;
|
milvus::engine::CollectionIndex index;
|
||||||
// index.index_name_ = "SORTED";
|
index.index_name_ = "SORTED";
|
||||||
// status = db_->CreateIndex(dummy_context_, collection_name, "field_0", index);
|
status = db_->CreateIndex(dummy_context_, collection_name, "field_0", index);
|
||||||
// ASSERT_TRUE(status.ok());
|
ASSERT_TRUE(status.ok());
|
||||||
// status = db_->CreateIndex(dummy_context_, collection_name, "field_1", index);
|
status = db_->CreateIndex(dummy_context_, collection_name, "field_1", index);
|
||||||
// ASSERT_TRUE(status.ok());
|
ASSERT_TRUE(status.ok());
|
||||||
// status = db_->CreateIndex(dummy_context_, collection_name, "field_2", index);
|
status = db_->CreateIndex(dummy_context_, collection_name, "field_2", index);
|
||||||
// ASSERT_TRUE(status.ok());
|
ASSERT_TRUE(status.ok());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,7 +51,7 @@ CreateCollection(std::shared_ptr<DBImpl> db, const std::string& collection_name,
|
||||||
auto vector_field = std::make_shared<Field>("vector", 0, milvus::engine::FieldType::VECTOR_FLOAT, vector_param,
|
auto vector_field = std::make_shared<Field>("vector", 0, milvus::engine::FieldType::VECTOR_FLOAT, vector_param,
|
||||||
field_id);
|
field_id);
|
||||||
auto vector_field_element_index = std::make_shared<FieldElement>(collection_id, field_id,
|
auto vector_field_element_index = std::make_shared<FieldElement>(collection_id, field_id,
|
||||||
milvus::engine::DEFAULT_INDEX_NAME, milvus::engine::FieldElementType::FET_INDEX);
|
milvus::knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, milvus::engine::FieldElementType::FET_INDEX);
|
||||||
|
|
||||||
context.fields_schema[uid_field] = {uid_field_element_blt, uid_field_element_del};
|
context.fields_schema[uid_field] = {uid_field_element_blt, uid_field_element_del};
|
||||||
context.fields_schema[vector_field] = {vector_field_element_index};
|
context.fields_schema[vector_field] = {vector_field_element_index};
|
||||||
|
|
Loading…
Reference in New Issue