MS-644 - Search crashed with index-type: flat

Former-commit-id: cf79b2ed46c1f33ba62e60a0ec77f769354c5a24
pull/191/head
Yu Kun 2019-10-14 17:09:37 +08:00
commit ed93701efd
14 changed files with 98 additions and 50 deletions

View File

@ -5,7 +5,7 @@ container('milvus-build-env') {
try {
checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'SubmoduleOption',disableSubmodules: false,parentCredentials: true,recursiveSubmodules: true,reference: '',trackingSubmodules: false]], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:megasearch/milvus.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]])
dir ("cpp") {
dir ("core") {
sh "git config --global user.email \"test@zilliz.com\""
sh "git config --global user.name \"test\""
withCredentials([usernamePassword(credentialsId: "${params.JFROG_USER}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {

View File

@ -5,7 +5,7 @@ container('milvus-build-env') {
try {
checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'SubmoduleOption',disableSubmodules: false,parentCredentials: true,recursiveSubmodules: true,reference: '',trackingSubmodules: false]], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:megasearch/milvus.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]])
dir ("cpp") {
dir ("core") {
sh "git config --global user.email \"test@zilliz.com\""
sh "git config --global user.name \"test\""
withCredentials([usernamePassword(credentialsId: "${params.JFROG_USER}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {

View File

@ -1,7 +1,7 @@
container('milvus-build-env') {
timeout(time: 5, unit: 'MINUTES') {
dir ("milvus_engine") {
dir ("cpp") {
dir ("core") {
gitlabCommitStatus(name: 'Packaged Engine') {
if (fileExists('milvus')) {
try {

View File

@ -1,7 +1,7 @@
container('milvus-build-env') {
timeout(time: 5, unit: 'MINUTES') {
dir ("milvus_engine") {
dir ("cpp") {
dir ("core") {
gitlabCommitStatus(name: 'Packaged Engine') {
if (fileExists('milvus')) {
try {

View File

@ -9,16 +9,17 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-572 - Milvus crash when get SIGINT
- MS-577 - Unittest Query randomly hung
- MS-587 - Count get wrong result after adding vectors and index built immediately
- MS-599 - search wrong result when table created with metric_type: IP
- MS-599 - Search wrong result when table created with metric_type: IP
- MS-601 - Docker logs error caused by get CPUTemperature error
- MS-622 - Delete vectors should be failed if date range is invalid
- MS-620 - Get table row counts display wrong error code
- MS-637 - out of memory when load too many tasks
- MS-637 - Out of memory when load too many tasks
- MS-640 - Cache object size calculate incorrect
- MS-641 - Segment fault(signal 11) in PickToLoad
- MS-639 - SQ8H index created failed and server hang
- MS-647 - [monitor] grafana display average cpu-temp
- MS-644 - Search crashed with index-type: flat
- MS-624 - Search vectors failed if time ranges long enough
## Improvement
- MS-552 - Add and change the easylogging library

View File

@ -81,7 +81,7 @@ class Meta {
UpdateTableFiles(TableFilesSchema& files) = 0;
virtual Status
FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& partition,
FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& dates,
DatePartionedTableFilesSchema& files) = 0;
virtual Status

View File

@ -41,11 +41,6 @@ using DateT = int;
const DateT EmptyDate = -1;
using DatesT = std::vector<DateT>;
struct DateRange {
DateT start_date_ = 0x1 << 32;
DateT end_date_ = 0;
};
struct TableSchema {
typedef enum {
NORMAL,

View File

@ -1088,7 +1088,7 @@ MySQLMetaImpl::FilesToIndex(TableFilesSchema& files) {
}
Status
MySQLMetaImpl::FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& partition,
MySQLMetaImpl::FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& dates,
DatePartionedTableFilesSchema& files) {
files.clear();
@ -1108,9 +1108,9 @@ MySQLMetaImpl::FilesToSearch(const std::string& table_id, const std::vector<size
<< META_TABLEFILES << " "
<< "WHERE table_id = " << mysqlpp::quote << table_id;
if (!partition.empty()) {
if (!dates.empty()) {
std::stringstream partitionListSS;
for (auto& date : partition) {
for (auto& date : dates) {
partitionListSS << std::to_string(date) << ", ";
}
std::string partitionListStr = partitionListSS.str();

View File

@ -89,7 +89,7 @@ class MySQLMetaImpl : public Meta {
UpdateTableFiles(TableFilesSchema& files) override;
Status
FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& partition,
FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& dates,
DatePartionedTableFilesSchema& files) override;
Status

View File

@ -159,7 +159,7 @@ SqliteMetaImpl::Initialize() {
Status
SqliteMetaImpl::DropPartitionsByDates(const std::string &table_id,
const DatesT &dates) {
if (dates.size() == 0) {
if (dates.empty()) {
return Status::OK();
}
@ -171,16 +171,35 @@ SqliteMetaImpl::DropPartitionsByDates(const std::string &table_id,
}
try {
//sqlite_orm has a bug, 'in' statement cannot handle too many elements
//so we split one query into multi-queries, this is a work-around!!
std::vector<DatesT> split_dates;
split_dates.push_back(DatesT());
const size_t batch_size = 30;
for(DateT date : dates) {
DatesT& last_batch = *split_dates.rbegin();
last_batch.push_back(date);
if(last_batch.size() > batch_size) {
split_dates.push_back(DatesT());
}
}
//multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here
std::lock_guard<std::mutex> meta_lock(meta_mutex_);
ConnectorPtr->update_all(
set(
c(&TableFileSchema::file_type_) = (int) TableFileSchema::TO_DELETE,
c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp()),
where(
c(&TableFileSchema::table_id_) == table_id and
in(&TableFileSchema::date_, dates)));
for(auto& batch_dates : split_dates) {
if(batch_dates.empty()) {
continue;
}
ConnectorPtr->update_all(
set(
c(&TableFileSchema::file_type_) = (int)TableFileSchema::TO_DELETE,
c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp()),
where(
c(&TableFileSchema::table_id_) == table_id and
in(&TableFileSchema::date_, batch_dates)));
}
ENGINE_LOG_DEBUG << "Successfully drop partitions, table id = " << table_schema.table_id_;
} catch (std::exception &e) {
@ -673,7 +692,7 @@ SqliteMetaImpl::FilesToIndex(TableFilesSchema &files) {
Status
SqliteMetaImpl::FilesToSearch(const std::string &table_id,
const std::vector<size_t> &ids,
const DatesT &partition,
const DatesT &dates,
DatePartionedTableFilesSchema &files) {
files.clear();
server::MetricCollector metric;
@ -702,23 +721,54 @@ SqliteMetaImpl::FilesToSearch(const std::string &table_id,
auto status = DescribeTable(table_schema);
if (!status.ok()) { return status; }
//sqlite_orm has a bug, 'in' statement cannot handle too many elements
//so we split one query into multi-queries, this is a work-around!!
std::vector<DatesT> split_dates;
split_dates.push_back(DatesT());
const size_t batch_size = 30;
for(DateT date : dates) {
DatesT& last_batch = *split_dates.rbegin();
last_batch.push_back(date);
if(last_batch.size() > batch_size) {
split_dates.push_back(DatesT());
}
}
//perform query
decltype(ConnectorPtr->select(select_columns)) selected;
if (partition.empty() && ids.empty()) {
if (dates.empty() && ids.empty()) {
auto filter = where(match_tableid and match_type);
selected = ConnectorPtr->select(select_columns, filter);
} else if (partition.empty() && !ids.empty()) {
} else if (dates.empty() && !ids.empty()) {
auto match_fileid = in(&TableFileSchema::id_, ids);
auto filter = where(match_tableid and match_fileid and match_type);
selected = ConnectorPtr->select(select_columns, filter);
} else if (!partition.empty() && ids.empty()) {
auto match_date = in(&TableFileSchema::date_, partition);
auto filter = where(match_tableid and match_date and match_type);
selected = ConnectorPtr->select(select_columns, filter);
} else if (!partition.empty() && !ids.empty()) {
auto match_fileid = in(&TableFileSchema::id_, ids);
auto match_date = in(&TableFileSchema::date_, partition);
auto filter = where(match_tableid and match_fileid and match_date and match_type);
selected = ConnectorPtr->select(select_columns, filter);
} else if (!dates.empty() && ids.empty()) {
for(auto& batch_dates : split_dates) {
if(batch_dates.empty()) {
continue;
}
auto match_date = in(&TableFileSchema::date_, batch_dates);
auto filter = where(match_tableid and match_date and match_type);
auto batch_selected = ConnectorPtr->select(select_columns, filter);
for (auto &file : batch_selected) {
selected.push_back(file);
}
}
} else if (!dates.empty() && !ids.empty()) {
for(auto& batch_dates : split_dates) {
if(batch_dates.empty()) {
continue;
}
auto match_fileid = in(&TableFileSchema::id_, ids);
auto match_date = in(&TableFileSchema::date_, batch_dates);
auto filter = where(match_tableid and match_fileid and match_date and match_type);
auto batch_selected = ConnectorPtr->select(select_columns, filter);
for (auto &file : batch_selected) {
selected.push_back(file);
}
}
}
Status ret;

View File

@ -89,7 +89,7 @@ class SqliteMetaImpl : public Meta {
UpdateTableFiles(TableFilesSchema& files) override;
Status
FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& partition,
FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& dates,
DatePartionedTableFilesSchema& files) override;
Status

View File

@ -79,12 +79,8 @@ IVFSQHybrid::CopyGpuToCpu(const Config& config) {
VectorIndexPtr
IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) {
auto p = CopyCpuToGpuWithQuantizer(device_id, config);
return p.first;
} else {
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
}
auto p = CopyCpuToGpuWithQuantizer(device_id, config);
return p.first;
}
void
@ -256,8 +252,14 @@ void
IVFSQHybrid::UnsetQuantizer() {
}
void
VectorIndexPtr
IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
return nullptr;
}
std::pair<VectorIndexPtr, QuantizerPtr>
IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config) {
KNOWHERE_THROW_MSG("Not yet implemented");
}
IndexModelPtr

View File

@ -231,10 +231,10 @@ PrometheusMetrics::CPUTemperature() {
prometheus::Gauge& cpu_temp = CPU_temperature_.Add({{"CPU", std::to_string(0)}});
cpu_temp.Set(avg_cpu_temp);
// for (int i = 0; i < CPU_temperatures.size(); ++i) {
// prometheus::Gauge& cpu_temp = CPU_temperature_.Add({{"CPU", std::to_string(i)}});
// cpu_temp.Set(CPU_temperatures[i]);
// }
// for (int i = 0; i < CPU_temperatures.size(); ++i) {
// prometheus::Gauge& cpu_temp = CPU_temperature_.Add({{"CPU", std::to_string(i)}});
// cpu_temp.Set(CPU_temperatures[i]);
// }
}
void

View File

@ -423,8 +423,8 @@ InsertTask::OnExecute() {
return Status(SERVER_INVALID_ROWRECORD_ARRAY, "Row record array is empty");
}
if (!record_ids_->vector_id_array().empty()) {
if (record_ids_->vector_id_array().size() != insert_param_->row_record_array_size()) {
if (!insert_param_->row_id_array().empty()) {
if (insert_param_->row_id_array().size() != insert_param_->row_record_array_size()) {
return Status(SERVER_ILLEGAL_VECTOR_ID, "Size of vector ids is not equal to row record array size");
}
}