#1873 fix index file serialize to incorrect path (#1874)

* #1873 fix index file serialize to incorrect path

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>

* not create sq8h index when gpu disabled

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>
pull/1877/head
Cai Yudong 2020-04-07 16:25:46 +08:00 committed by GitHub
parent c6f4660b15
commit bf6d22e2f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 29 additions and 57 deletions

View File

@ -5,7 +5,8 @@ Please mark all change in change log and use the issue from GitHub
# Milvus 0.8.0 (TBD)
## Bug
- \#1762 Server is not forbidden to create new partition which tag is "_default"
- \#1762 Server is not forbidden to create new partition which tag is `_default`
- \#1873 Fix index file serialize to incorrect path
## Feature
- \#261 Integrate ANNOY into Milvus

View File

@ -29,10 +29,11 @@ namespace codec {
class VectorIndexFormat {
public:
virtual void
read(const storage::FSHandlerPtr& fs_ptr, segment::VectorIndexPtr& vector_index) = 0;
read(const storage::FSHandlerPtr& fs_ptr, const std::string& location, segment::VectorIndexPtr& vector_index) = 0;
virtual void
write(const storage::FSHandlerPtr& fs_ptr, const segment::VectorIndexPtr& vector_index) = 0;
write(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
const segment::VectorIndexPtr& vector_index) = 0;
};
using VectorIndexFormatPtr = std::shared_ptr<VectorIndexFormat>;

View File

@ -98,7 +98,8 @@ DefaultVectorIndexFormat::read_internal(const storage::FSHandlerPtr& fs_ptr, con
}
void
DefaultVectorIndexFormat::read(const storage::FSHandlerPtr& fs_ptr, segment::VectorIndexPtr& vector_index) {
DefaultVectorIndexFormat::read(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
segment::VectorIndexPtr& vector_index) {
const std::lock_guard<std::mutex> lock(mutex_);
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
@ -108,42 +109,17 @@ DefaultVectorIndexFormat::read(const storage::FSHandlerPtr& fs_ptr, segment::Vec
throw Exception(SERVER_INVALID_ARGUMENT, err_msg);
}
boost::filesystem::path target_path(dir_path);
typedef boost::filesystem::directory_iterator d_it;
d_it it_end;
d_it it(target_path);
for (; it != it_end; ++it) {
const auto& path = it->path();
// if (path.extension().string() == vector_index_extension_) {
/* tmp solution, should be replaced when use .idx as index extension name */
const std::string& location = path.string();
if (location.substr(location.length() - 3) == "000") {
knowhere::VecIndexPtr index = read_internal(fs_ptr, location);
vector_index->SetVectorIndex(index);
vector_index->SetName(path.stem().string());
return;
}
}
}
std::string
GenerateFileName() {
auto now = std::chrono::system_clock::now();
auto micros = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
return std::to_string(micros * 1000);
knowhere::VecIndexPtr index = read_internal(fs_ptr, location);
vector_index->SetVectorIndex(index);
}
void
DefaultVectorIndexFormat::write(const storage::FSHandlerPtr& fs_ptr, const segment::VectorIndexPtr& vector_index) {
DefaultVectorIndexFormat::write(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
const segment::VectorIndexPtr& vector_index) {
const std::lock_guard<std::mutex> lock(mutex_);
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
const std::string index_file_path = dir_path + "/" + GenerateFileName();
// const std::string index_file_path = dir_path + "/" + vector_index->GetName() + vector_index_extension_;
milvus::TimeRecorder recorder("write_index");
knowhere::VecIndexPtr index = vector_index->GetVectorIndex();
@ -152,7 +128,7 @@ DefaultVectorIndexFormat::write(const storage::FSHandlerPtr& fs_ptr, const segme
int32_t index_type = knowhere::StrToOldIndexType(index->index_type());
recorder.RecordSection("Start");
fs_ptr->writer_ptr_->open(index_file_path);
fs_ptr->writer_ptr_->open(location);
fs_ptr->writer_ptr_->write(&index_type, sizeof(index_type));
@ -171,7 +147,7 @@ DefaultVectorIndexFormat::write(const storage::FSHandlerPtr& fs_ptr, const segme
double span = recorder.RecordSection("End");
double rate = fs_ptr->writer_ptr_->length() * 1000000.0 / span / 1024 / 1024;
ENGINE_LOG_DEBUG << "write_index(" << index_file_path << ") rate " << rate << "MB/s";
ENGINE_LOG_DEBUG << "write_index(" << location << ") rate " << rate << "MB/s";
}
} // namespace codec

View File

@ -30,10 +30,12 @@ class DefaultVectorIndexFormat : public VectorIndexFormat {
DefaultVectorIndexFormat() = default;
void
read(const storage::FSHandlerPtr& fs_ptr, segment::VectorIndexPtr& vector_index) override;
read(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
segment::VectorIndexPtr& vector_index) override;
void
write(const storage::FSHandlerPtr& fs_ptr, const segment::VectorIndexPtr& vector_index) override;
write(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
const segment::VectorIndexPtr& vector_index) override;
// No copy and move
DefaultVectorIndexFormat(const DefaultVectorIndexFormat&) = delete;

View File

@ -358,7 +358,7 @@ ExecutionEngineImpl::Serialize() {
utils::GetParentPath(location_, segment_dir);
auto segment_writer_ptr = std::make_shared<segment::SegmentWriter>(segment_dir);
segment_writer_ptr->SetVectorIndex(index_);
segment_writer_ptr->WriteVectorIndex();
segment_writer_ptr->WriteVectorIndex(location_);
// here we reset index size by file size,
// since some index type(such as SQ8) data size become smaller after serialized
@ -443,7 +443,7 @@ ExecutionEngineImpl::Load(bool to_cache) {
try {
segment::SegmentPtr segment_ptr;
segment_reader_ptr->GetSegment(segment_ptr);
auto status = segment_reader_ptr->LoadVectorIndex(segment_ptr->vector_index_ptr_);
auto status = segment_reader_ptr->LoadVectorIndex(location_, segment_ptr->vector_index_ptr_);
index_ = segment_ptr->vector_index_ptr_->GetVectorIndex();
if (index_ == nullptr) {

View File

@ -25,7 +25,7 @@ AdapterMgr::GetAdapter(const IndexType type) {
try {
return collection_.at(type)();
} catch (...) {
KNOWHERE_THROW_MSG("Can not find this type of confadapter");
KNOWHERE_THROW_MSG("Can not find confadapter: " + type);
}
}

View File

@ -65,6 +65,9 @@ VecIndexFactory::CreateVecIndex(const IndexType& type, const IndexMode mode) {
return std::make_shared<knowhere::IVFSQ>();
#ifdef MILVUS_GPU_VERSION
} else if (type == IndexEnum::INDEX_FAISS_IVFSQ8H) {
if (mode == IndexMode::MODE_CPU) {
return nullptr;
}
return std::make_shared<knowhere::IVFSQHybrid>(gpu_device);
#endif
} else if (type == IndexEnum::INDEX_FAISS_BIN_IDMAP) {

View File

@ -93,11 +93,11 @@ SegmentReader::GetSegment(SegmentPtr& segment_ptr) {
}
Status
SegmentReader::LoadVectorIndex(segment::VectorIndexPtr& vector_index_ptr) {
SegmentReader::LoadVectorIndex(const std::string& location, segment::VectorIndexPtr& vector_index_ptr) {
codec::DefaultCodec default_codec;
try {
fs_ptr_->operation_ptr_->CreateDirectory();
default_codec.GetVectorIndexFormat()->read(fs_ptr_, vector_index_ptr);
default_codec.GetVectorIndexFormat()->read(fs_ptr_, location, vector_index_ptr);
} catch (std::exception& e) {
std::string err_msg = "Failed to load vector index: " + std::string(e.what());
ENGINE_LOG_ERROR << err_msg;

View File

@ -46,7 +46,7 @@ class SegmentReader {
LoadUids(std::vector<doc_id_t>& uids);
Status
LoadVectorIndex(segment::VectorIndexPtr& vector_index_ptr);
LoadVectorIndex(const std::string& location, segment::VectorIndexPtr& vector_index_ptr);
Status
LoadBloomFilter(segment::IdBloomFilterPtr& id_bloom_filter_ptr);

View File

@ -106,11 +106,11 @@ SegmentWriter::WriteVectors() {
}
Status
SegmentWriter::WriteVectorIndex() {
SegmentWriter::WriteVectorIndex(const std::string& location) {
codec::DefaultCodec default_codec;
try {
fs_ptr_->operation_ptr_->CreateDirectory();
default_codec.GetVectorIndexFormat()->write(fs_ptr_, segment_ptr_->vector_index_ptr_);
default_codec.GetVectorIndexFormat()->write(fs_ptr_, location, segment_ptr_->vector_index_ptr_);
} catch (std::exception& e) {
std::string err_msg = "Failed to write vector index: " + std::string(e.what());
ENGINE_LOG_ERROR << err_msg;

View File

@ -63,7 +63,7 @@ class SegmentWriter {
VectorCount();
Status
WriteVectorIndex();
WriteVectorIndex(const std::string& location);
private:
Status

View File

@ -41,16 +41,6 @@ class VectorIndex {
index_ptr_ = index_ptr;
}
void
SetName(const std::string& name) {
name_ = name;
}
const std::string&
GetName() const {
return name_;
}
// No copy and move
VectorIndex(const VectorIndex&) = delete;
VectorIndex(VectorIndex&&) = delete;
@ -62,7 +52,6 @@ class VectorIndex {
private:
knowhere::VecIndexPtr index_ptr_ = nullptr;
std::string name_;
};
using VectorIndexPtr = std::shared_ptr<VectorIndex>;