fix: Use `text_log` prefix for TextMatchIndex null offset file (#39935)

Related to #39933

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
pull/39943/head
congqixia 2025-02-17 20:17:25 +08:00 committed by GitHub
parent a4dbbc2e52
commit 7ccde3300e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 27 additions and 12 deletions

View File

@ -510,7 +510,7 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
};
} else {
auto size_per_chunk = segment_->size_per_chunk();
retrieve = [ size_per_chunk, this ](int64_t offset) -> auto{
retrieve = [ size_per_chunk, this ](int64_t offset) -> auto {
auto chunk_idx = offset / size_per_chunk;
auto chunk_offset = offset % size_per_chunk;
const auto& chunk =

View File

@ -101,7 +101,7 @@ TextMatchIndex::Upload(const Config& config) {
auto remote_paths_to_size = disk_file_manager_->GetRemotePathsToFileSize();
auto binary_set = Serialize(config);
mem_file_manager_->AddFile(binary_set);
mem_file_manager_->AddTextLog(binary_set);
auto remote_mem_path_to_size =
mem_file_manager_->GetRemotePathsToFileSize();

View File

@ -39,7 +39,8 @@ MemFileManagerImpl::AddFile(const std::string& filename /* unused */) noexcept {
}
bool
MemFileManagerImpl::AddFile(const BinarySet& binary_set) {
MemFileManagerImpl::AddBinarySet(const BinarySet& binary_set,
const std::string& prefix) {
std::vector<const uint8_t*> data_slices;
std::vector<int64_t> slice_sizes;
std::vector<std::string> slice_names;
@ -56,11 +57,8 @@ MemFileManagerImpl::AddFile(const BinarySet& binary_set) {
}
};
auto remotePrefix = GetRemoteIndexObjectPrefix();
int64_t batch_size = 0;
for (auto iter = binary_set.binary_map_.begin();
iter != binary_set.binary_map_.end();
iter++) {
for (const auto& iter : binary_set.binary_map_) {
if (batch_size >= DEFAULT_FIELD_MAX_MEMORY_LIMIT) {
AddBatchIndexFiles();
data_slices.clear();
@ -69,11 +67,11 @@ MemFileManagerImpl::AddFile(const BinarySet& binary_set) {
batch_size = 0;
}
data_slices.emplace_back(iter->second->data.get());
slice_sizes.emplace_back(iter->second->size);
slice_names.emplace_back(remotePrefix + "/" + iter->first);
batch_size += iter->second->size;
added_total_mem_size_ += iter->second->size;
data_slices.emplace_back(iter.second->data.get());
slice_sizes.emplace_back(iter.second->size);
slice_names.emplace_back(prefix + "/" + iter.first);
batch_size += iter.second->size;
added_total_mem_size_ += iter.second->size;
}
if (data_slices.size() > 0) {
@ -83,6 +81,16 @@ MemFileManagerImpl::AddFile(const BinarySet& binary_set) {
return true;
}
bool
MemFileManagerImpl::AddFile(const BinarySet& binary_set) {
return AddBinarySet(binary_set, GetRemoteIndexObjectPrefix());
}
bool
MemFileManagerImpl::AddTextLog(const BinarySet& binary_set) {
return AddBinarySet(binary_set, GetRemoteTextLogPrefix());
}
bool
MemFileManagerImpl::LoadFile(const std::string& filename) noexcept {
return true;

View File

@ -60,6 +60,9 @@ class MemFileManagerImpl : public FileManagerImpl {
bool
AddFile(const BinarySet& binary_set);
bool
AddTextLog(const BinarySet& binary_set);
std::map<std::string, int64_t>
GetRemotePathsToFileSize() const {
return remote_paths_to_size_;
@ -73,6 +76,10 @@ class MemFileManagerImpl : public FileManagerImpl {
std::unordered_map<int64_t, std::vector<std::vector<uint32_t>>>
CacheOptFieldToMemory(OptFieldT& fields_map);
private:
bool
AddBinarySet(const BinarySet& binary_set, const std::string& prefix);
private:
// remote file path
std::map<std::string, int64_t> remote_paths_to_size_;