feat: support mmap for marisa trie (#29613)

this supports mmap for marisa trie index
related https://github.com/milvus-io/milvus/issues/21866

Signed-off-by: yah01 <yang.cen@zilliz.com>
pull/29884/head
yah01 2024-01-11 10:22:50 +08:00 committed by GitHub
parent d6429933a7
commit 031243fee7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 15 deletions

View File

@ -13,6 +13,7 @@
#include <string>
#include "common/EasyAssert.h"
#include "common/Types.h"
#include "fmt/core.h"
#include <fcntl.h>
#include <unistd.h>
@ -51,6 +52,11 @@ class File {
return write(fd_, buf, size);
}
offset_t
Seek(offset_t offset, int whence) {
return lseek(fd_, offset, whence);
}
void
Close() {
close(fd_);

View File

@ -17,11 +17,16 @@
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <boost/uuid/uuid_generators.hpp>
#include <cstring>
#include <memory>
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <sys/errno.h>
#include <sys/mman.h>
#include <unistd.h>
#include "common/File.h"
#include "common/Types.h"
#include "common/EasyAssert.h"
#include "common/Exception.h"
@ -249,28 +254,29 @@ StringIndexMarisa::LoadWithoutAssemble(const BinarySet& set,
const Config& config) {
auto uuid = boost::uuids::random_generator()();
auto uuid_string = boost::uuids::to_string(uuid);
auto file = std::string("/tmp/") + uuid_string;
auto file_name = std::string("/tmp/") + uuid_string;
auto index = set.GetByName(MARISA_TRIE_INDEX);
auto len = index->size;
auto fd = open(
file.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IXUSR);
lseek(fd, 0, SEEK_SET);
auto status = write(fd, index->data.get(), len);
if (status != len) {
close(fd);
remove(file.c_str());
auto file = File::Open(file_name, O_RDWR | O_CREAT | O_EXCL);
auto written = file.Write(index->data.get(), len);
if (written != len) {
file.Close();
remove(file_name.c_str());
throw SegcoreError(
ErrorCode::UnistdError,
"write index to fd error, errorCode is " + std::to_string(status));
fmt::format("write index to fd error: {}", strerror(errno)));
}
lseek(fd, 0, SEEK_SET);
trie_.read(fd);
close(fd);
remove(file.c_str());
file.Seek(0, SEEK_SET);
if (config.contains(kEnableMmap)) {
trie_.mmap(file_name.c_str());
} else {
trie_.read(file.Descriptor());
}
// make sure the file would be removed after we unmap & close it
unlink(file_name.c_str());
auto str_ids = set.GetByName(MARISA_STR_IDS);
auto str_ids_len = str_ids->size;

View File

@ -31,7 +31,7 @@ beginTime=`date +%s`
for d in $(go list ./tests/integration/...); do
echo "$d"
go test -race -tags dynamic -v -coverpkg=./... -coverprofile=profile.out -covermode=atomic "$d" -timeout=20m
go test -race -tags dynamic -v -coverpkg=./... -coverprofile=profile.out -covermode=atomic "$d" -timeout=30m
if [ -f profile.out ]; then
grep -v kafka profile.out | grep -v planparserv2/generated | grep -v mocks | sed '1d' >> ${FILE_COVERAGE_INFO}
rm profile.out