mirror of https://github.com/milvus-io/milvus.git
enhance: Handle rust error in c++ (#38113)
https://github.com/milvus-io/milvus/issues/37930 --------- Signed-off-by: sunby <sunbingyi1992@gmail.com>pull/38505/head
parent
659847c11f
commit
3e2a2f278b
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
|
||||
#include "tantivy-binding.h"
|
||||
|
@ -11,7 +12,13 @@ namespace milvus::tantivy {
|
|||
struct RustArrayWrapper {
|
||||
NO_COPY_OR_ASSIGN(RustArrayWrapper);
|
||||
|
||||
explicit RustArrayWrapper(RustArray array) : array_(array) {
|
||||
explicit RustArrayWrapper(RustArray&& array) {
|
||||
array_.array = array.array;
|
||||
array_.len = array.len;
|
||||
array_.cap = array.cap;
|
||||
array.array = nullptr;
|
||||
array.len = 0;
|
||||
array.cap = 0;
|
||||
}
|
||||
|
||||
RustArrayWrapper(RustArrayWrapper&& other) noexcept {
|
||||
|
@ -62,4 +69,42 @@ struct RustArrayWrapper {
|
|||
}
|
||||
}
|
||||
};
|
||||
struct RustResultWrapper {
|
||||
NO_COPY_OR_ASSIGN(RustResultWrapper);
|
||||
|
||||
RustResultWrapper() = default;
|
||||
explicit RustResultWrapper(RustResult result)
|
||||
: result_(std::make_unique<RustResult>(result)) {
|
||||
}
|
||||
|
||||
RustResultWrapper(RustResultWrapper&& other) noexcept {
|
||||
result_ = std::move(other.result_);
|
||||
}
|
||||
|
||||
RustResultWrapper&
|
||||
operator=(RustResultWrapper&& other) noexcept {
|
||||
if (this != &other) {
|
||||
free();
|
||||
result_ = std::move(other.result_);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
~RustResultWrapper() {
|
||||
free();
|
||||
}
|
||||
|
||||
std::unique_ptr<RustResult> result_;
|
||||
|
||||
private:
|
||||
void
|
||||
free() {
|
||||
if (result_) {
|
||||
free_rust_result(*result_);
|
||||
result_.reset();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace milvus::tantivy
|
||||
|
|
|
@ -20,10 +20,59 @@ struct RustArray {
|
|||
size_t cap;
|
||||
};
|
||||
|
||||
struct Value {
|
||||
enum class Tag {
|
||||
None,
|
||||
RustArray,
|
||||
U32,
|
||||
Ptr,
|
||||
};
|
||||
|
||||
struct None_Body {
|
||||
|
||||
};
|
||||
|
||||
struct RustArray_Body {
|
||||
RustArray _0;
|
||||
};
|
||||
|
||||
struct U32_Body {
|
||||
uint32_t _0;
|
||||
};
|
||||
|
||||
struct Ptr_Body {
|
||||
void *_0;
|
||||
};
|
||||
|
||||
Tag tag;
|
||||
union {
|
||||
None_Body none;
|
||||
RustArray_Body rust_array;
|
||||
U32_Body u32;
|
||||
Ptr_Body ptr;
|
||||
};
|
||||
};
|
||||
|
||||
struct RustResult {
|
||||
bool success;
|
||||
Value value;
|
||||
const char *error;
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
|
||||
void free_rust_array(RustArray array);
|
||||
|
||||
void free_rust_result(RustResult result);
|
||||
|
||||
void free_rust_error(const char *error);
|
||||
|
||||
RustResult test_enum_with_array();
|
||||
|
||||
RustResult test_enum_with_ptr();
|
||||
|
||||
void free_test_ptr(void *ptr);
|
||||
|
||||
void print_vector_of_strings(const char *const *ptr, uintptr_t len);
|
||||
|
||||
void *create_hashmap();
|
||||
|
@ -32,120 +81,164 @@ void hashmap_set_value(void *map, const char *key, const char *value);
|
|||
|
||||
void free_hashmap(void *map);
|
||||
|
||||
void *tantivy_load_index(const char *path);
|
||||
RustResult tantivy_load_index(const char *path);
|
||||
|
||||
void tantivy_free_index_reader(void *ptr);
|
||||
|
||||
void tantivy_reload_index(void *ptr);
|
||||
RustResult tantivy_reload_index(void *ptr);
|
||||
|
||||
uint32_t tantivy_index_count(void *ptr);
|
||||
RustResult tantivy_index_count(void *ptr);
|
||||
|
||||
RustArray tantivy_term_query_i64(void *ptr, int64_t term);
|
||||
RustResult tantivy_term_query_i64(void *ptr, int64_t term);
|
||||
|
||||
RustArray tantivy_lower_bound_range_query_i64(void *ptr, int64_t lower_bound, bool inclusive);
|
||||
RustResult tantivy_lower_bound_range_query_i64(void *ptr, int64_t lower_bound, bool inclusive);
|
||||
|
||||
RustArray tantivy_upper_bound_range_query_i64(void *ptr, int64_t upper_bound, bool inclusive);
|
||||
RustResult tantivy_upper_bound_range_query_i64(void *ptr, int64_t upper_bound, bool inclusive);
|
||||
|
||||
RustArray tantivy_range_query_i64(void *ptr,
|
||||
int64_t lower_bound,
|
||||
int64_t upper_bound,
|
||||
bool lb_inclusive,
|
||||
bool ub_inclusive);
|
||||
RustResult tantivy_range_query_i64(void *ptr,
|
||||
int64_t lower_bound,
|
||||
int64_t upper_bound,
|
||||
bool lb_inclusive,
|
||||
bool ub_inclusive);
|
||||
|
||||
RustArray tantivy_term_query_f64(void *ptr, double term);
|
||||
RustResult tantivy_term_query_f64(void *ptr, double term);
|
||||
|
||||
RustArray tantivy_lower_bound_range_query_f64(void *ptr, double lower_bound, bool inclusive);
|
||||
RustResult tantivy_lower_bound_range_query_f64(void *ptr, double lower_bound, bool inclusive);
|
||||
|
||||
RustArray tantivy_upper_bound_range_query_f64(void *ptr, double upper_bound, bool inclusive);
|
||||
RustResult tantivy_upper_bound_range_query_f64(void *ptr, double upper_bound, bool inclusive);
|
||||
|
||||
RustArray tantivy_range_query_f64(void *ptr,
|
||||
double lower_bound,
|
||||
double upper_bound,
|
||||
bool lb_inclusive,
|
||||
bool ub_inclusive);
|
||||
RustResult tantivy_range_query_f64(void *ptr,
|
||||
double lower_bound,
|
||||
double upper_bound,
|
||||
bool lb_inclusive,
|
||||
bool ub_inclusive);
|
||||
|
||||
RustArray tantivy_term_query_bool(void *ptr, bool term);
|
||||
RustResult tantivy_term_query_bool(void *ptr, bool term);
|
||||
|
||||
RustArray tantivy_term_query_keyword(void *ptr, const char *term);
|
||||
RustResult tantivy_term_query_keyword(void *ptr, const char *term);
|
||||
|
||||
RustArray tantivy_lower_bound_range_query_keyword(void *ptr,
|
||||
const char *lower_bound,
|
||||
bool inclusive);
|
||||
RustResult tantivy_lower_bound_range_query_keyword(void *ptr,
|
||||
const char *lower_bound,
|
||||
bool inclusive);
|
||||
|
||||
RustArray tantivy_upper_bound_range_query_keyword(void *ptr,
|
||||
const char *upper_bound,
|
||||
bool inclusive);
|
||||
RustResult tantivy_upper_bound_range_query_keyword(void *ptr,
|
||||
const char *upper_bound,
|
||||
bool inclusive);
|
||||
|
||||
RustArray tantivy_range_query_keyword(void *ptr,
|
||||
const char *lower_bound,
|
||||
const char *upper_bound,
|
||||
bool lb_inclusive,
|
||||
bool ub_inclusive);
|
||||
RustResult tantivy_range_query_keyword(void *ptr,
|
||||
const char *lower_bound,
|
||||
const char *upper_bound,
|
||||
bool lb_inclusive,
|
||||
bool ub_inclusive);
|
||||
|
||||
RustArray tantivy_prefix_query_keyword(void *ptr, const char *prefix);
|
||||
RustResult tantivy_prefix_query_keyword(void *ptr, const char *prefix);
|
||||
|
||||
RustArray tantivy_regex_query(void *ptr, const char *pattern);
|
||||
RustResult tantivy_regex_query(void *ptr, const char *pattern);
|
||||
|
||||
RustArray tantivy_match_query(void *ptr, const char *query);
|
||||
RustResult tantivy_match_query(void *ptr, const char *query);
|
||||
|
||||
void tantivy_register_tokenizer(void *ptr, const char *tokenizer_name, const char *analyzer_params);
|
||||
RustResult tantivy_register_tokenizer(void *ptr,
|
||||
const char *tokenizer_name,
|
||||
const char *analyzer_params);
|
||||
|
||||
void *tantivy_create_index(const char *field_name,
|
||||
TantivyDataType data_type,
|
||||
const char *path,
|
||||
uintptr_t num_threads,
|
||||
uintptr_t overall_memory_budget_in_bytes);
|
||||
RustResult tantivy_create_index(const char *field_name,
|
||||
TantivyDataType data_type,
|
||||
const char *path,
|
||||
uintptr_t num_threads,
|
||||
uintptr_t overall_memory_budget_in_bytes);
|
||||
|
||||
void tantivy_free_index_writer(void *ptr);
|
||||
|
||||
void tantivy_finish_index(void *ptr);
|
||||
RustResult tantivy_finish_index(void *ptr);
|
||||
|
||||
void tantivy_commit_index(void *ptr);
|
||||
RustResult tantivy_commit_index(void *ptr);
|
||||
|
||||
void *tantivy_create_reader_from_writer(void *ptr);
|
||||
RustResult tantivy_create_reader_from_writer(void *ptr);
|
||||
|
||||
void tantivy_index_add_int8s(void *ptr, const int8_t *array, uintptr_t len, int64_t offset_begin);
|
||||
RustResult tantivy_index_add_int8s(void *ptr,
|
||||
const int8_t *array,
|
||||
uintptr_t len,
|
||||
int64_t offset_begin);
|
||||
|
||||
void tantivy_index_add_int16s(void *ptr, const int16_t *array, uintptr_t len, int64_t offset_begin);
|
||||
RustResult tantivy_index_add_int16s(void *ptr,
|
||||
const int16_t *array,
|
||||
uintptr_t len,
|
||||
int64_t offset_begin);
|
||||
|
||||
void tantivy_index_add_int32s(void *ptr, const int32_t *array, uintptr_t len, int64_t offset_begin);
|
||||
RustResult tantivy_index_add_int32s(void *ptr,
|
||||
const int32_t *array,
|
||||
uintptr_t len,
|
||||
int64_t offset_begin);
|
||||
|
||||
void tantivy_index_add_int64s(void *ptr, const int64_t *array, uintptr_t len, int64_t offset_begin);
|
||||
RustResult tantivy_index_add_int64s(void *ptr,
|
||||
const int64_t *array,
|
||||
uintptr_t len,
|
||||
int64_t offset_begin);
|
||||
|
||||
void tantivy_index_add_f32s(void *ptr, const float *array, uintptr_t len, int64_t offset_begin);
|
||||
RustResult tantivy_index_add_f32s(void *ptr,
|
||||
const float *array,
|
||||
uintptr_t len,
|
||||
int64_t offset_begin);
|
||||
|
||||
void tantivy_index_add_f64s(void *ptr, const double *array, uintptr_t len, int64_t offset_begin);
|
||||
RustResult tantivy_index_add_f64s(void *ptr,
|
||||
const double *array,
|
||||
uintptr_t len,
|
||||
int64_t offset_begin);
|
||||
|
||||
void tantivy_index_add_bools(void *ptr, const bool *array, uintptr_t len, int64_t offset_begin);
|
||||
RustResult tantivy_index_add_bools(void *ptr,
|
||||
const bool *array,
|
||||
uintptr_t len,
|
||||
int64_t offset_begin);
|
||||
|
||||
void tantivy_index_add_string(void *ptr, const char *s, int64_t offset);
|
||||
RustResult tantivy_index_add_string(void *ptr, const char *s, int64_t offset);
|
||||
|
||||
void tantivy_index_add_multi_int8s(void *ptr, const int8_t *array, uintptr_t len, int64_t offset);
|
||||
RustResult tantivy_index_add_multi_int8s(void *ptr,
|
||||
const int8_t *array,
|
||||
uintptr_t len,
|
||||
int64_t offset);
|
||||
|
||||
void tantivy_index_add_multi_int16s(void *ptr, const int16_t *array, uintptr_t len, int64_t offset);
|
||||
RustResult tantivy_index_add_multi_int16s(void *ptr,
|
||||
const int16_t *array,
|
||||
uintptr_t len,
|
||||
int64_t offset);
|
||||
|
||||
void tantivy_index_add_multi_int32s(void *ptr, const int32_t *array, uintptr_t len, int64_t offset);
|
||||
RustResult tantivy_index_add_multi_int32s(void *ptr,
|
||||
const int32_t *array,
|
||||
uintptr_t len,
|
||||
int64_t offset);
|
||||
|
||||
void tantivy_index_add_multi_int64s(void *ptr, const int64_t *array, uintptr_t len, int64_t offset);
|
||||
RustResult tantivy_index_add_multi_int64s(void *ptr,
|
||||
const int64_t *array,
|
||||
uintptr_t len,
|
||||
int64_t offset);
|
||||
|
||||
void tantivy_index_add_multi_f32s(void *ptr, const float *array, uintptr_t len, int64_t offset);
|
||||
RustResult tantivy_index_add_multi_f32s(void *ptr,
|
||||
const float *array,
|
||||
uintptr_t len,
|
||||
int64_t offset);
|
||||
|
||||
void tantivy_index_add_multi_f64s(void *ptr, const double *array, uintptr_t len, int64_t offset);
|
||||
RustResult tantivy_index_add_multi_f64s(void *ptr,
|
||||
const double *array,
|
||||
uintptr_t len,
|
||||
int64_t offset);
|
||||
|
||||
void tantivy_index_add_multi_bools(void *ptr, const bool *array, uintptr_t len, int64_t offset);
|
||||
RustResult tantivy_index_add_multi_bools(void *ptr,
|
||||
const bool *array,
|
||||
uintptr_t len,
|
||||
int64_t offset);
|
||||
|
||||
void tantivy_index_add_multi_keywords(void *ptr,
|
||||
const char *const *array,
|
||||
uintptr_t len,
|
||||
int64_t offset);
|
||||
RustResult tantivy_index_add_multi_keywords(void *ptr,
|
||||
const char *const *array,
|
||||
uintptr_t len,
|
||||
int64_t offset);
|
||||
|
||||
void *tantivy_create_text_writer(const char *field_name,
|
||||
const char *path,
|
||||
const char *tokenizer_name,
|
||||
const char *analyzer_params,
|
||||
uintptr_t num_threads,
|
||||
uintptr_t overall_memory_budget_in_bytes,
|
||||
bool in_ram);
|
||||
RustResult tantivy_create_text_writer(const char *field_name,
|
||||
const char *path,
|
||||
const char *tokenizer_name,
|
||||
const char *analyzer_params,
|
||||
uintptr_t num_threads,
|
||||
uintptr_t overall_memory_budget_in_bytes,
|
||||
bool in_ram);
|
||||
|
||||
void free_rust_string(const char *ptr);
|
||||
|
||||
|
@ -157,7 +250,7 @@ bool tantivy_token_stream_advance(void *token_stream);
|
|||
|
||||
const char *tantivy_token_stream_get_token(void *token_stream);
|
||||
|
||||
void *tantivy_create_tokenizer(const char *analyzer_params);
|
||||
RustResult tantivy_create_tokenizer(const char *analyzer_params);
|
||||
|
||||
void *tantivy_clone_tokenizer(void *ptr);
|
||||
|
||||
|
|
|
@ -1,5 +1,16 @@
|
|||
use std::default;
|
||||
use std::ffi::c_void;
|
||||
use std::ptr::null;
|
||||
|
||||
use libc::c_char;
|
||||
use libc::size_t;
|
||||
|
||||
use crate::error;
|
||||
use crate::error::Result;
|
||||
use crate::string_c::create_string;
|
||||
use crate::string_c::free_rust_string;
|
||||
use crate::util::free_binding;
|
||||
|
||||
#[repr(C)]
|
||||
pub struct RustArray {
|
||||
array: *mut u32,
|
||||
|
@ -20,6 +31,22 @@ impl RustArray {
|
|||
}
|
||||
}
|
||||
|
||||
impl std::default::Default for RustArray {
|
||||
fn default() -> Self {
|
||||
RustArray {
|
||||
array: std::ptr::null_mut(),
|
||||
len: 0,
|
||||
cap: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<u32>> for RustArray {
|
||||
fn from(vec: Vec<u32>) -> Self {
|
||||
RustArray::from_vec(vec)
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn free_rust_array(array: RustArray) {
|
||||
let RustArray { array, len, cap } = array;
|
||||
|
@ -27,3 +54,129 @@ pub extern "C" fn free_rust_array(array: RustArray) {
|
|||
Vec::from_raw_parts(array, len, cap);
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub enum Value {
|
||||
None(()),
|
||||
RustArray(RustArray),
|
||||
U32(u32),
|
||||
Ptr(*mut c_void),
|
||||
}
|
||||
|
||||
macro_rules! impl_from_for_enum {
|
||||
($enum_name:ident, $($variant:ident => $type:ty),*) => {
|
||||
$(
|
||||
impl From<$type> for $enum_name {
|
||||
fn from(value: $type) -> Self {
|
||||
$enum_name::$variant(value.into())
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
impl_from_for_enum!(Value, None => (), RustArray => RustArray, RustArray => Vec<u32>, U32 => u32, Ptr => *mut c_void);
|
||||
|
||||
#[repr(C)]
|
||||
pub struct RustResult {
|
||||
pub success: bool,
|
||||
pub value: Value,
|
||||
pub error: *const c_char,
|
||||
}
|
||||
|
||||
impl RustResult {
|
||||
pub fn from_ptr(value: *mut c_void) -> Self {
|
||||
RustResult {
|
||||
success: true,
|
||||
value: Value::Ptr(value),
|
||||
error: std::ptr::null(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_error(error: String) -> Self {
|
||||
RustResult {
|
||||
success: false,
|
||||
value: Value::None(()),
|
||||
error: create_string(&error),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<Result<T>> for RustResult
|
||||
where
|
||||
T: Into<Value>,
|
||||
{
|
||||
fn from(value: error::Result<T>) -> Self {
|
||||
match value {
|
||||
Ok(v) => RustResult {
|
||||
success: true,
|
||||
value: v.into(),
|
||||
error: null(),
|
||||
},
|
||||
Err(e) => RustResult {
|
||||
success: false,
|
||||
value: Value::None(()),
|
||||
error: create_string(&e.to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn free_rust_result(result: RustResult) {
|
||||
match result.value {
|
||||
Value::RustArray(array) => {
|
||||
if !array.array.is_null() {
|
||||
free_rust_array(array);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
unsafe {
|
||||
if !result.error.is_null() {
|
||||
free_rust_string(result.error as *mut c_char);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn free_rust_error(error: *const c_char) {
|
||||
unsafe {
|
||||
if !error.is_null() {
|
||||
free_rust_string(error as *mut c_char);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: move to common
|
||||
#[macro_export]
|
||||
macro_rules! cstr_to_str {
|
||||
($cstr:expr) => {
|
||||
unsafe {
|
||||
match CStr::from_ptr($cstr).to_str() {
|
||||
Ok(f) => f,
|
||||
Err(e) => return RustResult::from_error(e.to_string()),
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn test_enum_with_array() -> RustResult {
|
||||
let array = vec![1, 2, 3];
|
||||
RustResult::from(Result::Ok(array))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn test_enum_with_ptr() -> RustResult {
|
||||
let ptr = Box::into_raw(Box::new(1 as u32));
|
||||
RustResult::from(Result::Ok(ptr as *mut c_void))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn free_test_ptr(ptr: *mut c_void) {
|
||||
if ptr.is_null() {
|
||||
return;
|
||||
}
|
||||
free_binding::<u32>(ptr);
|
||||
}
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
use core::fmt;
|
||||
use core::{fmt, str};
|
||||
|
||||
use serde_json as json;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TantivyBindingError {
|
||||
JsonError(serde_json::Error),
|
||||
TantivyError(tantivy::TantivyError),
|
||||
InvalidArgument(String),
|
||||
InternalError(String),
|
||||
}
|
||||
|
||||
|
@ -14,10 +16,18 @@ impl From<serde_json::Error> for TantivyBindingError {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<tantivy::TantivyError> for TantivyBindingError {
|
||||
fn from(value: tantivy::TantivyError) -> Self {
|
||||
TantivyBindingError::TantivyError(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for TantivyBindingError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
TantivyBindingError::JsonError(e) => write!(f, "JsonError: {}", e),
|
||||
TantivyBindingError::TantivyError(e) => write!(f, "TantivyError: {}", e),
|
||||
TantivyBindingError::InvalidArgument(e) => write!(f, "InvalidArgument: {}", e),
|
||||
TantivyBindingError::InternalError(e) => write!(f, "InternalError: {}", e),
|
||||
}
|
||||
}
|
||||
|
@ -27,9 +37,17 @@ impl std::error::Error for TantivyBindingError {
|
|||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
match self {
|
||||
TantivyBindingError::JsonError(e) => Some(e),
|
||||
TantivyBindingError::TantivyError(e) => Some(e),
|
||||
TantivyBindingError::InvalidArgument(_) => None,
|
||||
TantivyBindingError::InternalError(_) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<str::Utf8Error> for TantivyBindingError {
|
||||
fn from(value: str::Utf8Error) -> Self {
|
||||
TantivyBindingError::InternalError(value.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, TantivyBindingError>;
|
||||
|
|
|
@ -10,6 +10,8 @@ use crate::log::init_log;
|
|||
use crate::util::make_bounds;
|
||||
use crate::vec_collector::VecCollector;
|
||||
|
||||
use crate::error::{Result, TantivyBindingError};
|
||||
|
||||
pub(crate) struct IndexReaderWrapper {
|
||||
pub(crate) field_name: String,
|
||||
pub(crate) field: Field,
|
||||
|
@ -19,15 +21,15 @@ pub(crate) struct IndexReaderWrapper {
|
|||
}
|
||||
|
||||
impl IndexReaderWrapper {
|
||||
pub fn load(path: &str) -> IndexReaderWrapper {
|
||||
pub fn load(path: &str) -> Result<IndexReaderWrapper> {
|
||||
init_log();
|
||||
|
||||
let index = Index::open_in_dir(path).unwrap();
|
||||
let index = Index::open_in_dir(path)?;
|
||||
|
||||
IndexReaderWrapper::from_index(Arc::new(index))
|
||||
}
|
||||
|
||||
pub fn from_index(index: Arc<Index>) -> IndexReaderWrapper {
|
||||
pub fn from_index(index: Arc<Index>) -> Result<IndexReaderWrapper> {
|
||||
let field = index.schema().fields().next().unwrap().0;
|
||||
let schema = index.schema();
|
||||
let field_name = String::from(schema.get_field_name(field));
|
||||
|
@ -39,47 +41,51 @@ impl IndexReaderWrapper {
|
|||
let reader = index
|
||||
.reader_builder()
|
||||
.reload_policy(ReloadPolicy::OnCommit) // OnCommit serve for growing segment.
|
||||
.try_into()
|
||||
.unwrap();
|
||||
reader.reload().unwrap();
|
||||
.try_into()?;
|
||||
reader.reload()?;
|
||||
|
||||
IndexReaderWrapper {
|
||||
Ok(IndexReaderWrapper {
|
||||
field_name,
|
||||
field,
|
||||
reader,
|
||||
index,
|
||||
id_field,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn reload(&self) {
|
||||
self.reader.reload().unwrap();
|
||||
pub fn reload(&self) -> Result<()> {
|
||||
self.reader.reload()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn count(&self) -> u32 {
|
||||
let metas = self.index.searchable_segment_metas().unwrap();
|
||||
pub fn count(&self) -> Result<u32> {
|
||||
let metas = self.index.searchable_segment_metas()?;
|
||||
let mut sum: u32 = 0;
|
||||
for meta in metas {
|
||||
sum += meta.max_doc();
|
||||
}
|
||||
sum
|
||||
Ok(sum)
|
||||
}
|
||||
|
||||
pub(crate) fn search(&self, q: &dyn Query) -> Vec<u32> {
|
||||
pub(crate) fn search(&self, q: &dyn Query) -> Result<Vec<u32>> {
|
||||
let searcher = self.reader.searcher();
|
||||
match self.id_field {
|
||||
Some(_) => {
|
||||
// newer version with doc_id.
|
||||
searcher.search(q, &DocIdCollector {}).unwrap()
|
||||
searcher
|
||||
.search(q, &DocIdCollector {})
|
||||
.map_err(TantivyBindingError::TantivyError)
|
||||
}
|
||||
None => {
|
||||
// older version without doc_id, only one segment.
|
||||
searcher.search(q, &VecCollector {}).unwrap()
|
||||
searcher
|
||||
.search(q, &VecCollector {})
|
||||
.map_err(TantivyBindingError::TantivyError)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn term_query_i64(&self, term: i64) -> Vec<u32> {
|
||||
pub fn term_query_i64(&self, term: i64) -> Result<Vec<u32>> {
|
||||
let q = TermQuery::new(
|
||||
Term::from_field_i64(self.field, term),
|
||||
IndexRecordOption::Basic,
|
||||
|
@ -87,7 +93,11 @@ impl IndexReaderWrapper {
|
|||
self.search(&q)
|
||||
}
|
||||
|
||||
pub fn lower_bound_range_query_i64(&self, lower_bound: i64, inclusive: bool) -> Vec<u32> {
|
||||
pub fn lower_bound_range_query_i64(
|
||||
&self,
|
||||
lower_bound: i64,
|
||||
inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let q = RangeQuery::new_i64_bounds(
|
||||
self.field_name.to_string(),
|
||||
make_bounds(lower_bound, inclusive),
|
||||
|
@ -96,7 +106,11 @@ impl IndexReaderWrapper {
|
|||
self.search(&q)
|
||||
}
|
||||
|
||||
pub fn upper_bound_range_query_i64(&self, upper_bound: i64, inclusive: bool) -> Vec<u32> {
|
||||
pub fn upper_bound_range_query_i64(
|
||||
&self,
|
||||
upper_bound: i64,
|
||||
inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let q = RangeQuery::new_i64_bounds(
|
||||
self.field_name.to_string(),
|
||||
Bound::Unbounded,
|
||||
|
@ -111,14 +125,14 @@ impl IndexReaderWrapper {
|
|||
upper_bound: i64,
|
||||
lb_inclusive: bool,
|
||||
ub_inclusive: bool,
|
||||
) -> Vec<u32> {
|
||||
) -> Result<Vec<u32>> {
|
||||
let lb = make_bounds(lower_bound, lb_inclusive);
|
||||
let ub = make_bounds(upper_bound, ub_inclusive);
|
||||
let q = RangeQuery::new_i64_bounds(self.field_name.to_string(), lb, ub);
|
||||
self.search(&q)
|
||||
}
|
||||
|
||||
pub fn term_query_f64(&self, term: f64) -> Vec<u32> {
|
||||
pub fn term_query_f64(&self, term: f64) -> Result<Vec<u32>> {
|
||||
let q = TermQuery::new(
|
||||
Term::from_field_f64(self.field, term),
|
||||
IndexRecordOption::Basic,
|
||||
|
@ -126,7 +140,11 @@ impl IndexReaderWrapper {
|
|||
self.search(&q)
|
||||
}
|
||||
|
||||
pub fn lower_bound_range_query_f64(&self, lower_bound: f64, inclusive: bool) -> Vec<u32> {
|
||||
pub fn lower_bound_range_query_f64(
|
||||
&self,
|
||||
lower_bound: f64,
|
||||
inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let q = RangeQuery::new_f64_bounds(
|
||||
self.field_name.to_string(),
|
||||
make_bounds(lower_bound, inclusive),
|
||||
|
@ -135,7 +153,11 @@ impl IndexReaderWrapper {
|
|||
self.search(&q)
|
||||
}
|
||||
|
||||
pub fn upper_bound_range_query_f64(&self, upper_bound: f64, inclusive: bool) -> Vec<u32> {
|
||||
pub fn upper_bound_range_query_f64(
|
||||
&self,
|
||||
upper_bound: f64,
|
||||
inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let q = RangeQuery::new_f64_bounds(
|
||||
self.field_name.to_string(),
|
||||
Bound::Unbounded,
|
||||
|
@ -150,14 +172,14 @@ impl IndexReaderWrapper {
|
|||
upper_bound: f64,
|
||||
lb_inclusive: bool,
|
||||
ub_inclusive: bool,
|
||||
) -> Vec<u32> {
|
||||
) -> Result<Vec<u32>> {
|
||||
let lb = make_bounds(lower_bound, lb_inclusive);
|
||||
let ub = make_bounds(upper_bound, ub_inclusive);
|
||||
let q = RangeQuery::new_f64_bounds(self.field_name.to_string(), lb, ub);
|
||||
self.search(&q)
|
||||
}
|
||||
|
||||
pub fn term_query_bool(&self, term: bool) -> Vec<u32> {
|
||||
pub fn term_query_bool(&self, term: bool) -> Result<Vec<u32>> {
|
||||
let q = TermQuery::new(
|
||||
Term::from_field_bool(self.field, term),
|
||||
IndexRecordOption::Basic,
|
||||
|
@ -165,7 +187,7 @@ impl IndexReaderWrapper {
|
|||
self.search(&q)
|
||||
}
|
||||
|
||||
pub fn term_query_keyword(&self, term: &str) -> Vec<u32> {
|
||||
pub fn term_query_keyword(&self, term: &str) -> Result<Vec<u32>> {
|
||||
let q = TermQuery::new(
|
||||
Term::from_field_text(self.field, term),
|
||||
IndexRecordOption::Basic,
|
||||
|
@ -173,7 +195,11 @@ impl IndexReaderWrapper {
|
|||
self.search(&q)
|
||||
}
|
||||
|
||||
pub fn lower_bound_range_query_keyword(&self, lower_bound: &str, inclusive: bool) -> Vec<u32> {
|
||||
pub fn lower_bound_range_query_keyword(
|
||||
&self,
|
||||
lower_bound: &str,
|
||||
inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let q = RangeQuery::new_str_bounds(
|
||||
self.field_name.to_string(),
|
||||
make_bounds(lower_bound, inclusive),
|
||||
|
@ -182,7 +208,11 @@ impl IndexReaderWrapper {
|
|||
self.search(&q)
|
||||
}
|
||||
|
||||
pub fn upper_bound_range_query_keyword(&self, upper_bound: &str, inclusive: bool) -> Vec<u32> {
|
||||
pub fn upper_bound_range_query_keyword(
|
||||
&self,
|
||||
upper_bound: &str,
|
||||
inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let q = RangeQuery::new_str_bounds(
|
||||
self.field_name.to_string(),
|
||||
Bound::Unbounded,
|
||||
|
@ -197,21 +227,21 @@ impl IndexReaderWrapper {
|
|||
upper_bound: &str,
|
||||
lb_inclusive: bool,
|
||||
ub_inclusive: bool,
|
||||
) -> Vec<u32> {
|
||||
) -> Result<Vec<u32>> {
|
||||
let lb = make_bounds(lower_bound, lb_inclusive);
|
||||
let ub = make_bounds(upper_bound, ub_inclusive);
|
||||
let q = RangeQuery::new_str_bounds(self.field_name.to_string(), lb, ub);
|
||||
self.search(&q)
|
||||
}
|
||||
|
||||
pub fn prefix_query_keyword(&self, prefix: &str) -> Vec<u32> {
|
||||
pub fn prefix_query_keyword(&self, prefix: &str) -> Result<Vec<u32>> {
|
||||
let escaped = regex::escape(prefix);
|
||||
let pattern = format!("{}(.|\n)*", escaped);
|
||||
self.regex_query(&pattern)
|
||||
}
|
||||
|
||||
pub fn regex_query(&self, pattern: &str) -> Vec<u32> {
|
||||
let q = RegexQuery::from_pattern(&pattern, self.field).unwrap();
|
||||
pub fn regex_query(&self, pattern: &str) -> Result<Vec<u32>> {
|
||||
let q = RegexQuery::from_pattern(&pattern, self.field)?;
|
||||
self.search(&q)
|
||||
}
|
||||
}
|
||||
|
@ -244,10 +274,10 @@ mod test {
|
|||
index_writer.commit().unwrap();
|
||||
|
||||
let index_shared = Arc::new(index);
|
||||
let index_reader_wrapper = IndexReaderWrapper::from_index(index_shared);
|
||||
let mut res = index_reader_wrapper.prefix_query_keyword("^");
|
||||
let index_reader_wrapper = IndexReaderWrapper::from_index(index_shared).unwrap();
|
||||
let mut res = index_reader_wrapper.prefix_query_keyword("^").unwrap();
|
||||
assert_eq!(res.len(), 1);
|
||||
res = index_reader_wrapper.prefix_query_keyword("$");
|
||||
res = index_reader_wrapper.prefix_query_keyword("$").unwrap();
|
||||
assert_eq!(res.len(), 1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,18 +1,25 @@
|
|||
use std::ffi::{c_char, c_void, CStr};
|
||||
use std::{
|
||||
ffi::{c_char, c_void, CStr},
|
||||
ptr::null,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
array::RustArray,
|
||||
array::{RustArray, RustResult},
|
||||
cstr_to_str,
|
||||
index_reader::IndexReaderWrapper,
|
||||
string_c::create_string,
|
||||
util::{create_binding, free_binding},
|
||||
util_c::tantivy_index_exist,
|
||||
};
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_load_index(path: *const c_char) -> *mut c_void {
|
||||
pub extern "C" fn tantivy_load_index(path: *const c_char) -> RustResult {
|
||||
assert!(tantivy_index_exist(path));
|
||||
let path_str = unsafe { CStr::from_ptr(path) };
|
||||
let wrapper = IndexReaderWrapper::load(path_str.to_str().unwrap());
|
||||
create_binding(wrapper)
|
||||
let path_str = cstr_to_str!(path);
|
||||
match IndexReaderWrapper::load(path_str) {
|
||||
Ok(w) => RustResult::from_ptr(create_binding(w)),
|
||||
Err(e) => RustResult::from_error(e.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -22,26 +29,21 @@ pub extern "C" fn tantivy_free_index_reader(ptr: *mut c_void) {
|
|||
|
||||
// -------------------------query--------------------
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_reload_index(ptr: *mut c_void) {
|
||||
pub extern "C" fn tantivy_reload_index(ptr: *mut c_void) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe {
|
||||
(*real).reload();
|
||||
}
|
||||
unsafe { (*real).reload().into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_index_count(ptr: *mut c_void) -> u32 {
|
||||
pub extern "C" fn tantivy_index_count(ptr: *mut c_void) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe { (*real).count() }
|
||||
unsafe { (*real).count().into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_term_query_i64(ptr: *mut c_void, term: i64) -> RustArray {
|
||||
pub extern "C" fn tantivy_term_query_i64(ptr: *mut c_void, term: i64) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe {
|
||||
let hits = (*real).term_query_i64(term);
|
||||
RustArray::from_vec(hits)
|
||||
}
|
||||
unsafe { (*real).term_query_i64(term).into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -49,11 +51,12 @@ pub extern "C" fn tantivy_lower_bound_range_query_i64(
|
|||
ptr: *mut c_void,
|
||||
lower_bound: i64,
|
||||
inclusive: bool,
|
||||
) -> RustArray {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe {
|
||||
let hits = (*real).lower_bound_range_query_i64(lower_bound, inclusive);
|
||||
RustArray::from_vec(hits)
|
||||
(*real)
|
||||
.lower_bound_range_query_i64(lower_bound, inclusive)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -62,11 +65,12 @@ pub extern "C" fn tantivy_upper_bound_range_query_i64(
|
|||
ptr: *mut c_void,
|
||||
upper_bound: i64,
|
||||
inclusive: bool,
|
||||
) -> RustArray {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe {
|
||||
let hits = (*real).upper_bound_range_query_i64(upper_bound, inclusive);
|
||||
RustArray::from_vec(hits)
|
||||
(*real)
|
||||
.upper_bound_range_query_i64(upper_bound, inclusive)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -77,21 +81,19 @@ pub extern "C" fn tantivy_range_query_i64(
|
|||
upper_bound: i64,
|
||||
lb_inclusive: bool,
|
||||
ub_inclusive: bool,
|
||||
) -> RustArray {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe {
|
||||
let hits = (*real).range_query_i64(lower_bound, upper_bound, lb_inclusive, ub_inclusive);
|
||||
RustArray::from_vec(hits)
|
||||
(*real)
|
||||
.range_query_i64(lower_bound, upper_bound, lb_inclusive, ub_inclusive)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_term_query_f64(ptr: *mut c_void, term: f64) -> RustArray {
|
||||
pub extern "C" fn tantivy_term_query_f64(ptr: *mut c_void, term: f64) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe {
|
||||
let hits = (*real).term_query_f64(term);
|
||||
RustArray::from_vec(hits)
|
||||
}
|
||||
unsafe { (*real).term_query_f64(term).into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -99,11 +101,12 @@ pub extern "C" fn tantivy_lower_bound_range_query_f64(
|
|||
ptr: *mut c_void,
|
||||
lower_bound: f64,
|
||||
inclusive: bool,
|
||||
) -> RustArray {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe {
|
||||
let hits = (*real).lower_bound_range_query_f64(lower_bound, inclusive);
|
||||
RustArray::from_vec(hits)
|
||||
(*real)
|
||||
.lower_bound_range_query_f64(lower_bound, inclusive)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -112,11 +115,12 @@ pub extern "C" fn tantivy_upper_bound_range_query_f64(
|
|||
ptr: *mut c_void,
|
||||
upper_bound: f64,
|
||||
inclusive: bool,
|
||||
) -> RustArray {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe {
|
||||
let hits = (*real).upper_bound_range_query_f64(upper_bound, inclusive);
|
||||
RustArray::from_vec(hits)
|
||||
(*real)
|
||||
.upper_bound_range_query_f64(upper_bound, inclusive)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -127,31 +131,26 @@ pub extern "C" fn tantivy_range_query_f64(
|
|||
upper_bound: f64,
|
||||
lb_inclusive: bool,
|
||||
ub_inclusive: bool,
|
||||
) -> RustArray {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe {
|
||||
let hits = (*real).range_query_f64(lower_bound, upper_bound, lb_inclusive, ub_inclusive);
|
||||
RustArray::from_vec(hits)
|
||||
(*real)
|
||||
.range_query_f64(lower_bound, upper_bound, lb_inclusive, ub_inclusive)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_term_query_bool(ptr: *mut c_void, term: bool) -> RustArray {
|
||||
pub extern "C" fn tantivy_term_query_bool(ptr: *mut c_void, term: bool) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe {
|
||||
let hits = (*real).term_query_bool(term);
|
||||
RustArray::from_vec(hits)
|
||||
}
|
||||
unsafe { (*real).term_query_bool(term).into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_term_query_keyword(ptr: *mut c_void, term: *const c_char) -> RustArray {
|
||||
pub extern "C" fn tantivy_term_query_keyword(ptr: *mut c_void, term: *const c_char) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe {
|
||||
let c_str = CStr::from_ptr(term);
|
||||
let hits = (*real).term_query_keyword(c_str.to_str().unwrap());
|
||||
RustArray::from_vec(hits)
|
||||
}
|
||||
let term = cstr_to_str!(term);
|
||||
unsafe { (*real).term_query_keyword(term).into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -159,13 +158,13 @@ pub extern "C" fn tantivy_lower_bound_range_query_keyword(
|
|||
ptr: *mut c_void,
|
||||
lower_bound: *const c_char,
|
||||
inclusive: bool,
|
||||
) -> RustArray {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
let lower_bound = cstr_to_str!(lower_bound);
|
||||
unsafe {
|
||||
let c_lower_bound = CStr::from_ptr(lower_bound);
|
||||
let hits =
|
||||
(*real).lower_bound_range_query_keyword(c_lower_bound.to_str().unwrap(), inclusive);
|
||||
RustArray::from_vec(hits)
|
||||
(*real)
|
||||
.lower_bound_range_query_keyword(lower_bound, inclusive)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -174,13 +173,13 @@ pub extern "C" fn tantivy_upper_bound_range_query_keyword(
|
|||
ptr: *mut c_void,
|
||||
upper_bound: *const c_char,
|
||||
inclusive: bool,
|
||||
) -> RustArray {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
let upper_bound = cstr_to_str!(upper_bound);
|
||||
unsafe {
|
||||
let c_upper_bound = CStr::from_ptr(upper_bound);
|
||||
let hits =
|
||||
(*real).upper_bound_range_query_keyword(c_upper_bound.to_str().unwrap(), inclusive);
|
||||
RustArray::from_vec(hits)
|
||||
(*real)
|
||||
.upper_bound_range_query_keyword(upper_bound, inclusive)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -191,18 +190,14 @@ pub extern "C" fn tantivy_range_query_keyword(
|
|||
upper_bound: *const c_char,
|
||||
lb_inclusive: bool,
|
||||
ub_inclusive: bool,
|
||||
) -> RustArray {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
let lower_bound = cstr_to_str!(lower_bound);
|
||||
let upper_bound = cstr_to_str!(upper_bound);
|
||||
unsafe {
|
||||
let c_lower_bound = CStr::from_ptr(lower_bound);
|
||||
let c_upper_bound = CStr::from_ptr(upper_bound);
|
||||
let hits = (*real).range_query_keyword(
|
||||
c_lower_bound.to_str().unwrap(),
|
||||
c_upper_bound.to_str().unwrap(),
|
||||
lb_inclusive,
|
||||
ub_inclusive,
|
||||
);
|
||||
RustArray::from_vec(hits)
|
||||
(*real)
|
||||
.range_query_keyword(lower_bound, upper_bound, lb_inclusive, ub_inclusive)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -210,21 +205,15 @@ pub extern "C" fn tantivy_range_query_keyword(
|
|||
pub extern "C" fn tantivy_prefix_query_keyword(
|
||||
ptr: *mut c_void,
|
||||
prefix: *const c_char,
|
||||
) -> RustArray {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe {
|
||||
let c_str = CStr::from_ptr(prefix);
|
||||
let hits = (*real).prefix_query_keyword(c_str.to_str().unwrap());
|
||||
RustArray::from_vec(hits)
|
||||
}
|
||||
let prefix = cstr_to_str!(prefix);
|
||||
unsafe { (*real).prefix_query_keyword(prefix).into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_regex_query(ptr: *mut c_void, pattern: *const c_char) -> RustArray {
|
||||
pub extern "C" fn tantivy_regex_query(ptr: *mut c_void, pattern: *const c_char) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe {
|
||||
let c_str = CStr::from_ptr(pattern);
|
||||
let hits = (*real).regex_query(c_str.to_str().unwrap());
|
||||
RustArray::from_vec(hits)
|
||||
}
|
||||
let pattern = cstr_to_str!(pattern);
|
||||
unsafe { (*real).regex_query(pattern).into() }
|
||||
}
|
||||
|
|
|
@ -4,12 +4,13 @@ use tantivy::{
|
|||
Term,
|
||||
};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::{index_reader::IndexReaderWrapper, tokenizer::standard_analyzer};
|
||||
|
||||
impl IndexReaderWrapper {
|
||||
// split the query string into multiple tokens using index's default tokenizer,
|
||||
// and then execute the disconjunction of term query.
|
||||
pub(crate) fn match_query(&self, q: &str) -> Vec<u32> {
|
||||
pub(crate) fn match_query(&self, q: &str) -> Result<Vec<u32>> {
|
||||
// clone the tokenizer to make `match_query` thread-safe.
|
||||
let mut tokenizer = self
|
||||
.index
|
||||
|
|
|
@ -1,22 +1,22 @@
|
|||
use std::{ffi::CStr};
|
||||
use std::{ffi::CStr, ptr::null};
|
||||
|
||||
use libc::{c_char, c_void};
|
||||
|
||||
use crate::{
|
||||
array::RustArray,
|
||||
string_c::c_str_to_str,
|
||||
array::{RustArray, RustResult},
|
||||
cstr_to_str,
|
||||
index_reader::IndexReaderWrapper,
|
||||
tokenizer::create_tokenizer,
|
||||
log::init_log,
|
||||
string_c::{c_str_to_str, create_string},
|
||||
tokenizer::create_tokenizer,
|
||||
};
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_match_query(ptr: *mut c_void, query: *const c_char) -> RustArray {
|
||||
pub extern "C" fn tantivy_match_query(ptr: *mut c_void, query: *const c_char) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
unsafe {
|
||||
let c_str = CStr::from_ptr(query);
|
||||
let hits = (*real).match_query(c_str.to_str().unwrap());
|
||||
RustArray::from_vec(hits)
|
||||
let query = cstr_to_str!(query);
|
||||
(*real).match_query(query).into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -25,21 +25,17 @@ pub extern "C" fn tantivy_register_tokenizer(
|
|||
ptr: *mut c_void,
|
||||
tokenizer_name: *const c_char,
|
||||
analyzer_params: *const c_char,
|
||||
) {
|
||||
) -> RustResult {
|
||||
init_log();
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
let tokenizer_name_str = unsafe { CStr::from_ptr(tokenizer_name) };
|
||||
let params = unsafe{c_str_to_str(analyzer_params).to_string()};
|
||||
let analyzer = create_tokenizer(¶ms);
|
||||
let tokenizer_name = cstr_to_str!(tokenizer_name);
|
||||
let params = cstr_to_str!(analyzer_params);
|
||||
let analyzer = create_tokenizer(params);
|
||||
match analyzer {
|
||||
Ok(text_analyzer) => unsafe {
|
||||
(*real).register_tokenizer(
|
||||
String::from(tokenizer_name_str.to_str().unwrap()),
|
||||
text_analyzer,
|
||||
);
|
||||
},
|
||||
Err(err) => {
|
||||
panic!("create tokenizer failed with error: {} param: {}", err.to_string(), params);
|
||||
(*real).register_tokenizer(String::from(tokenizer_name), text_analyzer);
|
||||
Ok(()).into()
|
||||
},
|
||||
Err(err) => RustResult::from_error(err.to_string()),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@ use tantivy::{doc, tokenizer, Document, Index, IndexWriter};
|
|||
|
||||
use crate::data_type::TantivyDataType;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::index_reader::IndexReaderWrapper;
|
||||
use crate::log::init_log;
|
||||
|
||||
|
@ -27,7 +28,7 @@ impl IndexWriterWrapper {
|
|||
path: String,
|
||||
num_threads: usize,
|
||||
overall_memory_budget_in_bytes: usize,
|
||||
) -> IndexWriterWrapper {
|
||||
) -> Result<IndexWriterWrapper> {
|
||||
init_log();
|
||||
|
||||
let field: Field;
|
||||
|
@ -55,171 +56,170 @@ impl IndexWriterWrapper {
|
|||
}
|
||||
let id_field = schema_builder.add_i64_field("doc_id", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_dir(path.clone(), schema).unwrap();
|
||||
let index_writer = index
|
||||
.writer_with_num_threads(num_threads, overall_memory_budget_in_bytes)
|
||||
.unwrap();
|
||||
IndexWriterWrapper {
|
||||
let index = Index::create_in_dir(path.clone(), schema)?;
|
||||
let index_writer =
|
||||
index.writer_with_num_threads(num_threads, overall_memory_budget_in_bytes)?;
|
||||
Ok(IndexWriterWrapper {
|
||||
field,
|
||||
index_writer,
|
||||
id_field,
|
||||
index: Arc::new(index),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn create_reader(&self) -> IndexReaderWrapper {
|
||||
pub fn create_reader(&self) -> Result<IndexReaderWrapper> {
|
||||
IndexReaderWrapper::from_index(self.index.clone())
|
||||
}
|
||||
|
||||
pub fn add_i8(&mut self, data: i8, offset: i64) {
|
||||
pub fn add_i8(&mut self, data: i8, offset: i64) -> Result<()> {
|
||||
self.add_i64(data.into(), offset)
|
||||
}
|
||||
|
||||
pub fn add_i16(&mut self, data: i16, offset: i64) {
|
||||
pub fn add_i16(&mut self, data: i16, offset: i64) -> Result<()> {
|
||||
self.add_i64(data.into(), offset)
|
||||
}
|
||||
|
||||
pub fn add_i32(&mut self, data: i32, offset: i64) {
|
||||
pub fn add_i32(&mut self, data: i32, offset: i64) -> Result<()> {
|
||||
self.add_i64(data.into(), offset)
|
||||
}
|
||||
|
||||
pub fn add_i64(&mut self, data: i64, offset: i64) {
|
||||
self.index_writer
|
||||
.add_document(doc!(
|
||||
self.field => data,
|
||||
self.id_field => offset,
|
||||
))
|
||||
.unwrap();
|
||||
pub fn add_i64(&mut self, data: i64, offset: i64) -> Result<()> {
|
||||
let _ = self.index_writer.add_document(doc!(
|
||||
self.field => data,
|
||||
self.id_field => offset,
|
||||
))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_f32(&mut self, data: f32, offset: i64) {
|
||||
pub fn add_f32(&mut self, data: f32, offset: i64) -> Result<()> {
|
||||
self.add_f64(data.into(), offset)
|
||||
}
|
||||
|
||||
pub fn add_f64(&mut self, data: f64, offset: i64) {
|
||||
self.index_writer
|
||||
.add_document(doc!(
|
||||
self.field => data,
|
||||
self.id_field => offset,
|
||||
))
|
||||
.unwrap();
|
||||
pub fn add_f64(&mut self, data: f64, offset: i64) -> Result<()> {
|
||||
let _ = self.index_writer.add_document(doc!(
|
||||
self.field => data,
|
||||
self.id_field => offset,
|
||||
))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_bool(&mut self, data: bool, offset: i64) {
|
||||
self.index_writer
|
||||
.add_document(doc!(
|
||||
self.field => data,
|
||||
self.id_field => offset,
|
||||
))
|
||||
.unwrap();
|
||||
pub fn add_bool(&mut self, data: bool, offset: i64) -> Result<()> {
|
||||
let _ = self.index_writer.add_document(doc!(
|
||||
self.field => data,
|
||||
self.id_field => offset,
|
||||
))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_string(&mut self, data: &str, offset: i64) {
|
||||
self.index_writer
|
||||
.add_document(doc!(
|
||||
self.field => data,
|
||||
self.id_field => offset,
|
||||
))
|
||||
.unwrap();
|
||||
pub fn add_string(&mut self, data: &str, offset: i64) -> Result<()> {
|
||||
let _ = self.index_writer.add_document(doc!(
|
||||
self.field => data,
|
||||
self.id_field => offset,
|
||||
))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_multi_i8s(&mut self, datas: &[i8], offset: i64) {
|
||||
pub fn add_multi_i8s(&mut self, datas: &[i8], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data as i64);
|
||||
}
|
||||
document.add_i64(self.id_field, offset);
|
||||
self.index_writer.add_document(document).unwrap();
|
||||
let _ = self.index_writer.add_document(document)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_multi_i16s(&mut self, datas: &[i16], offset: i64) {
|
||||
pub fn add_multi_i16s(&mut self, datas: &[i16], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data as i64);
|
||||
}
|
||||
document.add_i64(self.id_field, offset);
|
||||
self.index_writer.add_document(document).unwrap();
|
||||
let _ = self.index_writer.add_document(document)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_multi_i32s(&mut self, datas: &[i32], offset: i64) {
|
||||
pub fn add_multi_i32s(&mut self, datas: &[i32], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data as i64);
|
||||
}
|
||||
document.add_i64(self.id_field, offset);
|
||||
self.index_writer.add_document(document).unwrap();
|
||||
let _ = self.index_writer.add_document(document)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_multi_i64s(&mut self, datas: &[i64], offset: i64) {
|
||||
pub fn add_multi_i64s(&mut self, datas: &[i64], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data);
|
||||
}
|
||||
document.add_i64(self.id_field, offset);
|
||||
self.index_writer.add_document(document).unwrap();
|
||||
let _ = self.index_writer.add_document(document)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_multi_f32s(&mut self, datas: &[f32], offset: i64) {
|
||||
pub fn add_multi_f32s(&mut self, datas: &[f32], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data as f64);
|
||||
}
|
||||
document.add_i64(self.id_field, offset);
|
||||
self.index_writer.add_document(document).unwrap();
|
||||
let _ = self.index_writer.add_document(document)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_multi_f64s(&mut self, datas: &[f64], offset: i64) {
|
||||
pub fn add_multi_f64s(&mut self, datas: &[f64], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data);
|
||||
}
|
||||
document.add_i64(self.id_field, offset);
|
||||
self.index_writer.add_document(document).unwrap();
|
||||
let _ = self.index_writer.add_document(document)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_multi_bools(&mut self, datas: &[bool], offset: i64) {
|
||||
pub fn add_multi_bools(&mut self, datas: &[bool], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data);
|
||||
}
|
||||
document.add_i64(self.id_field, offset);
|
||||
self.index_writer.add_document(document).unwrap();
|
||||
let _ = self.index_writer.add_document(document)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_multi_keywords(&mut self, datas: &[*const c_char], offset: i64) {
|
||||
pub fn add_multi_keywords(&mut self, datas: &[*const c_char], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
for element in datas {
|
||||
let data = unsafe { CStr::from_ptr(*element) };
|
||||
document.add_field_value(self.field, data.to_str().unwrap());
|
||||
document.add_field_value(self.field, data.to_str()?);
|
||||
}
|
||||
document.add_i64(self.id_field, offset);
|
||||
self.index_writer.add_document(document).unwrap();
|
||||
let _ = self.index_writer.add_document(document)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn manual_merge(&mut self) {
|
||||
let metas = self
|
||||
.index_writer
|
||||
.index()
|
||||
.searchable_segment_metas()
|
||||
.unwrap();
|
||||
fn manual_merge(&mut self) -> Result<()> {
|
||||
let metas = self.index_writer.index().searchable_segment_metas()?;
|
||||
let policy = self.index_writer.get_merge_policy();
|
||||
let candidates = policy.compute_merge_candidates(metas.as_slice());
|
||||
for candidate in candidates {
|
||||
self.index_writer
|
||||
.merge(candidate.0.as_slice())
|
||||
.wait()
|
||||
.unwrap();
|
||||
self.index_writer.merge(candidate.0.as_slice()).wait()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn finish(mut self) {
|
||||
self.index_writer.commit().unwrap();
|
||||
pub fn finish(mut self) -> Result<()> {
|
||||
self.index_writer.commit()?;
|
||||
// self.manual_merge();
|
||||
block_on(self.index_writer.garbage_collect_files()).unwrap();
|
||||
self.index_writer.wait_merging_threads().unwrap();
|
||||
block_on(self.index_writer.garbage_collect_files())?;
|
||||
self.index_writer.wait_merging_threads()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn commit(&mut self) {
|
||||
self.index_writer.commit().unwrap();
|
||||
pub(crate) fn commit(&mut self) -> Result<()> {
|
||||
self.index_writer.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,8 +1,13 @@
|
|||
use core::slice;
|
||||
use std::ffi::{c_char, c_void, CStr};
|
||||
|
||||
use tantivy::Index;
|
||||
|
||||
use crate::{
|
||||
array::RustResult,
|
||||
cstr_to_str,
|
||||
data_type::TantivyDataType,
|
||||
error::Result,
|
||||
index_writer::IndexWriterWrapper,
|
||||
util::{create_binding, free_binding},
|
||||
};
|
||||
|
@ -14,17 +19,19 @@ pub extern "C" fn tantivy_create_index(
|
|||
path: *const c_char,
|
||||
num_threads: usize,
|
||||
overall_memory_budget_in_bytes: usize,
|
||||
) -> *mut c_void {
|
||||
let field_name_str = unsafe { CStr::from_ptr(field_name) };
|
||||
let path_str = unsafe { CStr::from_ptr(path) };
|
||||
let wrapper = IndexWriterWrapper::new(
|
||||
String::from(field_name_str.to_str().unwrap()),
|
||||
) -> RustResult {
|
||||
let field_name_str = cstr_to_str!(field_name);
|
||||
let path_str = cstr_to_str!(path);
|
||||
match IndexWriterWrapper::new(
|
||||
String::from(field_name_str),
|
||||
data_type,
|
||||
String::from(path_str.to_str().unwrap()),
|
||||
String::from(path_str),
|
||||
num_threads,
|
||||
overall_memory_budget_in_bytes,
|
||||
);
|
||||
create_binding(wrapper)
|
||||
) {
|
||||
Ok(wrapper) => RustResult::from_ptr(create_binding(wrapper)),
|
||||
Err(e) => RustResult::from_error(e.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -35,24 +42,25 @@ pub extern "C" fn tantivy_free_index_writer(ptr: *mut c_void) {
|
|||
// tantivy_finish_index will finish the index writer, and the index writer can't be used any more.
|
||||
// After this was called, you should reset the pointer to null.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_finish_index(ptr: *mut c_void) {
|
||||
pub extern "C" fn tantivy_finish_index(ptr: *mut c_void) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
unsafe { Box::from_raw(real).finish() }
|
||||
unsafe { Box::from_raw(real).finish().into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_commit_index(ptr: *mut c_void) {
|
||||
pub extern "C" fn tantivy_commit_index(ptr: *mut c_void) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
unsafe {
|
||||
(*real).commit();
|
||||
}
|
||||
unsafe { (*real).commit().into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_create_reader_from_writer(ptr: *mut c_void) -> *mut c_void {
|
||||
pub extern "C" fn tantivy_create_reader_from_writer(ptr: *mut c_void) -> RustResult {
|
||||
let writer = ptr as *mut IndexWriterWrapper;
|
||||
let reader = unsafe { (*writer).create_reader() };
|
||||
create_binding(reader)
|
||||
match reader {
|
||||
Ok(r) => RustResult::from_ptr(create_binding(r)),
|
||||
Err(e) => RustResult::from_error(e.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------build--------------------
|
||||
|
@ -62,14 +70,10 @@ pub extern "C" fn tantivy_index_add_int8s(
|
|||
array: *const i8,
|
||||
len: usize,
|
||||
offset_begin: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe {
|
||||
for (index, data) in arr.iter().enumerate() {
|
||||
(*real).add_i8(*data, offset_begin + (index as i64));
|
||||
}
|
||||
}
|
||||
unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i8, &mut (*real)).into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -78,14 +82,10 @@ pub extern "C" fn tantivy_index_add_int16s(
|
|||
array: *const i16,
|
||||
len: usize,
|
||||
offset_begin: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe {
|
||||
for (index, data) in arr.iter().enumerate() {
|
||||
(*real).add_i16(*data, offset_begin + (index as i64));
|
||||
}
|
||||
}
|
||||
unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i16, &mut (*real)).into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -94,14 +94,10 @@ pub extern "C" fn tantivy_index_add_int32s(
|
|||
array: *const i32,
|
||||
len: usize,
|
||||
offset_begin: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe {
|
||||
for (index, data) in arr.iter().enumerate() {
|
||||
(*real).add_i32(*data, offset_begin + (index as i64));
|
||||
}
|
||||
}
|
||||
unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i32, &mut (*real)).into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -110,14 +106,25 @@ pub extern "C" fn tantivy_index_add_int64s(
|
|||
array: *const i64,
|
||||
len: usize,
|
||||
offset_begin: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
|
||||
unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i64, &mut (*real)).into() }
|
||||
}
|
||||
|
||||
fn execute<T: Copy>(
|
||||
arr: &[T],
|
||||
offset: i64,
|
||||
mut e: fn(&mut IndexWriterWrapper, T, i64) -> Result<()>,
|
||||
w: &mut IndexWriterWrapper,
|
||||
) -> Result<()> {
|
||||
unsafe {
|
||||
for (index, data) in arr.iter().enumerate() {
|
||||
(*real).add_i64(*data, offset_begin + (index as i64));
|
||||
e(w, *data, offset + (index as i64))?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -126,14 +133,10 @@ pub extern "C" fn tantivy_index_add_f32s(
|
|||
array: *const f32,
|
||||
len: usize,
|
||||
offset_begin: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe {
|
||||
for (index, data) in arr.iter().enumerate() {
|
||||
(*real).add_f32(*data, offset_begin + (index as i64));
|
||||
}
|
||||
}
|
||||
unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_f32, &mut (*real)).into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -142,14 +145,10 @@ pub extern "C" fn tantivy_index_add_f64s(
|
|||
array: *const f64,
|
||||
len: usize,
|
||||
offset_begin: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe {
|
||||
for (index, data) in arr.iter().enumerate() {
|
||||
(*real).add_f64(*data, offset_begin + (index as i64));
|
||||
}
|
||||
}
|
||||
unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_f64, &mut (*real)).into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -158,23 +157,31 @@ pub extern "C" fn tantivy_index_add_bools(
|
|||
array: *const bool,
|
||||
len: usize,
|
||||
offset_begin: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe {
|
||||
for (index, data) in arr.iter().enumerate() {
|
||||
(*real).add_bool(*data, offset_begin + (index as i64));
|
||||
}
|
||||
execute(
|
||||
arr,
|
||||
offset_begin,
|
||||
IndexWriterWrapper::add_bool,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: this is not a very efficient way, since we must call this function many times, which
|
||||
// will bring a lot of overhead caused by the rust binding.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_index_add_string(ptr: *mut c_void, s: *const c_char, offset: i64) {
|
||||
pub extern "C" fn tantivy_index_add_string(
|
||||
ptr: *mut c_void,
|
||||
s: *const c_char,
|
||||
offset: i64,
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let c_str = unsafe { CStr::from_ptr(s) };
|
||||
unsafe { (*real).add_string(c_str.to_str().unwrap(), offset) }
|
||||
let s = cstr_to_str!(s);
|
||||
unsafe { (*real).add_string(s, offset).into() }
|
||||
}
|
||||
|
||||
// --------------------------------------------- array ------------------------------------------
|
||||
|
@ -185,11 +192,11 @@ pub extern "C" fn tantivy_index_add_multi_int8s(
|
|||
array: *const i8,
|
||||
len: usize,
|
||||
offset: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
unsafe {
|
||||
let arr = slice::from_raw_parts(array, len);
|
||||
(*real).add_multi_i8s(arr, offset)
|
||||
(*real).add_multi_i8s(arr, offset).into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -199,11 +206,11 @@ pub extern "C" fn tantivy_index_add_multi_int16s(
|
|||
array: *const i16,
|
||||
len: usize,
|
||||
offset: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
unsafe {
|
||||
let arr = slice::from_raw_parts(array, len);
|
||||
(*real).add_multi_i16s(arr, offset);
|
||||
(*real).add_multi_i16s(arr, offset).into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -213,11 +220,11 @@ pub extern "C" fn tantivy_index_add_multi_int32s(
|
|||
array: *const i32,
|
||||
len: usize,
|
||||
offset: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
unsafe {
|
||||
let arr = slice::from_raw_parts(array, len);
|
||||
(*real).add_multi_i32s(arr, offset);
|
||||
(*real).add_multi_i32s(arr, offset).into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -227,11 +234,11 @@ pub extern "C" fn tantivy_index_add_multi_int64s(
|
|||
array: *const i64,
|
||||
len: usize,
|
||||
offset: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
unsafe {
|
||||
let arr = slice::from_raw_parts(array, len);
|
||||
(*real).add_multi_i64s(arr, offset);
|
||||
(*real).add_multi_i64s(arr, offset).into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -241,11 +248,11 @@ pub extern "C" fn tantivy_index_add_multi_f32s(
|
|||
array: *const f32,
|
||||
len: usize,
|
||||
offset: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
unsafe {
|
||||
let arr = slice::from_raw_parts(array, len);
|
||||
(*real).add_multi_f32s(arr, offset);
|
||||
(*real).add_multi_f32s(arr, offset).into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -255,11 +262,11 @@ pub extern "C" fn tantivy_index_add_multi_f64s(
|
|||
array: *const f64,
|
||||
len: usize,
|
||||
offset: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
unsafe {
|
||||
let arr = slice::from_raw_parts(array, len);
|
||||
(*real).add_multi_f64s(arr, offset);
|
||||
(*real).add_multi_f64s(arr, offset).into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -269,11 +276,11 @@ pub extern "C" fn tantivy_index_add_multi_bools(
|
|||
array: *const bool,
|
||||
len: usize,
|
||||
offset: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
unsafe {
|
||||
let arr = slice::from_raw_parts(array, len);
|
||||
(*real).add_multi_bools(arr, offset);
|
||||
(*real).add_multi_bools(arr, offset).into()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -283,10 +290,10 @@ pub extern "C" fn tantivy_index_add_multi_keywords(
|
|||
array: *const *const c_char,
|
||||
len: usize,
|
||||
offset: i64,
|
||||
) {
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
unsafe {
|
||||
let arr = slice::from_raw_parts(array, len);
|
||||
(*real).add_multi_keywords(arr, offset)
|
||||
(*real).add_multi_keywords(arr, offset).into()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,11 +2,14 @@ use std::ffi::c_char;
|
|||
use std::ffi::c_void;
|
||||
use std::ffi::CStr;
|
||||
|
||||
use crate::array::RustResult;
|
||||
use crate::cstr_to_str;
|
||||
use crate::error::Result;
|
||||
use crate::index_writer::IndexWriterWrapper;
|
||||
use crate::log::init_log;
|
||||
use crate::string_c::c_str_to_str;
|
||||
use crate::tokenizer::create_tokenizer;
|
||||
use crate::util::create_binding;
|
||||
use crate::string_c::c_str_to_str;
|
||||
use crate::log::init_log;
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_create_text_writer(
|
||||
|
@ -17,13 +20,13 @@ pub extern "C" fn tantivy_create_text_writer(
|
|||
num_threads: usize,
|
||||
overall_memory_budget_in_bytes: usize,
|
||||
in_ram: bool,
|
||||
) -> *mut c_void {
|
||||
) -> RustResult {
|
||||
init_log();
|
||||
let field_name_str = unsafe { CStr::from_ptr(field_name).to_str().unwrap() };
|
||||
let path_str = unsafe { CStr::from_ptr(path).to_str().unwrap() };
|
||||
let tokenizer_name_str = unsafe { CStr::from_ptr(tokenizer_name).to_str().unwrap() };
|
||||
let params = unsafe{c_str_to_str(analyzer_params).to_string()};
|
||||
let analyzer = create_tokenizer(¶ms);
|
||||
let field_name_str = cstr_to_str!(field_name);
|
||||
let path_str = cstr_to_str!(path);
|
||||
let tokenizer_name_str = cstr_to_str!(tokenizer_name);
|
||||
let params = cstr_to_str!(analyzer_params);
|
||||
let analyzer = create_tokenizer(params);
|
||||
match analyzer {
|
||||
Ok(text_analyzer) => {
|
||||
let wrapper = IndexWriterWrapper::create_text_writer(
|
||||
|
@ -35,11 +38,12 @@ pub extern "C" fn tantivy_create_text_writer(
|
|||
overall_memory_budget_in_bytes,
|
||||
in_ram,
|
||||
);
|
||||
create_binding(wrapper)
|
||||
RustResult::from_ptr(create_binding(wrapper))
|
||||
}
|
||||
Err(err) => {
|
||||
log::warn!("create tokenizer failed with error: {} param: {}", err.to_string(), params);
|
||||
std::ptr::null_mut()
|
||||
},
|
||||
Err(err) => RustResult::from_error(format!(
|
||||
"create tokenizer failed with error: {} param: {}",
|
||||
err.to_string(),
|
||||
params,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -280,7 +280,7 @@ pub(crate) fn create_tokenizer_with_filter(params: &String) -> Result<TextAnalyz
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn create_tokenizer(params: &String) -> Result<TextAnalyzer> {
|
||||
pub(crate) fn create_tokenizer(params: &str) -> Result<TextAnalyzer> {
|
||||
if params.len() == 0 {
|
||||
return Ok(standard_analyzer(vec![]));
|
||||
}
|
||||
|
|
|
@ -1,31 +1,33 @@
|
|||
use libc::{c_void,c_char};
|
||||
use libc::{c_char, c_void};
|
||||
use tantivy::tokenizer::TextAnalyzer;
|
||||
|
||||
use crate::{
|
||||
array::RustResult,
|
||||
log::init_log,
|
||||
string_c::c_str_to_str,
|
||||
tokenizer::create_tokenizer,
|
||||
util::{create_binding, free_binding},
|
||||
log::init_log,
|
||||
};
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_create_tokenizer(analyzer_params: *const c_char) -> *mut c_void {
|
||||
pub extern "C" fn tantivy_create_tokenizer(analyzer_params: *const c_char) -> RustResult {
|
||||
init_log();
|
||||
let params = unsafe{c_str_to_str(analyzer_params).to_string()};
|
||||
let params = unsafe { c_str_to_str(analyzer_params).to_string() };
|
||||
let analyzer = create_tokenizer(¶ms);
|
||||
match analyzer {
|
||||
Ok(text_analyzer) => create_binding(text_analyzer),
|
||||
Err(err) => {
|
||||
log::warn!("create tokenizer failed with error: {} param: {}", err.to_string(), params);
|
||||
std::ptr::null_mut()
|
||||
},
|
||||
Ok(text_analyzer) => RustResult::from_ptr(create_binding(text_analyzer)),
|
||||
Err(err) => RustResult::from_error(format!(
|
||||
"create tokenizer failed with error: {} param: {}",
|
||||
err.to_string(),
|
||||
params,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_clone_tokenizer(ptr: *mut c_void) -> *mut c_void {
|
||||
let analyzer=ptr as *mut TextAnalyzer;
|
||||
let clone = unsafe {(*analyzer).clone()};
|
||||
let analyzer = ptr as *mut TextAnalyzer;
|
||||
let clone = unsafe { (*analyzer).clone() };
|
||||
create_binding(clone)
|
||||
}
|
||||
|
||||
|
|
|
@ -4,7 +4,9 @@
|
|||
#include <set>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "common/EasyAssert.h"
|
||||
#include "tantivy-binding.h"
|
||||
#include "rust-binding.h"
|
||||
#include "rust-array.h"
|
||||
|
@ -82,18 +84,27 @@ struct TantivyIndexWrapper {
|
|||
uintptr_t num_threads = DEFAULT_NUM_THREADS,
|
||||
uintptr_t overall_memory_budget_in_bytes =
|
||||
DEFAULT_OVERALL_MEMORY_BUDGET_IN_BYTES) {
|
||||
writer_ = tantivy_create_index(field_name,
|
||||
data_type,
|
||||
path,
|
||||
num_threads,
|
||||
overall_memory_budget_in_bytes);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_create_index(field_name,
|
||||
data_type,
|
||||
path,
|
||||
num_threads,
|
||||
overall_memory_budget_in_bytes));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to create index: {}",
|
||||
res.result_->error);
|
||||
writer_ = res.result_->value.ptr._0;
|
||||
path_ = std::string(path);
|
||||
}
|
||||
|
||||
// load index. create index reader.
|
||||
explicit TantivyIndexWrapper(const char* path) {
|
||||
assert(tantivy_index_exist(path));
|
||||
reader_ = tantivy_load_index(path);
|
||||
auto res = RustResultWrapper(tantivy_load_index(path));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to load index: {}",
|
||||
res.result_->error);
|
||||
reader_ = res.result_->value.ptr._0;
|
||||
path_ = std::string(path);
|
||||
}
|
||||
|
||||
|
@ -106,13 +117,18 @@ struct TantivyIndexWrapper {
|
|||
uintptr_t num_threads = DEFAULT_NUM_THREADS,
|
||||
uintptr_t overall_memory_budget_in_bytes =
|
||||
DEFAULT_OVERALL_MEMORY_BUDGET_IN_BYTES) {
|
||||
writer_ = tantivy_create_text_writer(field_name,
|
||||
path,
|
||||
tokenizer_name,
|
||||
analyzer_params,
|
||||
num_threads,
|
||||
overall_memory_budget_in_bytes,
|
||||
in_ram);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_create_text_writer(field_name,
|
||||
path,
|
||||
tokenizer_name,
|
||||
analyzer_params,
|
||||
num_threads,
|
||||
overall_memory_budget_in_bytes,
|
||||
in_ram));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to create text writer: {}",
|
||||
res.result_->error);
|
||||
writer_ = res.result_->value.ptr._0;
|
||||
path_ = std::string(path);
|
||||
}
|
||||
|
||||
|
@ -120,10 +136,19 @@ struct TantivyIndexWrapper {
|
|||
void
|
||||
create_reader() {
|
||||
if (writer_ != nullptr) {
|
||||
reader_ = tantivy_create_reader_from_writer(writer_);
|
||||
auto res =
|
||||
RustResultWrapper(tantivy_create_reader_from_writer(writer_));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to create reader from writer: {}",
|
||||
res.result_->error);
|
||||
reader_ = res.result_->value.ptr._0;
|
||||
} else if (!path_.empty()) {
|
||||
assert(tantivy_index_exist(path_.c_str()));
|
||||
reader_ = tantivy_load_index(path_.c_str());
|
||||
auto res = RustResultWrapper(tantivy_load_index(path_.c_str()));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to load index: {}",
|
||||
res.result_->error);
|
||||
reader_ = res.result_->value.ptr._0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -135,8 +160,11 @@ struct TantivyIndexWrapper {
|
|||
register_tokenizer(const char* tokenizer_name,
|
||||
const char* analyzer_params) {
|
||||
if (reader_ != nullptr) {
|
||||
tantivy_register_tokenizer(
|
||||
reader_, tokenizer_name, analyzer_params);
|
||||
auto res = RustResultWrapper(tantivy_register_tokenizer(
|
||||
reader_, tokenizer_name, analyzer_params));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to register tokenizer: {}",
|
||||
res.result_->error);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -146,47 +174,78 @@ struct TantivyIndexWrapper {
|
|||
assert(!finished_);
|
||||
|
||||
if constexpr (std::is_same_v<T, bool>) {
|
||||
tantivy_index_add_bools(writer_, array, len, offset_begin);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_bools(writer_, array, len, offset_begin));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add bools: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, int8_t>) {
|
||||
tantivy_index_add_int8s(writer_, array, len, offset_begin);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_int8s(writer_, array, len, offset_begin));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add int8s: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, int16_t>) {
|
||||
tantivy_index_add_int16s(writer_, array, len, offset_begin);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_int16s(writer_, array, len, offset_begin));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add int16s: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, int32_t>) {
|
||||
tantivy_index_add_int32s(writer_, array, len, offset_begin);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_int32s(writer_, array, len, offset_begin));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add int32s: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, int64_t>) {
|
||||
tantivy_index_add_int64s(writer_, array, len, offset_begin);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_int64s(writer_, array, len, offset_begin));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add int64s: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, float>) {
|
||||
tantivy_index_add_f32s(writer_, array, len, offset_begin);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_f32s(writer_, array, len, offset_begin));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add f32s: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, double>) {
|
||||
tantivy_index_add_f64s(writer_, array, len, offset_begin);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_f64s(writer_, array, len, offset_begin));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add f64s: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, std::string>) {
|
||||
// TODO: not very efficient, a lot of overhead due to rust-ffi call.
|
||||
for (uintptr_t i = 0; i < len; i++) {
|
||||
tantivy_index_add_string(
|
||||
auto res = RustResultWrapper(tantivy_index_add_string(
|
||||
writer_,
|
||||
static_cast<const std::string*>(array)[i].c_str(),
|
||||
offset_begin + i);
|
||||
offset_begin + i));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add string: {}",
|
||||
res.result_->error);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -201,37 +260,65 @@ struct TantivyIndexWrapper {
|
|||
assert(!finished_);
|
||||
|
||||
if constexpr (std::is_same_v<T, bool>) {
|
||||
tantivy_index_add_multi_bools(writer_, array, len, offset);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_multi_bools(writer_, array, len, offset));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add multi bools: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, int8_t>) {
|
||||
tantivy_index_add_multi_int8s(writer_, array, len, offset);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_multi_int8s(writer_, array, len, offset));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add multi int8s: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, int16_t>) {
|
||||
tantivy_index_add_multi_int16s(writer_, array, len, offset);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_multi_int16s(writer_, array, len, offset));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add multi int16s: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, int32_t>) {
|
||||
tantivy_index_add_multi_int32s(writer_, array, len, offset);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_multi_int32s(writer_, array, len, offset));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add multi int32s: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, int64_t>) {
|
||||
tantivy_index_add_multi_int64s(writer_, array, len, offset);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_multi_int64s(writer_, array, len, offset));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add multi int64s: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, float>) {
|
||||
tantivy_index_add_multi_f32s(writer_, array, len, offset);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_multi_f32s(writer_, array, len, offset));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add multi f32s: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, double>) {
|
||||
tantivy_index_add_multi_f64s(writer_, array, len, offset);
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_multi_f64s(writer_, array, len, offset));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add multi f64s: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -240,8 +327,11 @@ struct TantivyIndexWrapper {
|
|||
for (uintptr_t i = 0; i < len; i++) {
|
||||
views.push_back(array[i].c_str());
|
||||
}
|
||||
tantivy_index_add_multi_keywords(
|
||||
writer_, views.data(), len, offset);
|
||||
auto res = RustResultWrapper(tantivy_index_add_multi_keywords(
|
||||
writer_, views.data(), len, offset));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add multi keywords: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -256,7 +346,10 @@ struct TantivyIndexWrapper {
|
|||
return;
|
||||
}
|
||||
|
||||
tantivy_finish_index(writer_);
|
||||
auto res = RustResultWrapper(tantivy_finish_index(writer_));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to finish index: {}",
|
||||
res.result_->error);
|
||||
writer_ = nullptr;
|
||||
finished_ = true;
|
||||
}
|
||||
|
@ -264,20 +357,30 @@ struct TantivyIndexWrapper {
|
|||
inline void
|
||||
commit() {
|
||||
if (writer_ != nullptr) {
|
||||
tantivy_commit_index(writer_);
|
||||
auto res = RustResultWrapper(tantivy_commit_index(writer_));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to commit index: {}",
|
||||
res.result_->error);
|
||||
}
|
||||
}
|
||||
|
||||
inline void
|
||||
reload() {
|
||||
if (reader_ != nullptr) {
|
||||
tantivy_reload_index(reader_);
|
||||
auto res = RustResultWrapper(tantivy_reload_index(reader_));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to reload index: {}",
|
||||
res.result_->error);
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t
|
||||
count() {
|
||||
return tantivy_index_count(reader_);
|
||||
auto res = RustResultWrapper(tantivy_index_count(reader_));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to get count: {}",
|
||||
res.result_->error);
|
||||
return res.result_->value.u32._0;
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -308,7 +411,14 @@ struct TantivyIndexWrapper {
|
|||
"InvertedIndex.term_query: unsupported data type: {}",
|
||||
typeid(T).name());
|
||||
}();
|
||||
return RustArrayWrapper(array);
|
||||
|
||||
auto res = RustResultWrapper(array);
|
||||
AssertInfo(res.result_->success,
|
||||
"TantivyIndexWrapper.term_query: {}",
|
||||
res.result_->error);
|
||||
AssertInfo(res.result_->value.tag == Value::Tag::RustArray,
|
||||
"TantivyIndexWrapper.term_query: invalid result type");
|
||||
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -337,7 +447,15 @@ struct TantivyIndexWrapper {
|
|||
"{}",
|
||||
typeid(T).name());
|
||||
}();
|
||||
return RustArrayWrapper(array);
|
||||
auto res = RustResultWrapper(array);
|
||||
AssertInfo(res.result_->success,
|
||||
"TantivyIndexWrapper.lower_bound_range_query: {}",
|
||||
res.result_->error);
|
||||
AssertInfo(
|
||||
res.result_->value.tag == Value::Tag::RustArray,
|
||||
"TantivyIndexWrapper.lower_bound_range_query: invalid result "
|
||||
"type");
|
||||
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -366,7 +484,15 @@ struct TantivyIndexWrapper {
|
|||
"{}",
|
||||
typeid(T).name());
|
||||
}();
|
||||
return RustArrayWrapper(array);
|
||||
auto res = RustResultWrapper(array);
|
||||
AssertInfo(res.result_->success,
|
||||
"TantivyIndexWrapper.upper_bound_range_query: {}",
|
||||
res.result_->error);
|
||||
AssertInfo(
|
||||
res.result_->value.tag == Value::Tag::RustArray,
|
||||
"TantivyIndexWrapper.upper_bound_range_query: invalid result "
|
||||
"type");
|
||||
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -406,25 +532,49 @@ struct TantivyIndexWrapper {
|
|||
"InvertedIndex.range_query: unsupported data type: {}",
|
||||
typeid(T).name());
|
||||
}();
|
||||
return RustArrayWrapper(array);
|
||||
auto res = RustResultWrapper(array);
|
||||
AssertInfo(res.result_->success,
|
||||
"TantivyIndexWrapper.range_query: {}",
|
||||
res.result_->error);
|
||||
AssertInfo(res.result_->value.tag == Value::Tag::RustArray,
|
||||
"TantivyIndexWrapper.range_query: invalid result type");
|
||||
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
|
||||
}
|
||||
|
||||
RustArrayWrapper
|
||||
prefix_query(const std::string& prefix) {
|
||||
auto array = tantivy_prefix_query_keyword(reader_, prefix.c_str());
|
||||
return RustArrayWrapper(array);
|
||||
auto res = RustResultWrapper(array);
|
||||
AssertInfo(res.result_->success,
|
||||
"TantivyIndexWrapper.prefix_query: {}",
|
||||
res.result_->error);
|
||||
AssertInfo(res.result_->value.tag == Value::Tag::RustArray,
|
||||
"TantivyIndexWrapper.prefix_query: invalid result type");
|
||||
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
|
||||
}
|
||||
|
||||
RustArrayWrapper
|
||||
regex_query(const std::string& pattern) {
|
||||
auto array = tantivy_regex_query(reader_, pattern.c_str());
|
||||
return RustArrayWrapper(array);
|
||||
auto res = RustResultWrapper(array);
|
||||
AssertInfo(res.result_->success,
|
||||
"TantivyIndexWrapper.regex_query: {}",
|
||||
res.result_->error);
|
||||
AssertInfo(res.result_->value.tag == Value::Tag::RustArray,
|
||||
"TantivyIndexWrapper.regex_query: invalid result type");
|
||||
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
|
||||
}
|
||||
|
||||
RustArrayWrapper
|
||||
match_query(const std::string& query) {
|
||||
auto array = tantivy_match_query(reader_, query.c_str());
|
||||
return RustArrayWrapper(array);
|
||||
auto res = RustResultWrapper(array);
|
||||
AssertInfo(res.result_->success,
|
||||
"TantivyIndexWrapper.match_query: {}",
|
||||
res.result_->error);
|
||||
AssertInfo(res.result_->value.tag == Value::Tag::RustArray,
|
||||
"TantivyIndexWrapper.match_query: invalid result type");
|
||||
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
|
||||
}
|
||||
|
||||
public:
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "tantivy-binding.h"
|
||||
#include "rust-binding.h"
|
||||
#include "rust-hashmap.h"
|
||||
#include "tantivy/rust-array.h"
|
||||
#include "token-stream.h"
|
||||
|
||||
namespace milvus::tantivy {
|
||||
|
@ -13,10 +14,12 @@ struct Tokenizer {
|
|||
|
||||
explicit Tokenizer(std::string&& params) {
|
||||
auto shared_params = std::make_shared<std::string>(std::move(params));
|
||||
ptr_ = tantivy_create_tokenizer(shared_params->c_str());
|
||||
if (ptr_ == nullptr) {
|
||||
throw std::invalid_argument("invalid tokenizer parameters");
|
||||
}
|
||||
auto res =
|
||||
RustResultWrapper(tantivy_create_tokenizer(shared_params->c_str()));
|
||||
AssertInfo(res.result_->success,
|
||||
"Tokenizer creation failed: {}",
|
||||
res.result_->error);
|
||||
ptr_ = res.result_->value.ptr._0;
|
||||
}
|
||||
|
||||
explicit Tokenizer(void* _ptr) : ptr_(_ptr) {
|
||||
|
|
|
@ -87,6 +87,7 @@ set(MILVUS_TEST_FILES
|
|||
test_utils.cpp
|
||||
test_chunked_segment.cpp
|
||||
test_chunked_column.cpp
|
||||
test_rust_result.cpp
|
||||
)
|
||||
|
||||
if ( INDEX_ENGINE STREQUAL "cardinal" )
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <cstdint>
|
||||
#include "gtest/gtest.h"
|
||||
#include "tantivy-binding.h"
|
||||
TEST(RustResultTest, TestResult) {
|
||||
auto arr = test_enum_with_array();
|
||||
auto len = arr.value.rust_array._0.len;
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
EXPECT_EQ(i + 1, arr.value.rust_array._0.array[i]);
|
||||
}
|
||||
free_rust_result(arr);
|
||||
|
||||
auto ptr = test_enum_with_ptr();
|
||||
EXPECT_EQ(1, *static_cast<uint32_t*>(ptr.value.ptr._0));
|
||||
free_rust_result(ptr);
|
||||
free_test_ptr(ptr.value.ptr._0);
|
||||
}
|
|
@ -6679,7 +6679,7 @@ class TestQueryTextMatchNegative(TestcaseBase):
|
|||
default_schema = CollectionSchema(
|
||||
fields=default_fields, description="test collection"
|
||||
)
|
||||
error = {ct.err_code: 2000, ct.err_msg: "invalid tokenizer parameters"}
|
||||
error = {ct.err_code: 2000, ct.err_msg: "unsupported tokenizer"}
|
||||
self.init_collection_wrap(
|
||||
name=cf.gen_unique_str(prefix),
|
||||
schema=default_schema,
|
||||
|
|
Loading…
Reference in New Issue