enhance: Handle rust error in c++ (#38113)

https://github.com/milvus-io/milvus/issues/37930

---------

Signed-off-by: sunby <sunbingyi1992@gmail.com>
pull/38505/head
Bingyi Sun 2024-12-16 19:40:45 +08:00 committed by GitHub
parent 659847c11f
commit 3e2a2f278b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 950 additions and 431 deletions

View File

@ -1,6 +1,7 @@
#pragma once
#include <iostream>
#include <memory>
#include <sstream>
#include "tantivy-binding.h"
@ -11,7 +12,13 @@ namespace milvus::tantivy {
struct RustArrayWrapper {
NO_COPY_OR_ASSIGN(RustArrayWrapper);
explicit RustArrayWrapper(RustArray array) : array_(array) {
explicit RustArrayWrapper(RustArray&& array) {
array_.array = array.array;
array_.len = array.len;
array_.cap = array.cap;
array.array = nullptr;
array.len = 0;
array.cap = 0;
}
RustArrayWrapper(RustArrayWrapper&& other) noexcept {
@ -62,4 +69,42 @@ struct RustArrayWrapper {
}
}
};
struct RustResultWrapper {
NO_COPY_OR_ASSIGN(RustResultWrapper);
RustResultWrapper() = default;
explicit RustResultWrapper(RustResult result)
: result_(std::make_unique<RustResult>(result)) {
}
RustResultWrapper(RustResultWrapper&& other) noexcept {
result_ = std::move(other.result_);
}
RustResultWrapper&
operator=(RustResultWrapper&& other) noexcept {
if (this != &other) {
free();
result_ = std::move(other.result_);
}
return *this;
}
~RustResultWrapper() {
free();
}
std::unique_ptr<RustResult> result_;
private:
void
free() {
if (result_) {
free_rust_result(*result_);
result_.reset();
}
}
};
} // namespace milvus::tantivy

View File

@ -20,10 +20,59 @@ struct RustArray {
size_t cap;
};
struct Value {
enum class Tag {
None,
RustArray,
U32,
Ptr,
};
struct None_Body {
};
struct RustArray_Body {
RustArray _0;
};
struct U32_Body {
uint32_t _0;
};
struct Ptr_Body {
void *_0;
};
Tag tag;
union {
None_Body none;
RustArray_Body rust_array;
U32_Body u32;
Ptr_Body ptr;
};
};
struct RustResult {
bool success;
Value value;
const char *error;
};
extern "C" {
void free_rust_array(RustArray array);
void free_rust_result(RustResult result);
void free_rust_error(const char *error);
RustResult test_enum_with_array();
RustResult test_enum_with_ptr();
void free_test_ptr(void *ptr);
void print_vector_of_strings(const char *const *ptr, uintptr_t len);
void *create_hashmap();
@ -32,120 +81,164 @@ void hashmap_set_value(void *map, const char *key, const char *value);
void free_hashmap(void *map);
void *tantivy_load_index(const char *path);
RustResult tantivy_load_index(const char *path);
void tantivy_free_index_reader(void *ptr);
void tantivy_reload_index(void *ptr);
RustResult tantivy_reload_index(void *ptr);
uint32_t tantivy_index_count(void *ptr);
RustResult tantivy_index_count(void *ptr);
RustArray tantivy_term_query_i64(void *ptr, int64_t term);
RustResult tantivy_term_query_i64(void *ptr, int64_t term);
RustArray tantivy_lower_bound_range_query_i64(void *ptr, int64_t lower_bound, bool inclusive);
RustResult tantivy_lower_bound_range_query_i64(void *ptr, int64_t lower_bound, bool inclusive);
RustArray tantivy_upper_bound_range_query_i64(void *ptr, int64_t upper_bound, bool inclusive);
RustResult tantivy_upper_bound_range_query_i64(void *ptr, int64_t upper_bound, bool inclusive);
RustArray tantivy_range_query_i64(void *ptr,
int64_t lower_bound,
int64_t upper_bound,
bool lb_inclusive,
bool ub_inclusive);
RustResult tantivy_range_query_i64(void *ptr,
int64_t lower_bound,
int64_t upper_bound,
bool lb_inclusive,
bool ub_inclusive);
RustArray tantivy_term_query_f64(void *ptr, double term);
RustResult tantivy_term_query_f64(void *ptr, double term);
RustArray tantivy_lower_bound_range_query_f64(void *ptr, double lower_bound, bool inclusive);
RustResult tantivy_lower_bound_range_query_f64(void *ptr, double lower_bound, bool inclusive);
RustArray tantivy_upper_bound_range_query_f64(void *ptr, double upper_bound, bool inclusive);
RustResult tantivy_upper_bound_range_query_f64(void *ptr, double upper_bound, bool inclusive);
RustArray tantivy_range_query_f64(void *ptr,
double lower_bound,
double upper_bound,
bool lb_inclusive,
bool ub_inclusive);
RustResult tantivy_range_query_f64(void *ptr,
double lower_bound,
double upper_bound,
bool lb_inclusive,
bool ub_inclusive);
RustArray tantivy_term_query_bool(void *ptr, bool term);
RustResult tantivy_term_query_bool(void *ptr, bool term);
RustArray tantivy_term_query_keyword(void *ptr, const char *term);
RustResult tantivy_term_query_keyword(void *ptr, const char *term);
RustArray tantivy_lower_bound_range_query_keyword(void *ptr,
const char *lower_bound,
bool inclusive);
RustResult tantivy_lower_bound_range_query_keyword(void *ptr,
const char *lower_bound,
bool inclusive);
RustArray tantivy_upper_bound_range_query_keyword(void *ptr,
const char *upper_bound,
bool inclusive);
RustResult tantivy_upper_bound_range_query_keyword(void *ptr,
const char *upper_bound,
bool inclusive);
RustArray tantivy_range_query_keyword(void *ptr,
const char *lower_bound,
const char *upper_bound,
bool lb_inclusive,
bool ub_inclusive);
RustResult tantivy_range_query_keyword(void *ptr,
const char *lower_bound,
const char *upper_bound,
bool lb_inclusive,
bool ub_inclusive);
RustArray tantivy_prefix_query_keyword(void *ptr, const char *prefix);
RustResult tantivy_prefix_query_keyword(void *ptr, const char *prefix);
RustArray tantivy_regex_query(void *ptr, const char *pattern);
RustResult tantivy_regex_query(void *ptr, const char *pattern);
RustArray tantivy_match_query(void *ptr, const char *query);
RustResult tantivy_match_query(void *ptr, const char *query);
void tantivy_register_tokenizer(void *ptr, const char *tokenizer_name, const char *analyzer_params);
RustResult tantivy_register_tokenizer(void *ptr,
const char *tokenizer_name,
const char *analyzer_params);
void *tantivy_create_index(const char *field_name,
TantivyDataType data_type,
const char *path,
uintptr_t num_threads,
uintptr_t overall_memory_budget_in_bytes);
RustResult tantivy_create_index(const char *field_name,
TantivyDataType data_type,
const char *path,
uintptr_t num_threads,
uintptr_t overall_memory_budget_in_bytes);
void tantivy_free_index_writer(void *ptr);
void tantivy_finish_index(void *ptr);
RustResult tantivy_finish_index(void *ptr);
void tantivy_commit_index(void *ptr);
RustResult tantivy_commit_index(void *ptr);
void *tantivy_create_reader_from_writer(void *ptr);
RustResult tantivy_create_reader_from_writer(void *ptr);
void tantivy_index_add_int8s(void *ptr, const int8_t *array, uintptr_t len, int64_t offset_begin);
RustResult tantivy_index_add_int8s(void *ptr,
const int8_t *array,
uintptr_t len,
int64_t offset_begin);
void tantivy_index_add_int16s(void *ptr, const int16_t *array, uintptr_t len, int64_t offset_begin);
RustResult tantivy_index_add_int16s(void *ptr,
const int16_t *array,
uintptr_t len,
int64_t offset_begin);
void tantivy_index_add_int32s(void *ptr, const int32_t *array, uintptr_t len, int64_t offset_begin);
RustResult tantivy_index_add_int32s(void *ptr,
const int32_t *array,
uintptr_t len,
int64_t offset_begin);
void tantivy_index_add_int64s(void *ptr, const int64_t *array, uintptr_t len, int64_t offset_begin);
RustResult tantivy_index_add_int64s(void *ptr,
const int64_t *array,
uintptr_t len,
int64_t offset_begin);
void tantivy_index_add_f32s(void *ptr, const float *array, uintptr_t len, int64_t offset_begin);
RustResult tantivy_index_add_f32s(void *ptr,
const float *array,
uintptr_t len,
int64_t offset_begin);
void tantivy_index_add_f64s(void *ptr, const double *array, uintptr_t len, int64_t offset_begin);
RustResult tantivy_index_add_f64s(void *ptr,
const double *array,
uintptr_t len,
int64_t offset_begin);
void tantivy_index_add_bools(void *ptr, const bool *array, uintptr_t len, int64_t offset_begin);
RustResult tantivy_index_add_bools(void *ptr,
const bool *array,
uintptr_t len,
int64_t offset_begin);
void tantivy_index_add_string(void *ptr, const char *s, int64_t offset);
RustResult tantivy_index_add_string(void *ptr, const char *s, int64_t offset);
void tantivy_index_add_multi_int8s(void *ptr, const int8_t *array, uintptr_t len, int64_t offset);
RustResult tantivy_index_add_multi_int8s(void *ptr,
const int8_t *array,
uintptr_t len,
int64_t offset);
void tantivy_index_add_multi_int16s(void *ptr, const int16_t *array, uintptr_t len, int64_t offset);
RustResult tantivy_index_add_multi_int16s(void *ptr,
const int16_t *array,
uintptr_t len,
int64_t offset);
void tantivy_index_add_multi_int32s(void *ptr, const int32_t *array, uintptr_t len, int64_t offset);
RustResult tantivy_index_add_multi_int32s(void *ptr,
const int32_t *array,
uintptr_t len,
int64_t offset);
void tantivy_index_add_multi_int64s(void *ptr, const int64_t *array, uintptr_t len, int64_t offset);
RustResult tantivy_index_add_multi_int64s(void *ptr,
const int64_t *array,
uintptr_t len,
int64_t offset);
void tantivy_index_add_multi_f32s(void *ptr, const float *array, uintptr_t len, int64_t offset);
RustResult tantivy_index_add_multi_f32s(void *ptr,
const float *array,
uintptr_t len,
int64_t offset);
void tantivy_index_add_multi_f64s(void *ptr, const double *array, uintptr_t len, int64_t offset);
RustResult tantivy_index_add_multi_f64s(void *ptr,
const double *array,
uintptr_t len,
int64_t offset);
void tantivy_index_add_multi_bools(void *ptr, const bool *array, uintptr_t len, int64_t offset);
RustResult tantivy_index_add_multi_bools(void *ptr,
const bool *array,
uintptr_t len,
int64_t offset);
void tantivy_index_add_multi_keywords(void *ptr,
const char *const *array,
uintptr_t len,
int64_t offset);
RustResult tantivy_index_add_multi_keywords(void *ptr,
const char *const *array,
uintptr_t len,
int64_t offset);
void *tantivy_create_text_writer(const char *field_name,
const char *path,
const char *tokenizer_name,
const char *analyzer_params,
uintptr_t num_threads,
uintptr_t overall_memory_budget_in_bytes,
bool in_ram);
RustResult tantivy_create_text_writer(const char *field_name,
const char *path,
const char *tokenizer_name,
const char *analyzer_params,
uintptr_t num_threads,
uintptr_t overall_memory_budget_in_bytes,
bool in_ram);
void free_rust_string(const char *ptr);
@ -157,7 +250,7 @@ bool tantivy_token_stream_advance(void *token_stream);
const char *tantivy_token_stream_get_token(void *token_stream);
void *tantivy_create_tokenizer(const char *analyzer_params);
RustResult tantivy_create_tokenizer(const char *analyzer_params);
void *tantivy_clone_tokenizer(void *ptr);

View File

@ -1,5 +1,16 @@
use std::default;
use std::ffi::c_void;
use std::ptr::null;
use libc::c_char;
use libc::size_t;
use crate::error;
use crate::error::Result;
use crate::string_c::create_string;
use crate::string_c::free_rust_string;
use crate::util::free_binding;
#[repr(C)]
pub struct RustArray {
array: *mut u32,
@ -20,6 +31,22 @@ impl RustArray {
}
}
impl std::default::Default for RustArray {
fn default() -> Self {
RustArray {
array: std::ptr::null_mut(),
len: 0,
cap: 0,
}
}
}
impl From<Vec<u32>> for RustArray {
fn from(vec: Vec<u32>) -> Self {
RustArray::from_vec(vec)
}
}
#[no_mangle]
pub extern "C" fn free_rust_array(array: RustArray) {
let RustArray { array, len, cap } = array;
@ -27,3 +54,129 @@ pub extern "C" fn free_rust_array(array: RustArray) {
Vec::from_raw_parts(array, len, cap);
}
}
#[repr(C)]
pub enum Value {
None(()),
RustArray(RustArray),
U32(u32),
Ptr(*mut c_void),
}
macro_rules! impl_from_for_enum {
($enum_name:ident, $($variant:ident => $type:ty),*) => {
$(
impl From<$type> for $enum_name {
fn from(value: $type) -> Self {
$enum_name::$variant(value.into())
}
}
)*
};
}
impl_from_for_enum!(Value, None => (), RustArray => RustArray, RustArray => Vec<u32>, U32 => u32, Ptr => *mut c_void);
#[repr(C)]
pub struct RustResult {
pub success: bool,
pub value: Value,
pub error: *const c_char,
}
impl RustResult {
pub fn from_ptr(value: *mut c_void) -> Self {
RustResult {
success: true,
value: Value::Ptr(value),
error: std::ptr::null(),
}
}
pub fn from_error(error: String) -> Self {
RustResult {
success: false,
value: Value::None(()),
error: create_string(&error),
}
}
}
impl<T> From<Result<T>> for RustResult
where
T: Into<Value>,
{
fn from(value: error::Result<T>) -> Self {
match value {
Ok(v) => RustResult {
success: true,
value: v.into(),
error: null(),
},
Err(e) => RustResult {
success: false,
value: Value::None(()),
error: create_string(&e.to_string()),
},
}
}
}
#[no_mangle]
pub extern "C" fn free_rust_result(result: RustResult) {
match result.value {
Value::RustArray(array) => {
if !array.array.is_null() {
free_rust_array(array);
}
}
_ => {}
}
unsafe {
if !result.error.is_null() {
free_rust_string(result.error as *mut c_char);
}
}
}
#[no_mangle]
pub extern "C" fn free_rust_error(error: *const c_char) {
unsafe {
if !error.is_null() {
free_rust_string(error as *mut c_char);
}
}
}
// TODO: move to common
#[macro_export]
macro_rules! cstr_to_str {
($cstr:expr) => {
unsafe {
match CStr::from_ptr($cstr).to_str() {
Ok(f) => f,
Err(e) => return RustResult::from_error(e.to_string()),
}
}
};
}
#[no_mangle]
pub extern "C" fn test_enum_with_array() -> RustResult {
let array = vec![1, 2, 3];
RustResult::from(Result::Ok(array))
}
#[no_mangle]
pub extern "C" fn test_enum_with_ptr() -> RustResult {
let ptr = Box::into_raw(Box::new(1 as u32));
RustResult::from(Result::Ok(ptr as *mut c_void))
}
#[no_mangle]
pub extern "C" fn free_test_ptr(ptr: *mut c_void) {
if ptr.is_null() {
return;
}
free_binding::<u32>(ptr);
}

View File

@ -1,10 +1,12 @@
use core::fmt;
use core::{fmt, str};
use serde_json as json;
#[derive(Debug)]
pub enum TantivyBindingError {
JsonError(serde_json::Error),
TantivyError(tantivy::TantivyError),
InvalidArgument(String),
InternalError(String),
}
@ -14,10 +16,18 @@ impl From<serde_json::Error> for TantivyBindingError {
}
}
impl From<tantivy::TantivyError> for TantivyBindingError {
fn from(value: tantivy::TantivyError) -> Self {
TantivyBindingError::TantivyError(value)
}
}
impl fmt::Display for TantivyBindingError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
TantivyBindingError::JsonError(e) => write!(f, "JsonError: {}", e),
TantivyBindingError::TantivyError(e) => write!(f, "TantivyError: {}", e),
TantivyBindingError::InvalidArgument(e) => write!(f, "InvalidArgument: {}", e),
TantivyBindingError::InternalError(e) => write!(f, "InternalError: {}", e),
}
}
@ -27,9 +37,17 @@ impl std::error::Error for TantivyBindingError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
TantivyBindingError::JsonError(e) => Some(e),
TantivyBindingError::TantivyError(e) => Some(e),
TantivyBindingError::InvalidArgument(_) => None,
TantivyBindingError::InternalError(_) => None,
}
}
}
impl From<str::Utf8Error> for TantivyBindingError {
fn from(value: str::Utf8Error) -> Self {
TantivyBindingError::InternalError(value.to_string())
}
}
pub type Result<T> = std::result::Result<T, TantivyBindingError>;

View File

@ -10,6 +10,8 @@ use crate::log::init_log;
use crate::util::make_bounds;
use crate::vec_collector::VecCollector;
use crate::error::{Result, TantivyBindingError};
pub(crate) struct IndexReaderWrapper {
pub(crate) field_name: String,
pub(crate) field: Field,
@ -19,15 +21,15 @@ pub(crate) struct IndexReaderWrapper {
}
impl IndexReaderWrapper {
pub fn load(path: &str) -> IndexReaderWrapper {
pub fn load(path: &str) -> Result<IndexReaderWrapper> {
init_log();
let index = Index::open_in_dir(path).unwrap();
let index = Index::open_in_dir(path)?;
IndexReaderWrapper::from_index(Arc::new(index))
}
pub fn from_index(index: Arc<Index>) -> IndexReaderWrapper {
pub fn from_index(index: Arc<Index>) -> Result<IndexReaderWrapper> {
let field = index.schema().fields().next().unwrap().0;
let schema = index.schema();
let field_name = String::from(schema.get_field_name(field));
@ -39,47 +41,51 @@ impl IndexReaderWrapper {
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::OnCommit) // OnCommit serve for growing segment.
.try_into()
.unwrap();
reader.reload().unwrap();
.try_into()?;
reader.reload()?;
IndexReaderWrapper {
Ok(IndexReaderWrapper {
field_name,
field,
reader,
index,
id_field,
}
})
}
pub fn reload(&self) {
self.reader.reload().unwrap();
pub fn reload(&self) -> Result<()> {
self.reader.reload()?;
Ok(())
}
pub fn count(&self) -> u32 {
let metas = self.index.searchable_segment_metas().unwrap();
pub fn count(&self) -> Result<u32> {
let metas = self.index.searchable_segment_metas()?;
let mut sum: u32 = 0;
for meta in metas {
sum += meta.max_doc();
}
sum
Ok(sum)
}
pub(crate) fn search(&self, q: &dyn Query) -> Vec<u32> {
pub(crate) fn search(&self, q: &dyn Query) -> Result<Vec<u32>> {
let searcher = self.reader.searcher();
match self.id_field {
Some(_) => {
// newer version with doc_id.
searcher.search(q, &DocIdCollector {}).unwrap()
searcher
.search(q, &DocIdCollector {})
.map_err(TantivyBindingError::TantivyError)
}
None => {
// older version without doc_id, only one segment.
searcher.search(q, &VecCollector {}).unwrap()
searcher
.search(q, &VecCollector {})
.map_err(TantivyBindingError::TantivyError)
}
}
}
pub fn term_query_i64(&self, term: i64) -> Vec<u32> {
pub fn term_query_i64(&self, term: i64) -> Result<Vec<u32>> {
let q = TermQuery::new(
Term::from_field_i64(self.field, term),
IndexRecordOption::Basic,
@ -87,7 +93,11 @@ impl IndexReaderWrapper {
self.search(&q)
}
pub fn lower_bound_range_query_i64(&self, lower_bound: i64, inclusive: bool) -> Vec<u32> {
pub fn lower_bound_range_query_i64(
&self,
lower_bound: i64,
inclusive: bool,
) -> Result<Vec<u32>> {
let q = RangeQuery::new_i64_bounds(
self.field_name.to_string(),
make_bounds(lower_bound, inclusive),
@ -96,7 +106,11 @@ impl IndexReaderWrapper {
self.search(&q)
}
pub fn upper_bound_range_query_i64(&self, upper_bound: i64, inclusive: bool) -> Vec<u32> {
pub fn upper_bound_range_query_i64(
&self,
upper_bound: i64,
inclusive: bool,
) -> Result<Vec<u32>> {
let q = RangeQuery::new_i64_bounds(
self.field_name.to_string(),
Bound::Unbounded,
@ -111,14 +125,14 @@ impl IndexReaderWrapper {
upper_bound: i64,
lb_inclusive: bool,
ub_inclusive: bool,
) -> Vec<u32> {
) -> Result<Vec<u32>> {
let lb = make_bounds(lower_bound, lb_inclusive);
let ub = make_bounds(upper_bound, ub_inclusive);
let q = RangeQuery::new_i64_bounds(self.field_name.to_string(), lb, ub);
self.search(&q)
}
pub fn term_query_f64(&self, term: f64) -> Vec<u32> {
pub fn term_query_f64(&self, term: f64) -> Result<Vec<u32>> {
let q = TermQuery::new(
Term::from_field_f64(self.field, term),
IndexRecordOption::Basic,
@ -126,7 +140,11 @@ impl IndexReaderWrapper {
self.search(&q)
}
pub fn lower_bound_range_query_f64(&self, lower_bound: f64, inclusive: bool) -> Vec<u32> {
pub fn lower_bound_range_query_f64(
&self,
lower_bound: f64,
inclusive: bool,
) -> Result<Vec<u32>> {
let q = RangeQuery::new_f64_bounds(
self.field_name.to_string(),
make_bounds(lower_bound, inclusive),
@ -135,7 +153,11 @@ impl IndexReaderWrapper {
self.search(&q)
}
pub fn upper_bound_range_query_f64(&self, upper_bound: f64, inclusive: bool) -> Vec<u32> {
pub fn upper_bound_range_query_f64(
&self,
upper_bound: f64,
inclusive: bool,
) -> Result<Vec<u32>> {
let q = RangeQuery::new_f64_bounds(
self.field_name.to_string(),
Bound::Unbounded,
@ -150,14 +172,14 @@ impl IndexReaderWrapper {
upper_bound: f64,
lb_inclusive: bool,
ub_inclusive: bool,
) -> Vec<u32> {
) -> Result<Vec<u32>> {
let lb = make_bounds(lower_bound, lb_inclusive);
let ub = make_bounds(upper_bound, ub_inclusive);
let q = RangeQuery::new_f64_bounds(self.field_name.to_string(), lb, ub);
self.search(&q)
}
pub fn term_query_bool(&self, term: bool) -> Vec<u32> {
pub fn term_query_bool(&self, term: bool) -> Result<Vec<u32>> {
let q = TermQuery::new(
Term::from_field_bool(self.field, term),
IndexRecordOption::Basic,
@ -165,7 +187,7 @@ impl IndexReaderWrapper {
self.search(&q)
}
pub fn term_query_keyword(&self, term: &str) -> Vec<u32> {
pub fn term_query_keyword(&self, term: &str) -> Result<Vec<u32>> {
let q = TermQuery::new(
Term::from_field_text(self.field, term),
IndexRecordOption::Basic,
@ -173,7 +195,11 @@ impl IndexReaderWrapper {
self.search(&q)
}
pub fn lower_bound_range_query_keyword(&self, lower_bound: &str, inclusive: bool) -> Vec<u32> {
pub fn lower_bound_range_query_keyword(
&self,
lower_bound: &str,
inclusive: bool,
) -> Result<Vec<u32>> {
let q = RangeQuery::new_str_bounds(
self.field_name.to_string(),
make_bounds(lower_bound, inclusive),
@ -182,7 +208,11 @@ impl IndexReaderWrapper {
self.search(&q)
}
pub fn upper_bound_range_query_keyword(&self, upper_bound: &str, inclusive: bool) -> Vec<u32> {
pub fn upper_bound_range_query_keyword(
&self,
upper_bound: &str,
inclusive: bool,
) -> Result<Vec<u32>> {
let q = RangeQuery::new_str_bounds(
self.field_name.to_string(),
Bound::Unbounded,
@ -197,21 +227,21 @@ impl IndexReaderWrapper {
upper_bound: &str,
lb_inclusive: bool,
ub_inclusive: bool,
) -> Vec<u32> {
) -> Result<Vec<u32>> {
let lb = make_bounds(lower_bound, lb_inclusive);
let ub = make_bounds(upper_bound, ub_inclusive);
let q = RangeQuery::new_str_bounds(self.field_name.to_string(), lb, ub);
self.search(&q)
}
pub fn prefix_query_keyword(&self, prefix: &str) -> Vec<u32> {
pub fn prefix_query_keyword(&self, prefix: &str) -> Result<Vec<u32>> {
let escaped = regex::escape(prefix);
let pattern = format!("{}(.|\n)*", escaped);
self.regex_query(&pattern)
}
pub fn regex_query(&self, pattern: &str) -> Vec<u32> {
let q = RegexQuery::from_pattern(&pattern, self.field).unwrap();
pub fn regex_query(&self, pattern: &str) -> Result<Vec<u32>> {
let q = RegexQuery::from_pattern(&pattern, self.field)?;
self.search(&q)
}
}
@ -244,10 +274,10 @@ mod test {
index_writer.commit().unwrap();
let index_shared = Arc::new(index);
let index_reader_wrapper = IndexReaderWrapper::from_index(index_shared);
let mut res = index_reader_wrapper.prefix_query_keyword("^");
let index_reader_wrapper = IndexReaderWrapper::from_index(index_shared).unwrap();
let mut res = index_reader_wrapper.prefix_query_keyword("^").unwrap();
assert_eq!(res.len(), 1);
res = index_reader_wrapper.prefix_query_keyword("$");
res = index_reader_wrapper.prefix_query_keyword("$").unwrap();
assert_eq!(res.len(), 1);
}
}

View File

@ -1,18 +1,25 @@
use std::ffi::{c_char, c_void, CStr};
use std::{
ffi::{c_char, c_void, CStr},
ptr::null,
};
use crate::{
array::RustArray,
array::{RustArray, RustResult},
cstr_to_str,
index_reader::IndexReaderWrapper,
string_c::create_string,
util::{create_binding, free_binding},
util_c::tantivy_index_exist,
};
#[no_mangle]
pub extern "C" fn tantivy_load_index(path: *const c_char) -> *mut c_void {
pub extern "C" fn tantivy_load_index(path: *const c_char) -> RustResult {
assert!(tantivy_index_exist(path));
let path_str = unsafe { CStr::from_ptr(path) };
let wrapper = IndexReaderWrapper::load(path_str.to_str().unwrap());
create_binding(wrapper)
let path_str = cstr_to_str!(path);
match IndexReaderWrapper::load(path_str) {
Ok(w) => RustResult::from_ptr(create_binding(w)),
Err(e) => RustResult::from_error(e.to_string()),
}
}
#[no_mangle]
@ -22,26 +29,21 @@ pub extern "C" fn tantivy_free_index_reader(ptr: *mut c_void) {
// -------------------------query--------------------
#[no_mangle]
pub extern "C" fn tantivy_reload_index(ptr: *mut c_void) {
pub extern "C" fn tantivy_reload_index(ptr: *mut c_void) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe {
(*real).reload();
}
unsafe { (*real).reload().into() }
}
#[no_mangle]
pub extern "C" fn tantivy_index_count(ptr: *mut c_void) -> u32 {
pub extern "C" fn tantivy_index_count(ptr: *mut c_void) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe { (*real).count() }
unsafe { (*real).count().into() }
}
#[no_mangle]
pub extern "C" fn tantivy_term_query_i64(ptr: *mut c_void, term: i64) -> RustArray {
pub extern "C" fn tantivy_term_query_i64(ptr: *mut c_void, term: i64) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe {
let hits = (*real).term_query_i64(term);
RustArray::from_vec(hits)
}
unsafe { (*real).term_query_i64(term).into() }
}
#[no_mangle]
@ -49,11 +51,12 @@ pub extern "C" fn tantivy_lower_bound_range_query_i64(
ptr: *mut c_void,
lower_bound: i64,
inclusive: bool,
) -> RustArray {
) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe {
let hits = (*real).lower_bound_range_query_i64(lower_bound, inclusive);
RustArray::from_vec(hits)
(*real)
.lower_bound_range_query_i64(lower_bound, inclusive)
.into()
}
}
@ -62,11 +65,12 @@ pub extern "C" fn tantivy_upper_bound_range_query_i64(
ptr: *mut c_void,
upper_bound: i64,
inclusive: bool,
) -> RustArray {
) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe {
let hits = (*real).upper_bound_range_query_i64(upper_bound, inclusive);
RustArray::from_vec(hits)
(*real)
.upper_bound_range_query_i64(upper_bound, inclusive)
.into()
}
}
@ -77,21 +81,19 @@ pub extern "C" fn tantivy_range_query_i64(
upper_bound: i64,
lb_inclusive: bool,
ub_inclusive: bool,
) -> RustArray {
) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe {
let hits = (*real).range_query_i64(lower_bound, upper_bound, lb_inclusive, ub_inclusive);
RustArray::from_vec(hits)
(*real)
.range_query_i64(lower_bound, upper_bound, lb_inclusive, ub_inclusive)
.into()
}
}
#[no_mangle]
pub extern "C" fn tantivy_term_query_f64(ptr: *mut c_void, term: f64) -> RustArray {
pub extern "C" fn tantivy_term_query_f64(ptr: *mut c_void, term: f64) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe {
let hits = (*real).term_query_f64(term);
RustArray::from_vec(hits)
}
unsafe { (*real).term_query_f64(term).into() }
}
#[no_mangle]
@ -99,11 +101,12 @@ pub extern "C" fn tantivy_lower_bound_range_query_f64(
ptr: *mut c_void,
lower_bound: f64,
inclusive: bool,
) -> RustArray {
) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe {
let hits = (*real).lower_bound_range_query_f64(lower_bound, inclusive);
RustArray::from_vec(hits)
(*real)
.lower_bound_range_query_f64(lower_bound, inclusive)
.into()
}
}
@ -112,11 +115,12 @@ pub extern "C" fn tantivy_upper_bound_range_query_f64(
ptr: *mut c_void,
upper_bound: f64,
inclusive: bool,
) -> RustArray {
) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe {
let hits = (*real).upper_bound_range_query_f64(upper_bound, inclusive);
RustArray::from_vec(hits)
(*real)
.upper_bound_range_query_f64(upper_bound, inclusive)
.into()
}
}
@ -127,31 +131,26 @@ pub extern "C" fn tantivy_range_query_f64(
upper_bound: f64,
lb_inclusive: bool,
ub_inclusive: bool,
) -> RustArray {
) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe {
let hits = (*real).range_query_f64(lower_bound, upper_bound, lb_inclusive, ub_inclusive);
RustArray::from_vec(hits)
(*real)
.range_query_f64(lower_bound, upper_bound, lb_inclusive, ub_inclusive)
.into()
}
}
#[no_mangle]
pub extern "C" fn tantivy_term_query_bool(ptr: *mut c_void, term: bool) -> RustArray {
pub extern "C" fn tantivy_term_query_bool(ptr: *mut c_void, term: bool) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe {
let hits = (*real).term_query_bool(term);
RustArray::from_vec(hits)
}
unsafe { (*real).term_query_bool(term).into() }
}
#[no_mangle]
pub extern "C" fn tantivy_term_query_keyword(ptr: *mut c_void, term: *const c_char) -> RustArray {
pub extern "C" fn tantivy_term_query_keyword(ptr: *mut c_void, term: *const c_char) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe {
let c_str = CStr::from_ptr(term);
let hits = (*real).term_query_keyword(c_str.to_str().unwrap());
RustArray::from_vec(hits)
}
let term = cstr_to_str!(term);
unsafe { (*real).term_query_keyword(term).into() }
}
#[no_mangle]
@ -159,13 +158,13 @@ pub extern "C" fn tantivy_lower_bound_range_query_keyword(
ptr: *mut c_void,
lower_bound: *const c_char,
inclusive: bool,
) -> RustArray {
) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
let lower_bound = cstr_to_str!(lower_bound);
unsafe {
let c_lower_bound = CStr::from_ptr(lower_bound);
let hits =
(*real).lower_bound_range_query_keyword(c_lower_bound.to_str().unwrap(), inclusive);
RustArray::from_vec(hits)
(*real)
.lower_bound_range_query_keyword(lower_bound, inclusive)
.into()
}
}
@ -174,13 +173,13 @@ pub extern "C" fn tantivy_upper_bound_range_query_keyword(
ptr: *mut c_void,
upper_bound: *const c_char,
inclusive: bool,
) -> RustArray {
) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
let upper_bound = cstr_to_str!(upper_bound);
unsafe {
let c_upper_bound = CStr::from_ptr(upper_bound);
let hits =
(*real).upper_bound_range_query_keyword(c_upper_bound.to_str().unwrap(), inclusive);
RustArray::from_vec(hits)
(*real)
.upper_bound_range_query_keyword(upper_bound, inclusive)
.into()
}
}
@ -191,18 +190,14 @@ pub extern "C" fn tantivy_range_query_keyword(
upper_bound: *const c_char,
lb_inclusive: bool,
ub_inclusive: bool,
) -> RustArray {
) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
let lower_bound = cstr_to_str!(lower_bound);
let upper_bound = cstr_to_str!(upper_bound);
unsafe {
let c_lower_bound = CStr::from_ptr(lower_bound);
let c_upper_bound = CStr::from_ptr(upper_bound);
let hits = (*real).range_query_keyword(
c_lower_bound.to_str().unwrap(),
c_upper_bound.to_str().unwrap(),
lb_inclusive,
ub_inclusive,
);
RustArray::from_vec(hits)
(*real)
.range_query_keyword(lower_bound, upper_bound, lb_inclusive, ub_inclusive)
.into()
}
}
@ -210,21 +205,15 @@ pub extern "C" fn tantivy_range_query_keyword(
pub extern "C" fn tantivy_prefix_query_keyword(
ptr: *mut c_void,
prefix: *const c_char,
) -> RustArray {
) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe {
let c_str = CStr::from_ptr(prefix);
let hits = (*real).prefix_query_keyword(c_str.to_str().unwrap());
RustArray::from_vec(hits)
}
let prefix = cstr_to_str!(prefix);
unsafe { (*real).prefix_query_keyword(prefix).into() }
}
#[no_mangle]
pub extern "C" fn tantivy_regex_query(ptr: *mut c_void, pattern: *const c_char) -> RustArray {
pub extern "C" fn tantivy_regex_query(ptr: *mut c_void, pattern: *const c_char) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe {
let c_str = CStr::from_ptr(pattern);
let hits = (*real).regex_query(c_str.to_str().unwrap());
RustArray::from_vec(hits)
}
let pattern = cstr_to_str!(pattern);
unsafe { (*real).regex_query(pattern).into() }
}

View File

@ -4,12 +4,13 @@ use tantivy::{
Term,
};
use crate::error::Result;
use crate::{index_reader::IndexReaderWrapper, tokenizer::standard_analyzer};
impl IndexReaderWrapper {
// split the query string into multiple tokens using index's default tokenizer,
// and then execute the disconjunction of term query.
pub(crate) fn match_query(&self, q: &str) -> Vec<u32> {
pub(crate) fn match_query(&self, q: &str) -> Result<Vec<u32>> {
// clone the tokenizer to make `match_query` thread-safe.
let mut tokenizer = self
.index

View File

@ -1,22 +1,22 @@
use std::{ffi::CStr};
use std::{ffi::CStr, ptr::null};
use libc::{c_char, c_void};
use crate::{
array::RustArray,
string_c::c_str_to_str,
array::{RustArray, RustResult},
cstr_to_str,
index_reader::IndexReaderWrapper,
tokenizer::create_tokenizer,
log::init_log,
string_c::{c_str_to_str, create_string},
tokenizer::create_tokenizer,
};
#[no_mangle]
pub extern "C" fn tantivy_match_query(ptr: *mut c_void, query: *const c_char) -> RustArray {
pub extern "C" fn tantivy_match_query(ptr: *mut c_void, query: *const c_char) -> RustResult {
let real = ptr as *mut IndexReaderWrapper;
unsafe {
let c_str = CStr::from_ptr(query);
let hits = (*real).match_query(c_str.to_str().unwrap());
RustArray::from_vec(hits)
let query = cstr_to_str!(query);
(*real).match_query(query).into()
}
}
@ -25,21 +25,17 @@ pub extern "C" fn tantivy_register_tokenizer(
ptr: *mut c_void,
tokenizer_name: *const c_char,
analyzer_params: *const c_char,
) {
) -> RustResult {
init_log();
let real = ptr as *mut IndexReaderWrapper;
let tokenizer_name_str = unsafe { CStr::from_ptr(tokenizer_name) };
let params = unsafe{c_str_to_str(analyzer_params).to_string()};
let analyzer = create_tokenizer(&params);
let tokenizer_name = cstr_to_str!(tokenizer_name);
let params = cstr_to_str!(analyzer_params);
let analyzer = create_tokenizer(params);
match analyzer {
Ok(text_analyzer) => unsafe {
(*real).register_tokenizer(
String::from(tokenizer_name_str.to_str().unwrap()),
text_analyzer,
);
},
Err(err) => {
panic!("create tokenizer failed with error: {} param: {}", err.to_string(), params);
(*real).register_tokenizer(String::from(tokenizer_name), text_analyzer);
Ok(()).into()
},
Err(err) => RustResult::from_error(err.to_string()),
}
}

View File

@ -10,6 +10,7 @@ use tantivy::{doc, tokenizer, Document, Index, IndexWriter};
use crate::data_type::TantivyDataType;
use crate::error::Result;
use crate::index_reader::IndexReaderWrapper;
use crate::log::init_log;
@ -27,7 +28,7 @@ impl IndexWriterWrapper {
path: String,
num_threads: usize,
overall_memory_budget_in_bytes: usize,
) -> IndexWriterWrapper {
) -> Result<IndexWriterWrapper> {
init_log();
let field: Field;
@ -55,171 +56,170 @@ impl IndexWriterWrapper {
}
let id_field = schema_builder.add_i64_field("doc_id", FAST);
let schema = schema_builder.build();
let index = Index::create_in_dir(path.clone(), schema).unwrap();
let index_writer = index
.writer_with_num_threads(num_threads, overall_memory_budget_in_bytes)
.unwrap();
IndexWriterWrapper {
let index = Index::create_in_dir(path.clone(), schema)?;
let index_writer =
index.writer_with_num_threads(num_threads, overall_memory_budget_in_bytes)?;
Ok(IndexWriterWrapper {
field,
index_writer,
id_field,
index: Arc::new(index),
}
})
}
pub fn create_reader(&self) -> IndexReaderWrapper {
pub fn create_reader(&self) -> Result<IndexReaderWrapper> {
IndexReaderWrapper::from_index(self.index.clone())
}
pub fn add_i8(&mut self, data: i8, offset: i64) {
pub fn add_i8(&mut self, data: i8, offset: i64) -> Result<()> {
self.add_i64(data.into(), offset)
}
pub fn add_i16(&mut self, data: i16, offset: i64) {
pub fn add_i16(&mut self, data: i16, offset: i64) -> Result<()> {
self.add_i64(data.into(), offset)
}
pub fn add_i32(&mut self, data: i32, offset: i64) {
pub fn add_i32(&mut self, data: i32, offset: i64) -> Result<()> {
self.add_i64(data.into(), offset)
}
pub fn add_i64(&mut self, data: i64, offset: i64) {
self.index_writer
.add_document(doc!(
self.field => data,
self.id_field => offset,
))
.unwrap();
pub fn add_i64(&mut self, data: i64, offset: i64) -> Result<()> {
let _ = self.index_writer.add_document(doc!(
self.field => data,
self.id_field => offset,
))?;
Ok(())
}
pub fn add_f32(&mut self, data: f32, offset: i64) {
pub fn add_f32(&mut self, data: f32, offset: i64) -> Result<()> {
self.add_f64(data.into(), offset)
}
pub fn add_f64(&mut self, data: f64, offset: i64) {
self.index_writer
.add_document(doc!(
self.field => data,
self.id_field => offset,
))
.unwrap();
pub fn add_f64(&mut self, data: f64, offset: i64) -> Result<()> {
let _ = self.index_writer.add_document(doc!(
self.field => data,
self.id_field => offset,
))?;
Ok(())
}
pub fn add_bool(&mut self, data: bool, offset: i64) {
self.index_writer
.add_document(doc!(
self.field => data,
self.id_field => offset,
))
.unwrap();
pub fn add_bool(&mut self, data: bool, offset: i64) -> Result<()> {
let _ = self.index_writer.add_document(doc!(
self.field => data,
self.id_field => offset,
))?;
Ok(())
}
pub fn add_string(&mut self, data: &str, offset: i64) {
self.index_writer
.add_document(doc!(
self.field => data,
self.id_field => offset,
))
.unwrap();
pub fn add_string(&mut self, data: &str, offset: i64) -> Result<()> {
let _ = self.index_writer.add_document(doc!(
self.field => data,
self.id_field => offset,
))?;
Ok(())
}
pub fn add_multi_i8s(&mut self, datas: &[i8], offset: i64) {
pub fn add_multi_i8s(&mut self, datas: &[i8], offset: i64) -> Result<()> {
let mut document = Document::default();
for data in datas {
document.add_field_value(self.field, *data as i64);
}
document.add_i64(self.id_field, offset);
self.index_writer.add_document(document).unwrap();
let _ = self.index_writer.add_document(document)?;
Ok(())
}
pub fn add_multi_i16s(&mut self, datas: &[i16], offset: i64) {
pub fn add_multi_i16s(&mut self, datas: &[i16], offset: i64) -> Result<()> {
let mut document = Document::default();
for data in datas {
document.add_field_value(self.field, *data as i64);
}
document.add_i64(self.id_field, offset);
self.index_writer.add_document(document).unwrap();
let _ = self.index_writer.add_document(document)?;
Ok(())
}
pub fn add_multi_i32s(&mut self, datas: &[i32], offset: i64) {
pub fn add_multi_i32s(&mut self, datas: &[i32], offset: i64) -> Result<()> {
let mut document = Document::default();
for data in datas {
document.add_field_value(self.field, *data as i64);
}
document.add_i64(self.id_field, offset);
self.index_writer.add_document(document).unwrap();
let _ = self.index_writer.add_document(document)?;
Ok(())
}
pub fn add_multi_i64s(&mut self, datas: &[i64], offset: i64) {
pub fn add_multi_i64s(&mut self, datas: &[i64], offset: i64) -> Result<()> {
let mut document = Document::default();
for data in datas {
document.add_field_value(self.field, *data);
}
document.add_i64(self.id_field, offset);
self.index_writer.add_document(document).unwrap();
let _ = self.index_writer.add_document(document)?;
Ok(())
}
pub fn add_multi_f32s(&mut self, datas: &[f32], offset: i64) {
pub fn add_multi_f32s(&mut self, datas: &[f32], offset: i64) -> Result<()> {
let mut document = Document::default();
for data in datas {
document.add_field_value(self.field, *data as f64);
}
document.add_i64(self.id_field, offset);
self.index_writer.add_document(document).unwrap();
let _ = self.index_writer.add_document(document)?;
Ok(())
}
pub fn add_multi_f64s(&mut self, datas: &[f64], offset: i64) {
pub fn add_multi_f64s(&mut self, datas: &[f64], offset: i64) -> Result<()> {
let mut document = Document::default();
for data in datas {
document.add_field_value(self.field, *data);
}
document.add_i64(self.id_field, offset);
self.index_writer.add_document(document).unwrap();
let _ = self.index_writer.add_document(document)?;
Ok(())
}
pub fn add_multi_bools(&mut self, datas: &[bool], offset: i64) {
pub fn add_multi_bools(&mut self, datas: &[bool], offset: i64) -> Result<()> {
let mut document = Document::default();
for data in datas {
document.add_field_value(self.field, *data);
}
document.add_i64(self.id_field, offset);
self.index_writer.add_document(document).unwrap();
let _ = self.index_writer.add_document(document)?;
Ok(())
}
pub fn add_multi_keywords(&mut self, datas: &[*const c_char], offset: i64) {
pub fn add_multi_keywords(&mut self, datas: &[*const c_char], offset: i64) -> Result<()> {
let mut document = Document::default();
for element in datas {
let data = unsafe { CStr::from_ptr(*element) };
document.add_field_value(self.field, data.to_str().unwrap());
document.add_field_value(self.field, data.to_str()?);
}
document.add_i64(self.id_field, offset);
self.index_writer.add_document(document).unwrap();
let _ = self.index_writer.add_document(document)?;
Ok(())
}
fn manual_merge(&mut self) {
let metas = self
.index_writer
.index()
.searchable_segment_metas()
.unwrap();
fn manual_merge(&mut self) -> Result<()> {
let metas = self.index_writer.index().searchable_segment_metas()?;
let policy = self.index_writer.get_merge_policy();
let candidates = policy.compute_merge_candidates(metas.as_slice());
for candidate in candidates {
self.index_writer
.merge(candidate.0.as_slice())
.wait()
.unwrap();
self.index_writer.merge(candidate.0.as_slice()).wait()?;
}
Ok(())
}
pub fn finish(mut self) {
self.index_writer.commit().unwrap();
pub fn finish(mut self) -> Result<()> {
self.index_writer.commit()?;
// self.manual_merge();
block_on(self.index_writer.garbage_collect_files()).unwrap();
self.index_writer.wait_merging_threads().unwrap();
block_on(self.index_writer.garbage_collect_files())?;
self.index_writer.wait_merging_threads()?;
Ok(())
}
pub(crate) fn commit(&mut self) {
self.index_writer.commit().unwrap();
pub(crate) fn commit(&mut self) -> Result<()> {
self.index_writer.commit()?;
Ok(())
}
}

View File

@ -1,8 +1,13 @@
use core::slice;
use std::ffi::{c_char, c_void, CStr};
use tantivy::Index;
use crate::{
array::RustResult,
cstr_to_str,
data_type::TantivyDataType,
error::Result,
index_writer::IndexWriterWrapper,
util::{create_binding, free_binding},
};
@ -14,17 +19,19 @@ pub extern "C" fn tantivy_create_index(
path: *const c_char,
num_threads: usize,
overall_memory_budget_in_bytes: usize,
) -> *mut c_void {
let field_name_str = unsafe { CStr::from_ptr(field_name) };
let path_str = unsafe { CStr::from_ptr(path) };
let wrapper = IndexWriterWrapper::new(
String::from(field_name_str.to_str().unwrap()),
) -> RustResult {
let field_name_str = cstr_to_str!(field_name);
let path_str = cstr_to_str!(path);
match IndexWriterWrapper::new(
String::from(field_name_str),
data_type,
String::from(path_str.to_str().unwrap()),
String::from(path_str),
num_threads,
overall_memory_budget_in_bytes,
);
create_binding(wrapper)
) {
Ok(wrapper) => RustResult::from_ptr(create_binding(wrapper)),
Err(e) => RustResult::from_error(e.to_string()),
}
}
#[no_mangle]
@ -35,24 +42,25 @@ pub extern "C" fn tantivy_free_index_writer(ptr: *mut c_void) {
// tantivy_finish_index will finish the index writer, and the index writer can't be used any more.
// After this was called, you should reset the pointer to null.
#[no_mangle]
pub extern "C" fn tantivy_finish_index(ptr: *mut c_void) {
pub extern "C" fn tantivy_finish_index(ptr: *mut c_void) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
unsafe { Box::from_raw(real).finish() }
unsafe { Box::from_raw(real).finish().into() }
}
#[no_mangle]
pub extern "C" fn tantivy_commit_index(ptr: *mut c_void) {
pub extern "C" fn tantivy_commit_index(ptr: *mut c_void) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
unsafe {
(*real).commit();
}
unsafe { (*real).commit().into() }
}
#[no_mangle]
pub extern "C" fn tantivy_create_reader_from_writer(ptr: *mut c_void) -> *mut c_void {
pub extern "C" fn tantivy_create_reader_from_writer(ptr: *mut c_void) -> RustResult {
let writer = ptr as *mut IndexWriterWrapper;
let reader = unsafe { (*writer).create_reader() };
create_binding(reader)
match reader {
Ok(r) => RustResult::from_ptr(create_binding(r)),
Err(e) => RustResult::from_error(e.to_string()),
}
}
// -------------------------build--------------------
@ -62,14 +70,10 @@ pub extern "C" fn tantivy_index_add_int8s(
array: *const i8,
len: usize,
offset_begin: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe {
for (index, data) in arr.iter().enumerate() {
(*real).add_i8(*data, offset_begin + (index as i64));
}
}
unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i8, &mut (*real)).into() }
}
#[no_mangle]
@ -78,14 +82,10 @@ pub extern "C" fn tantivy_index_add_int16s(
array: *const i16,
len: usize,
offset_begin: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe {
for (index, data) in arr.iter().enumerate() {
(*real).add_i16(*data, offset_begin + (index as i64));
}
}
unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i16, &mut (*real)).into() }
}
#[no_mangle]
@ -94,14 +94,10 @@ pub extern "C" fn tantivy_index_add_int32s(
array: *const i32,
len: usize,
offset_begin: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe {
for (index, data) in arr.iter().enumerate() {
(*real).add_i32(*data, offset_begin + (index as i64));
}
}
unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i32, &mut (*real)).into() }
}
#[no_mangle]
@ -110,14 +106,25 @@ pub extern "C" fn tantivy_index_add_int64s(
array: *const i64,
len: usize,
offset_begin: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i64, &mut (*real)).into() }
}
fn execute<T: Copy>(
arr: &[T],
offset: i64,
mut e: fn(&mut IndexWriterWrapper, T, i64) -> Result<()>,
w: &mut IndexWriterWrapper,
) -> Result<()> {
unsafe {
for (index, data) in arr.iter().enumerate() {
(*real).add_i64(*data, offset_begin + (index as i64));
e(w, *data, offset + (index as i64))?;
}
}
Ok(())
}
#[no_mangle]
@ -126,14 +133,10 @@ pub extern "C" fn tantivy_index_add_f32s(
array: *const f32,
len: usize,
offset_begin: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe {
for (index, data) in arr.iter().enumerate() {
(*real).add_f32(*data, offset_begin + (index as i64));
}
}
unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_f32, &mut (*real)).into() }
}
#[no_mangle]
@ -142,14 +145,10 @@ pub extern "C" fn tantivy_index_add_f64s(
array: *const f64,
len: usize,
offset_begin: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe {
for (index, data) in arr.iter().enumerate() {
(*real).add_f64(*data, offset_begin + (index as i64));
}
}
unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_f64, &mut (*real)).into() }
}
#[no_mangle]
@ -158,23 +157,31 @@ pub extern "C" fn tantivy_index_add_bools(
array: *const bool,
len: usize,
offset_begin: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe {
for (index, data) in arr.iter().enumerate() {
(*real).add_bool(*data, offset_begin + (index as i64));
}
execute(
arr,
offset_begin,
IndexWriterWrapper::add_bool,
&mut (*real),
)
.into()
}
}
// TODO: this is not a very efficient way, since we must call this function many times, which
// will bring a lot of overhead caused by the rust binding.
#[no_mangle]
pub extern "C" fn tantivy_index_add_string(ptr: *mut c_void, s: *const c_char, offset: i64) {
pub extern "C" fn tantivy_index_add_string(
ptr: *mut c_void,
s: *const c_char,
offset: i64,
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let c_str = unsafe { CStr::from_ptr(s) };
unsafe { (*real).add_string(c_str.to_str().unwrap(), offset) }
let s = cstr_to_str!(s);
unsafe { (*real).add_string(s, offset).into() }
}
// --------------------------------------------- array ------------------------------------------
@ -185,11 +192,11 @@ pub extern "C" fn tantivy_index_add_multi_int8s(
array: *const i8,
len: usize,
offset: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
unsafe {
let arr = slice::from_raw_parts(array, len);
(*real).add_multi_i8s(arr, offset)
(*real).add_multi_i8s(arr, offset).into()
}
}
@ -199,11 +206,11 @@ pub extern "C" fn tantivy_index_add_multi_int16s(
array: *const i16,
len: usize,
offset: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
unsafe {
let arr = slice::from_raw_parts(array, len);
(*real).add_multi_i16s(arr, offset);
(*real).add_multi_i16s(arr, offset).into()
}
}
@ -213,11 +220,11 @@ pub extern "C" fn tantivy_index_add_multi_int32s(
array: *const i32,
len: usize,
offset: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
unsafe {
let arr = slice::from_raw_parts(array, len);
(*real).add_multi_i32s(arr, offset);
(*real).add_multi_i32s(arr, offset).into()
}
}
@ -227,11 +234,11 @@ pub extern "C" fn tantivy_index_add_multi_int64s(
array: *const i64,
len: usize,
offset: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
unsafe {
let arr = slice::from_raw_parts(array, len);
(*real).add_multi_i64s(arr, offset);
(*real).add_multi_i64s(arr, offset).into()
}
}
@ -241,11 +248,11 @@ pub extern "C" fn tantivy_index_add_multi_f32s(
array: *const f32,
len: usize,
offset: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
unsafe {
let arr = slice::from_raw_parts(array, len);
(*real).add_multi_f32s(arr, offset);
(*real).add_multi_f32s(arr, offset).into()
}
}
@ -255,11 +262,11 @@ pub extern "C" fn tantivy_index_add_multi_f64s(
array: *const f64,
len: usize,
offset: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
unsafe {
let arr = slice::from_raw_parts(array, len);
(*real).add_multi_f64s(arr, offset);
(*real).add_multi_f64s(arr, offset).into()
}
}
@ -269,11 +276,11 @@ pub extern "C" fn tantivy_index_add_multi_bools(
array: *const bool,
len: usize,
offset: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
unsafe {
let arr = slice::from_raw_parts(array, len);
(*real).add_multi_bools(arr, offset);
(*real).add_multi_bools(arr, offset).into()
}
}
@ -283,10 +290,10 @@ pub extern "C" fn tantivy_index_add_multi_keywords(
array: *const *const c_char,
len: usize,
offset: i64,
) {
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
unsafe {
let arr = slice::from_raw_parts(array, len);
(*real).add_multi_keywords(arr, offset)
(*real).add_multi_keywords(arr, offset).into()
}
}

View File

@ -2,11 +2,14 @@ use std::ffi::c_char;
use std::ffi::c_void;
use std::ffi::CStr;
use crate::array::RustResult;
use crate::cstr_to_str;
use crate::error::Result;
use crate::index_writer::IndexWriterWrapper;
use crate::log::init_log;
use crate::string_c::c_str_to_str;
use crate::tokenizer::create_tokenizer;
use crate::util::create_binding;
use crate::string_c::c_str_to_str;
use crate::log::init_log;
#[no_mangle]
pub extern "C" fn tantivy_create_text_writer(
@ -17,13 +20,13 @@ pub extern "C" fn tantivy_create_text_writer(
num_threads: usize,
overall_memory_budget_in_bytes: usize,
in_ram: bool,
) -> *mut c_void {
) -> RustResult {
init_log();
let field_name_str = unsafe { CStr::from_ptr(field_name).to_str().unwrap() };
let path_str = unsafe { CStr::from_ptr(path).to_str().unwrap() };
let tokenizer_name_str = unsafe { CStr::from_ptr(tokenizer_name).to_str().unwrap() };
let params = unsafe{c_str_to_str(analyzer_params).to_string()};
let analyzer = create_tokenizer(&params);
let field_name_str = cstr_to_str!(field_name);
let path_str = cstr_to_str!(path);
let tokenizer_name_str = cstr_to_str!(tokenizer_name);
let params = cstr_to_str!(analyzer_params);
let analyzer = create_tokenizer(params);
match analyzer {
Ok(text_analyzer) => {
let wrapper = IndexWriterWrapper::create_text_writer(
@ -35,11 +38,12 @@ pub extern "C" fn tantivy_create_text_writer(
overall_memory_budget_in_bytes,
in_ram,
);
create_binding(wrapper)
RustResult::from_ptr(create_binding(wrapper))
}
Err(err) => {
log::warn!("create tokenizer failed with error: {} param: {}", err.to_string(), params);
std::ptr::null_mut()
},
Err(err) => RustResult::from_error(format!(
"create tokenizer failed with error: {} param: {}",
err.to_string(),
params,
)),
}
}

View File

@ -280,7 +280,7 @@ pub(crate) fn create_tokenizer_with_filter(params: &String) -> Result<TextAnalyz
}
}
pub(crate) fn create_tokenizer(params: &String) -> Result<TextAnalyzer> {
pub(crate) fn create_tokenizer(params: &str) -> Result<TextAnalyzer> {
if params.len() == 0 {
return Ok(standard_analyzer(vec![]));
}

View File

@ -1,31 +1,33 @@
use libc::{c_void,c_char};
use libc::{c_char, c_void};
use tantivy::tokenizer::TextAnalyzer;
use crate::{
array::RustResult,
log::init_log,
string_c::c_str_to_str,
tokenizer::create_tokenizer,
util::{create_binding, free_binding},
log::init_log,
};
#[no_mangle]
pub extern "C" fn tantivy_create_tokenizer(analyzer_params: *const c_char) -> *mut c_void {
pub extern "C" fn tantivy_create_tokenizer(analyzer_params: *const c_char) -> RustResult {
init_log();
let params = unsafe{c_str_to_str(analyzer_params).to_string()};
let params = unsafe { c_str_to_str(analyzer_params).to_string() };
let analyzer = create_tokenizer(&params);
match analyzer {
Ok(text_analyzer) => create_binding(text_analyzer),
Err(err) => {
log::warn!("create tokenizer failed with error: {} param: {}", err.to_string(), params);
std::ptr::null_mut()
},
Ok(text_analyzer) => RustResult::from_ptr(create_binding(text_analyzer)),
Err(err) => RustResult::from_error(format!(
"create tokenizer failed with error: {} param: {}",
err.to_string(),
params,
)),
}
}
#[no_mangle]
pub extern "C" fn tantivy_clone_tokenizer(ptr: *mut c_void) -> *mut c_void {
let analyzer=ptr as *mut TextAnalyzer;
let clone = unsafe {(*analyzer).clone()};
let analyzer = ptr as *mut TextAnalyzer;
let clone = unsafe { (*analyzer).clone() };
create_binding(clone)
}

View File

@ -4,7 +4,9 @@
#include <set>
#include <iostream>
#include <map>
#include <vector>
#include "common/EasyAssert.h"
#include "tantivy-binding.h"
#include "rust-binding.h"
#include "rust-array.h"
@ -82,18 +84,27 @@ struct TantivyIndexWrapper {
uintptr_t num_threads = DEFAULT_NUM_THREADS,
uintptr_t overall_memory_budget_in_bytes =
DEFAULT_OVERALL_MEMORY_BUDGET_IN_BYTES) {
writer_ = tantivy_create_index(field_name,
data_type,
path,
num_threads,
overall_memory_budget_in_bytes);
auto res = RustResultWrapper(
tantivy_create_index(field_name,
data_type,
path,
num_threads,
overall_memory_budget_in_bytes));
AssertInfo(res.result_->success,
"failed to create index: {}",
res.result_->error);
writer_ = res.result_->value.ptr._0;
path_ = std::string(path);
}
// load index. create index reader.
explicit TantivyIndexWrapper(const char* path) {
assert(tantivy_index_exist(path));
reader_ = tantivy_load_index(path);
auto res = RustResultWrapper(tantivy_load_index(path));
AssertInfo(res.result_->success,
"failed to load index: {}",
res.result_->error);
reader_ = res.result_->value.ptr._0;
path_ = std::string(path);
}
@ -106,13 +117,18 @@ struct TantivyIndexWrapper {
uintptr_t num_threads = DEFAULT_NUM_THREADS,
uintptr_t overall_memory_budget_in_bytes =
DEFAULT_OVERALL_MEMORY_BUDGET_IN_BYTES) {
writer_ = tantivy_create_text_writer(field_name,
path,
tokenizer_name,
analyzer_params,
num_threads,
overall_memory_budget_in_bytes,
in_ram);
auto res = RustResultWrapper(
tantivy_create_text_writer(field_name,
path,
tokenizer_name,
analyzer_params,
num_threads,
overall_memory_budget_in_bytes,
in_ram));
AssertInfo(res.result_->success,
"failed to create text writer: {}",
res.result_->error);
writer_ = res.result_->value.ptr._0;
path_ = std::string(path);
}
@ -120,10 +136,19 @@ struct TantivyIndexWrapper {
void
create_reader() {
if (writer_ != nullptr) {
reader_ = tantivy_create_reader_from_writer(writer_);
auto res =
RustResultWrapper(tantivy_create_reader_from_writer(writer_));
AssertInfo(res.result_->success,
"failed to create reader from writer: {}",
res.result_->error);
reader_ = res.result_->value.ptr._0;
} else if (!path_.empty()) {
assert(tantivy_index_exist(path_.c_str()));
reader_ = tantivy_load_index(path_.c_str());
auto res = RustResultWrapper(tantivy_load_index(path_.c_str()));
AssertInfo(res.result_->success,
"failed to load index: {}",
res.result_->error);
reader_ = res.result_->value.ptr._0;
}
}
@ -135,8 +160,11 @@ struct TantivyIndexWrapper {
register_tokenizer(const char* tokenizer_name,
const char* analyzer_params) {
if (reader_ != nullptr) {
tantivy_register_tokenizer(
reader_, tokenizer_name, analyzer_params);
auto res = RustResultWrapper(tantivy_register_tokenizer(
reader_, tokenizer_name, analyzer_params));
AssertInfo(res.result_->success,
"failed to register tokenizer: {}",
res.result_->error);
}
}
@ -146,47 +174,78 @@ struct TantivyIndexWrapper {
assert(!finished_);
if constexpr (std::is_same_v<T, bool>) {
tantivy_index_add_bools(writer_, array, len, offset_begin);
auto res = RustResultWrapper(
tantivy_index_add_bools(writer_, array, len, offset_begin));
AssertInfo(res.result_->success,
"failed to add bools: {}",
res.result_->error);
return;
}
if constexpr (std::is_same_v<T, int8_t>) {
tantivy_index_add_int8s(writer_, array, len, offset_begin);
auto res = RustResultWrapper(
tantivy_index_add_int8s(writer_, array, len, offset_begin));
AssertInfo(res.result_->success,
"failed to add int8s: {}",
res.result_->error);
return;
}
if constexpr (std::is_same_v<T, int16_t>) {
tantivy_index_add_int16s(writer_, array, len, offset_begin);
auto res = RustResultWrapper(
tantivy_index_add_int16s(writer_, array, len, offset_begin));
AssertInfo(res.result_->success,
"failed to add int16s: {}",
res.result_->error);
return;
}
if constexpr (std::is_same_v<T, int32_t>) {
tantivy_index_add_int32s(writer_, array, len, offset_begin);
auto res = RustResultWrapper(
tantivy_index_add_int32s(writer_, array, len, offset_begin));
AssertInfo(res.result_->success,
"failed to add int32s: {}",
res.result_->error);
return;
}
if constexpr (std::is_same_v<T, int64_t>) {
tantivy_index_add_int64s(writer_, array, len, offset_begin);
auto res = RustResultWrapper(
tantivy_index_add_int64s(writer_, array, len, offset_begin));
AssertInfo(res.result_->success,
"failed to add int64s: {}",
res.result_->error);
return;
}
if constexpr (std::is_same_v<T, float>) {
tantivy_index_add_f32s(writer_, array, len, offset_begin);
auto res = RustResultWrapper(
tantivy_index_add_f32s(writer_, array, len, offset_begin));
AssertInfo(res.result_->success,
"failed to add f32s: {}",
res.result_->error);
return;
}
if constexpr (std::is_same_v<T, double>) {
tantivy_index_add_f64s(writer_, array, len, offset_begin);
auto res = RustResultWrapper(
tantivy_index_add_f64s(writer_, array, len, offset_begin));
AssertInfo(res.result_->success,
"failed to add f64s: {}",
res.result_->error);
return;
}
if constexpr (std::is_same_v<T, std::string>) {
// TODO: not very efficient, a lot of overhead due to rust-ffi call.
for (uintptr_t i = 0; i < len; i++) {
tantivy_index_add_string(
auto res = RustResultWrapper(tantivy_index_add_string(
writer_,
static_cast<const std::string*>(array)[i].c_str(),
offset_begin + i);
offset_begin + i));
AssertInfo(res.result_->success,
"failed to add string: {}",
res.result_->error);
}
return;
}
@ -201,37 +260,65 @@ struct TantivyIndexWrapper {
assert(!finished_);
if constexpr (std::is_same_v<T, bool>) {
tantivy_index_add_multi_bools(writer_, array, len, offset);
auto res = RustResultWrapper(
tantivy_index_add_multi_bools(writer_, array, len, offset));
AssertInfo(res.result_->success,
"failed to add multi bools: {}",
res.result_->error);
return;
}
if constexpr (std::is_same_v<T, int8_t>) {
tantivy_index_add_multi_int8s(writer_, array, len, offset);
auto res = RustResultWrapper(
tantivy_index_add_multi_int8s(writer_, array, len, offset));
AssertInfo(res.result_->success,
"failed to add multi int8s: {}",
res.result_->error);
return;
}
if constexpr (std::is_same_v<T, int16_t>) {
tantivy_index_add_multi_int16s(writer_, array, len, offset);
auto res = RustResultWrapper(
tantivy_index_add_multi_int16s(writer_, array, len, offset));
AssertInfo(res.result_->success,
"failed to add multi int16s: {}",
res.result_->error);
return;
}
if constexpr (std::is_same_v<T, int32_t>) {
tantivy_index_add_multi_int32s(writer_, array, len, offset);
auto res = RustResultWrapper(
tantivy_index_add_multi_int32s(writer_, array, len, offset));
AssertInfo(res.result_->success,
"failed to add multi int32s: {}",
res.result_->error);
return;
}
if constexpr (std::is_same_v<T, int64_t>) {
tantivy_index_add_multi_int64s(writer_, array, len, offset);
auto res = RustResultWrapper(
tantivy_index_add_multi_int64s(writer_, array, len, offset));
AssertInfo(res.result_->success,
"failed to add multi int64s: {}",
res.result_->error);
return;
}
if constexpr (std::is_same_v<T, float>) {
tantivy_index_add_multi_f32s(writer_, array, len, offset);
auto res = RustResultWrapper(
tantivy_index_add_multi_f32s(writer_, array, len, offset));
AssertInfo(res.result_->success,
"failed to add multi f32s: {}",
res.result_->error);
return;
}
if constexpr (std::is_same_v<T, double>) {
tantivy_index_add_multi_f64s(writer_, array, len, offset);
auto res = RustResultWrapper(
tantivy_index_add_multi_f64s(writer_, array, len, offset));
AssertInfo(res.result_->success,
"failed to add multi f64s: {}",
res.result_->error);
return;
}
@ -240,8 +327,11 @@ struct TantivyIndexWrapper {
for (uintptr_t i = 0; i < len; i++) {
views.push_back(array[i].c_str());
}
tantivy_index_add_multi_keywords(
writer_, views.data(), len, offset);
auto res = RustResultWrapper(tantivy_index_add_multi_keywords(
writer_, views.data(), len, offset));
AssertInfo(res.result_->success,
"failed to add multi keywords: {}",
res.result_->error);
return;
}
@ -256,7 +346,10 @@ struct TantivyIndexWrapper {
return;
}
tantivy_finish_index(writer_);
auto res = RustResultWrapper(tantivy_finish_index(writer_));
AssertInfo(res.result_->success,
"failed to finish index: {}",
res.result_->error);
writer_ = nullptr;
finished_ = true;
}
@ -264,20 +357,30 @@ struct TantivyIndexWrapper {
inline void
commit() {
if (writer_ != nullptr) {
tantivy_commit_index(writer_);
auto res = RustResultWrapper(tantivy_commit_index(writer_));
AssertInfo(res.result_->success,
"failed to commit index: {}",
res.result_->error);
}
}
inline void
reload() {
if (reader_ != nullptr) {
tantivy_reload_index(reader_);
auto res = RustResultWrapper(tantivy_reload_index(reader_));
AssertInfo(res.result_->success,
"failed to reload index: {}",
res.result_->error);
}
}
inline uint32_t
count() {
return tantivy_index_count(reader_);
auto res = RustResultWrapper(tantivy_index_count(reader_));
AssertInfo(res.result_->success,
"failed to get count: {}",
res.result_->error);
return res.result_->value.u32._0;
}
public:
@ -308,7 +411,14 @@ struct TantivyIndexWrapper {
"InvertedIndex.term_query: unsupported data type: {}",
typeid(T).name());
}();
return RustArrayWrapper(array);
auto res = RustResultWrapper(array);
AssertInfo(res.result_->success,
"TantivyIndexWrapper.term_query: {}",
res.result_->error);
AssertInfo(res.result_->value.tag == Value::Tag::RustArray,
"TantivyIndexWrapper.term_query: invalid result type");
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
}
template <typename T>
@ -337,7 +447,15 @@ struct TantivyIndexWrapper {
"{}",
typeid(T).name());
}();
return RustArrayWrapper(array);
auto res = RustResultWrapper(array);
AssertInfo(res.result_->success,
"TantivyIndexWrapper.lower_bound_range_query: {}",
res.result_->error);
AssertInfo(
res.result_->value.tag == Value::Tag::RustArray,
"TantivyIndexWrapper.lower_bound_range_query: invalid result "
"type");
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
}
template <typename T>
@ -366,7 +484,15 @@ struct TantivyIndexWrapper {
"{}",
typeid(T).name());
}();
return RustArrayWrapper(array);
auto res = RustResultWrapper(array);
AssertInfo(res.result_->success,
"TantivyIndexWrapper.upper_bound_range_query: {}",
res.result_->error);
AssertInfo(
res.result_->value.tag == Value::Tag::RustArray,
"TantivyIndexWrapper.upper_bound_range_query: invalid result "
"type");
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
}
template <typename T>
@ -406,25 +532,49 @@ struct TantivyIndexWrapper {
"InvertedIndex.range_query: unsupported data type: {}",
typeid(T).name());
}();
return RustArrayWrapper(array);
auto res = RustResultWrapper(array);
AssertInfo(res.result_->success,
"TantivyIndexWrapper.range_query: {}",
res.result_->error);
AssertInfo(res.result_->value.tag == Value::Tag::RustArray,
"TantivyIndexWrapper.range_query: invalid result type");
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
}
RustArrayWrapper
prefix_query(const std::string& prefix) {
auto array = tantivy_prefix_query_keyword(reader_, prefix.c_str());
return RustArrayWrapper(array);
auto res = RustResultWrapper(array);
AssertInfo(res.result_->success,
"TantivyIndexWrapper.prefix_query: {}",
res.result_->error);
AssertInfo(res.result_->value.tag == Value::Tag::RustArray,
"TantivyIndexWrapper.prefix_query: invalid result type");
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
}
RustArrayWrapper
regex_query(const std::string& pattern) {
auto array = tantivy_regex_query(reader_, pattern.c_str());
return RustArrayWrapper(array);
auto res = RustResultWrapper(array);
AssertInfo(res.result_->success,
"TantivyIndexWrapper.regex_query: {}",
res.result_->error);
AssertInfo(res.result_->value.tag == Value::Tag::RustArray,
"TantivyIndexWrapper.regex_query: invalid result type");
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
}
RustArrayWrapper
match_query(const std::string& query) {
auto array = tantivy_match_query(reader_, query.c_str());
return RustArrayWrapper(array);
auto res = RustResultWrapper(array);
AssertInfo(res.result_->success,
"TantivyIndexWrapper.match_query: {}",
res.result_->error);
AssertInfo(res.result_->value.tag == Value::Tag::RustArray,
"TantivyIndexWrapper.match_query: invalid result type");
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
}
public:

View File

@ -3,6 +3,7 @@
#include "tantivy-binding.h"
#include "rust-binding.h"
#include "rust-hashmap.h"
#include "tantivy/rust-array.h"
#include "token-stream.h"
namespace milvus::tantivy {
@ -13,10 +14,12 @@ struct Tokenizer {
explicit Tokenizer(std::string&& params) {
auto shared_params = std::make_shared<std::string>(std::move(params));
ptr_ = tantivy_create_tokenizer(shared_params->c_str());
if (ptr_ == nullptr) {
throw std::invalid_argument("invalid tokenizer parameters");
}
auto res =
RustResultWrapper(tantivy_create_tokenizer(shared_params->c_str()));
AssertInfo(res.result_->success,
"Tokenizer creation failed: {}",
res.result_->error);
ptr_ = res.result_->value.ptr._0;
}
explicit Tokenizer(void* _ptr) : ptr_(_ptr) {

View File

@ -87,6 +87,7 @@ set(MILVUS_TEST_FILES
test_utils.cpp
test_chunked_segment.cpp
test_chunked_column.cpp
test_rust_result.cpp
)
if ( INDEX_ENGINE STREQUAL "cardinal" )

View File

@ -0,0 +1,27 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <cstdint>
#include "gtest/gtest.h"
#include "tantivy-binding.h"
TEST(RustResultTest, TestResult) {
auto arr = test_enum_with_array();
auto len = arr.value.rust_array._0.len;
for (size_t i = 0; i < len; i++) {
EXPECT_EQ(i + 1, arr.value.rust_array._0.array[i]);
}
free_rust_result(arr);
auto ptr = test_enum_with_ptr();
EXPECT_EQ(1, *static_cast<uint32_t*>(ptr.value.ptr._0));
free_rust_result(ptr);
free_test_ptr(ptr.value.ptr._0);
}

View File

@ -6679,7 +6679,7 @@ class TestQueryTextMatchNegative(TestcaseBase):
default_schema = CollectionSchema(
fields=default_fields, description="test collection"
)
error = {ct.err_code: 2000, ct.err_msg: "invalid tokenizer parameters"}
error = {ct.err_code: 2000, ct.err_msg: "unsupported tokenizer"}
self.init_collection_wrap(
name=cf.gen_unique_str(prefix),
schema=default_schema,