mirror of https://github.com/milvus-io/milvus.git
enhance: add tantivy collector for i64 (#39850)
issue: #39852 Signed-off-by: SpadeA <tangchenjie1210@gmail.com>pull/39893/head
parent
36e5b545b5
commit
f7d9587720
|
@ -148,7 +148,8 @@ struct ArithCompareOperator {
|
|||
} else if constexpr (AOp == ArithOpType::Div) {
|
||||
return CompareOperator<CmpOp>::compare(left / right, value);
|
||||
} else if constexpr (AOp == ArithOpType::Mod) {
|
||||
return CompareOperator<CmpOp>::compare(long(left) % long(right), value);
|
||||
return CompareOperator<CmpOp>::compare(long(left) % long(right),
|
||||
value);
|
||||
} else {
|
||||
// unimplemented
|
||||
static_assert(always_false_v<T>, "unimplemented");
|
||||
|
|
|
@ -122,7 +122,8 @@ struct ArithOpElementFunc {
|
|||
res[i] = (src[offset] / right_operand) == val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mod) {
|
||||
res[i] = (long(src[offset]) % long(right_operand)) == val;
|
||||
res[i] =
|
||||
(long(src[offset]) % long(right_operand)) == val;
|
||||
} else {
|
||||
PanicInfo(OpTypeInvalid,
|
||||
fmt::format("unsupported arith type:{} for "
|
||||
|
@ -143,7 +144,8 @@ struct ArithOpElementFunc {
|
|||
res[i] = (src[offset] / right_operand) != val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mod) {
|
||||
res[i] = (long(src[offset]) % long(right_operand)) != val;
|
||||
res[i] =
|
||||
(long(src[offset]) % long(right_operand)) != val;
|
||||
} else {
|
||||
PanicInfo(OpTypeInvalid,
|
||||
fmt::format("unsupported arith type:{} for "
|
||||
|
@ -165,7 +167,8 @@ struct ArithOpElementFunc {
|
|||
res[i] = (src[offset] / right_operand) > val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mod) {
|
||||
res[i] = (long(src[offset]) % long(right_operand)) > val;
|
||||
res[i] =
|
||||
(long(src[offset]) % long(right_operand)) > val;
|
||||
} else {
|
||||
PanicInfo(OpTypeInvalid,
|
||||
fmt::format("unsupported arith type:{} for "
|
||||
|
@ -187,7 +190,8 @@ struct ArithOpElementFunc {
|
|||
res[i] = (src[offset] / right_operand) >= val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mod) {
|
||||
res[i] = (long(src[offset]) % long(right_operand)) >= val;
|
||||
res[i] =
|
||||
(long(src[offset]) % long(right_operand)) >= val;
|
||||
} else {
|
||||
PanicInfo(OpTypeInvalid,
|
||||
fmt::format("unsupported arith type:{} for "
|
||||
|
@ -208,7 +212,8 @@ struct ArithOpElementFunc {
|
|||
res[i] = (src[offset] / right_operand) < val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mod) {
|
||||
res[i] = (long(src[offset]) % long(right_operand)) < val;
|
||||
res[i] =
|
||||
(long(src[offset]) % long(right_operand)) < val;
|
||||
} else {
|
||||
PanicInfo(OpTypeInvalid,
|
||||
fmt::format("unsupported arith type:{} for "
|
||||
|
@ -229,7 +234,8 @@ struct ArithOpElementFunc {
|
|||
res[i] = (src[offset] / right_operand) <= val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mod) {
|
||||
res[i] = (long(src[offset]) % long(right_operand)) <= val;
|
||||
res[i] =
|
||||
(long(src[offset]) % long(right_operand)) <= val;
|
||||
} else {
|
||||
PanicInfo(OpTypeInvalid,
|
||||
fmt::format("unsupported arith type:{} for "
|
||||
|
|
|
@ -20,10 +20,17 @@ struct RustArray {
|
|||
size_t cap;
|
||||
};
|
||||
|
||||
struct RustArrayI64 {
|
||||
int64_t *array;
|
||||
size_t len;
|
||||
size_t cap;
|
||||
};
|
||||
|
||||
struct Value {
|
||||
enum class Tag {
|
||||
None,
|
||||
RustArray,
|
||||
RustArrayI64,
|
||||
U32,
|
||||
Ptr,
|
||||
};
|
||||
|
@ -36,6 +43,10 @@ struct Value {
|
|||
RustArray _0;
|
||||
};
|
||||
|
||||
struct RustArrayI64_Body {
|
||||
RustArrayI64 _0;
|
||||
};
|
||||
|
||||
struct U32_Body {
|
||||
uint32_t _0;
|
||||
};
|
||||
|
@ -48,6 +59,7 @@ struct Value {
|
|||
union {
|
||||
None_Body none;
|
||||
RustArray_Body rust_array;
|
||||
RustArrayI64_Body rust_array_i64;
|
||||
U32_Body u32;
|
||||
Ptr_Body ptr;
|
||||
};
|
||||
|
@ -63,6 +75,8 @@ extern "C" {
|
|||
|
||||
void free_rust_array(RustArray array);
|
||||
|
||||
void free_rust_array_i64(RustArrayI64 array);
|
||||
|
||||
void free_rust_result(RustResult result);
|
||||
|
||||
void free_rust_error(const char *error);
|
||||
|
|
|
@ -54,10 +54,55 @@ pub extern "C" fn free_rust_array(array: RustArray) {
|
|||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct RustArrayI64 {
|
||||
array: *mut i64,
|
||||
len: size_t,
|
||||
cap: size_t,
|
||||
}
|
||||
|
||||
impl RustArrayI64 {
|
||||
pub fn from_vec(vec: Vec<i64>) -> RustArrayI64 {
|
||||
let len = vec.len();
|
||||
let cap = vec.capacity();
|
||||
let v = vec.leak();
|
||||
RustArrayI64 {
|
||||
array: v.as_mut_ptr(),
|
||||
len,
|
||||
cap,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::default::Default for RustArrayI64 {
|
||||
fn default() -> Self {
|
||||
RustArrayI64 {
|
||||
array: std::ptr::null_mut(),
|
||||
len: 0,
|
||||
cap: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<i64>> for RustArrayI64 {
|
||||
fn from(vec: Vec<i64>) -> Self {
|
||||
RustArrayI64::from_vec(vec)
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn free_rust_array_i64(array: RustArrayI64) {
|
||||
let RustArrayI64 { array, len, cap } = array;
|
||||
unsafe {
|
||||
Vec::from_raw_parts(array, len, cap);
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub enum Value {
|
||||
None(()),
|
||||
RustArray(RustArray),
|
||||
RustArrayI64(RustArrayI64),
|
||||
U32(u32),
|
||||
Ptr(*mut c_void),
|
||||
}
|
||||
|
@ -74,7 +119,7 @@ macro_rules! impl_from_for_enum {
|
|||
};
|
||||
}
|
||||
|
||||
impl_from_for_enum!(Value, None => (), RustArray => RustArray, RustArray => Vec<u32>, U32 => u32, Ptr => *mut c_void);
|
||||
impl_from_for_enum!(Value, None => (), RustArrayI64 => RustArrayI64, RustArray => RustArray, RustArray => Vec<u32>, U32 => u32, Ptr => *mut c_void);
|
||||
|
||||
#[repr(C)]
|
||||
pub struct RustResult {
|
||||
|
|
|
@ -4,11 +4,19 @@ use tantivy::{
|
|||
DocId, Score, SegmentOrdinal, SegmentReader,
|
||||
};
|
||||
|
||||
pub(crate) struct DocIdCollector;
|
||||
#[derive(Default)]
|
||||
pub(crate) struct DocIdCollector<T> {
|
||||
_phantom: std::marker::PhantomData<T>,
|
||||
}
|
||||
|
||||
impl Collector for DocIdCollector {
|
||||
pub(crate) struct DocIdChildCollector<T> {
|
||||
docs: Vec<T>,
|
||||
column: Column<i64>,
|
||||
}
|
||||
|
||||
impl Collector for DocIdCollector<u32> {
|
||||
type Fruit = Vec<u32>;
|
||||
type Child = DocIdChildCollector;
|
||||
type Child = DocIdChildCollector<u32>;
|
||||
|
||||
fn for_segment(
|
||||
&self,
|
||||
|
@ -40,12 +48,7 @@ impl Collector for DocIdCollector {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) struct DocIdChildCollector {
|
||||
docs: Vec<u32>,
|
||||
column: Column<i64>,
|
||||
}
|
||||
|
||||
impl SegmentCollector for DocIdChildCollector {
|
||||
impl SegmentCollector for DocIdChildCollector<u32> {
|
||||
type Fruit = Vec<u32>;
|
||||
|
||||
fn collect(&mut self, doc: DocId, _score: Score) {
|
||||
|
@ -58,3 +61,51 @@ impl SegmentCollector for DocIdChildCollector {
|
|||
self.docs
|
||||
}
|
||||
}
|
||||
|
||||
impl Collector for DocIdCollector<i64> {
|
||||
type Fruit = Vec<i64>;
|
||||
type Child = DocIdChildCollector<i64>;
|
||||
|
||||
fn for_segment(
|
||||
&self,
|
||||
_segment_local_id: SegmentOrdinal,
|
||||
segment: &SegmentReader,
|
||||
) -> tantivy::Result<Self::Child> {
|
||||
Ok(DocIdChildCollector {
|
||||
docs: Vec::new(),
|
||||
column: segment.fast_fields().i64("doc_id").unwrap(),
|
||||
})
|
||||
}
|
||||
|
||||
fn requires_scoring(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn merge_fruits(
|
||||
&self,
|
||||
segment_fruits: Vec<<Self::Child as SegmentCollector>::Fruit>,
|
||||
) -> tantivy::Result<Self::Fruit> {
|
||||
let len: usize = segment_fruits.iter().map(|docset| docset.len()).sum();
|
||||
let mut result = Vec::with_capacity(len);
|
||||
for docs in segment_fruits {
|
||||
for doc in docs {
|
||||
result.push(doc);
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
impl SegmentCollector for DocIdChildCollector<i64> {
|
||||
type Fruit = Vec<i64>;
|
||||
|
||||
fn collect(&mut self, doc: DocId, _score: Score) {
|
||||
self.column.values_for_doc(doc).for_each(|doc_id| {
|
||||
self.docs.push(doc_id);
|
||||
})
|
||||
}
|
||||
|
||||
fn harvest(self) -> Self::Fruit {
|
||||
self.docs
|
||||
}
|
||||
}
|
||||
|
|
|
@ -73,7 +73,7 @@ impl IndexReaderWrapper {
|
|||
Some(_) => {
|
||||
// newer version with doc_id.
|
||||
searcher
|
||||
.search(q, &DocIdCollector {})
|
||||
.search(q, &DocIdCollector::<u32>::default())
|
||||
.map_err(TantivyBindingError::TantivyError)
|
||||
}
|
||||
None => {
|
||||
|
@ -85,6 +85,16 @@ impl IndexReaderWrapper {
|
|||
}
|
||||
}
|
||||
|
||||
// Generally, we should use [`crate::search`], except for some special senarios where the doc_id could beyound
|
||||
// the score of u32.
|
||||
pub(crate) fn search_i64(&self, q: &dyn Query) -> Result<Vec<i64>> {
|
||||
assert!(self.id_field.is_some());
|
||||
let searcher = self.reader.searcher();
|
||||
searcher
|
||||
.search(q, &DocIdCollector::<i64>::default())
|
||||
.map_err(TantivyBindingError::TantivyError)
|
||||
}
|
||||
|
||||
pub fn term_query_i64(&self, term: i64) -> Result<Vec<u32>> {
|
||||
let q = TermQuery::new(
|
||||
Term::from_field_i64(self.field, term),
|
||||
|
|
|
@ -2,6 +2,7 @@ mod array;
|
|||
mod data_type;
|
||||
mod demo_c;
|
||||
mod docid_collector;
|
||||
mod error;
|
||||
mod hashmap_c;
|
||||
mod index_reader;
|
||||
mod index_reader_c;
|
||||
|
@ -11,18 +12,17 @@ mod index_writer;
|
|||
mod index_writer_c;
|
||||
mod index_writer_text;
|
||||
mod index_writer_text_c;
|
||||
mod jieba_tokenizer;
|
||||
mod log;
|
||||
mod stop_words;
|
||||
mod string_c;
|
||||
mod token_stream_c;
|
||||
mod tokenizer;
|
||||
mod tokenizer_filter;
|
||||
mod tokenizer_c;
|
||||
mod tokenizer_filter;
|
||||
mod util;
|
||||
mod error;
|
||||
mod util_c;
|
||||
mod vec_collector;
|
||||
mod stop_words;
|
||||
mod jieba_tokenizer;
|
||||
|
||||
pub fn add(left: usize, right: usize) -> usize {
|
||||
left + right
|
||||
|
|
Loading…
Reference in New Issue