diff --git a/internal/core/src/bitset/common.h b/internal/core/src/bitset/common.h index a8b1d4e4e3..f747f52246 100644 --- a/internal/core/src/bitset/common.h +++ b/internal/core/src/bitset/common.h @@ -148,7 +148,8 @@ struct ArithCompareOperator { } else if constexpr (AOp == ArithOpType::Div) { return CompareOperator::compare(left / right, value); } else if constexpr (AOp == ArithOpType::Mod) { - return CompareOperator::compare(long(left) % long(right), value); + return CompareOperator::compare(long(left) % long(right), + value); } else { // unimplemented static_assert(always_false_v, "unimplemented"); diff --git a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h index 59e281da05..5e950cbc46 100644 --- a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h +++ b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h @@ -122,7 +122,8 @@ struct ArithOpElementFunc { res[i] = (src[offset] / right_operand) == val; } else if constexpr (arith_op == proto::plan::ArithOpType::Mod) { - res[i] = (long(src[offset]) % long(right_operand)) == val; + res[i] = + (long(src[offset]) % long(right_operand)) == val; } else { PanicInfo(OpTypeInvalid, fmt::format("unsupported arith type:{} for " @@ -143,7 +144,8 @@ struct ArithOpElementFunc { res[i] = (src[offset] / right_operand) != val; } else if constexpr (arith_op == proto::plan::ArithOpType::Mod) { - res[i] = (long(src[offset]) % long(right_operand)) != val; + res[i] = + (long(src[offset]) % long(right_operand)) != val; } else { PanicInfo(OpTypeInvalid, fmt::format("unsupported arith type:{} for " @@ -165,7 +167,8 @@ struct ArithOpElementFunc { res[i] = (src[offset] / right_operand) > val; } else if constexpr (arith_op == proto::plan::ArithOpType::Mod) { - res[i] = (long(src[offset]) % long(right_operand)) > val; + res[i] = + (long(src[offset]) % long(right_operand)) > val; } else { PanicInfo(OpTypeInvalid, fmt::format("unsupported arith type:{} for " @@ -187,7 +190,8 @@ struct ArithOpElementFunc { res[i] = (src[offset] / right_operand) >= val; } else if constexpr (arith_op == proto::plan::ArithOpType::Mod) { - res[i] = (long(src[offset]) % long(right_operand)) >= val; + res[i] = + (long(src[offset]) % long(right_operand)) >= val; } else { PanicInfo(OpTypeInvalid, fmt::format("unsupported arith type:{} for " @@ -208,7 +212,8 @@ struct ArithOpElementFunc { res[i] = (src[offset] / right_operand) < val; } else if constexpr (arith_op == proto::plan::ArithOpType::Mod) { - res[i] = (long(src[offset]) % long(right_operand)) < val; + res[i] = + (long(src[offset]) % long(right_operand)) < val; } else { PanicInfo(OpTypeInvalid, fmt::format("unsupported arith type:{} for " @@ -229,7 +234,8 @@ struct ArithOpElementFunc { res[i] = (src[offset] / right_operand) <= val; } else if constexpr (arith_op == proto::plan::ArithOpType::Mod) { - res[i] = (long(src[offset]) % long(right_operand)) <= val; + res[i] = + (long(src[offset]) % long(right_operand)) <= val; } else { PanicInfo(OpTypeInvalid, fmt::format("unsupported arith type:{} for " diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h b/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h index d1a8c54fa2..d0153154fa 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h +++ b/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h @@ -20,10 +20,17 @@ struct RustArray { size_t cap; }; +struct RustArrayI64 { + int64_t *array; + size_t len; + size_t cap; +}; + struct Value { enum class Tag { None, RustArray, + RustArrayI64, U32, Ptr, }; @@ -36,6 +43,10 @@ struct Value { RustArray _0; }; + struct RustArrayI64_Body { + RustArrayI64 _0; + }; + struct U32_Body { uint32_t _0; }; @@ -48,6 +59,7 @@ struct Value { union { None_Body none; RustArray_Body rust_array; + RustArrayI64_Body rust_array_i64; U32_Body u32; Ptr_Body ptr; }; @@ -63,6 +75,8 @@ extern "C" { void free_rust_array(RustArray array); +void free_rust_array_i64(RustArrayI64 array); + void free_rust_result(RustResult result); void free_rust_error(const char *error); diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/array.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/array.rs index 69612d0d36..f0e2553f77 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/array.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/array.rs @@ -54,10 +54,55 @@ pub extern "C" fn free_rust_array(array: RustArray) { } } +#[repr(C)] +pub struct RustArrayI64 { + array: *mut i64, + len: size_t, + cap: size_t, +} + +impl RustArrayI64 { + pub fn from_vec(vec: Vec) -> RustArrayI64 { + let len = vec.len(); + let cap = vec.capacity(); + let v = vec.leak(); + RustArrayI64 { + array: v.as_mut_ptr(), + len, + cap, + } + } +} + +impl std::default::Default for RustArrayI64 { + fn default() -> Self { + RustArrayI64 { + array: std::ptr::null_mut(), + len: 0, + cap: 0, + } + } +} + +impl From> for RustArrayI64 { + fn from(vec: Vec) -> Self { + RustArrayI64::from_vec(vec) + } +} + +#[no_mangle] +pub extern "C" fn free_rust_array_i64(array: RustArrayI64) { + let RustArrayI64 { array, len, cap } = array; + unsafe { + Vec::from_raw_parts(array, len, cap); + } +} + #[repr(C)] pub enum Value { None(()), RustArray(RustArray), + RustArrayI64(RustArrayI64), U32(u32), Ptr(*mut c_void), } @@ -74,7 +119,7 @@ macro_rules! impl_from_for_enum { }; } -impl_from_for_enum!(Value, None => (), RustArray => RustArray, RustArray => Vec, U32 => u32, Ptr => *mut c_void); +impl_from_for_enum!(Value, None => (), RustArrayI64 => RustArrayI64, RustArray => RustArray, RustArray => Vec, U32 => u32, Ptr => *mut c_void); #[repr(C)] pub struct RustResult { diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/docid_collector.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/docid_collector.rs index 95d585b436..92541831b0 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/docid_collector.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/docid_collector.rs @@ -4,11 +4,19 @@ use tantivy::{ DocId, Score, SegmentOrdinal, SegmentReader, }; -pub(crate) struct DocIdCollector; +#[derive(Default)] +pub(crate) struct DocIdCollector { + _phantom: std::marker::PhantomData, +} -impl Collector for DocIdCollector { +pub(crate) struct DocIdChildCollector { + docs: Vec, + column: Column, +} + +impl Collector for DocIdCollector { type Fruit = Vec; - type Child = DocIdChildCollector; + type Child = DocIdChildCollector; fn for_segment( &self, @@ -40,12 +48,7 @@ impl Collector for DocIdCollector { } } -pub(crate) struct DocIdChildCollector { - docs: Vec, - column: Column, -} - -impl SegmentCollector for DocIdChildCollector { +impl SegmentCollector for DocIdChildCollector { type Fruit = Vec; fn collect(&mut self, doc: DocId, _score: Score) { @@ -58,3 +61,51 @@ impl SegmentCollector for DocIdChildCollector { self.docs } } + +impl Collector for DocIdCollector { + type Fruit = Vec; + type Child = DocIdChildCollector; + + fn for_segment( + &self, + _segment_local_id: SegmentOrdinal, + segment: &SegmentReader, + ) -> tantivy::Result { + Ok(DocIdChildCollector { + docs: Vec::new(), + column: segment.fast_fields().i64("doc_id").unwrap(), + }) + } + + fn requires_scoring(&self) -> bool { + false + } + + fn merge_fruits( + &self, + segment_fruits: Vec<::Fruit>, + ) -> tantivy::Result { + let len: usize = segment_fruits.iter().map(|docset| docset.len()).sum(); + let mut result = Vec::with_capacity(len); + for docs in segment_fruits { + for doc in docs { + result.push(doc); + } + } + Ok(result) + } +} + +impl SegmentCollector for DocIdChildCollector { + type Fruit = Vec; + + fn collect(&mut self, doc: DocId, _score: Score) { + self.column.values_for_doc(doc).for_each(|doc_id| { + self.docs.push(doc_id); + }) + } + + fn harvest(self) -> Self::Fruit { + self.docs + } +} diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader.rs index cf3a5dd928..d840c3358a 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader.rs @@ -73,7 +73,7 @@ impl IndexReaderWrapper { Some(_) => { // newer version with doc_id. searcher - .search(q, &DocIdCollector {}) + .search(q, &DocIdCollector::::default()) .map_err(TantivyBindingError::TantivyError) } None => { @@ -85,6 +85,16 @@ impl IndexReaderWrapper { } } + // Generally, we should use [`crate::search`], except for some special senarios where the doc_id could beyound + // the score of u32. + pub(crate) fn search_i64(&self, q: &dyn Query) -> Result> { + assert!(self.id_field.is_some()); + let searcher = self.reader.searcher(); + searcher + .search(q, &DocIdCollector::::default()) + .map_err(TantivyBindingError::TantivyError) + } + pub fn term_query_i64(&self, term: i64) -> Result> { let q = TermQuery::new( Term::from_field_i64(self.field, term), diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/lib.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/lib.rs index c789e66de8..110c16d6ab 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/lib.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/lib.rs @@ -2,6 +2,7 @@ mod array; mod data_type; mod demo_c; mod docid_collector; +mod error; mod hashmap_c; mod index_reader; mod index_reader_c; @@ -11,18 +12,17 @@ mod index_writer; mod index_writer_c; mod index_writer_text; mod index_writer_text_c; +mod jieba_tokenizer; mod log; +mod stop_words; mod string_c; mod token_stream_c; mod tokenizer; -mod tokenizer_filter; mod tokenizer_c; +mod tokenizer_filter; mod util; -mod error; mod util_c; mod vec_collector; -mod stop_words; -mod jieba_tokenizer; pub fn add(left: usize, right: usize) -> usize { left + right