refactor: spike on arrow encoding
parent
9f299461ed
commit
47b2f7940b
|
@ -819,6 +819,7 @@ dependencies = [
|
||||||
"datafusion 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
|
"datafusion 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
|
||||||
"delorean_table",
|
"delorean_table",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
|
"heapsize",
|
||||||
"human_format",
|
"human_format",
|
||||||
"log",
|
"log",
|
||||||
"parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
|
"parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
|
||||||
|
@ -1356,6 +1357,15 @@ dependencies = [
|
||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heapsize"
|
||||||
|
version = "0.4.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1679e6ea370dee694f91f1dc469bf94cf8f52051d147aec3e1f9497c6fc22461"
|
||||||
|
dependencies = [
|
||||||
|
"winapi 0.3.8",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heck"
|
name = "heck"
|
||||||
version = "0.3.1"
|
version = "0.3.1"
|
||||||
|
|
|
@ -12,9 +12,7 @@ delorean_table = { path = "../delorean_table" }
|
||||||
arrow = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
|
arrow = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
|
||||||
parquet = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
|
parquet = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
|
||||||
datafusion = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
|
datafusion = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
|
||||||
#arrow = { path = "/Users/alamb/Software/arrow/rust/arrow" }
|
heapsize = "0.4.2"
|
||||||
#parquet = { path = "/Users/alamb/Software/arrow/rust/parquet" }
|
|
||||||
#datafusion = { path = "/Users/alamb/Software/arrow/rust/datafusion" }
|
|
||||||
snafu = "0.6.8"
|
snafu = "0.6.8"
|
||||||
croaring = "0.4.5"
|
croaring = "0.4.5"
|
||||||
crossbeam = "0.7.3"
|
crossbeam = "0.7.3"
|
||||||
|
|
|
@ -134,6 +134,7 @@ fn build_store(
|
||||||
let mut segment = Segment::new(rb.num_rows(), schema);
|
let mut segment = Segment::new(rb.num_rows(), schema);
|
||||||
convert_record_batch(rb, &mut segment)?;
|
convert_record_batch(rb, &mut segment)?;
|
||||||
|
|
||||||
|
println!("{}", &segment);
|
||||||
store.add_segment(segment);
|
store.add_segment(segment);
|
||||||
}
|
}
|
||||||
Ok(None) => {
|
Ok(None) => {
|
||||||
|
|
|
@ -873,6 +873,23 @@ impl Column {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for Column {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match &self {
|
||||||
|
Column::String(c) => {
|
||||||
|
write!(f, "{}", c)?;
|
||||||
|
}
|
||||||
|
Column::Float(c) => {
|
||||||
|
write!(f, "{}", c)?;
|
||||||
|
}
|
||||||
|
Column::Integer(c) => {
|
||||||
|
write!(f, "{}", c)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl AggregatableByRange for &Column {
|
impl AggregatableByRange for &Column {
|
||||||
fn aggregate_by_id_range(
|
fn aggregate_by_id_range(
|
||||||
&self,
|
&self,
|
||||||
|
@ -964,6 +981,12 @@ impl String {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for String {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "Meta: {}, Data: {}", self.meta, self.data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Float {
|
pub struct Float {
|
||||||
meta: metadata::F64,
|
meta: metadata::F64,
|
||||||
|
@ -1015,6 +1038,12 @@ impl Float {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for Float {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "Meta: {}, Data: {}", self.meta, self.data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl From<&[f64]> for Float {
|
impl From<&[f64]> for Float {
|
||||||
fn from(values: &[f64]) -> Self {
|
fn from(values: &[f64]) -> Self {
|
||||||
let len = values.len();
|
let len = values.len();
|
||||||
|
@ -1034,6 +1063,32 @@ impl From<&[f64]> for Float {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// use arrow::array::Array;
|
||||||
|
// impl From<arrow::array::PrimitiveArray<arrow::datatypes::Float64Type>> for Float {
|
||||||
|
// fn from(arr: arrow::array::PrimitiveArray<arrow::datatypes::Float64Type>) -> Self {
|
||||||
|
// let len = arr.len();
|
||||||
|
// let mut min = std::f64::MAX;
|
||||||
|
// let mut max = std::f64::MIN;
|
||||||
|
|
||||||
|
// // calculate min/max for meta data
|
||||||
|
// // TODO(edd): can use compute kernels for this.
|
||||||
|
// for i in 0..arr.len() {
|
||||||
|
// if arr.is_null(i) {
|
||||||
|
// continue;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// let v = arr.value(i);
|
||||||
|
// min = min.min(v);
|
||||||
|
// max = max.max(v);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Self {
|
||||||
|
// meta: metadata::F64::new((min, max), len),
|
||||||
|
// data: Box::new(encoding::PlainArrow { arr }),
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Integer {
|
pub struct Integer {
|
||||||
meta: metadata::I64,
|
meta: metadata::I64,
|
||||||
|
@ -1080,6 +1135,12 @@ impl Integer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for Integer {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "Meta: {}, Data: {}", self.meta, self.data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl From<&[i64]> for Integer {
|
impl From<&[i64]> for Integer {
|
||||||
fn from(values: &[i64]) -> Self {
|
fn from(values: &[i64]) -> Self {
|
||||||
let len = values.len();
|
let len = values.len();
|
||||||
|
@ -1100,11 +1161,12 @@ impl From<&[i64]> for Integer {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod metadata {
|
pub mod metadata {
|
||||||
|
use std::mem::size_of;
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct Str {
|
pub struct Str {
|
||||||
range: (Option<String>, Option<String>),
|
range: (Option<String>, Option<String>),
|
||||||
num_rows: usize,
|
num_rows: usize,
|
||||||
// sparse_index: BTreeMap<String, usize>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Str {
|
impl Str {
|
||||||
|
@ -1145,8 +1207,20 @@ pub mod metadata {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn size(&self) -> usize {
|
pub fn size(&self) -> usize {
|
||||||
// TODO!!!!
|
// size of types for num_rows and range
|
||||||
0 //self.range.0.len() + self.range.1.len() + std::mem::size_of::<usize>()
|
let base_size = size_of::<usize>() + (2 * size_of::<Option<String>>());
|
||||||
|
match &self.range {
|
||||||
|
(None, None) => base_size,
|
||||||
|
(Some(min), None) => base_size + min.len(),
|
||||||
|
(None, Some(max)) => base_size + max.len(),
|
||||||
|
(Some(min), Some(max)) => base_size + min.len() + max.len(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for Str {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "Range: ({:?})", self.range)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1184,7 +1258,13 @@ pub mod metadata {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn size(&self) -> usize {
|
pub fn size(&self) -> usize {
|
||||||
std::mem::size_of::<(f64, f64)>() + std::mem::size_of::<usize>()
|
size_of::<usize>() + (size_of::<(f64, f64)>())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for F64 {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "Range: ({:?})", self.range)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1219,7 +1299,13 @@ pub mod metadata {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn size(&self) -> usize {
|
pub fn size(&self) -> usize {
|
||||||
std::mem::size_of::<(i64, i64)>() + std::mem::size_of::<usize>()
|
size_of::<usize>() + (size_of::<(i64, i64)>())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for I64 {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "Range: ({:?})", self.range)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
use std::collections::{BTreeMap, BTreeSet};
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
use std::iter;
|
use std::iter;
|
||||||
|
use std::mem::size_of;
|
||||||
|
|
||||||
use arrow::array::{Array, PrimitiveArray};
|
use arrow::array::{Array, PrimitiveArray};
|
||||||
use arrow::datatypes::ArrowNumericType;
|
use arrow::datatypes::ArrowNumericType;
|
||||||
use arrow::datatypes::*;
|
|
||||||
|
|
||||||
pub trait Encoding: Send + Sync {
|
pub trait NumericEncoding: Send + Sync + std::fmt::Display + std::fmt::Debug {
|
||||||
type Item;
|
type Item;
|
||||||
|
|
||||||
fn size(&self) -> usize;
|
fn size(&self) -> usize;
|
||||||
|
@ -15,39 +15,15 @@ pub trait Encoding: Send + Sync {
|
||||||
fn all_encoded_values(&self) -> Vec<Self::Item>;
|
fn all_encoded_values(&self) -> Vec<Self::Item>;
|
||||||
fn scan_from(&self, row_id: usize) -> &[Self::Item];
|
fn scan_from(&self, row_id: usize) -> &[Self::Item];
|
||||||
|
|
||||||
fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize;
|
|
||||||
|
|
||||||
// TODO(edd): clean up the API for getting row ids that match predicates.
|
|
||||||
//
|
|
||||||
// Ideally you should be able to provide a collection of predicates to
|
|
||||||
// match on.
|
|
||||||
//
|
|
||||||
// A simpler approach would be to provide a method that matches on a single
|
|
||||||
// predicate and then call that multiple times, unioning or intersecting the
|
|
||||||
// resulting row sets.
|
|
||||||
fn row_id_eq_value(&self, v: Self::Item) -> Option<usize>;
|
|
||||||
fn row_ids_single_cmp_roaring(
|
|
||||||
&self,
|
|
||||||
wanted: &Self::Item,
|
|
||||||
order: std::cmp::Ordering,
|
|
||||||
) -> croaring::Bitmap;
|
|
||||||
fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait NumericEncoding: Send + Sync {
|
|
||||||
type Item;
|
|
||||||
|
|
||||||
fn size(&self) -> usize;
|
|
||||||
fn value(&self, row_id: usize) -> Self::Item;
|
|
||||||
fn values(&self, row_ids: &[usize]) -> Vec<Self::Item>;
|
|
||||||
fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item>;
|
|
||||||
fn all_encoded_values(&self) -> Vec<Self::Item>;
|
|
||||||
fn scan_from(&self, row_id: usize) -> &[Self::Item];
|
|
||||||
fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item;
|
fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item;
|
||||||
fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item;
|
fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item;
|
||||||
|
|
||||||
fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize;
|
fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize;
|
||||||
|
fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64;
|
||||||
|
|
||||||
fn row_id_eq_value(&self, v: Self::Item) -> Option<usize>;
|
fn row_id_eq_value(&self, v: Self::Item) -> Option<usize>;
|
||||||
|
fn row_id_ge_value(&self, v: Self::Item) -> Option<usize>;
|
||||||
|
|
||||||
fn row_ids_single_cmp_roaring(
|
fn row_ids_single_cmp_roaring(
|
||||||
&self,
|
&self,
|
||||||
wanted: &Self::Item,
|
wanted: &Self::Item,
|
||||||
|
@ -56,39 +32,37 @@ pub trait NumericEncoding: Send + Sync {
|
||||||
fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap;
|
fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Debug for dyn NumericEncoding<Item = f64> {
|
#[derive(Debug)]
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(f, "{}", "todo")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Debug for dyn NumericEncoding<Item = i64> {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(f, "{}", "todo")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct PlainArrow<T>
|
pub struct PlainArrow<T>
|
||||||
where
|
where
|
||||||
// T: ArrowNumericType + std::ops::Add,
|
|
||||||
T: ArrowNumericType,
|
T: ArrowNumericType,
|
||||||
// T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign,
|
T::Native: Default
|
||||||
|
+ PartialEq
|
||||||
|
+ PartialOrd
|
||||||
|
+ Copy
|
||||||
|
+ std::fmt::Debug
|
||||||
|
+ std::ops::Add<Output = T::Native>,
|
||||||
{
|
{
|
||||||
arr: PrimitiveArray<T>,
|
arr: PrimitiveArray<T>,
|
||||||
// _phantom: T,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> PlainArrow<T>
|
impl<T> NumericEncoding for PlainArrow<T>
|
||||||
where
|
where
|
||||||
// T: ArrowNumericType + std::ops::Add,
|
T: ArrowNumericType + std::fmt::Debug,
|
||||||
T: ArrowNumericType,
|
T::Native: Default
|
||||||
// T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign,
|
+ PartialEq
|
||||||
|
+ PartialOrd
|
||||||
|
+ Copy
|
||||||
|
+ std::fmt::Debug
|
||||||
|
+ std::ops::Add<Output = T::Native>,
|
||||||
{
|
{
|
||||||
pub fn size(&self) -> usize {
|
type Item = Option<T::Native>;
|
||||||
|
|
||||||
|
fn size(&self) -> usize {
|
||||||
self.arr.len()
|
self.arr.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn value(&self, row_id: usize) -> Option<T::Native> {
|
fn value(&self, row_id: usize) -> Option<T::Native> {
|
||||||
if self.arr.is_null(row_id) {
|
if self.arr.is_null(row_id) {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
@ -109,12 +83,12 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Well this is terribly slow
|
/// Well this is terribly slow
|
||||||
pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<Option<T::Native>> {
|
fn encoded_values(&self, row_ids: &[usize]) -> Vec<Option<T::Native>> {
|
||||||
self.values(row_ids)
|
self.values(row_ids)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// TODO(edd): there must be a more efficient way.
|
/// TODO(edd): there must be a more efficient way.
|
||||||
pub fn all_encoded_values(&self) -> Vec<Option<T::Native>> {
|
fn all_encoded_values(&self) -> Vec<Option<T::Native>> {
|
||||||
let mut out = Vec::with_capacity(self.arr.len());
|
let mut out = Vec::with_capacity(self.arr.len());
|
||||||
for i in 0..self.arr.len() {
|
for i in 0..self.arr.len() {
|
||||||
if self.arr.is_null(i) {
|
if self.arr.is_null(i) {
|
||||||
|
@ -127,11 +101,10 @@ where
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn scan_from(&self, row_id: usize) -> &[Option<T::Native>] {
|
// TODO(edd): problem here is returning a slice because we need to own the
|
||||||
// todo
|
// backing vector.
|
||||||
|
fn scan_from(&self, row_id: usize) -> &[Option<T::Native>] {
|
||||||
&[]
|
unimplemented!("need to figure out returning a slice");
|
||||||
|
|
||||||
// let mut out = Vec::with_capacity(self.arr.len() - row_id);
|
// let mut out = Vec::with_capacity(self.arr.len() - row_id);
|
||||||
// for i in row_id..self.arr.len() {
|
// for i in row_id..self.arr.len() {
|
||||||
// if self.arr.is_null(i) {
|
// if self.arr.is_null(i) {
|
||||||
|
@ -144,18 +117,22 @@ where
|
||||||
// out.as_slice()
|
// out.as_slice()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<T::Native> {
|
fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<T::Native> {
|
||||||
// let mut res = T::Native::default();
|
// TODO(edd): this is expensive - may pay to expose method to do this
|
||||||
|
// where you accept an array.
|
||||||
// // HMMMMM - materialising which has a memory cost.
|
let mut res = T::Native::default();
|
||||||
// let vec = row_ids.to_vec();
|
let vec = row_ids.to_vec();
|
||||||
// for v in vec {
|
for row_id in vec {
|
||||||
// res += self.arr.value(v as usize);
|
let i = row_id as usize;
|
||||||
// }
|
if self.arr.is_null(i) {
|
||||||
None // todo
|
return None;
|
||||||
|
}
|
||||||
|
res = res + self.arr.value(i);
|
||||||
|
}
|
||||||
|
Some(res)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<T::Native> {
|
fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<T::Native> {
|
||||||
// if the column contains a null value between the range then the result
|
// if the column contains a null value between the range then the result
|
||||||
// will be None.
|
// will be None.
|
||||||
for i in from_row_id..to_row_id {
|
for i in from_row_id..to_row_id {
|
||||||
|
@ -165,57 +142,35 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise sum all the values between in the range.
|
// Otherwise sum all the values between in the range.
|
||||||
// let mut res = f64::from(self.arr.value(from_row_id));
|
let mut res = T::Native::default();
|
||||||
// for i in from_row_id + 1..to_row_id {
|
for i in from_row_id..to_row_id {
|
||||||
// res = res + self.arr.value(i);
|
res = res + self.arr.value(i);
|
||||||
// }
|
|
||||||
// Some(res)
|
|
||||||
None
|
|
||||||
}
|
}
|
||||||
|
Some(res)
|
||||||
pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
|
|
||||||
// TODO - count values that are not null in the row range.
|
|
||||||
0 // todo
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl NumericEncoding for PlainArrow<Float64Type> {
|
|
||||||
type Item = Option<f64>;
|
|
||||||
|
|
||||||
fn size(&self) -> usize {
|
|
||||||
self.size()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn value(&self, row_id: usize) -> Self::Item {
|
|
||||||
self.value(row_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
|
|
||||||
self.values(row_ids)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
|
|
||||||
self.encoded_values(row_ids)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn all_encoded_values(&self) -> Vec<Self::Item> {
|
|
||||||
self.all_encoded_values()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn scan_from(&self, row_id: usize) -> &[Self::Item] {
|
|
||||||
self.scan_from(row_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item {
|
|
||||||
self.sum_by_ids(row_ids)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item {
|
|
||||||
self.sum_by_id_range(from_row_id, to_row_id)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
|
fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
|
||||||
self.count_by_id_range(from_row_id, to_row_id)
|
// TODO - count values that are not null in the row range.
|
||||||
|
let mut count = 0;
|
||||||
|
for i in from_row_id..to_row_id {
|
||||||
|
if self.arr.is_null(i) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
count
|
||||||
|
}
|
||||||
|
|
||||||
|
fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn row_id_eq_value(&self, v: Self::Item) -> Option<usize> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn row_id_ge_value(&self, v: Self::Item) -> Option<usize> {
|
||||||
|
todo!()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn row_ids_single_cmp_roaring(
|
fn row_ids_single_cmp_roaring(
|
||||||
|
@ -223,15 +178,26 @@ impl NumericEncoding for PlainArrow<Float64Type> {
|
||||||
wanted: &Self::Item,
|
wanted: &Self::Item,
|
||||||
order: std::cmp::Ordering,
|
order: std::cmp::Ordering,
|
||||||
) -> croaring::Bitmap {
|
) -> croaring::Bitmap {
|
||||||
self.row_ids_single_cmp_roaring(wanted, order)
|
todo!()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap {
|
fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap {
|
||||||
self.row_ids_gte_lt_roaring(from, to)
|
todo!()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn row_id_eq_value(&self, v: Self::Item) -> Option<usize> {
|
impl<T: ArrowNumericType> std::fmt::Display for PlainArrow<T>
|
||||||
self.row_id_eq_value(v)
|
where
|
||||||
|
T: ArrowNumericType + std::fmt::Debug,
|
||||||
|
T::Native: Default
|
||||||
|
+ PartialEq
|
||||||
|
+ PartialOrd
|
||||||
|
+ Copy
|
||||||
|
+ std::fmt::Debug
|
||||||
|
+ std::ops::Add<Output = T::Native>,
|
||||||
|
{
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "[PlainArrow<T>] size: {}", self.size())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -242,31 +208,60 @@ pub struct PlainFixed<T> {
|
||||||
// total_order can be used as a hint to stop scanning the column early when
|
// total_order can be used as a hint to stop scanning the column early when
|
||||||
// applying a comparison predicate to the column.
|
// applying a comparison predicate to the column.
|
||||||
total_order: bool,
|
total_order: bool,
|
||||||
|
|
||||||
|
size: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> PlainFixed<T>
|
impl<T> std::fmt::Display for PlainFixed<T>
|
||||||
where
|
where
|
||||||
T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign,
|
T: Default
|
||||||
|
+ PartialEq
|
||||||
|
+ PartialOrd
|
||||||
|
+ Copy
|
||||||
|
+ std::fmt::Debug
|
||||||
|
+ std::fmt::Display
|
||||||
|
+ Sync
|
||||||
|
+ Send
|
||||||
|
+ std::ops::AddAssign,
|
||||||
{
|
{
|
||||||
pub fn size(&self) -> usize {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
self.values.len() * std::mem::size_of::<T>()
|
write!(f, "[PlainFixed<T>] size: {}", self.size(),)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn row_id_eq_value(&self, v: T) -> Option<usize> {
|
impl<T> NumericEncoding for PlainFixed<T>
|
||||||
|
where
|
||||||
|
T: Default
|
||||||
|
+ PartialEq
|
||||||
|
+ PartialOrd
|
||||||
|
+ Copy
|
||||||
|
+ std::fmt::Debug
|
||||||
|
+ std::fmt::Display
|
||||||
|
+ Sync
|
||||||
|
+ Send
|
||||||
|
+ std::ops::AddAssign,
|
||||||
|
{
|
||||||
|
type Item = T;
|
||||||
|
|
||||||
|
fn size(&self) -> usize {
|
||||||
|
self.size
|
||||||
|
}
|
||||||
|
|
||||||
|
fn row_id_eq_value(&self, v: T) -> Option<usize> {
|
||||||
self.values.iter().position(|x| *x == v)
|
self.values.iter().position(|x| *x == v)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn row_id_ge_value(&self, v: T) -> Option<usize> {
|
fn row_id_ge_value(&self, v: T) -> Option<usize> {
|
||||||
self.values.iter().position(|x| *x >= v)
|
self.values.iter().position(|x| *x >= v)
|
||||||
}
|
}
|
||||||
|
|
||||||
// get value at row_id. Panics if out of bounds.
|
// get value at row_id. Panics if out of bounds.
|
||||||
pub fn value(&self, row_id: usize) -> T {
|
fn value(&self, row_id: usize) -> T {
|
||||||
self.values[row_id]
|
self.values[row_id]
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the decoded values for the provided logical row ids.
|
/// Return the decoded values for the provided logical row ids.
|
||||||
pub fn values(&self, row_ids: &[usize]) -> Vec<T> {
|
fn values(&self, row_ids: &[usize]) -> Vec<T> {
|
||||||
let mut out = Vec::with_capacity(row_ids.len());
|
let mut out = Vec::with_capacity(row_ids.len());
|
||||||
for chunks in row_ids.chunks_exact(4) {
|
for chunks in row_ids.chunks_exact(4) {
|
||||||
out.push(self.values[chunks[3]]);
|
out.push(self.values[chunks[3]]);
|
||||||
|
@ -286,24 +281,24 @@ where
|
||||||
|
|
||||||
/// Return the raw encoded values for the provided logical row ids. For Plain
|
/// Return the raw encoded values for the provided logical row ids. For Plain
|
||||||
/// encoding this is just the decoded values.
|
/// encoding this is just the decoded values.
|
||||||
pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<T> {
|
fn encoded_values(&self, row_ids: &[usize]) -> Vec<T> {
|
||||||
self.values(row_ids)
|
self.values(row_ids)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return all encoded values. For this encoding this is just the decoded
|
/// Return all encoded values. For this encoding this is just the decoded
|
||||||
/// values
|
/// values
|
||||||
pub fn all_encoded_values(&self) -> Vec<T> {
|
fn all_encoded_values(&self) -> Vec<T> {
|
||||||
self.values.clone()
|
self.values.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn scan_from(&self, row_id: usize) -> &[T] {
|
fn scan_from(&self, row_id: usize) -> &[T] {
|
||||||
&self.values[row_id..]
|
&self.values[row_id..]
|
||||||
}
|
}
|
||||||
|
|
||||||
/// returns a set of row ids that match a single ordering on a desired value
|
/// returns a set of row ids that match a single ordering on a desired value
|
||||||
///
|
///
|
||||||
/// This supports `value = x` , `value < x` or `value > x`.
|
/// This supports `value = x` , `value < x` or `value > x`.
|
||||||
pub fn row_ids_single_cmp_roaring(
|
fn row_ids_single_cmp_roaring(
|
||||||
&self,
|
&self,
|
||||||
wanted: &T,
|
wanted: &T,
|
||||||
order: std::cmp::Ordering,
|
order: std::cmp::Ordering,
|
||||||
|
@ -343,7 +338,7 @@ where
|
||||||
/// returns a set of row ids that match the half open interval `[from, to)`.
|
/// returns a set of row ids that match the half open interval `[from, to)`.
|
||||||
///
|
///
|
||||||
/// The main use-case for this is time range filtering.
|
/// The main use-case for this is time range filtering.
|
||||||
pub fn row_ids_gte_lt_roaring(&self, from: &T, to: &T) -> croaring::Bitmap {
|
fn row_ids_gte_lt_roaring(&self, from: &T, to: &T) -> croaring::Bitmap {
|
||||||
let mut bm = croaring::Bitmap::create();
|
let mut bm = croaring::Bitmap::create();
|
||||||
|
|
||||||
let mut found = false; //self.values[0];
|
let mut found = false; //self.values[0];
|
||||||
|
@ -376,7 +371,7 @@ where
|
||||||
bm
|
bm
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> T {
|
fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> T {
|
||||||
let mut res = T::default();
|
let mut res = T::default();
|
||||||
for v in self.values[from_row_id..to_row_id].iter() {
|
for v in self.values[from_row_id..to_row_id].iter() {
|
||||||
res += *v;
|
res += *v;
|
||||||
|
@ -384,12 +379,12 @@ where
|
||||||
res
|
res
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
|
fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
|
||||||
to_row_id - from_row_id
|
to_row_id - from_row_id
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(edd): make faster
|
// TODO(edd): make faster
|
||||||
pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> T {
|
fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> T {
|
||||||
let mut res = T::default();
|
let mut res = T::default();
|
||||||
// println!(
|
// println!(
|
||||||
// "cardinality is {:?} out of {:?}",
|
// "cardinality is {:?} out of {:?}",
|
||||||
|
@ -449,7 +444,7 @@ where
|
||||||
res
|
res
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 {
|
fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 {
|
||||||
row_ids.cardinality()
|
row_ids.cardinality()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -460,6 +455,10 @@ impl From<&[i64]> for PlainFixed<i64> {
|
||||||
values: v.to_vec(),
|
values: v.to_vec(),
|
||||||
// buf: Vec::with_capacity(v.len()),
|
// buf: Vec::with_capacity(v.len()),
|
||||||
total_order: false,
|
total_order: false,
|
||||||
|
size: size_of::<Vec<i64>>()
|
||||||
|
+ (size_of::<i64>() * v.len())
|
||||||
|
+ size_of::<bool>()
|
||||||
|
+ size_of::<usize>(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -470,122 +469,14 @@ impl From<&[f64]> for PlainFixed<f64> {
|
||||||
values: v.to_vec(),
|
values: v.to_vec(),
|
||||||
// buf: Vec::with_capacity(v.len()),
|
// buf: Vec::with_capacity(v.len()),
|
||||||
total_order: false,
|
total_order: false,
|
||||||
|
size: size_of::<Vec<f64>>()
|
||||||
|
+ (size_of::<f64>() * v.len())
|
||||||
|
+ size_of::<bool>()
|
||||||
|
+ size_of::<usize>(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NumericEncoding for PlainFixed<f64> {
|
|
||||||
type Item = f64;
|
|
||||||
|
|
||||||
fn size(&self) -> usize {
|
|
||||||
self.size()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn value(&self, row_id: usize) -> Self::Item {
|
|
||||||
self.value(row_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
|
|
||||||
self.values(row_ids)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
|
|
||||||
self.encoded_values(row_ids)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn all_encoded_values(&self) -> Vec<Self::Item> {
|
|
||||||
self.all_encoded_values()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn scan_from(&self, row_id: usize) -> &[Self::Item] {
|
|
||||||
self.scan_from(row_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item {
|
|
||||||
self.sum_by_ids(row_ids)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item {
|
|
||||||
self.sum_by_id_range(from_row_id, to_row_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
|
|
||||||
self.count_by_id_range(from_row_id, to_row_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn row_ids_single_cmp_roaring(
|
|
||||||
&self,
|
|
||||||
wanted: &Self::Item,
|
|
||||||
order: std::cmp::Ordering,
|
|
||||||
) -> croaring::Bitmap {
|
|
||||||
self.row_ids_single_cmp_roaring(wanted, order)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap {
|
|
||||||
self.row_ids_gte_lt_roaring(from, to)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn row_id_eq_value(&self, v: Self::Item) -> Option<usize> {
|
|
||||||
self.row_id_eq_value(v)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl NumericEncoding for PlainFixed<i64> {
|
|
||||||
type Item = i64;
|
|
||||||
|
|
||||||
fn size(&self) -> usize {
|
|
||||||
self.size()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn value(&self, row_id: usize) -> Self::Item {
|
|
||||||
self.value(row_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
|
|
||||||
self.values(row_ids)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
|
|
||||||
self.encoded_values(row_ids)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn all_encoded_values(&self) -> Vec<Self::Item> {
|
|
||||||
self.all_encoded_values()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn scan_from(&self, row_id: usize) -> &[Self::Item] {
|
|
||||||
self.scan_from(row_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item {
|
|
||||||
self.sum_by_ids(row_ids)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item {
|
|
||||||
self.sum_by_id_range(from_row_id, to_row_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
|
|
||||||
self.count_by_id_range(from_row_id, to_row_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn row_ids_single_cmp_roaring(
|
|
||||||
&self,
|
|
||||||
wanted: &Self::Item,
|
|
||||||
order: std::cmp::Ordering,
|
|
||||||
) -> croaring::Bitmap {
|
|
||||||
self.row_ids_single_cmp_roaring(wanted, order)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap {
|
|
||||||
self.row_ids_gte_lt_roaring(from, to)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn row_id_eq_value(&self, v: Self::Item) -> Option<usize> {
|
|
||||||
self.row_id_eq_value(v)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct DictionaryRLE {
|
pub struct DictionaryRLE {
|
||||||
// stores the mapping between an entry and its assigned index.
|
// stores the mapping between an entry and its assigned index.
|
||||||
|
@ -602,7 +493,6 @@ pub struct DictionaryRLE {
|
||||||
// stores tuples where each pair refers to a dictionary entry and the number
|
// stores tuples where each pair refers to a dictionary entry and the number
|
||||||
// of times the entry repeats.
|
// of times the entry repeats.
|
||||||
run_lengths: Vec<(usize, u64)>,
|
run_lengths: Vec<(usize, u64)>,
|
||||||
run_length_size: usize,
|
|
||||||
|
|
||||||
total: u64,
|
total: u64,
|
||||||
}
|
}
|
||||||
|
@ -615,7 +505,6 @@ impl DictionaryRLE {
|
||||||
index_entry: BTreeMap::new(),
|
index_entry: BTreeMap::new(),
|
||||||
map_size: 0,
|
map_size: 0,
|
||||||
run_lengths: Vec::new(),
|
run_lengths: Vec::new(),
|
||||||
run_length_size: 0,
|
|
||||||
total: 0,
|
total: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -627,7 +516,6 @@ impl DictionaryRLE {
|
||||||
index_entry: BTreeMap::new(),
|
index_entry: BTreeMap::new(),
|
||||||
map_size: 0,
|
map_size: 0,
|
||||||
run_lengths: Vec::new(),
|
run_lengths: Vec::new(),
|
||||||
run_length_size: 0,
|
|
||||||
total: 0,
|
total: 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -663,7 +551,6 @@ impl DictionaryRLE {
|
||||||
} else {
|
} else {
|
||||||
// start a new run-length
|
// start a new run-length
|
||||||
self.run_lengths.push((*idx, additional));
|
self.run_lengths.push((*idx, additional));
|
||||||
self.run_length_size += std::mem::size_of::<(usize, u64)>();
|
|
||||||
}
|
}
|
||||||
self.index_row_ids
|
self.index_row_ids
|
||||||
.get_mut(&(*idx as u32))
|
.get_mut(&(*idx as u32))
|
||||||
|
@ -690,7 +577,6 @@ impl DictionaryRLE {
|
||||||
.get_mut(&(idx as u32))
|
.get_mut(&(idx as u32))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.add_range(self.total..self.total + additional);
|
.add_range(self.total..self.total + additional);
|
||||||
self.run_length_size += std::mem::size_of::<(usize, u64)>();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -927,8 +813,28 @@ impl DictionaryRLE {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn size(&self) -> usize {
|
pub fn size(&self) -> usize {
|
||||||
// mapping and reverse mapping then the rles
|
// entry_index: BTreeMap<Option<String>, usize>,
|
||||||
2 * self.map_size + self.run_length_size
|
|
||||||
|
// // stores the mapping between an index and its entry.
|
||||||
|
// index_entry: BTreeMap<usize, Option<String>>,
|
||||||
|
|
||||||
|
(self.index_entry.len() * size_of::<BTreeMap<usize, Option<String>>>())
|
||||||
|
+ (self.index_row_ids.len() * size_of::<BTreeMap<u32, croaring::Bitmap>>())
|
||||||
|
+ size_of::<usize>()
|
||||||
|
+ (self.run_lengths.len() * size_of::<Vec<(usize, u64)>>())
|
||||||
|
+ size_of::<u64>()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for DictionaryRLE {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"[DictionaryRLE] size: {}, dict entries: {}, runs: {} ",
|
||||||
|
self.size(),
|
||||||
|
self.index_entry.len(),
|
||||||
|
self.run_lengths.len()
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -959,6 +865,7 @@ impl std::convert::From<&delorean_table::Packer<delorean_table::ByteArray>> for
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
|
use super::NumericEncoding;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn plain_arrow() {
|
fn plain_arrow() {
|
||||||
|
|
|
@ -934,6 +934,21 @@ impl Segment {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for Segment {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
writeln!(
|
||||||
|
f,
|
||||||
|
"Rows: {}\nSize: {} Columns: ",
|
||||||
|
self.num_rows(),
|
||||||
|
self.size()
|
||||||
|
)?;
|
||||||
|
for (c, name) in self.columns.iter().zip(self.column_names().iter()) {
|
||||||
|
writeln!(f, "{} {}", name, c)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Meta data for a segment. This data is mainly used to determine if a segment
|
/// Meta data for a segment. This data is mainly used to determine if a segment
|
||||||
/// may contain a value that can answer a query.
|
/// may contain a value that can answer a query.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
|
Loading…
Reference in New Issue