Merge branch 'main' into er/test/read_buffer/null_column

pull/24376/head
Edd Robinson 2021-10-22 15:28:39 +01:00 committed by GitHub
commit 8d267e5840
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 205 additions and 120 deletions

View File

@ -494,12 +494,21 @@ where
} }
/// Create statistics for a column that only has nulls up to now /// Create statistics for a column that only has nulls up to now
pub fn new_all_null(total_count: u64) -> Self { pub fn new_all_null(total_count: u64, distinct_count: Option<u64>) -> Self {
let min = None; let min = None;
let max = None; let max = None;
let null_count = total_count; let null_count = total_count;
let distinct_count = NonZeroU64::new(1);
Self::new_with_distinct(min, max, total_count, null_count, distinct_count) if let Some(count) = distinct_count {
assert!(count > 0);
}
Self::new_with_distinct(
min,
max,
total_count,
null_count,
distinct_count.map(|c| NonZeroU64::new(c).unwrap()),
)
} }
pub fn update_from(&mut self, other: &Self) { pub fn update_from(&mut self, other: &Self) {
@ -660,6 +669,8 @@ impl IsNan for f64 {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::convert::TryFrom;
use super::*; use super::*;
#[test] #[test]
@ -677,13 +688,25 @@ mod tests {
#[test] #[test]
fn statistics_new_all_null() { fn statistics_new_all_null() {
let actual = StatValues::<i64>::new_all_null(3); // i64 values do not have a distinct count
let actual = StatValues::<i64>::new_all_null(3, None);
let expected = StatValues { let expected = StatValues {
min: None, min: None,
max: None, max: None,
total_count: 3, total_count: 3,
null_count: 3, null_count: 3,
distinct_count: NonZeroU64::new(1), distinct_count: None,
};
assert_eq!(actual, expected);
// string columns can have a distinct count
let actual = StatValues::<i64>::new_all_null(3, Some(1_u64));
let expected = StatValues {
min: None,
max: None,
total_count: 3,
null_count: 3,
distinct_count: Some(NonZeroU64::try_from(1_u64).unwrap()),
}; };
assert_eq!(actual, expected); assert_eq!(actual, expected);
} }

View File

@ -113,30 +113,33 @@ impl Column {
| InfluxColumnType::Field(InfluxFieldType::Boolean) => { | InfluxColumnType::Field(InfluxFieldType::Boolean) => {
let mut data = BitSet::new(); let mut data = BitSet::new();
data.append_unset(row_count); data.append_unset(row_count);
ColumnData::Bool(data, StatValues::new_all_null(total_count)) ColumnData::Bool(data, StatValues::new_all_null(total_count, None))
} }
InfluxColumnType::IOx(IOxValueType::U64) InfluxColumnType::IOx(IOxValueType::U64)
| InfluxColumnType::Field(InfluxFieldType::UInteger) => { | InfluxColumnType::Field(InfluxFieldType::UInteger) => ColumnData::U64(
ColumnData::U64(vec![0; row_count], StatValues::new_all_null(total_count)) vec![0; row_count],
} StatValues::new_all_null(total_count, None),
),
InfluxColumnType::IOx(IOxValueType::F64) InfluxColumnType::IOx(IOxValueType::F64)
| InfluxColumnType::Field(InfluxFieldType::Float) => { | InfluxColumnType::Field(InfluxFieldType::Float) => ColumnData::F64(
ColumnData::F64(vec![0.0; row_count], StatValues::new_all_null(total_count)) vec![0.0; row_count],
} StatValues::new_all_null(total_count, None),
),
InfluxColumnType::IOx(IOxValueType::I64) InfluxColumnType::IOx(IOxValueType::I64)
| InfluxColumnType::Field(InfluxFieldType::Integer) | InfluxColumnType::Field(InfluxFieldType::Integer)
| InfluxColumnType::Timestamp => { | InfluxColumnType::Timestamp => ColumnData::I64(
ColumnData::I64(vec![0; row_count], StatValues::new_all_null(total_count)) vec![0; row_count],
} StatValues::new_all_null(total_count, None),
),
InfluxColumnType::IOx(IOxValueType::String) InfluxColumnType::IOx(IOxValueType::String)
| InfluxColumnType::Field(InfluxFieldType::String) => ColumnData::String( | InfluxColumnType::Field(InfluxFieldType::String) => ColumnData::String(
PackedStringArray::new_empty(row_count), PackedStringArray::new_empty(row_count),
StatValues::new_all_null(total_count), StatValues::new_all_null(total_count, Some(1)),
), ),
InfluxColumnType::Tag => ColumnData::Tag( InfluxColumnType::Tag => ColumnData::Tag(
vec![INVALID_DID; row_count], vec![INVALID_DID; row_count],
Default::default(), Default::default(),
StatValues::new_all_null(total_count), StatValues::new_all_null(total_count, Some(1)),
), ),
InfluxColumnType::IOx(IOxValueType::Bytes) => todo!(), InfluxColumnType::IOx(IOxValueType::Bytes) => todo!(),
}; };

View File

@ -191,8 +191,6 @@ async fn test_field_name_plan() {
} }
} }
// BUG: https://github.com/influxdata/influxdb_iox/issues/2860
#[ignore]
#[tokio::test] #[tokio::test]
async fn test_field_name_plan_with_delete() { async fn test_field_name_plan_with_delete() {
test_helpers::maybe_start_logging(); test_helpers::maybe_start_logging();

View File

@ -495,6 +495,7 @@ mod test {
use data_types::partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics}; use data_types::partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics};
use metric::{MetricKind, Observation, ObservationSet, RawReporter}; use metric::{MetricKind, Observation, ObservationSet, RawReporter};
use schema::builder::SchemaBuilder; use schema::builder::SchemaBuilder;
use std::iter::FromIterator;
use std::{num::NonZeroU64, sync::Arc}; use std::{num::NonZeroU64, sync::Arc};
// helper to make the `add_remove_tables` test simpler to read. // helper to make the `add_remove_tables` test simpler to read.
@ -854,6 +855,8 @@ mod test {
#[test] #[test]
fn table_summaries() { fn table_summaries() {
use std::iter::repeat;
let schema = SchemaBuilder::new() let schema = SchemaBuilder::new()
.non_null_tag("env") .non_null_tag("env")
.tag("host") .tag("host")
@ -862,6 +865,11 @@ mod test {
.non_null_field("icounter", Int64) .non_null_field("icounter", Int64)
.non_null_field("active", Boolean) .non_null_field("active", Boolean)
.non_null_field("msg", Utf8) .non_null_field("msg", Utf8)
.field("zf64", Float64)
.field("zu64", UInt64)
.field("zi64", Int64)
.field("zbool", Boolean)
.field("zstr", Utf8)
.timestamp() .timestamp()
.build() .build()
.unwrap(); .unwrap();
@ -873,7 +881,7 @@ mod test {
.collect::<DictionaryArray<Int32Type>>(), .collect::<DictionaryArray<Int32Type>>(),
), ),
Arc::new( Arc::new(
(vec![None, None, None] as Vec<Option<&str>>) (vec![Some("host a"), None, Some("host b")] as Vec<Option<&str>>)
.into_iter() .into_iter()
.collect::<DictionaryArray<Int32Type>>(), .collect::<DictionaryArray<Int32Type>>(),
), ),
@ -881,7 +889,20 @@ mod test {
Arc::new(UInt64Array::from(vec![1000, 3000, 5000])), Arc::new(UInt64Array::from(vec![1000, 3000, 5000])),
Arc::new(Int64Array::from(vec![1000, -1000, 4000])), Arc::new(Int64Array::from(vec![1000, -1000, 4000])),
Arc::new(BooleanArray::from(vec![true, true, false])), Arc::new(BooleanArray::from(vec![true, true, false])),
Arc::new(StringArray::from(vec![Some("msg a"), Some("msg b"), None])), Arc::new(StringArray::from(vec![
Some("msg a"),
Some("msg b"),
Some("msg b"),
])),
// all null columns
Arc::new(Float64Array::from_iter(repeat(None).take(3))),
Arc::new(UInt64Array::from_iter(repeat(None).take(3))),
Arc::new(Int64Array::from_iter(repeat(None).take(3))),
Arc::new(BooleanArray::from_iter(repeat(None).take(3))),
Arc::new(StringArray::from_iter(
repeat::<Option<String>>(None).take(3),
)),
// timestamp column
Arc::new(TimestampNanosecondArray::from_vec( Arc::new(TimestampNanosecondArray::from_vec(
vec![11111111, 222222, 3333], vec![11111111, 222222, 3333],
None, None,
@ -925,7 +946,13 @@ mod test {
ColumnSummary { ColumnSummary {
name: "host".into(), name: "host".into(),
influxdb_type: Some(InfluxDbType::Tag), influxdb_type: Some(InfluxDbType::Tag),
stats: Statistics::String(StatValues::new_all_null(3)), stats: Statistics::String(StatValues {
min: Some("host a".into()),
max: Some("host b".into()),
total_count: 3,
null_count: 1,
distinct_count: Some(NonZeroU64::new(3).unwrap()),
}),
}, },
ColumnSummary { ColumnSummary {
name: "icounter".into(), name: "icounter".into(),
@ -939,8 +966,8 @@ mod test {
min: Some("msg a".into()), min: Some("msg a".into()),
max: Some("msg b".into()), max: Some("msg b".into()),
total_count: 3, total_count: 3,
null_count: 1, null_count: 0,
distinct_count: Some(NonZeroU64::new(3).unwrap()), distinct_count: Some(NonZeroU64::new(2).unwrap()),
}), }),
}, },
ColumnSummary { ColumnSummary {
@ -953,6 +980,31 @@ mod test {
influxdb_type: Some(InfluxDbType::Timestamp), influxdb_type: Some(InfluxDbType::Timestamp),
stats: Statistics::I64(StatValues::new_non_null(Some(3333), Some(11111111), 3)), stats: Statistics::I64(StatValues::new_non_null(Some(3333), Some(11111111), 3)),
}, },
ColumnSummary {
name: "zbool".into(),
influxdb_type: Some(InfluxDbType::Field),
stats: Statistics::Bool(StatValues::new_all_null(3, None)),
},
ColumnSummary {
name: "zf64".into(),
influxdb_type: Some(InfluxDbType::Field),
stats: Statistics::F64(StatValues::new_all_null(3, None)),
},
ColumnSummary {
name: "zi64".into(),
influxdb_type: Some(InfluxDbType::Field),
stats: Statistics::I64(StatValues::new_all_null(3, None)),
},
ColumnSummary {
name: "zstr".into(),
influxdb_type: Some(InfluxDbType::Field),
stats: Statistics::String(StatValues::new_all_null(3, Some(1))),
},
ColumnSummary {
name: "zu64".into(),
influxdb_type: Some(InfluxDbType::Field),
stats: Statistics::U64(StatValues::new_all_null(3, None)),
},
]; ];
assert_eq!( assert_eq!(

View File

@ -785,90 +785,43 @@ impl MetaData {
let null_count = column_meta.null_count as u64; let null_count = column_meta.null_count as u64;
let distinct_count = column_meta.distinct_count; let distinct_count = column_meta.distinct_count;
let stats = match &column_meta.range { let stats = match column_meta.logical_data_type {
(OwnedValue::String(min), OwnedValue::String(max)) => { LogicalDataType::Integer => Statistics::I64(StatValues {
Statistics::String(StatValues { min: column_meta.range.0.as_i64(),
min: Some(min.to_string()), max: column_meta.range.1.as_i64(),
max: Some(max.to_string()), total_count,
total_count, null_count,
null_count, distinct_count,
distinct_count, }),
}) LogicalDataType::Unsigned => Statistics::U64(StatValues {
} min: column_meta.range.0.as_u64(),
(OwnedValue::String(_), mismatch) => { max: column_meta.range.1.as_u64(),
panic!("inconsistent min/max expected String got {}", mismatch) total_count,
} null_count,
(OwnedValue::Boolean(min), OwnedValue::Boolean(max)) => { distinct_count,
Statistics::Bool(StatValues { }),
min: Some(*min), LogicalDataType::Float => Statistics::F64(StatValues {
max: Some(*max), min: column_meta.range.0.as_f64(),
total_count, max: column_meta.range.1.as_f64(),
null_count, total_count,
distinct_count, null_count,
}) distinct_count,
} }),
(OwnedValue::Boolean(_), mismatch) => { LogicalDataType::String => Statistics::String(StatValues {
panic!("inconsistent min/max expected Boolean got {}", mismatch) min: column_meta.range.0.as_string(),
} max: column_meta.range.1.as_string(),
(OwnedValue::Scalar(min), OwnedValue::Scalar(max)) => match (min, max) { total_count,
(Scalar::I64(min), Scalar::I64(max)) => Statistics::I64(StatValues { null_count,
min: Some(*min), distinct_count,
max: Some(*max), }),
total_count, LogicalDataType::Binary => panic!("unsupported type statistcs type ByteArray"),
null_count, LogicalDataType::Boolean => Statistics::Bool(StatValues {
distinct_count, min: column_meta.range.0.as_bool(),
}), max: column_meta.range.1.as_bool(),
(Scalar::I64(_), mismatch) => { total_count,
panic!("inconsistent min/max expected I64 got {}", mismatch) null_count,
} distinct_count,
(Scalar::U64(min), Scalar::U64(max)) => Statistics::U64(StatValues { }),
min: Some(*min),
max: Some(*max),
total_count,
null_count,
distinct_count,
}),
(Scalar::U64(_), mismatch) => {
panic!("inconsistent min/max expected U64 got {}", mismatch)
}
(Scalar::F64(min), Scalar::F64(max)) => Statistics::F64(StatValues {
min: Some(*min),
max: Some(*max),
total_count,
null_count,
distinct_count,
}),
(Scalar::F64(_), mismatch) => {
panic!("inconsistent min/max expected F64 got {}", mismatch)
}
(Scalar::Null, Scalar::Null) => {
assert_eq!(
total_count, null_count,
"expected only null values: {:?}",
column_meta,
);
assert_eq!(
distinct_count,
std::num::NonZeroU64::new(1),
"distinct count for all null was not 1: {:?}",
column_meta,
);
make_null_stats(total_count, &column_meta.logical_data_type)
}
(Scalar::Null, mismatch) => {
panic!("inconsistent min/max expected NULL got {}", mismatch)
}
},
(OwnedValue::Scalar(_), mismatch) => {
panic!("inconsistent min/max expected Scalar got {}", mismatch)
}
(OwnedValue::ByteArray(_), OwnedValue::ByteArray(_)) => {
panic!("unsupported type statistcs type ByteArray")
}
(OwnedValue::ByteArray(_), mismatch) => {
panic!("inconsistent min/max expected ByteArray got {}", mismatch)
}
}; };
ColumnSummary { ColumnSummary {
@ -899,12 +852,12 @@ fn make_null_stats(
use LogicalDataType::*; use LogicalDataType::*;
match logical_data_type { match logical_data_type {
Integer => Statistics::I64(StatValues::new_all_null(total_count)), Integer => Statistics::I64(StatValues::new_all_null(total_count, None)),
Unsigned => Statistics::U64(StatValues::new_all_null(total_count)), Unsigned => Statistics::U64(StatValues::new_all_null(total_count, None)),
Float => Statistics::F64(StatValues::new_all_null(total_count)), Float => Statistics::F64(StatValues::new_all_null(total_count, None)),
String => Statistics::String(StatValues::new_all_null(total_count)), String => Statistics::String(StatValues::new_all_null(total_count, Some(1))),
Binary => panic!("Binary statistics not supported"), Binary => panic!("Binary statistics not supported"),
Boolean => Statistics::Bool(StatValues::new_all_null(total_count)), Boolean => Statistics::Bool(StatValues::new_all_null(total_count, None)),
} }
} }
@ -1847,25 +1800,28 @@ west,host-b,100
#[test] #[test]
fn null_stats_ifield() { fn null_stats_ifield() {
let actual = make_null_stats(12, &LogicalDataType::Integer); let actual = make_null_stats(12, &LogicalDataType::Integer);
assert_eq!(actual, Statistics::I64(StatValues::new_all_null(12))); assert_eq!(actual, Statistics::I64(StatValues::new_all_null(12, None)));
} }
#[test] #[test]
fn null_stats_ufield() { fn null_stats_ufield() {
let actual = make_null_stats(12, &LogicalDataType::Unsigned); let actual = make_null_stats(12, &LogicalDataType::Unsigned);
assert_eq!(actual, Statistics::U64(StatValues::new_all_null(12))); assert_eq!(actual, Statistics::U64(StatValues::new_all_null(12, None)));
} }
#[test] #[test]
fn null_stats_float() { fn null_stats_float() {
let actual = make_null_stats(12, &LogicalDataType::Float); let actual = make_null_stats(12, &LogicalDataType::Float);
assert_eq!(actual, Statistics::F64(StatValues::new_all_null(12))); assert_eq!(actual, Statistics::F64(StatValues::new_all_null(12, None)));
} }
#[test] #[test]
fn null_stats_string() { fn null_stats_string() {
let actual = make_null_stats(12, &LogicalDataType::String); let actual = make_null_stats(12, &LogicalDataType::String);
assert_eq!(actual, Statistics::String(StatValues::new_all_null(12))); assert_eq!(
actual,
Statistics::String(StatValues::new_all_null(12, Some(1_u64)))
);
} }
#[test] #[test]
@ -1877,6 +1833,6 @@ west,host-b,100
#[test] #[test]
fn null_stats_boolean() { fn null_stats_boolean() {
let actual = make_null_stats(12, &LogicalDataType::Boolean); let actual = make_null_stats(12, &LogicalDataType::Boolean);
assert_eq!(actual, Statistics::Bool(StatValues::new_all_null(12))); assert_eq!(actual, Statistics::Bool(StatValues::new_all_null(12, None)));
} }
} }

View File

@ -1139,6 +1139,9 @@ impl std::fmt::Display for &Scalar {
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub enum OwnedValue { pub enum OwnedValue {
// Represents a NULL value in a column row.
Null,
/// A UTF-8 valid string. /// A UTF-8 valid string.
String(String), String(String),
@ -1167,11 +1170,35 @@ impl PartialOrd for OwnedValue {
impl OwnedValue { impl OwnedValue {
pub fn new_null() -> Self { pub fn new_null() -> Self {
Self::Scalar(Scalar::Null) Self::Null
} }
pub fn is_null(&self) -> bool { pub fn is_null(&self) -> bool {
matches!(self, Self::Scalar(Scalar::Null)) matches!(self, Self::Null)
}
pub fn as_string(&self) -> Option<String> {
match self {
OwnedValue::Null => None,
OwnedValue::String(s) => Some(s.clone()),
v => panic!("{:?} cannot be unwrapped as string", v),
}
}
pub fn as_byte_array(&self) -> Option<Vec<u8>> {
match self {
OwnedValue::Null => None,
OwnedValue::ByteArray(arr) => Some(arr.clone()),
v => panic!("{:?} cannot be unwrapped as byte array", v),
}
}
pub fn as_bool(&self) -> Option<bool> {
match self {
OwnedValue::Null => None,
OwnedValue::Boolean(b) => Some(*b),
v => panic!("{:?} cannot be unwrapped as string", v),
}
} }
/// Update self to the min of self and other, taking into /// Update self to the min of self and other, taking into
@ -1203,9 +1230,35 @@ impl OwnedValue {
} }
} }
// Implementations of as_type for various scalar types.
macro_rules! owned_value_as_impls {
($(($type:ident, $name:ident),)*) => {
$(
impl OwnedValue {
pub fn $name(&self) -> Option<$type> {
match self {
OwnedValue::Null => None,
OwnedValue::Scalar(s) => {
(!s.is_null()).then(|| s.$name())
}
v => panic!("{:?} cannot be unwrapped as {:?}", v, stringify!($type)),
}
}
}
)*
};
}
owned_value_as_impls! {
(i64, as_i64),
(f64, as_f64),
(u64, as_u64),
}
impl std::fmt::Display for &OwnedValue { impl std::fmt::Display for &OwnedValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
OwnedValue::Null => write!(f, "NULL"),
OwnedValue::String(s) => s.fmt(f), OwnedValue::String(s) => s.fmt(f),
OwnedValue::ByteArray(s) => write!(f, "{}", String::from_utf8_lossy(s)), OwnedValue::ByteArray(s) => write!(f, "{}", String::from_utf8_lossy(s)),
OwnedValue::Boolean(b) => b.fmt(f), OwnedValue::Boolean(b) => b.fmt(f),