refactor: make `SchemaBuilder::field` fallible

It would be nice if the IOx data type would not be optional and this is
a prep clean-up to achieve that.
pull/24376/head
Marco Neumann 2022-10-24 18:12:42 +02:00
parent c9b1066b89
commit 3e4db81bc6
15 changed files with 84 additions and 35 deletions

View File

@ -2006,6 +2006,7 @@ mod test {
// request just the field and timestamp
let schema = SchemaBuilder::new()
.field("field_int", DataType::Int64)
.unwrap()
.timestamp()
.build()
.unwrap();
@ -2101,7 +2102,9 @@ mod test {
// request just the fields
let schema = SchemaBuilder::new()
.field("field_int", DataType::Int64)
.unwrap()
.field("other_field_int", DataType::Int64)
.unwrap()
.build()
.unwrap();
@ -2416,6 +2419,7 @@ mod test {
// request just the field and timestamp
let schema = SchemaBuilder::new()
.field("field_int", DataType::Int64)
.unwrap()
.timestamp()
.build()
.unwrap();

View File

@ -267,6 +267,7 @@ macro_rules! impl_with_column {
let new_column_schema = SchemaBuilder::new()
.field(&column_name, DataType::$DATA_TYPE)
.unwrap()
.build()
.unwrap();
self.add_schema_to_table(new_column_schema, true, None)
@ -282,6 +283,7 @@ macro_rules! impl_with_column_no_stats {
let new_column_schema = SchemaBuilder::new()
.field(&column_name, DataType::$DATA_TYPE)
.unwrap()
.build()
.unwrap();
@ -303,6 +305,7 @@ macro_rules! impl_with_column_with_stats {
let new_column_schema = SchemaBuilder::new()
.field(&column_name, DataType::$DATA_TYPE)
.unwrap()
.build()
.unwrap();
@ -525,6 +528,7 @@ impl TestChunk {
// merge it in to any existing schema
let new_column_schema = SchemaBuilder::new()
.field(&column_name, DataType::Utf8)
.unwrap()
.build()
.unwrap();

View File

@ -288,10 +288,15 @@ mod tests {
let schema = SchemaBuilder::new()
.tag("tag")
.field("str", DataType::Utf8)
.unwrap()
.field("int", DataType::Int64)
.unwrap()
.field("uint", DataType::UInt64)
.unwrap()
.field("float", DataType::Float64)
.unwrap()
.field("bool", DataType::Boolean)
.unwrap()
.build()
.unwrap();

View File

@ -789,7 +789,9 @@ mod tests {
let schema = SchemaBuilder::new()
.field("foo", ArrowDataType::Int64)
.unwrap()
.field("bar", ArrowDataType::Int64)
.unwrap()
.timestamp()
.build()
.unwrap();
@ -896,6 +898,7 @@ mod tests {
let schema = SchemaBuilder::new()
.field("foo", ArrowDataType::Int64)
.unwrap()
.timestamp()
.build()
.unwrap();

View File

@ -454,7 +454,9 @@ mod tests {
.tag("t1")
.tag("t2")
.field("f1", DataType::Int64)
.unwrap()
.field("f2", DataType::Int64)
.unwrap()
.build()
.unwrap();

View File

@ -91,6 +91,7 @@ mod tests {
let schema = SchemaBuilder::new()
.tag("t1")
.field("f1", DataType::Int64)
.unwrap()
.build()
.unwrap();

View File

@ -505,9 +505,13 @@ mod tests {
.tag("foo")
.tag("bar")
.field("f1", DataType::Float64)
.unwrap()
.field("f2", DataType::Float64)
.unwrap()
.field("f3", DataType::Float64)
.unwrap()
.field("f4", DataType::Float64)
.unwrap()
.build()
.map(Arc::new)
.unwrap()

View File

@ -327,6 +327,7 @@ mod tests {
schema: Arc::new(
SchemaBuilder::new()
.field("col1", DataType::Int64)
.unwrap()
.tag("col2")
.timestamp()
.build()
@ -346,6 +347,7 @@ mod tests {
schema: Arc::new(
SchemaBuilder::new()
.field("col1", DataType::Float64)
.unwrap()
.timestamp()
.build()
.unwrap(),

View File

@ -510,6 +510,7 @@ pub mod tests {
fn assert_schema(chunk: &QuerierChunk) {
let expected_schema = SchemaBuilder::new()
.field("field_int", DataType::Int64)
.unwrap()
.tag("tag1")
.timestamp()
.build()

View File

@ -1955,6 +1955,7 @@ mod tests {
let expected_schema = Arc::new(
SchemaBuilder::new()
.field("b", DataType::Boolean)
.unwrap()
.timestamp()
.build()
.unwrap(),

View File

@ -85,6 +85,7 @@ async fn list_schema_cpu_all() {
.tag("region")
.timestamp()
.field("user", DataType::Float64)
.unwrap()
.build()
.unwrap();
@ -107,6 +108,7 @@ async fn list_schema_cpu_all_set_sort_key() {
.tag("region")
.timestamp()
.field("user", DataType::Float64)
.unwrap()
.build()
.unwrap();
@ -127,6 +129,7 @@ async fn list_schema_disk_all() {
// we expect columns to come out in lexicographic order by name
let expected_schema = SchemaBuilder::new()
.field("bytes", DataType::Int64)
.unwrap()
.tag("region")
.timestamp()
.build()
@ -146,6 +149,7 @@ async fn list_schema_disk_all() {
async fn list_schema_cpu_selection() {
let expected_schema = SchemaBuilder::new()
.field("user", DataType::Float64)
.unwrap()
.tag("region")
.build()
.unwrap();
@ -162,6 +166,7 @@ async fn list_schema_disk_selection() {
let expected_schema = SchemaBuilder::new()
.timestamp()
.field("bytes", DataType::Int64)
.unwrap()
.build()
.unwrap();
@ -176,6 +181,7 @@ async fn list_schema_location_all() {
// we expect columns to come out in lexicographic order by name
let expected_schema = SchemaBuilder::new()
.field("count", DataType::UInt64)
.unwrap()
.timestamp()
.tag("town")
.build()

View File

@ -70,34 +70,34 @@ impl SchemaBuilder {
pub fn influx_column(&mut self, column_name: &str, column_type: InfluxColumnType) -> &mut Self {
match column_type {
InfluxColumnType::Tag => self.tag(column_name),
InfluxColumnType::Field(influx_field_type) => {
self.field(column_name, influx_field_type.into())
}
InfluxColumnType::Field(influx_field_type) => self
.field(column_name, influx_field_type.into())
.expect("just converted this from a valid type"),
InfluxColumnType::Timestamp => self.timestamp(),
}
}
/// Add a new nullable field column with the specified Arrow datatype.
pub fn field(&mut self, column_name: &str, arrow_type: ArrowDataType) -> &mut Self {
let influxdb_column_type = arrow_type
.clone()
.try_into()
.map(InfluxColumnType::Field)
.ok();
pub fn field(
&mut self,
column_name: &str,
arrow_type: ArrowDataType,
) -> Result<&mut Self, &'static str> {
let influxdb_column_type = arrow_type.clone().try_into().map(InfluxColumnType::Field)?;
self.add_column(column_name, true, influxdb_column_type, arrow_type)
Ok(self.add_column(column_name, true, Some(influxdb_column_type), arrow_type))
}
/// Add a new field column with the specified Arrow datatype that can not be
/// null
pub fn non_null_field(&mut self, column_name: &str, arrow_type: ArrowDataType) -> &mut Self {
let influxdb_column_type = arrow_type
.clone()
.try_into()
.map(InfluxColumnType::Field)
.ok();
pub fn non_null_field(
&mut self,
column_name: &str,
arrow_type: ArrowDataType,
) -> Result<&mut Self, &'static str> {
let influxdb_column_type = arrow_type.clone().try_into().map(InfluxColumnType::Field)?;
self.add_column(column_name, false, influxdb_column_type, arrow_type)
Ok(self.add_column(column_name, false, Some(influxdb_column_type), arrow_type))
}
/// Add the InfluxDB data model timestamp column
@ -241,8 +241,9 @@ mod test {
fn test_builder_field() {
let s = SchemaBuilder::new()
.field("the_influx_field", ArrowDataType::Float64)
// can't represent with lp
.field("the_no_influx_field", ArrowDataType::Decimal128(10, 0))
.unwrap()
.field("the_other_influx_field", ArrowDataType::Int64)
.unwrap()
.build()
.unwrap();
@ -253,10 +254,10 @@ mod test {
assert_eq!(influxdb_column_type, Some(Field(Float)));
let (influxdb_column_type, field) = s.field(1);
assert_eq!(field.name(), "the_no_influx_field");
assert_eq!(field.data_type(), &ArrowDataType::Decimal128(10, 0));
assert_eq!(field.name(), "the_other_influx_field");
assert_eq!(field.data_type(), &ArrowDataType::Int64);
assert!(field.is_nullable());
assert_eq!(influxdb_column_type, None);
assert_eq!(influxdb_column_type, Some(Field(Integer)));
assert_eq!(s.len(), 2);
}
@ -281,8 +282,9 @@ mod test {
fn test_builder_non_field() {
let s = SchemaBuilder::new()
.non_null_field("the_influx_field", ArrowDataType::Float64)
// can't represent with lp
.non_null_field("the_no_influx_field", ArrowDataType::Decimal128(10, 0))
.unwrap()
.non_null_field("the_other_influx_field", ArrowDataType::Int64)
.unwrap()
.build()
.unwrap();
@ -293,10 +295,10 @@ mod test {
assert_eq!(influxdb_column_type, Some(Field(Float)));
let (influxdb_column_type, field) = s.field(1);
assert_eq!(field.name(), "the_no_influx_field");
assert_eq!(field.data_type(), &ArrowDataType::Decimal128(10, 0));
assert_eq!(field.name(), "the_other_influx_field");
assert_eq!(field.data_type(), &ArrowDataType::Int64);
assert!(!field.is_nullable());
assert_eq!(influxdb_column_type, None);
assert_eq!(influxdb_column_type, Some(Field(Integer)));
assert_eq!(s.len(), 2);
}

View File

@ -1026,8 +1026,11 @@ mod test {
fn test_sort_fields_by_name_already_sorted() {
let schema = SchemaBuilder::new()
.field("field_a", ArrowDataType::Int64)
.unwrap()
.field("field_b", ArrowDataType::Int64)
.unwrap()
.field("field_c", ArrowDataType::Int64)
.unwrap()
.build()
.unwrap();
@ -1044,8 +1047,11 @@ mod test {
fn test_sort_fields_by_name() {
let schema = SchemaBuilder::new()
.field("field_b", ArrowDataType::Int64)
.unwrap()
.field("field_a", ArrowDataType::Int64)
.unwrap()
.field("field_c", ArrowDataType::Int64)
.unwrap()
.build()
.unwrap();
@ -1053,8 +1059,11 @@ mod test {
let expected_schema = SchemaBuilder::new()
.field("field_a", ArrowDataType::Int64)
.unwrap()
.field("field_b", ArrowDataType::Int64)
.unwrap()
.field("field_c", ArrowDataType::Int64)
.unwrap()
.build()
.unwrap();

View File

@ -418,16 +418,16 @@ mod tests {
#[test]
fn test_merge_incompatible_data_types() {
// same field name with different type
let schema1 = SchemaBuilder::new()
.field("the_field", ArrowDataType::Int16)
.build()
.unwrap();
let schema1 = Schema::try_from_arrow(Arc::new(arrow::datatypes::Schema::new(vec![
arrow::datatypes::Field::new("the_field", ArrowDataType::Int16, true),
])))
.unwrap();
// same field name with different type
let schema2 = SchemaBuilder::new()
.field("the_field", ArrowDataType::Int8)
.build()
.unwrap();
let schema2 = Schema::try_from_arrow(Arc::new(arrow::datatypes::Schema::new(vec![
arrow::datatypes::Field::new("the_field", ArrowDataType::Int8, true),
])))
.unwrap();
let merged_schema_error = SchemaMerger::new()
.merge(&schema1)
@ -461,12 +461,14 @@ mod tests {
fn test_merge_incompatible_schema_nullability() {
let schema1 = SchemaBuilder::new()
.non_null_field("int_field", ArrowDataType::Int64)
.unwrap()
.build()
.unwrap();
// same field name with different nullability
let schema2 = SchemaBuilder::new()
.field("int_field", ArrowDataType::Int64)
.unwrap()
.build()
.unwrap();

View File

@ -908,7 +908,9 @@ mod tests {
.tag("t2")
.tag("host")
.field("foo", DataType::Int64)
.unwrap()
.field("bar", DataType::Int64)
.unwrap()
.build()
.unwrap();
@ -918,6 +920,7 @@ mod tests {
let schema = SchemaBuilder::new()
.tag("t3")
.field("baz", DataType::Int64)
.unwrap()
.build()
.unwrap();