use arrow_util::assert_batches_eq; use data_types::{PartitionTemplate, TemplatePart}; use mutable_batch::{writer::Writer, MutableBatch, PartitionWrite, WritePayload}; use mutable_batch_pb::{decode::write_table_batch, encode::encode_batch}; use schema::selection::Selection; #[test] fn test_encode_decode() { let (_, batch) = mutable_batch_lp::test_helpers::lp_to_mutable_batch( r#" foo,t1=asdf iv=1i,uv=774u,fv=1.0,bv=true,sv="hi" 1 foo,t1=bar uv=1u,fv=32.0,bv=true 2 foo,t1=bar iv=1i,uv=1u,fv=1.0,sv="bye" 3 foo iv=-3405i,uv=566u,bv=false,sv="hi" 4 foo,t1=asdf iv=1i,fv=1.23,bv=true,sv="hi" 5 "#, ); let expected = &[ "+-------+------+-------+-----+------+--------------------------------+-----+", "| bv | fv | iv | sv | t1 | time | uv |", "+-------+------+-------+-----+------+--------------------------------+-----+", "| true | 1 | 1 | hi | asdf | 1970-01-01T00:00:00.000000001Z | 774 |", "| true | 32 | | | bar | 1970-01-01T00:00:00.000000002Z | 1 |", "| | 1 | 1 | bye | bar | 1970-01-01T00:00:00.000000003Z | 1 |", "| false | | -3405 | hi | | 1970-01-01T00:00:00.000000004Z | 566 |", "| true | 1.23 | 1 | hi | asdf | 1970-01-01T00:00:00.000000005Z | |", "+-------+------+-------+-----+------+--------------------------------+-----+", ]; assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]); let encoded = encode_batch("foo", &batch); let mut batch = MutableBatch::new(); write_table_batch(&mut batch, &encoded).unwrap(); assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]); } // This test asserts columns containing no values do not prevent an encoded // batch from being deserialize: // // https://github.com/influxdata/influxdb_iox/issues/4272 // // The test constructs a table such that after partitioning, one entire column // is NULL within a partition. In this test case, the following table is // partitioned by YMD: // // // | time | A | B | // | ---------- | ---- | ---- | // | 1970-01-01 | 1 | NULL | // | 1970-07-05 | NULL | 1 | // // Yielding two partitions: // // | time | A | B | // | ---------- | ---- | ---- | // | 1970-01-01 | 1 | NULL | // // and: // // | time | A | B | // | ---------- | ---- | ---- | // | 1970-07-05 | NULL | 1 | // // In both partitions, one column is composed entirely of NULL values. // // Encoding each of these partitions succeeds, but decoding the partition fails // due to the inability to infer a column type from the serialized format which // contains no values: // // ``` // Column { // column_name: "B", // semantic_type: Field, // values: Some( // Values { // i64_values: [], // f64_values: [], // u64_values: [], // string_values: [], // bool_values: [], // bytes_values: [], // packed_string_values: None, // interned_string_values: None, // }, // ), // null_mask: [ // 1, // ], // }, // ``` // // In a column that is not entirely NULL, one of the "Values" fields would be // non-empty, and the decoder would use this to infer the type of the column. // // Because we have chosen to not differentiate between "NULL" and "empty" in our // proto encoding, the decoder cannot infer which field within the "Values" // struct the column belongs to - all are valid, but empty. This causes // [`Error::EmptyColumn`] to be returned during deserialisation. // // This is fixed by skipping entirely-null columns when encoding the batch. #[test] fn test_encode_decode_null_columns_issue_4272() { let mut batch = MutableBatch::new(); let mut writer = Writer::new(&mut batch, 2); writer // Yielding partition keys: ["1970-01-01", "1970-07-05"] .write_time("time", [160, 16007561568756160].into_iter()) .unwrap(); writer .write_i64("A", Some(&[0b00000001]), [1].into_iter()) .unwrap(); writer .write_i64("B", Some(&[0b00000010]), [1].into_iter()) .unwrap(); writer.commit(); let mut partitions = PartitionWrite::partition( "test", &batch, &PartitionTemplate { parts: vec![TemplatePart::TimeFormat("%Y-%m-%d".to_owned())], }, ); // There should be two partitions, one with for the timestamp 160, and // one for the other timestamp. assert_eq!(partitions.len(), 2); // Round-trip the "1970-01-01" partition let mut got = MutableBatch::default(); partitions .remove("1970-01-01") .expect("partition not found") .write_to_batch(&mut got) .expect("should write"); let encoded = encode_batch("bananas", &got); let mut batch = MutableBatch::new(); // Without the fix for #4272 this deserialisation call would fail. write_table_batch(&mut batch, &encoded).unwrap(); let expected = &[ "+---+--------------------------------+", "| A | time |", "+---+--------------------------------+", "| 1 | 1970-01-01T00:00:00.000000160Z |", "+---+--------------------------------+", ]; assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]); // And finally assert the "1970-07-05" round-trip let mut got = MutableBatch::default(); partitions .remove("1970-07-05") .expect("partition not found") .write_to_batch(&mut got) .expect("should write"); let encoded = encode_batch("bananas", &got); let mut batch = MutableBatch::new(); // Without the fix for #4272 this deserialisation call would fail. write_table_batch(&mut batch, &encoded).unwrap(); let expected = &[ "+---+--------------------------------+", "| B | time |", "+---+--------------------------------+", "| 1 | 1970-07-05T06:32:41.568756160Z |", "+---+--------------------------------+", ]; assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]); }