influxdb/mutable_batch_pb/Cargo.toml

[package]
name = "mutable_batch_pb"
description = "Conversion logic for binary write protocol <-> MutableBatch"
version.workspace = true
authors.workspace = true
edition.workspace = true
license.workspace = true

[dependencies]
arrow_util = { path = "../arrow_util" }
dml = { path = "../dml" }
generated_types = { path = "../generated_types" }
hashbrown = { workspace = true }
mutable_batch = { path = "../mutable_batch" }
schema = { path = "../schema" }
snafu = "0.7"
workspace-hack = { path = "../workspace-hack"}

[dev-dependencies]
mutable_batch_lp = { path = "../mutable_batch_lp" }
data_types = { path = "../data_types" }
feat: write pbdata to MutableBatch (#2724) (#2927) * feat: write pbdata to MutableBatch (#2724) * chore: review feedback Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> 2021-10-21 14:32:35 +00:00			`[package]`
			`name = "mutable_batch_pb"`
			`description = "Conversion logic for binary write protocol <-> MutableBatch"`
feat: Use workspace package metadata to reduce differences and repetition 2022-09-26 14:43:00 +00:00			`version.workspace = true`
			`authors.workspace = true`
			`edition.workspace = true`
			`license.workspace = true`
feat: write pbdata to MutableBatch (#2724) (#2927) * feat: write pbdata to MutableBatch (#2724) * chore: review feedback Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> 2021-10-21 14:32:35 +00:00
			`[dependencies]`
feat: pbdata encode (#2724) (#3009) 2021-11-02 18:31:53 +00:00			`arrow_util = { path = "../arrow_util" }`
refactor: extract DML types (#2731) (#3084) * refactor: extract DML types (#2731) * chore: fmt 2021-11-11 12:34:07 +00:00			`dml = { path = "../dml" }`
feat: write pbdata to MutableBatch (#2724) (#2927) * feat: write pbdata to MutableBatch (#2724) * chore: review feedback Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> 2021-10-21 14:32:35 +00:00			`generated_types = { path = "../generated_types" }`
refactor: use a workspace dependency for hashbrown 2022-11-11 17:12:30 +00:00			`hashbrown = { workspace = true }`
feat: write pbdata to MutableBatch (#2724) (#2927) * feat: write pbdata to MutableBatch (#2724) * chore: review feedback Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> 2021-10-21 14:32:35 +00:00			`mutable_batch = { path = "../mutable_batch" }`
			`schema = { path = "../schema" }`
chore: upgrade to snafu 0.7 (#3440) 2022-01-11 19:22:36 +00:00			`snafu = "0.7"`
feat: Results of running cargo hakari manage-deps 2021-11-19 14:21:57 +00:00			`workspace-hack = { path = "../workspace-hack"}`
feat: write pbdata to MutableBatch (#2724) (#2927) * feat: write pbdata to MutableBatch (#2724) * chore: review feedback Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> 2021-10-21 14:32:35 +00:00
			`[dev-dependencies]`
feat: pbdata encode (#2724) (#3009) 2021-11-02 18:31:53 +00:00			`mutable_batch_lp = { path = "../mutable_batch_lp" }`
fix(pb): encoding entirely NULL columns (#4272) This commit changes the protobuf record batch encoding to skip entirely NULL columns when serialising. This prevents the deserialisation from erroring due to a column type inference failure. Prior to this commit, when the system was presented a record batch such as this: \| time \| A \| B \| \| ---------- \| ---- \| ---- \| \| 1970-01-01 \| 1 \| NULL \| \| 1970-07-05 \| NULL \| 1 \| Which would be partitioned by YMD into two separate partitions: \| time \| A \| B \| \| ---------- \| ---- \| ---- \| \| 1970-01-01 \| 1 \| NULL \| and: \| time \| A \| B \| \| ---------- \| ---- \| ---- \| \| 1970-07-05 \| NULL \| 1 \| Both partitions would contain an entirely NULL column. Both of these partitioned record batches would be successfully encoded, but decoding the partition fails due to the inability to infer a column type from the serialised format which contains no values, which on the wire, looks like: Column { column_name: "B", semantic_type: Field, values: Some( Values { i64_values: [], f64_values: [], u64_values: [], string_values: [], bool_values: [], bytes_values: [], packed_string_values: None, interned_string_values: None, }, ), null_mask: [ 1, ], }, In a column that is not entirely NULL, one of the "Values" fields would be non-empty, and the decoder would use this to infer the type of the column. Because we have chosen to not differentiate between "NULL" and "empty" in our proto encoding, the decoder cannot infer which field within the "Values" struct the column belongs to - all are valid, but empty. This commit prevents this type inference failure by skipping any columns that are entirely NULL during serialisation, preventing the deserialiser from having to process columns with ambiguous types. 2022-05-17 13:08:11 +00:00			`data_types = { path = "../data_types" }`