2021-10-14 16:46:54 +00:00
|
|
|
[package]
|
|
|
|
name = "mutable_batch"
|
|
|
|
description = "A mutable arrow RecordBatch"
|
2022-09-26 14:43:00 +00:00
|
|
|
version.workspace = true
|
|
|
|
authors.workspace = true
|
|
|
|
edition.workspace = true
|
|
|
|
license.workspace = true
|
2021-10-14 16:46:54 +00:00
|
|
|
|
|
|
|
[dependencies]
|
2022-10-24 17:58:38 +00:00
|
|
|
arrow = { workspace = true, features = ["prettyprint"] }
|
2021-10-14 16:46:54 +00:00
|
|
|
arrow_util = { path = "../arrow_util" }
|
2021-11-22 22:06:04 +00:00
|
|
|
chrono = { version = "0.4", default-features = false }
|
2022-05-05 19:29:24 +00:00
|
|
|
data_types = { path = "../data_types" }
|
2022-05-05 15:50:06 +00:00
|
|
|
iox_time = { path = "../iox_time" }
|
2021-10-14 16:46:54 +00:00
|
|
|
schema = { path = "../schema" }
|
2022-01-11 19:22:36 +00:00
|
|
|
snafu = "0.7"
|
2022-11-11 17:12:30 +00:00
|
|
|
hashbrown = { workspace = true }
|
2023-06-23 08:11:56 +00:00
|
|
|
itertools = "0.11"
|
2023-02-24 18:02:23 +00:00
|
|
|
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
feat: unambiguously reversible partition keys
This commit changes the format of partition keys when generated with
non-default partition key templates ONLY. A prior fixture test is
unchanged by this commit, ensuring the default partition keys remain
the same.
When a custom partition key template is provided, it may specify one or
more parts, with the TagValue template causing values extracted from tag
columns to appear in the derived partition key.
This commit changes the generated partition key in the following ways:
* The delimiter of multi-part partition keys; the character used to
delimit partition key parts is changed from "/" to "|" (the pipe
character) as it is less likely to occur in user-provided input,
reducing the encoding overhead.
* The format of the extracted TagValue values (see below).
Building on the work of custom partition key overrides, where an
immutable partition template is resolved and set at table creation time,
the changes in this PR enable the derived partition key to be
unambiguously reversed into the set of tag (column_name, column_value)
tuples it was generated from for use in query pruning logic. This is
implemented by the build_column_values() method in this commit, which
requires both the template, and the derived partition key.
Prior to this commit, a partition key value extracted from a tag column
was in the form "tagname_x" where "x" is the value and "tagname" is the
name of the tag column it was extracted from. After this commit, the
partition key value is in the form "x"; the column name is removed from
the derived string to reduce the catalog storage overhead (a key driver
of COGS). In the case of a NULL tag value, the sentinel value "!" is
inserted instead of the prior "tagname_" marker. In the case of an empty
string tag value (""), the sentinel "^" value is inserted instead of the
"tagname_-" marker, ensuring the distinction between an empty value and
a not-present tag is preserved.
Additionally tag values utilise percent encoding to encode reserved
characters (part delimiter, empty sentinel character, % itself) to
eliminate deserialisation ambiguity.
Examples of how this has changed derived partition keys, for a template
of [Time(YYYY-MM-DD), TagValue(region), TagValue(bananas)]:
Write: time=1970-01-01,region=west,other=ignored
Old: "1970-01-01-region_west-bananas"
New: "1970-01-01|west|!"
Write: time=1970-01-01,other=ignored
Old: "1970-01-01-region-bananas"
New: "1970-01-01|!|!"
2023-05-29 12:47:25 +00:00
|
|
|
percent-encoding = "2.2.0"
|
2023-07-24 10:18:44 +00:00
|
|
|
thiserror = "1.0.44"
|
2023-06-05 10:29:47 +00:00
|
|
|
unicode-segmentation = "1.10.1"
|
2021-10-14 16:46:54 +00:00
|
|
|
|
|
|
|
[dev-dependencies]
|
2023-05-30 09:06:38 +00:00
|
|
|
assert_matches = "1.5.0"
|
2022-12-16 17:20:16 +00:00
|
|
|
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
2023-07-17 16:10:02 +00:00
|
|
|
paste = "1.0.14"
|
2023-06-14 12:28:18 +00:00
|
|
|
proptest = { version = "1.2.0", default-features = false }
|
2021-10-21 12:06:26 +00:00
|
|
|
rand = "0.8"
|