From 405a6a691b2edb5e6e6a44c0b690c8e70cd43d69 Mon Sep 17 00:00:00 2001 From: Nga Tran Date: Fri, 2 Jul 2021 17:57:48 -0400 Subject: [PATCH] feat: intial implementation of #1886: avoid resort if appropriate --- Cargo.lock | 1 + data_types/Cargo.toml | 1 + data_types/src/chunk_metadata.rs | 41 +++++++++++++++++++++++++++++--- server/src/db/chunk.rs | 6 ++++- 4 files changed, 45 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 65d69cab8e..3f58c83b62 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -818,6 +818,7 @@ name = "data_types" version = "0.1.0" dependencies = [ "chrono", + "indexmap", "influxdb_line_protocol", "observability_deps", "percent-encoding", diff --git a/data_types/Cargo.toml b/data_types/Cargo.toml index bfd1331a30..1bb7d0d88e 100644 --- a/data_types/Cargo.toml +++ b/data_types/Cargo.toml @@ -8,6 +8,7 @@ readme = "README.md" [dependencies] # In alphabetical order chrono = { version = "0.4", features = ["serde"] } +indexmap = "1.6" influxdb_line_protocol = { path = "../influxdb_line_protocol" } percent-encoding = "2.1.0" regex = "1.4" diff --git a/data_types/src/chunk_metadata.rs b/data_types/src/chunk_metadata.rs index 63e2b0bb82..e713c372c0 100644 --- a/data_types/src/chunk_metadata.rs +++ b/data_types/src/chunk_metadata.rs @@ -1,5 +1,7 @@ //! Module contains a representation of chunk metadata -use std::sync::Arc; +use std::{fmt, sync::Arc}; + +use indexmap::IndexMap; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; @@ -18,14 +20,47 @@ pub struct ChunkAddr { /// The ID of the chunk pub chunk_id: u32, + + // Sort key of this chunk + pub sort_key: Arc, +} + +/// Temporary - https://github.com/apache/arrow-rs/pull/425 +#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)] +pub struct SortOptions { + /// Whether to sort in descending order + pub descending: bool, + /// Whether to sort nulls first + pub nulls_first: bool, +} +impl fmt::Display for SortOptions { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Descending: {}, NUll first: {}", self.descending, self.nulls_first) + } +} + +#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] +pub struct SortKey { + columns: Vec<(String, SortOptions)>, +} + +impl fmt::Display for SortKey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut sort_key = "Sort Key:\n".to_string(); + for col in &self.columns { + let s = format!("{}, {}", col.0, col.1); + sort_key = sort_key + &s; + } + write!(f, "{}", sort_key) + } } impl std::fmt::Display for ChunkAddr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, - "Chunk('{}':'{}':'{}':{})", - self.db_name, self.table_name, self.partition_key, self.chunk_id + "Chunk('{}':'{}':'{}':{}:'{}')", + self.db_name, self.table_name, self.partition_key, self.chunk_id, self.sort_key ) } } diff --git a/server/src/db/chunk.rs b/server/src/db/chunk.rs index 012417ba40..88ebdf0376 100644 --- a/server/src/db/chunk.rs +++ b/server/src/db/chunk.rs @@ -432,7 +432,11 @@ impl QueryChunk for DbChunk { } } - // TODOs: return the right value. For now the chunk is assumed to be not sorted + /// Returns true if the chunk is sorted on its pk + /// Since data is compacted prior being moved to RUBs, data in RUBs and OBs + /// should be sorted on their PK as the results of compacting. + /// However, since we current sorted data based on their cardinality (see compute_sort_key), + /// 2 different chunks may be sorted on different order of key columns. fn is_sorted_on_pk(&self) -> bool { match &self.state { State::MutableBuffer { .. } => false,