feat: intial implementation of #1886: avoid resort if appropriate

pull/24376/head
Nga Tran 2021-07-02 17:57:48 -04:00
parent b4534883fe
commit 405a6a691b
4 changed files with 45 additions and 4 deletions

1
Cargo.lock generated
View File

@ -818,6 +818,7 @@ name = "data_types"
version = "0.1.0"
dependencies = [
"chrono",
"indexmap",
"influxdb_line_protocol",
"observability_deps",
"percent-encoding",

View File

@ -8,6 +8,7 @@ readme = "README.md"
[dependencies] # In alphabetical order
chrono = { version = "0.4", features = ["serde"] }
indexmap = "1.6"
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
percent-encoding = "2.1.0"
regex = "1.4"

View File

@ -1,5 +1,7 @@
//! Module contains a representation of chunk metadata
use std::sync::Arc;
use std::{fmt, sync::Arc};
use indexmap::IndexMap;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
@ -18,14 +20,47 @@ pub struct ChunkAddr {
/// The ID of the chunk
pub chunk_id: u32,
// Sort key of this chunk
pub sort_key: Arc<SortKey>,
}
/// Temporary - https://github.com/apache/arrow-rs/pull/425
#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
pub struct SortOptions {
/// Whether to sort in descending order
pub descending: bool,
/// Whether to sort nulls first
pub nulls_first: bool,
}
impl fmt::Display for SortOptions {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Descending: {}, NUll first: {}", self.descending, self.nulls_first)
}
}
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
pub struct SortKey {
columns: Vec<(String, SortOptions)>,
}
impl fmt::Display for SortKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut sort_key = "Sort Key:\n".to_string();
for col in &self.columns {
let s = format!("{}, {}", col.0, col.1);
sort_key = sort_key + &s;
}
write!(f, "{}", sort_key)
}
}
impl std::fmt::Display for ChunkAddr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Chunk('{}':'{}':'{}':{})",
self.db_name, self.table_name, self.partition_key, self.chunk_id
"Chunk('{}':'{}':'{}':{}:'{}')",
self.db_name, self.table_name, self.partition_key, self.chunk_id, self.sort_key
)
}
}

View File

@ -432,7 +432,11 @@ impl QueryChunk for DbChunk {
}
}
// TODOs: return the right value. For now the chunk is assumed to be not sorted
/// Returns true if the chunk is sorted on its pk
/// Since data is compacted prior being moved to RUBs, data in RUBs and OBs
/// should be sorted on their PK as the results of compacting.
/// However, since we current sorted data based on their cardinality (see compute_sort_key),
/// 2 different chunks may be sorted on different order of key columns.
fn is_sorted_on_pk(&self) -> bool {
match &self.state {
State::MutableBuffer { .. } => false,