feat: intial implementation of #1886: avoid resort if appropriate
parent
b4534883fe
commit
405a6a691b
|
@ -818,6 +818,7 @@ name = "data_types"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"indexmap",
|
||||
"influxdb_line_protocol",
|
||||
"observability_deps",
|
||||
"percent-encoding",
|
||||
|
|
|
@ -8,6 +8,7 @@ readme = "README.md"
|
|||
|
||||
[dependencies] # In alphabetical order
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
indexmap = "1.6"
|
||||
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
|
||||
percent-encoding = "2.1.0"
|
||||
regex = "1.4"
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
//! Module contains a representation of chunk metadata
|
||||
use std::sync::Arc;
|
||||
use std::{fmt, sync::Arc};
|
||||
|
||||
use indexmap::IndexMap;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
@ -18,14 +20,47 @@ pub struct ChunkAddr {
|
|||
|
||||
/// The ID of the chunk
|
||||
pub chunk_id: u32,
|
||||
|
||||
// Sort key of this chunk
|
||||
pub sort_key: Arc<SortKey>,
|
||||
}
|
||||
|
||||
/// Temporary - https://github.com/apache/arrow-rs/pull/425
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SortOptions {
|
||||
/// Whether to sort in descending order
|
||||
pub descending: bool,
|
||||
/// Whether to sort nulls first
|
||||
pub nulls_first: bool,
|
||||
}
|
||||
impl fmt::Display for SortOptions {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "Descending: {}, NUll first: {}", self.descending, self.nulls_first)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SortKey {
|
||||
columns: Vec<(String, SortOptions)>,
|
||||
}
|
||||
|
||||
impl fmt::Display for SortKey {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let mut sort_key = "Sort Key:\n".to_string();
|
||||
for col in &self.columns {
|
||||
let s = format!("{}, {}", col.0, col.1);
|
||||
sort_key = sort_key + &s;
|
||||
}
|
||||
write!(f, "{}", sort_key)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ChunkAddr {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Chunk('{}':'{}':'{}':{})",
|
||||
self.db_name, self.table_name, self.partition_key, self.chunk_id
|
||||
"Chunk('{}':'{}':'{}':{}:'{}')",
|
||||
self.db_name, self.table_name, self.partition_key, self.chunk_id, self.sort_key
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -432,7 +432,11 @@ impl QueryChunk for DbChunk {
|
|||
}
|
||||
}
|
||||
|
||||
// TODOs: return the right value. For now the chunk is assumed to be not sorted
|
||||
/// Returns true if the chunk is sorted on its pk
|
||||
/// Since data is compacted prior being moved to RUBs, data in RUBs and OBs
|
||||
/// should be sorted on their PK as the results of compacting.
|
||||
/// However, since we current sorted data based on their cardinality (see compute_sort_key),
|
||||
/// 2 different chunks may be sorted on different order of key columns.
|
||||
fn is_sorted_on_pk(&self) -> bool {
|
||||
match &self.state {
|
||||
State::MutableBuffer { .. } => false,
|
||||
|
|
Loading…
Reference in New Issue