feat: correctly sort data and store in catalog metadata (#3864)

* feat: respect sort order in ChunkTableProvider (#3214)

feat: persist sort order in catalog (#3845)

refactor: owned SortKey (#3845)

* fix: size tests

* refactor: immutable SortKey

* test: test sort order restart (#3845)

* chore: explicit None for sort key

* chore: test cleanup

* fix: handling of sort keys containing fields

* chore: remove unused selected_sort_key

* chore: more docs

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
pull/24376/head
Raphael Taylor-Davies 2022-02-25 17:56:27 +00:00 committed by GitHub
parent 8edc462c37
commit 2a842fbb1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 619 additions and 1338 deletions

View File

@ -14,7 +14,7 @@ use observability_deps::tracing::debug;
use parking_lot::Mutex;
use parquet_file::chunk::ParquetChunk;
use read_buffer::RBChunk;
use schema::{selection::Selection, Schema, TIME_COLUMN_NAME};
use schema::{sort::SortKey, Schema, TIME_COLUMN_NAME};
use snafu::Snafu;
use std::sync::Arc;
use time::{Time, TimeProvider};
@ -88,6 +88,9 @@ pub struct ChunkMetadata {
/// into IOx. Note due to the compaction, etc... this may not be the chunk
/// that data was originally written into
pub time_of_last_write: Time,
/// Sort key for this chunk
pub sort_key: Option<SortKey>,
}
/// Different memory representations of a frozen chunk.
@ -458,11 +461,11 @@ impl CatalogChunk {
self.order
}
pub fn schema(&self) -> Arc<Schema> {
pub fn sort_key(&self) -> Option<&SortKey> {
match &self.stage {
ChunkStage::Open { mb_chunk, .. } => Arc::new(mb_chunk.schema(Selection::All).unwrap()),
ChunkStage::Open { .. } => None,
ChunkStage::Frozen { meta, .. } | ChunkStage::Persisted { meta, .. } => {
Arc::clone(&meta.schema)
meta.sort_key.as_ref()
}
}
}
@ -532,6 +535,7 @@ impl CatalogChunk {
delete_predicates: del_preds,
time_of_first_write: meta.time_of_first_write,
time_of_last_write: meta.time_of_last_write,
sort_key: meta.sort_key.clone(),
});
}
}
@ -762,6 +766,7 @@ impl CatalogChunk {
delete_predicates,
time_of_first_write: *time_of_first_write,
time_of_last_write: *time_of_last_write,
sort_key: None,
};
self.stage = ChunkStage::Frozen {
@ -1293,6 +1298,7 @@ mod tests {
delete_predicates: vec![],
time_of_first_write: now,
time_of_last_write: now,
sort_key: metadata.sort_key,
};
CatalogChunk::new_object_store_only(

View File

@ -121,6 +121,7 @@ impl DbChunk {
delete_predicates: vec![], // open chunk does not have delete predicate
time_of_first_write: *time_of_first_write,
time_of_last_write: *time_of_last_write,
sort_key: None,
};
(state, Arc::new(meta))
}
@ -535,20 +536,6 @@ impl QueryChunk for DbChunk {
}
}
/// Returns true if the chunk is sorted on its pk
/// Since data is compacted prior being moved to RUBs, data in RUBs and OBs
/// should be sorted on their PK as the results of compacting.
/// However, since we current sorted data based on their cardinality (see compute_sort_key),
/// 2 different chunks may be sorted on different order of key columns.
fn is_sorted_on_pk(&self) -> bool {
self.schema().is_sorted_on_pk()
}
/// Returns the sort key of the chunk if any
fn sort_key(&self) -> Option<SortKey<'_>> {
self.meta.schema.sort_key()
}
fn chunk_type(&self) -> &str {
match &self.state {
State::MutableBuffer { .. } => "MUB",
@ -571,6 +558,10 @@ impl QueryChunkMeta for DbChunk {
Arc::clone(&self.meta.schema)
}
fn sort_key(&self) -> Option<&SortKey> {
self.meta.sort_key.as_ref()
}
// return a reference to delete predicates of the chunk
fn delete_predicates(&self) -> &[Arc<DeletePredicate>] {
let pred = &self.meta.delete_predicates;

View File

@ -1697,7 +1697,7 @@ mod tests {
.id();
// A chunk is now in the object store and still in read buffer
let expected_parquet_size = 1247;
let expected_parquet_size = 1257;
catalog_chunk_size_bytes_metric_eq(registry, "read_buffer", expected_read_buffer_size);
// now also in OS
catalog_chunk_size_bytes_metric_eq(registry, "object_store", expected_parquet_size);
@ -2128,7 +2128,7 @@ mod tests {
// Read buffer + Parquet chunk size
catalog_chunk_size_bytes_metric_eq(registry, "mutable_buffer", 0);
catalog_chunk_size_bytes_metric_eq(registry, "read_buffer", 1700);
catalog_chunk_size_bytes_metric_eq(registry, "object_store", 1248);
catalog_chunk_size_bytes_metric_eq(registry, "object_store", 1259);
// All the chunks should have different IDs
assert_ne!(mb_chunk.id(), rb_chunk.id());
@ -2245,7 +2245,7 @@ mod tests {
let registry = test_db.metric_registry.as_ref();
// Read buffer + Parquet chunk size
let object_store_bytes = 1248;
let object_store_bytes = 1259;
catalog_chunk_size_bytes_metric_eq(registry, "mutable_buffer", 0);
catalog_chunk_size_bytes_metric_eq(registry, "read_buffer", 1700);
catalog_chunk_size_bytes_metric_eq(registry, "object_store", object_store_bytes);
@ -2725,8 +2725,8 @@ mod tests {
id: chunk_summaries[0].id,
storage: ChunkStorage::ReadBufferAndObjectStore,
lifecycle_action,
memory_bytes: 4088, // size of RB and OS chunks
object_store_bytes: 1557, // size of parquet file
memory_bytes: 4102, // size of RB and OS chunks
object_store_bytes: 1573, // size of parquet file
row_count: 2,
time_of_last_access: None,
time_of_first_write: Time::from_timestamp_nanos(1),
@ -2776,7 +2776,7 @@ mod tests {
assert_eq!(db.catalog.metrics().memory().mutable_buffer(), 2486 + 1463);
assert_eq!(db.catalog.metrics().memory().read_buffer(), 2550);
assert_eq!(db.catalog.metrics().memory().object_store(), 1538);
assert_eq!(db.catalog.metrics().memory().object_store(), 1552);
}
#[tokio::test]

View File

@ -91,8 +91,8 @@ pub(crate) fn compact_chunks(
let summaries = query_chunks
.iter()
.map(|x| x.summary().expect("Chunk should have summary"));
let key = compute_sort_key(summaries);
let key_str = format!("\"{}\"", key); // for logging
let sort_key = compute_sort_key(summaries);
// build schema
//
@ -102,9 +102,11 @@ pub(crate) fn compact_chunks(
// partitions).
let schema = merge_schemas(&query_chunks);
// Cannot move query_chunks as the sort key borrows the column names
let (schema, plan) =
ReorgPlanner::new().compact_plan(schema, query_chunks.iter().map(Arc::clone), key)?;
let plan = ReorgPlanner::new().compact_plan(
Arc::clone(&schema),
query_chunks,
sort_key.clone(),
)?;
let physical_plan = ctx.prepare_plan(&plan).await?;
let stream = ctx.execute_stream(physical_plan).await?;
@ -148,6 +150,7 @@ pub(crate) fn compact_chunks(
delete_predicates,
time_of_first_write,
time_of_last_write,
sort_key: Some(sort_key.clone()),
};
let (_, chunk) = partition.create_rub_chunk(None, max_order, metadata, rb_chunk);
@ -158,7 +161,7 @@ pub(crate) fn compact_chunks(
info!(input_chunks=chunk_ids.len(), %rub_row_groups,
%input_rows, %output_rows,
sort_key=%key_str, compaction_took = ?elapsed, fut_execution_duration= ?fut_now.elapsed(),
%sort_key, compaction_took = ?elapsed, fut_execution_duration= ?fut_now.elapsed(),
rows_per_sec=?throughput, "chunk(s) compacted");
let snapshot = DbChunk::snapshot(&chunk.read());

View File

@ -33,6 +33,7 @@ use parquet_file::{
};
use persistence_windows::checkpoint::{DatabaseCheckpoint, PartitionCheckpoint};
use query::{compute_sort_key, exec::ExecutorType, frontend::reorg::ReorgPlanner, QueryChunkMeta};
use schema::sort::SortKey;
use schema::Schema;
use snafu::{OptionExt, ResultExt};
use std::{
@ -85,6 +86,7 @@ pub(crate) fn compact_object_store_chunks(
// The partition will be unlock after the chunks are marked and snaphot
let compacting_os_chunks =
mark_chunks_to_compact(partition, chunks, &registration, compacted_chunk_id)?;
let delete_predicates_before = compacting_os_chunks.delete_predicates;
let fut = async move {
@ -116,7 +118,9 @@ pub(crate) fn compact_object_store_chunks(
time_of_first_write: compacting_os_chunks.time_of_first_write,
time_of_last_write: compacting_os_chunks.time_of_last_write,
chunk_order: compacting_os_chunks.max_order,
sort_key: Some(sort_key.clone()),
};
let compacted_and_persisted_chunk = persist_stream_to_chunk(
&db,
&partition_addr,
@ -145,6 +149,7 @@ pub(crate) fn compact_object_store_chunks(
compacted_and_persisted_chunk.clone(),
compacting_os_chunks.partition,
delete_predicates_before,
sort_key.clone(),
)
.await;
@ -339,16 +344,15 @@ async fn compact_chunks(db: &Db, query_chunks: &[Arc<DbChunk>]) -> Result<Compac
.iter()
.map(|x| x.summary().expect("Chunk should have summary"));
let sort_key = compute_sort_key(summaries);
let sort_key_str = format!("\"{}\"", sort_key); // for logging
// Merge schema of the compacting chunks
let merged_schema = merge_schemas(query_chunks);
// Build compact query plan
let (plan_schema, plan) = ReorgPlanner::new().compact_plan(
let plan = ReorgPlanner::new().compact_plan(
Arc::clone(&merged_schema),
query_chunks.iter().map(Arc::clone),
sort_key,
sort_key.clone(),
)?;
let physical_plan = ctx.prepare_plan(&plan).await?;
@ -357,8 +361,8 @@ async fn compact_chunks(db: &Db, query_chunks: &[Arc<DbChunk>]) -> Result<Compac
Ok(CompactedStream {
stream,
schema: plan_schema,
sort_key: sort_key_str,
schema: merged_schema,
sort_key,
})
}
@ -366,7 +370,7 @@ async fn compact_chunks(db: &Db, query_chunks: &[Arc<DbChunk>]) -> Result<Compac
struct CompactedStream {
stream: SendableRecordBatchStream,
schema: Arc<Schema>,
sort_key: String,
sort_key: SortKey,
}
/// Persist a provided stream to a new OS chunk
@ -443,6 +447,7 @@ async fn update_in_memory_catalog(
parquet_chunk: Option<Arc<ParquetChunk>>,
partition: Arc<RwLock<Partition>>,
delete_predicates_before: HashSet<Arc<DeletePredicate>>,
sort_key: SortKey,
) -> Option<Arc<DbChunk>> {
// Acquire write lock to drop the old chunks while also getting delete predicates added during compaction
let mut partition = partition.write();
@ -474,6 +479,7 @@ async fn update_in_memory_catalog(
table_summary: Arc::clone(parquet_chunk.table_summary()),
schema: parquet_chunk.schema(),
delete_predicates,
sort_key: Some(sort_key),
time_of_first_write: iox_metadata.time_of_first_write,
time_of_last_write: iox_metadata.time_of_last_write,
};

View File

@ -95,6 +95,8 @@ pub fn persist_chunks(
return Ok(None);
}
let query_chunk_len = query_chunks.len();
let time_of_first_write =
time_of_first_write.expect("Should have had a first write somewhere");
@ -104,17 +106,16 @@ pub fn persist_chunks(
let summaries = query_chunks
.iter()
.map(|x| x.summary().expect("Chunk should have summary"));
let key = compute_sort_key(summaries);
let key_str = format!("\"{}\"", key); // for logging
let sort_key = compute_sort_key(summaries);
// build schema
let schema = merge_schemas(&query_chunks);
// Cannot move query_chunks as the sort key borrows the column names
let (schema, plan) = ReorgPlanner::new().split_plan(
schema,
query_chunks.iter().map(Arc::clone),
key,
let plan = ReorgPlanner::new().split_plan(
Arc::clone(&schema),
query_chunks,
sort_key.clone(),
flush_timestamp,
)?;
@ -170,6 +171,7 @@ pub fn persist_chunks(
delete_predicates: delete_predicates.clone(),
time_of_first_write,
time_of_last_write,
sort_key: Some(sort_key.clone()),
};
partition_write.create_rub_chunk(None, max_order, metadata, remainder);
@ -193,6 +195,7 @@ pub fn persist_chunks(
delete_predicates,
time_of_first_write,
time_of_last_write,
sort_key: Some(sort_key.clone()),
};
let (new_chunk_id, new_chunk) = partition_write.create_rub_chunk(
@ -220,9 +223,9 @@ pub fn persist_chunks(
// input rows per second
let throughput = (input_rows as u128 * 1_000_000_000) / elapsed.as_nanos();
info!(input_chunks=query_chunks.len(),
info!(input_chunks=query_chunk_len,
input_rows, persisted_rows, remainder_rows,
sort_key=%key_str, compaction_took = ?elapsed,
sort_key=%sort_key, compaction_took = ?elapsed,
?max_persistable_timestamp,
rows_per_sec=?throughput, "chunk(s) persisted");

View File

@ -60,6 +60,7 @@ pub(super) fn write_chunk_to_object_store(
let table_name = Arc::clone(&addr.table_name);
let partition_key = Arc::clone(&addr.partition_key);
let chunk_order = chunk.order();
let sort_key = chunk.sort_key().cloned();
let delete_predicates = chunk.delete_predicates().to_vec();
let (tracker, registration) = db.jobs.register(Job::WriteChunk {
@ -134,6 +135,7 @@ pub(super) fn write_chunk_to_object_store(
time_of_first_write,
time_of_last_write,
chunk_order,
sort_key,
};
let written_result = timeout(

View File

@ -255,6 +255,7 @@ impl CatalogState for Loader {
delete_predicates,
time_of_first_write: iox_md.time_of_first_write,
time_of_last_write: iox_md.time_of_last_write,
sort_key: iox_md.sort_key,
};
partition.insert_object_store_only_chunk(

View File

@ -12,6 +12,7 @@ use data_types::{
error::ErrorLogger,
partition_metadata::{ColumnSummary, PartitionSummary, TableSummary},
};
use schema::sort::SortKey;
use crate::{
catalog::Catalog,
@ -130,7 +131,7 @@ impl IoxSystemTable for ChunkColumnsTable {
(
chunk.table_summary(),
chunk.detailed_summary(),
chunk.schema(),
chunk.sort_key().cloned(),
)
});
@ -159,7 +160,7 @@ fn chunk_columns_schema() -> SchemaRef {
fn assemble_chunk_columns(
schema: SchemaRef,
chunk_summaries: Vec<(Arc<TableSummary>, DetailedChunkSummary, Arc<schema::Schema>)>,
chunk_summaries: Vec<(Arc<TableSummary>, DetailedChunkSummary, Option<SortKey>)>,
) -> Result<RecordBatch> {
// Create an iterator over each column in each table in each chunk
// so we can build `chunk_columns` column by column
@ -171,16 +172,14 @@ fn assemble_chunk_columns(
let rows = chunk_summaries
.iter()
.map(|(table_summary, chunk_summary, schema)| {
let sort_key = schema.sort_key().unwrap_or_default();
.map(|(table_summary, chunk_summary, sort_key)| {
table_summary
.columns
.iter()
.map(move |column_summary| EachColumn {
chunk_summary,
column_summary,
column_sort: sort_key.get(&column_summary.name),
column_sort: sort_key.as_ref().and_then(|x| x.get(&column_summary.name)),
})
})
.flatten()
@ -300,7 +299,6 @@ mod tests {
chunk_metadata::{ChunkColumnSummary, ChunkId, ChunkOrder, ChunkStorage, ChunkSummary},
partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics},
};
use schema::builder::SchemaBuilder;
use schema::sort::SortKey;
use time::Time;
@ -367,25 +365,7 @@ mod tests {
fn test_assemble_chunk_columns() {
let lifecycle_action = None;
let mut sort1 = SortKey::default();
sort1.push("c2", Default::default());
sort1.push("c1", Default::default());
let schema1 = SchemaBuilder::new()
.field("c1", DataType::Utf8)
.field("c2", DataType::Float64)
.build_with_sort_key(&sort1)
.unwrap();
let schema2 = SchemaBuilder::new()
.field("c1", DataType::Float64)
.build()
.unwrap();
let schema3 = SchemaBuilder::new()
.field("c3", DataType::Float64)
.build()
.unwrap();
let sort = SortKey::from_columns(vec!["c2", "c1"]);
let summaries = vec![
(
@ -435,7 +415,7 @@ mod tests {
},
],
},
Arc::new(schema1),
Some(sort),
),
(
Arc::new(TableSummary {
@ -466,7 +446,7 @@ mod tests {
memory_bytes: 100,
}],
},
Arc::new(schema2),
None,
),
(
Arc::new(TableSummary {
@ -497,7 +477,7 @@ mod tests {
memory_bytes: 200,
}],
},
Arc::new(schema3),
None,
),
];

View File

@ -39,6 +39,9 @@ message IoxMetadata {
// Order of this chunk relative to other overlapping chunks.
uint32 chunk_order = 10;
// The sort key of this chunk
SortKey sort_key = 12;
}
// Partition checkpoint.
@ -81,3 +84,21 @@ message OptionalMinMaxSequence {
OptionalUint64 min = 1;
uint64 max = 2;
}
message SortKey {
// A sort expression
message Expr {
// The name of the column
string column = 1;
/// Whether the data is sorted in descending order
bool descending = 2;
/// Whether the data is sorted with nulls first
bool nulls_first = 3;
}
repeated Expr expressions = 1;
}

View File

@ -259,7 +259,11 @@ async fn create_readbuffer_chunk(fixture: &ServerFixture, db_name: &str) -> Chun
let partition_key = "cpu";
let table_name = "cpu";
let lp_lines = vec!["cpu,region=west user=23.2 100"];
let lp_lines = vec![
"cpu,region=west,host=a user=23.2 100",
"cpu,region=west,host=b user=34.2 100",
"cpu,region=east,host=c user=54.2 100",
];
write_client
.write_lp(db_name, lp_lines.join("\n"), 0)
@ -310,7 +314,7 @@ async fn create_readbuffer_chunk(fixture: &ServerFixture, db_name: &str) -> Chun
async fn assert_chunk_query_works(fixture: &ServerFixture, db_name: &str) {
let mut client = fixture.flight_client();
let sql_query = "select region, user, time from cpu";
let sql_query = "select region, host, user, time from cpu";
let batches = client
.perform_query(db_name, sql_query)
@ -321,12 +325,35 @@ async fn assert_chunk_query_works(fixture: &ServerFixture, db_name: &str) {
.unwrap();
let expected_read_data = vec![
"+--------+------+--------------------------------+",
"| region | user | time |",
"+--------+------+--------------------------------+",
"| west | 23.2 | 1970-01-01T00:00:00.000000100Z |",
"+--------+------+--------------------------------+",
"+--------+------+------+--------------------------------+",
"| region | host | user | time |",
"+--------+------+------+--------------------------------+",
"| east | c | 54.2 | 1970-01-01T00:00:00.000000100Z |",
"| west | a | 23.2 | 1970-01-01T00:00:00.000000100Z |",
"| west | b | 34.2 | 1970-01-01T00:00:00.000000100Z |",
"+--------+------+------+--------------------------------+",
];
assert_batches_eq!(expected_read_data, &batches);
let batches = client
.perform_query(db_name, "select column_name, row_count, null_count, min_value, max_value, sort_ordinal from system.chunk_columns")
.await
.unwrap()
.collect()
.await
.unwrap();
let expected_columns = vec![
"+-------------+-----------+------------+-----------+-----------+--------------+",
"| column_name | row_count | null_count | min_value | max_value | sort_ordinal |",
"+-------------+-----------+------------+-----------+-----------+--------------+",
"| host | 3 | 0 | a | c | 1 |",
"| region | 3 | 0 | east | west | 0 |",
"| time | 3 | 0 | 100 | 100 | 2 |",
"| user | 3 | 0 | 23.2 | 54.2 | |",
"+-------------+-----------+------------+-----------+-----------+--------------+",
];
assert_batches_eq!(expected_columns, &batches);
}

View File

@ -27,7 +27,7 @@ async fn test_operations() {
write_client
.write_lp(&db_name1, lp_lines.join("\n"), 0)
.await
.expect("write succeded");
.expect("write succeeded");
let chunks = list_chunks(&fixture, &db_name1).await;
let chunk_id = chunks[0].id;

View File

@ -129,6 +129,10 @@ impl QueryChunkMeta for QueryableBatch {
merge_record_batch_schemas(&batches)
}
fn sort_key(&self) -> Option<&SortKey> {
None
}
fn delete_predicates(&self) -> &[Arc<DeletePredicate>] {
self.delete_predicates.as_ref()
}
@ -270,16 +274,6 @@ impl QueryChunk for QueryableBatch {
Ok(Box::pin(stream))
}
/// Returns true if data of this chunk is sorted
fn is_sorted_on_pk(&self) -> bool {
false
}
/// Returns the sort key of the chunk if any
fn sort_key(&self) -> Option<SortKey<'_>> {
None
}
/// Returns chunk type
fn chunk_type(&self) -> &str {
"PersistingBatch"

View File

@ -490,6 +490,7 @@ File {
chunk_order: ChunkOrder(
1,
),
sort_key: None,
},
),
schema: Ok(

View File

@ -380,6 +380,7 @@ mod tests {
time_of_first_write: Time::from_timestamp_nanos(0),
time_of_last_write: Time::from_timestamp_nanos(0),
chunk_order: ChunkOrder::new(5).unwrap(),
sort_key: None,
};
let stream: SendableRecordBatchStream = Box::pin(MemoryStream::new(record_batches));
let (path, file_size_bytes, metadata) = storage

View File

@ -113,6 +113,7 @@ use persistence_windows::{
min_max_sequence::OptionalMinMaxSequence,
};
use prost::Message;
use schema::sort::{SortKey, SortKeyBuilder};
use schema::{InfluxColumnType, InfluxFieldType, Schema};
use snafu::{ensure, OptionExt, ResultExt, Snafu};
use std::{collections::BTreeMap, convert::TryInto, sync::Arc};
@ -293,6 +294,9 @@ pub struct IoxMetadataOld {
/// Order of this chunk relative to other overlapping chunks.
pub chunk_order: ChunkOrder,
/// Sort key of this chunk
pub sort_key: Option<SortKey>,
}
impl IoxMetadataOld {
@ -332,6 +336,7 @@ impl IoxMetadataOld {
.context(IoxMetadataFieldMissingSnafu {
field: "partition_checkpoint",
})?;
let sequencer_numbers = proto_partition_checkpoint
.sequencer_numbers
.into_iter()
@ -345,6 +350,7 @@ impl IoxMetadataOld {
}
})
.collect::<Result<BTreeMap<u32, OptionalMinMaxSequence>>>()?;
let flush_timestamp = decode_timestamp_from_field(
proto_partition_checkpoint.flush_timestamp,
"partition_checkpoint.flush_timestamp",
@ -363,6 +369,7 @@ impl IoxMetadataOld {
.context(IoxMetadataFieldMissingSnafu {
field: "database_checkpoint",
})?;
let sequencer_numbers = proto_database_checkpoint
.sequencer_numbers
.into_iter()
@ -376,8 +383,17 @@ impl IoxMetadataOld {
}
})
.collect::<Result<BTreeMap<u32, OptionalMinMaxSequence>>>()?;
let database_checkpoint = DatabaseCheckpoint::new(sequencer_numbers);
let sort_key = proto_msg.sort_key.map(|proto_key| {
let mut builder = SortKeyBuilder::with_capacity(proto_key.expressions.len());
for expr in proto_key.expressions {
builder = builder.with_col_opts(expr.column, expr.descending, expr.nulls_first)
}
builder.build()
});
Ok(Self {
creation_timestamp,
time_of_first_write,
@ -395,6 +411,7 @@ impl IoxMetadataOld {
field: "chunk_order".to_string(),
}
})?,
sort_key,
})
}
@ -442,6 +459,20 @@ impl IoxMetadataOld {
.collect(),
};
let sort_key = self
.sort_key
.as_ref()
.map(|key| preserved_catalog::SortKey {
expressions: key
.iter()
.map(|(name, options)| preserved_catalog::sort_key::Expr {
column: name.to_string(),
descending: options.descending,
nulls_first: options.nulls_first,
})
.collect(),
});
let proto_msg = preserved_catalog::IoxMetadata {
version: METADATA_VERSION,
creation_timestamp: Some(self.creation_timestamp.date_time().into()),
@ -453,6 +484,7 @@ impl IoxMetadataOld {
partition_checkpoint: Some(proto_partition_checkpoint),
database_checkpoint: Some(proto_database_checkpoint),
chunk_order: self.chunk_order.get(),
sort_key,
};
let mut buf = Vec::new();
@ -1203,6 +1235,7 @@ mod tests {
time_of_first_write: Time::from_timestamp(3234, 0),
time_of_last_write: Time::from_timestamp(3234, 3456),
chunk_order: ChunkOrder::new(5).unwrap(),
sort_key: None,
};
let proto_bytes = metadata.to_protobuf().unwrap();

View File

@ -420,6 +420,7 @@ mod tests {
time_of_first_write: Time::from_timestamp_nanos(456),
time_of_last_write: Time::from_timestamp_nanos(43069346),
chunk_order: ChunkOrder::new(5).unwrap(),
sort_key: None,
};
// create parquet file
@ -495,6 +496,7 @@ mod tests {
time_of_first_write: Time::from_timestamp_nanos(234),
time_of_last_write: Time::from_timestamp_nanos(4784),
chunk_order: ChunkOrder::new(5).unwrap(),
sort_key: None,
};
let (path, _file_size_bytes, _metadata) = storage

View File

@ -92,6 +92,7 @@ impl ChunkGenerator {
database_checkpoint,
time_of_first_write: Time::from_timestamp(30, 40),
time_of_last_write: Time::from_timestamp(50, 60),
sort_key: None,
};
let (record_batches, schema, column_summaries, rows) = match self.config {

View File

@ -6,7 +6,6 @@ pub mod sql;
mod test {
use std::sync::Arc;
use arrow::compute::SortOptions;
use datafusion::physical_plan::{
metrics::{self, MetricValue},
ExecutionPlan, ExecutionPlanVisitor,
@ -60,18 +59,10 @@ mod test {
#[tokio::test]
async fn test_metrics() {
let (schema, chunks) = get_test_chunks();
let mut sort_key = SortKey::with_capacity(1);
sort_key.push(
"time",
SortOptions {
descending: false,
nulls_first: false,
},
);
let sort_key = SortKey::from_columns(vec!["time", "tag1"]);
// Use a split plan as it has StreamSplitExec, DeduplicateExec and IOxReadFilternode
let (_, split_plan) = ReorgPlanner::new()
let split_plan = ReorgPlanner::new()
.split_plan(schema, chunks, sort_key, 1000)
.expect("created compact plan");

View File

@ -3,7 +3,7 @@
use std::sync::Arc;
use datafusion::logical_plan::{col, lit_timestamp_nano, Expr, LogicalPlan, LogicalPlanBuilder};
use observability_deps::tracing::{debug, trace};
use observability_deps::tracing::debug;
use schema::{sort::SortKey, Schema, TIME_COLUMN_NAME};
use crate::{
@ -112,8 +112,8 @@ impl ReorgPlanner {
&self,
schema: Arc<Schema>,
chunks: I,
output_sort: SortKey<'_>,
) -> Result<(Arc<Schema>, LogicalPlan)>
sort_key: SortKey,
) -> Result<LogicalPlan>
where
C: QueryChunk + 'static,
I: IntoIterator<Item = Arc<C>>,
@ -121,29 +121,13 @@ impl ReorgPlanner {
let ScanPlan {
plan_builder,
provider,
} = self.sorted_scan_plan(schema, chunks)?;
let mut schema = provider.iox_schema();
// Set the sort_key of the schema to the compacted chunk's sort key
// Try to do this only if the sort key changes so we avoid unnecessary schema copies.
trace!(input_schema=?schema, "Setting sort key on schema for compact plan");
if schema
.sort_key()
.map_or(true, |existing_key| existing_key != output_sort)
{
let mut schema_cloned = schema.as_ref().clone();
schema_cloned.set_sort_key(&output_sort);
schema = Arc::new(schema_cloned);
}
trace!(output_schema=?schema, "Setting sort key on schema for compact plan");
} = self.sorted_scan_plan(schema, chunks, sort_key)?;
let plan = plan_builder.build().context(BuildingPlanSnafu)?;
debug!(table_name=provider.table_name(), plan=%plan.display_indent_schema(),
"created compact plan for table");
Ok((schema, plan))
Ok(plan)
}
/// Creates an execution plan for the SPLIT operations which does the following:
@ -194,9 +178,9 @@ impl ReorgPlanner {
&self,
schema: Arc<Schema>,
chunks: I,
output_sort: SortKey<'_>,
sort_key: SortKey,
split_time: i64,
) -> Result<(Arc<Schema>, LogicalPlan)>
) -> Result<LogicalPlan>
where
C: QueryChunk + 'static,
I: IntoIterator<Item = Arc<C>>,
@ -204,46 +188,33 @@ impl ReorgPlanner {
let ScanPlan {
plan_builder,
provider,
} = self.sorted_scan_plan(schema, chunks)?;
let mut schema = provider.iox_schema();
// Set output_sort as the sort_key of the schema
// Try to do this only if the sort key changes so we avoid unnecessary schema copies.
trace!(input_schema=?schema, "Setting sort key on schema for split plan");
if schema
.sort_key()
.map_or(true, |existing_key| existing_key != output_sort)
{
let mut schema_cloned = schema.as_ref().clone();
schema_cloned.set_sort_key(&output_sort);
schema = Arc::new(schema_cloned);
}
trace!(output_schema=?schema, "Setting sort key on schema for split plan");
} = self.sorted_scan_plan(schema, chunks, sort_key)?;
// time <= split_time
let split_expr = col(TIME_COLUMN_NAME).lt_eq(lit_timestamp_nano(split_time));
let plan = plan_builder.build().context(BuildingPlanSnafu)?;
let plan = make_stream_split(plan, split_expr);
debug!(table_name=provider.table_name(), plan=%plan.display_indent_schema(),
"created split plan for table");
Ok((schema, plan))
Ok(plan)
}
/// Creates a scan plan for the given set of chunks.
///
/// Output data of the scan will be deduplicated sorted if `sort=true` on
/// the optimal sort order of the chunks' PK columns (tags and time).
///
/// The optimal sort order is computed based on the PK columns cardinality
/// that will be best for RLE encoding.
///
/// Refer to query::provider::build_scan_plan for the detail of the plan
///
fn sorted_scan_plan<C, I>(&self, schema: Arc<Schema>, chunks: I) -> Result<ScanPlan<C>>
fn sorted_scan_plan<C, I>(
&self,
schema: Arc<Schema>,
chunks: I,
sort_key: SortKey,
) -> Result<ScanPlan<C>>
where
C: QueryChunk + 'static,
I: IntoIterator<Item = Arc<C>>,
@ -256,12 +227,11 @@ impl ReorgPlanner {
let table_name = &table_name;
// Prepare the plan for the table
let mut builder = ProviderBuilder::new(table_name, schema);
// Tell the scan of this provider to sort its output on the chunks' PK
builder.ensure_pk_sort();
// There are no predicates in these plans, so no need to prune them
builder = builder.add_no_op_pruner();
let mut builder = ProviderBuilder::new(table_name, schema)
// There are no predicates in these plans, so no need to prune them
.add_no_op_pruner()
// Tell the scan of this provider to sort its output on the chunks' PK
.with_sort_key(sort_key);
for chunk in chunks {
// check that it is consistent with this table_name
@ -278,6 +248,7 @@ impl ReorgPlanner {
let provider = builder
.build()
.context(CreatingProviderSnafu { table_name })?;
let provider = Arc::new(provider);
// Scan all columns
@ -301,10 +272,10 @@ struct ScanPlan<C: QueryChunk + 'static> {
#[cfg(test)]
mod test {
use arrow::compute::SortOptions;
use arrow_util::assert_batches_eq;
use datafusion_util::{test_collect, test_collect_partition};
use schema::merge::SchemaMerger;
use schema::sort::SortKeyBuilder;
use crate::{
exec::{Executor, ExecutorType},
@ -413,23 +384,12 @@ mod test {
let (schema, chunks) = get_test_chunks().await;
let mut sort_key = SortKey::with_capacity(2);
sort_key.push(
"tag1",
SortOptions {
descending: true,
nulls_first: true,
},
);
sort_key.push(
"time",
SortOptions {
descending: false,
nulls_first: false,
},
);
let sort_key = SortKeyBuilder::with_capacity(2)
.with_col_opts("tag1", true, true)
.with_col_opts(TIME_COLUMN_NAME, false, false)
.build();
let (_, compact_plan) = ReorgPlanner::new()
let compact_plan = ReorgPlanner::new()
.compact_plan(schema, chunks, sort_key)
.expect("created compact plan");
@ -453,14 +413,14 @@ mod test {
"+-----------+------------+------+--------------------------------+",
"| field_int | field_int2 | tag1 | time |",
"+-----------+------------+------+--------------------------------+",
"| 100 | | AL | 1970-01-01T00:00:00.000000050Z |",
"| 70 | | CT | 1970-01-01T00:00:00.000000100Z |",
"| 1000 | 1000 | WA | 1970-01-01T00:00:00.000028Z |",
"| 50 | 50 | VT | 1970-01-01T00:00:00.000210Z |",
"| 70 | 70 | UT | 1970-01-01T00:00:00.000220Z |",
"| 1000 | | MT | 1970-01-01T00:00:00.000001Z |",
"| 5 | | MT | 1970-01-01T00:00:00.000005Z |",
"| 10 | | MT | 1970-01-01T00:00:00.000007Z |",
"| 70 | 70 | UT | 1970-01-01T00:00:00.000220Z |",
"| 50 | 50 | VT | 1970-01-01T00:00:00.000210Z |",
"| 1000 | 1000 | WA | 1970-01-01T00:00:00.000028Z |",
"| 70 | | CT | 1970-01-01T00:00:00.000000100Z |",
"| 100 | | AL | 1970-01-01T00:00:00.000000050Z |",
"+-----------+------------+------+--------------------------------+",
];
@ -474,17 +434,13 @@ mod test {
// the operator is tested in its own module.
let (schema, chunks) = get_test_chunks().await;
let mut sort_key = SortKey::with_capacity(1);
sort_key.push(
"time",
SortOptions {
descending: false,
nulls_first: false,
},
);
let sort_key = SortKeyBuilder::with_capacity(2)
.with_col_opts("time", false, false)
.with_col_opts("tag1", false, true)
.build();
// split on 1000 should have timestamps 1000, 5000, and 7000
let (_, split_plan) = ReorgPlanner::new()
let split_plan = ReorgPlanner::new()
.split_plan(schema, chunks, sort_key, 1000)
.expect("created compact plan");
@ -519,16 +475,16 @@ mod test {
let batches1 = test_collect_partition(physical_plan, 1).await;
// Sorted on state (tag1) ASC and time
// Sorted on time
let expected = vec![
"+-----------+------------+------+-----------------------------+",
"| field_int | field_int2 | tag1 | time |",
"+-----------+------------+------+-----------------------------+",
"| 5 | | MT | 1970-01-01T00:00:00.000005Z |",
"| 10 | | MT | 1970-01-01T00:00:00.000007Z |",
"| 70 | 70 | UT | 1970-01-01T00:00:00.000220Z |",
"| 50 | 50 | VT | 1970-01-01T00:00:00.000210Z |",
"| 1000 | 1000 | WA | 1970-01-01T00:00:00.000028Z |",
"| 50 | 50 | VT | 1970-01-01T00:00:00.000210Z |",
"| 70 | 70 | UT | 1970-01-01T00:00:00.000220Z |",
"+-----------+------------+------+-----------------------------+",
];

View File

@ -34,6 +34,7 @@ pub mod statistics;
pub mod util;
pub use exec::context::{DEFAULT_CATALOG, DEFAULT_SCHEMA};
use schema::sort::SortKeyBuilder;
/// Trait for an object (designed to be a Chunk) which can provide
/// metadata
@ -44,6 +45,9 @@ pub trait QueryChunkMeta: Sized {
/// return a reference to the summary of the data held in this chunk
fn schema(&self) -> Arc<Schema>;
/// return a reference to the sort key if any
fn sort_key(&self) -> Option<&SortKey>;
/// return a reference to delete predicates of the chunk
fn delete_predicates(&self) -> &[Arc<DeletePredicate>];
@ -207,12 +211,6 @@ pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync {
selection: Selection<'_>,
) -> Result<SendableRecordBatchStream, Self::Error>;
/// Returns true if data of this chunk is sorted
fn is_sorted_on_pk(&self) -> bool;
/// Returns the sort key of the chunk if any
fn sort_key(&self) -> Option<SortKey<'_>>;
/// Returns chunk type which is either MUB, RUB, OS
fn chunk_type(&self) -> &str;
@ -233,6 +231,10 @@ where
self.as_ref().schema()
}
fn sort_key(&self) -> Option<&SortKey> {
self.as_ref().sort_key()
}
fn delete_predicates(&self) -> &[Arc<DeletePredicate>] {
let pred = self.as_ref().delete_predicates();
debug!(?pred, "Delete predicate in QueryChunkMeta");
@ -251,19 +253,14 @@ where
chunks.iter().all(|c| c.summary().is_some())
}
pub fn compute_sort_key_for_chunks<'a, C>(schema: &'a Schema, chunks: &'a [C]) -> SortKey<'a>
pub fn compute_sort_key_for_chunks<C>(schema: &Schema, chunks: &[C]) -> SortKey
where
C: QueryChunkMeta,
{
if !chunks_have_stats(chunks) {
// chunks have not enough stats, return its pk that is
// sorted lexicographically but time column always last
let pk = schema.primary_key();
let mut sort_key = SortKey::with_capacity(pk.len());
for col in pk {
sort_key.push(col, Default::default())
}
sort_key
SortKey::from_columns(schema.primary_key())
} else {
let summaries = chunks
.iter()
@ -276,7 +273,7 @@ where
///
/// In the absence of more precise information, this should yield a
/// good ordering for RLE compression
pub fn compute_sort_key<'a>(summaries: impl Iterator<Item = &'a TableSummary>) -> SortKey<'a> {
pub fn compute_sort_key<'a>(summaries: impl Iterator<Item = &'a TableSummary>) -> SortKey {
let mut cardinalities: HashMap<&str, u64> = Default::default();
for summary in summaries {
for column in &summary.columns {
@ -298,11 +295,13 @@ pub fn compute_sort_key<'a>(summaries: impl Iterator<Item = &'a TableSummary>) -
// Sort by (cardinality, column_name) to have deterministic order if same cardinality
cardinalities.sort_by_key(|x| (x.1, x.0));
let mut key = SortKey::with_capacity(cardinalities.len() + 1);
let mut builder = SortKeyBuilder::with_capacity(cardinalities.len() + 1);
for (col, _) in cardinalities {
key.push(col, Default::default())
builder = builder.with_col(col)
}
key.push(TIME_COLUMN_NAME, Default::default());
builder = builder.with_col(TIME_COLUMN_NAME);
let key = builder.build();
trace!(computed_sort_key=?key, "Value of sort key from compute_sort_key");

File diff suppressed because it is too large Load Diff

View File

@ -235,6 +235,9 @@ pub struct TestChunk {
/// Order of this chunk relative to other overlapping chunks.
order: ChunkOrder,
/// The sort key of this chunk
sort_key: Option<SortKey>,
}
/// Implements a method for adding a column with default stats
@ -310,6 +313,7 @@ impl TestChunk {
predicate_match: Default::default(),
delete_predicates: Default::default(),
order: ChunkOrder::MIN,
sort_key: None,
}
}
@ -858,13 +862,11 @@ impl TestChunk {
}
/// Set the sort key for this chunk
pub fn with_sort_key(mut self, sort_key: &SortKey<'_>) -> Self {
let mut merger = SchemaMerger::new();
merger = merger
.merge(self.schema.as_ref())
.expect("merging was successful");
self.schema = Arc::new(merger.build_with_sort_key(sort_key));
self
pub fn with_sort_key(self, sort_key: SortKey) -> Self {
Self {
sort_key: Some(sort_key),
..self
}
}
/// Returns all columns of the table
@ -929,16 +931,6 @@ impl QueryChunk for TestChunk {
Ok(stream_from_batches(batches))
}
/// Returns true if data of this chunk is sorted
fn is_sorted_on_pk(&self) -> bool {
false
}
/// Returns the sort key of the chunk if any
fn sort_key(&self) -> Option<SortKey<'_>> {
None
}
fn chunk_type(&self) -> &str {
"Test Chunk"
}
@ -999,6 +991,10 @@ impl QueryChunkMeta for TestChunk {
Arc::clone(&self.schema)
}
fn sort_key(&self) -> Option<&SortKey> {
self.sort_key.as_ref()
}
// return a reference to delete predicates of the chunk
fn delete_predicates(&self) -> &[Arc<DeletePredicate>] {
let pred = &self.delete_predicates;

View File

@ -55,22 +55,23 @@ pub fn arrow_pk_sort_exprs(
}
pub fn arrow_sort_key_exprs(
sort_key: &SortKey<'_>,
sort_key: &SortKey,
input_schema: &ArrowSchema,
) -> Vec<PhysicalSortExpr> {
let mut sort_exprs = vec![];
for (key, options) in sort_key.iter() {
let expr = physical_col(key, input_schema).expect("sort key column in schema");
sort_exprs.push(PhysicalSortExpr {
expr,
options: SortOptions {
descending: options.descending,
nulls_first: options.nulls_first,
},
});
}
sort_exprs
sort_key
.iter()
.flat_map(|(key, options)| {
// Skip over missing columns
let expr = physical_col(key, input_schema).ok()?;
Some(PhysicalSortExpr {
expr,
options: SortOptions {
descending: options.descending,
nulls_first: options.nulls_first,
},
})
})
.collect()
}
/// Build a datafusion physical expression from its logical one

View File

@ -17,6 +17,7 @@ async fn run_table_schema_test_case<D>(
selection: Selection<'_>,
table_name: &str,
expected_schema: Schema,
expected_sort_key: Option<&SortKey>,
) where
D: DbSetup,
{
@ -48,6 +49,8 @@ async fn run_table_schema_test_case<D>(
expected_schema,
actual_schema
);
assert_eq!(chunk.sort_key(), expected_sort_key);
}
}
assert!(
@ -59,7 +62,7 @@ async fn run_table_schema_test_case<D>(
#[tokio::test]
async fn list_schema_cpu_all_mub() {
// we expect columns to come out in lexographic order by name
// we expect columns to come out in lexicographic order by name
let expected_schema = SchemaBuilder::new()
.tag("region")
.timestamp()
@ -72,23 +75,22 @@ async fn list_schema_cpu_all_mub() {
Selection::All,
"cpu",
expected_schema,
None,
)
.await;
}
#[tokio::test]
async fn list_schema_cpu_all_rub() {
// we expect columns to come out in lexographic order by name
// we expect columns to come out in lexicographic order by name
// The schema of RUB includes sort key
let mut sort_key = SortKey::with_capacity(2);
sort_key.push("region", Default::default());
sort_key.push(TIME_COLUMN_NAME, Default::default());
let sort_key = SortKey::from_columns(vec!["region", TIME_COLUMN_NAME]);
let expected_schema = SchemaBuilder::new()
.tag("region")
.timestamp()
.field("user", DataType::Float64)
.build_with_sort_key(&sort_key)
.build()
.unwrap();
run_table_schema_test_case(
@ -96,6 +98,7 @@ async fn list_schema_cpu_all_rub() {
Selection::All,
"cpu",
expected_schema,
Some(&sort_key),
)
.await;
}
@ -103,15 +106,13 @@ async fn list_schema_cpu_all_rub() {
#[tokio::test]
async fn list_schema_cpu_all_rub_set_sort_key() {
// The schema of RUB includes sort key
let mut sort_key = SortKey::with_capacity(2);
sort_key.push("region", Default::default());
sort_key.push(TIME_COLUMN_NAME, Default::default());
let sort_key = SortKey::from_columns(vec!["region", TIME_COLUMN_NAME]);
let expected_schema = SchemaBuilder::new()
.tag("region")
.timestamp()
.field("user", DataType::Float64)
.build_with_sort_key(&sort_key)
.build()
.unwrap();
run_table_schema_test_case(
@ -119,6 +120,7 @@ async fn list_schema_cpu_all_rub_set_sort_key() {
Selection::All,
"cpu",
expected_schema,
Some(&sort_key),
)
.await;
@ -127,7 +129,7 @@ async fn list_schema_cpu_all_rub_set_sort_key() {
#[tokio::test]
async fn list_schema_disk_all() {
// we expect columns to come out in lexographic order by name
// we expect columns to come out in lexicographic order by name
let expected_schema = SchemaBuilder::new()
.field("bytes", DataType::Int64)
.tag("region")
@ -140,6 +142,7 @@ async fn list_schema_disk_all() {
Selection::All,
"disk",
expected_schema,
None,
)
.await;
}
@ -160,20 +163,21 @@ async fn list_schema_cpu_selection() {
selection,
"cpu",
expected_schema,
None,
)
.await;
}
#[tokio::test]
async fn list_schema_disk_selection() {
// we expect columns to come out in lexographic order by name
// we expect columns to come out in lexicographic order by name
let expected_schema = SchemaBuilder::new()
.timestamp()
.field("bytes", DataType::Int64)
.build()
.unwrap();
// Pick an order that is not lexographic
// Pick an order that is not lexicographic
let selection = Selection::Some(&["time", "bytes"]);
run_table_schema_test_case(
@ -181,13 +185,14 @@ async fn list_schema_disk_selection() {
selection,
"disk",
expected_schema,
None,
)
.await;
}
#[tokio::test]
async fn list_schema_location_all() {
// we expect columns to come out in lexographic order by name
// we expect columns to come out in lexicographic order by name
let expected_schema = SchemaBuilder::new()
.field("count", DataType::UInt64)
.timestamp()
@ -200,197 +205,7 @@ async fn list_schema_location_all() {
Selection::All,
"restaurant",
expected_schema,
None,
)
.await;
}
#[tokio::test]
async fn test_set_sort_key_valid_same_order() {
// Build the expected schema with sort key
let mut sort_key = SortKey::with_capacity(3);
sort_key.push("tag1", Default::default());
sort_key.push("time", Default::default());
sort_key.push("tag2", Default::default());
let expected_schema = SchemaBuilder::new()
.tag("tag1")
.timestamp()
.tag("tag2")
.field("field_int", DataType::Int64)
.field("field_float", DataType::Float64)
.build_with_sort_key(&sort_key)
.unwrap();
// The same schema without sort key
let mut schema = SchemaBuilder::new()
.tag("tag1")
.timestamp()
.tag("tag2")
.field("field_int", DataType::Int64)
.field("field_float", DataType::Float64)
.build()
.unwrap();
schema.set_sort_key(&sort_key);
assert_eq!(
expected_schema, schema,
"Schema mismatch \nExpected:\n{:#?}\nActual:\n{:#?}\n",
expected_schema, schema
);
}
#[tokio::test]
async fn test_set_sort_key_valid_different_order() {
// Build the expected schema with sort key "time, tag2, tag1"
let mut sort_key = SortKey::with_capacity(3);
sort_key.push("time", Default::default());
sort_key.push("tag2", Default::default());
sort_key.push("tag1", Default::default());
let expected_schema = SchemaBuilder::new()
.tag("tag1")
.timestamp()
.tag("tag2")
.field("field_int", DataType::Int64)
.field("field_float", DataType::Float64)
.build_with_sort_key(&sort_key)
.unwrap();
// The same schema without sort key
let mut schema = SchemaBuilder::new()
.tag("tag1")
.timestamp()
.tag("tag2")
.field("field_int", DataType::Int64)
.field("field_float", DataType::Float64)
.build()
.unwrap();
schema.set_sort_key(&sort_key);
assert_eq!(
expected_schema, schema,
"Schema mismatch \nExpected:\n{:#?}\nActual:\n{:#?}\n",
expected_schema, schema
);
}
#[tokio::test]
async fn test_set_sort_key_valid_subset() {
// Build the expected schema with sort key "time, tag1"
let mut sort_key = SortKey::with_capacity(2);
sort_key.push("time", Default::default());
sort_key.push("tag1", Default::default());
let expected_schema = SchemaBuilder::new()
.tag("tag1")
.timestamp()
.tag("tag2")
.field("field_int", DataType::Int64)
.field("field_float", DataType::Float64)
.build_with_sort_key(&sort_key)
.unwrap();
// The same schema without sort key
let mut schema = SchemaBuilder::new()
.tag("tag1")
.timestamp()
.tag("tag2")
.field("field_int", DataType::Int64)
.field("field_float", DataType::Float64)
.build()
.unwrap();
// set sort key for it
schema.set_sort_key(&sort_key);
assert_eq!(
expected_schema, schema,
"Schema mismatch \nExpected:\n{:#?}\nActual:\n{:#?}\n",
expected_schema, schema
);
}
#[tokio::test]
async fn test_set_sort_key_valid_subset_of_fully_set() {
// Build sort key "tag1, time, tag2"
let mut sort_key = SortKey::with_capacity(3);
sort_key.push("tag1", Default::default());
sort_key.push("time", Default::default());
sort_key.push("tag2", Default::default());
// The schema with sort key
let mut schema = SchemaBuilder::new()
.tag("tag1")
.timestamp()
.tag("tag2")
.field("field_int", DataType::Int64)
.field("field_float", DataType::Float64)
.build_with_sort_key(&sort_key)
.unwrap();
// reset sort key to "tag2, time"
let mut sort_key = SortKey::with_capacity(2);
sort_key.push("tag2", Default::default());
sort_key.push("time", Default::default());
schema.set_sort_key(&sort_key);
// Expected schema with "tag2, time" as sort key
let expected_schema = SchemaBuilder::new()
.tag("tag1")
.timestamp()
.tag("tag2")
.field("field_int", DataType::Int64)
.field("field_float", DataType::Float64)
.build_with_sort_key(&sort_key)
.unwrap();
assert_eq!(
expected_schema, schema,
"Schema mismatch \nExpected:\n{:#?}\nActual:\n{:#?}\n",
expected_schema, schema
);
}
#[tokio::test]
async fn test_set_sort_key_invalid_not_exist() {
// Build the expected schema with sort key "time"
let mut sort_key = SortKey::with_capacity(1);
sort_key.push("time", Default::default());
let expected_schema = SchemaBuilder::new()
.tag("tag1")
.timestamp()
.tag("tag2")
.field("field_int", DataType::Int64)
.field("field_float", DataType::Float64)
.build_with_sort_key(&sort_key)
.unwrap();
// The same schema without sort key
let mut schema = SchemaBuilder::new()
.tag("tag1")
.timestamp()
.tag("tag2")
.field("field_int", DataType::Int64)
.field("field_float", DataType::Float64)
.build()
.unwrap();
// Nuild sort key that include valid "time" and invalid "no_tag"
let mut sort_key = SortKey::with_capacity(2);
sort_key.push("time", Default::default());
// invalid column
sort_key.push("not_tag", Default::default());
// The invalid key will be ignored in this function
schema.set_sort_key(&sort_key);
assert_eq!(
expected_schema, schema,
"Schema mismatch \nExpected:\n{:#?}\nActual:\n{:#?}\n",
expected_schema, schema
);
}

View File

@ -3,8 +3,6 @@ use std::convert::TryInto;
use arrow::datatypes::{DataType as ArrowDataType, Field as ArrowField};
use snafu::{ResultExt, Snafu};
use crate::sort::SortKey;
use super::{InfluxColumnType, InfluxFieldType, Schema, TIME_COLUMN_NAME};
/// Database schema creation / validation errors.
@ -145,20 +143,11 @@ impl SchemaBuilder {
/// assert_eq!(influxdb_column_type, Some(InfluxColumnType::Timestamp));
/// ```
pub fn build(&mut self) -> Result<Schema> {
self.build_with_sort_key(&Default::default())
}
pub fn build_with_sort_key(&mut self, sort_key: &SortKey<'_>) -> Result<Schema> {
assert!(!self.finished, "build called multiple times");
self.finished = true;
Schema::new_from_parts(
self.measurement.take(),
self.fields.drain(..),
sort_key,
false,
)
.context(ValidatingSchemaSnafu)
Schema::new_from_parts(self.measurement.take(), self.fields.drain(..), false)
.context(ValidatingSchemaSnafu)
}
/// Internal helper method to add a column definition

View File

@ -13,11 +13,10 @@ use arrow::datatypes::{
};
use hashbrown::HashSet;
use crate::sort::SortKey;
use selection::Selection;
use snafu::{OptionExt, Snafu};
use crate::sort::{ColumnSort, SortKey};
/// The name of the timestamp column in the InfluxDB datamodel
pub const TIME_COLUMN_NAME: &str = "time";
@ -111,7 +110,6 @@ impl TryFrom<ArrowSchemaRef> for Schema {
const MEASUREMENT_METADATA_KEY: &str = "iox::measurement::name";
const COLUMN_METADATA_KEY: &str = "iox::column::type";
const COLUMN_SORT_METADATA_KEY: &str = "iox::column::sort";
impl Schema {
/// Create a new Schema wrapper over the schema
@ -161,7 +159,6 @@ impl Schema {
pub(crate) fn new_from_parts(
measurement: Option<String>,
fields: impl Iterator<Item = (ArrowField, Option<InfluxColumnType>)>,
sort_key: &SortKey<'_>,
sort_columns: bool,
) -> Result<Self> {
let mut metadata = HashMap::new();
@ -170,17 +167,9 @@ impl Schema {
metadata.insert(MEASUREMENT_METADATA_KEY.to_string(), measurement);
}
let mut sort_ordinals = Vec::with_capacity(sort_key.len());
let mut fields: Vec<ArrowField> = fields
.map(|(mut field, column_type)| {
match sort_key.get(field.name()) {
Some(sort) => {
sort_ordinals.push(sort.sort_ordinal);
set_field_metadata(&mut field, column_type, Some(sort))
}
None => set_field_metadata(&mut field, column_type, None),
}
set_field_metadata(&mut field, column_type);
field
})
.collect();
@ -194,77 +183,14 @@ impl Schema {
let record =
ArrowSchemaRef::new(ArrowSchema::new_with_metadata(fields, metadata)).try_into()?;
// This must be after validation in case of duplicate columns
sort_ordinals.sort_unstable();
for (idx, ordinal) in sort_ordinals.iter().enumerate() {
if idx != *ordinal {
return Err(Error::SortColumnNotFound {
column_name: sort_key.get_index(idx).unwrap().0.to_string(),
});
}
}
if sort_ordinals.len() != sort_key.len() {
return Err(Error::SortColumnNotFound {
column_name: sort_key
.get_index(sort_ordinals.len())
.unwrap()
.0
.to_string(),
});
}
Ok(record)
}
/// Set the order of sort columns to the specified `sort_key`
pub fn set_sort_key(&mut self, sort_key: &SortKey<'_>) {
let fields = self.inner.fields();
// create a new_fields that are the fields with their sort keys set
let new_fields = fields
/// Returns true if the sort_key includes all primary key cols
pub fn is_sorted_on_pk(&self, sort_key: &SortKey) -> bool {
self.primary_key()
.iter()
.map(|field| {
let mut new_field = field.clone();
let mut meta = std::collections::BTreeMap::new();
if let Some(sort) = sort_key.get(field.name()) {
// New sort key
meta.insert(COLUMN_SORT_METADATA_KEY.to_string(), sort.to_string());
}
// Keep other meta data
if let Some(metadata) = field.metadata() {
for (key, value) in metadata {
if key.ne(&COLUMN_SORT_METADATA_KEY.to_string()) {
meta.insert(key.clone(), value.clone());
}
}
}
new_field.set_metadata(Some(meta));
new_field
})
.collect();
let new_meta = self.inner.metadata().clone();
let new_schema = ArrowSchema::new_with_metadata(new_fields, new_meta);
self.inner = Arc::new(new_schema);
}
/// Returns true of the sort_key include all primary key cols
pub fn is_sorted_on_pk(&self) -> bool {
if let Some(sort_key) = self.sort_key() {
let key_columns = self.primary_key();
for key_col in key_columns {
if sort_key.get(key_col).is_none() {
return false; // pk col is not part of the sort key
}
}
true
} else {
// not sorted yet
false
}
.all(|col| sort_key.get(*col).is_some())
}
/// Provide a reference to the underlying Arrow Schema object
@ -425,36 +351,6 @@ impl Schema {
self.select(Selection::Some(selection))
}
/// Returns the sort key if any
pub fn sort_key(&self) -> Option<SortKey<'_>> {
// Find all the sorted columns
let mut columns: Vec<_> = self
.inner
.fields()
.iter()
.enumerate()
.flat_map(|(idx, field)| Some((idx, get_sort(field)?)))
.collect();
columns.sort_unstable_by_key(|(_, sort)| sort.sort_ordinal);
let mut sort_key = SortKey::with_capacity(columns.len());
for (idx, (column_idx, sort)) in columns.into_iter().enumerate() {
// If the schema has been projected with only some of the columns
// the sort key may be truncated
if sort.sort_ordinal != idx {
break;
}
sort_key.push(self.inner.field(column_idx).name().as_str(), sort.options)
}
if !sort_key.is_empty() {
return Some(sort_key);
}
None
}
/// Return columns used for the "primary key" in this table.
///
/// Currently this relies on the InfluxDB data model annotations
@ -501,32 +397,14 @@ pub(crate) fn get_influx_type(field: &ArrowField) -> Option<InfluxColumnType> {
.ok()
}
/// Gets the column sort for a field
pub(crate) fn get_sort(field: &ArrowField) -> Option<ColumnSort> {
field
.metadata()
.as_ref()?
.get(COLUMN_SORT_METADATA_KEY)?
.parse()
.ok()
}
/// Sets the metadata for a field - replacing any existing metadata
pub(crate) fn set_field_metadata(
field: &mut ArrowField,
column_type: Option<InfluxColumnType>,
sort: Option<ColumnSort>,
) {
pub(crate) fn set_field_metadata(field: &mut ArrowField, column_type: Option<InfluxColumnType>) {
let mut metadata = std::collections::BTreeMap::new();
if let Some(column_type) = column_type {
metadata.insert(COLUMN_METADATA_KEY.to_string(), column_type.to_string());
}
if let Some(sort) = sort {
metadata.insert(COLUMN_SORT_METADATA_KEY.to_string(), sort.to_string());
}
field.set_metadata(Some(metadata))
}
@ -732,13 +610,9 @@ macro_rules! assert_column_eq {
#[cfg(test)]
mod test {
use arrow::compute::SortOptions;
use InfluxColumnType::*;
use InfluxFieldType::*;
use crate::merge::SchemaMerger;
use super::{builder::SchemaBuilder, *};
fn make_field(
@ -1158,115 +1032,11 @@ mod test {
);
}
#[test]
fn test_sort() {
let mut sort_key = SortKey::with_capacity(3);
sort_key.push("tag4", Default::default());
sort_key.push("tag3", Default::default());
sort_key.push("tag2", Default::default());
sort_key.push("tag1", Default::default());
sort_key.push(TIME_COLUMN_NAME, Default::default());
let schema1 = SchemaBuilder::new()
.influx_field("the_field", String)
.tag("tag1")
.tag("tag2")
.tag("tag3")
.tag("tag4")
.timestamp()
.measurement("the_measurement")
.build_with_sort_key(&sort_key)
.unwrap();
let projected = schema1
.select_by_names(&["tag4", "tag2", "tag3", "time"])
.unwrap();
let projected_key: Vec<_> = projected.sort_key().unwrap().iter().map(|x| *x.0).collect();
let m1 = SchemaMerger::new().merge(&schema1).unwrap().build();
let m2 = SchemaMerger::new()
.merge(&schema1)
.unwrap()
.build_with_sort_key(&sort_key);
assert_eq!(schema1.sort_key().unwrap(), sort_key);
assert_eq!(m1.sort_key(), None);
assert_eq!(m2.sort_key().unwrap(), sort_key);
assert_eq!(projected_key, vec!["tag4", "tag3", "tag2"])
}
#[test]
fn test_sort_missing_column() {
let mut sort_key = SortKey::with_capacity(3);
sort_key.push(
"the_field",
SortOptions {
descending: true,
nulls_first: false,
},
);
sort_key.push("a", Default::default());
sort_key.push(TIME_COLUMN_NAME, Default::default());
// Verify missing columns are detected
let err = SchemaBuilder::new()
.influx_field("the_field", String)
.measurement("the_measurement")
.build_with_sort_key(&sort_key)
.unwrap_err();
assert!(matches!(
err,
builder::Error::ValidatingSchema {
source: Error::SortColumnNotFound {
column_name
}
} if &column_name == "a"
));
// Verify duplicate columns don't break truncation
let err = SchemaBuilder::new()
.influx_field("the_field", String)
.influx_field("a", String)
.timestamp()
.timestamp()
.measurement("the_measurement")
.build_with_sort_key(&sort_key)
.unwrap_err();
assert!(matches!(
err,
builder::Error::ValidatingSchema {
source: Error::DuplicateColumnName { .. }
}
));
// Verify sort key gaps are detected
let err = SchemaBuilder::new()
.influx_field("a", String)
.influx_field("the_field", String)
.measurement("the_measurement")
.build_with_sort_key(&sort_key)
.unwrap_err();
assert!(matches!(err, builder::Error::ValidatingSchema {
source: Error::SortColumnNotFound {
column_name
}
} if &column_name == "time" ));
}
#[test]
fn test_is_sort_on_pk() {
// Sort key the same as pk
let mut sort_key = SortKey::with_capacity(3);
sort_key.with_col("tag4");
sort_key.with_col("tag3");
sort_key.with_col("tag2");
sort_key.with_col("tag1");
sort_key.with_col(TIME_COLUMN_NAME);
let sort_key =
SortKey::from_columns(vec!["tag4", "tag3", "tag2", "tag1", TIME_COLUMN_NAME]);
let schema = SchemaBuilder::new()
.influx_field("the_field", String)
@ -1276,29 +1046,13 @@ mod test {
.tag("tag4")
.timestamp()
.measurement("the_measurement")
.build_with_sort_key(&sort_key)
.build()
.unwrap();
assert!(schema.is_sorted_on_pk());
assert!(schema.is_sorted_on_pk(&sort_key));
// Sort key does not include all pk cols
let mut sort_key = SortKey::with_capacity(3);
sort_key.with_col("tag3");
sort_key.with_col("tag1");
sort_key.with_col(TIME_COLUMN_NAME);
let sort_key = SortKey::from_columns(vec!["tag3", "tag1", TIME_COLUMN_NAME]);
let schema = SchemaBuilder::new()
.influx_field("the_field", String)
.tag("tag1")
.tag("tag2")
.tag("tag3")
.tag("tag4")
.timestamp()
.measurement("the_measurement")
.build_with_sort_key(&sort_key)
.unwrap();
assert!(!schema.is_sorted_on_pk());
// No sort key
let schema = SchemaBuilder::new()
.influx_field("the_field", String)
.tag("tag1")
@ -1309,19 +1063,10 @@ mod test {
.measurement("the_measurement")
.build()
.unwrap();
assert!(!schema.is_sorted_on_pk());
// No PK, no sort key
let schema = SchemaBuilder::new()
.influx_field("the_field", String)
.measurement("the_measurement")
.build()
.unwrap();
assert!(!schema.is_sorted_on_pk());
assert!(!schema.is_sorted_on_pk(&sort_key));
// No PK, sort key on non pk
let mut sort_key = SortKey::with_capacity(3);
sort_key.with_col("the_field");
let sort_key = SortKey::from_columns(vec!["the_field"]);
let schema = SchemaBuilder::new()
.influx_field("the_field", String)
@ -1331,8 +1076,8 @@ mod test {
.tag("tag4")
.timestamp()
.measurement("the_measurement")
.build_with_sort_key(&sort_key)
.build()
.unwrap();
assert!(!schema.is_sorted_on_pk());
assert!(!schema.is_sorted_on_pk(&sort_key));
}
}

View File

@ -8,8 +8,6 @@ use hashbrown::hash_map::RawEntryMut;
use hashbrown::HashMap;
use snafu::Snafu;
use crate::sort::SortKey;
use super::{InfluxColumnType, Schema};
/// Database schema creation / validation errors.
@ -137,7 +135,7 @@ impl SchemaMerger {
Ok(self)
}
fn merge_field(
pub fn merge_field(
&mut self,
field: &Field,
column_type: Option<InfluxColumnType>,
@ -187,18 +185,10 @@ impl SchemaMerger {
}
/// Returns the schema that was built, the columns are always sorted in lexicographic order
pub fn build(self) -> Schema {
self.build_with_sort_key(&Default::default())
}
/// Returns the schema that was built, the columns are always sorted in lexicographic order
///
/// Additionally specifies a sort key for the data
pub fn build_with_sort_key(mut self, sort_key: &SortKey<'_>) -> Schema {
pub fn build(mut self) -> Schema {
Schema::new_from_parts(
self.measurement.take(),
self.fields.drain().map(|x| x.1),
sort_key,
true,
)
.expect("failed to build merged schema")

View File

@ -1,3 +1,4 @@
use std::sync::Arc;
use std::{fmt::Display, str::FromStr};
use arrow::compute::SortOptions;
@ -68,22 +69,79 @@ impl std::fmt::Display for ColumnSort {
}
}
#[derive(Debug, Default, Eq, PartialEq, Clone)]
pub struct SortKey<'a> {
columns: IndexMap<&'a str, SortOptions>,
#[derive(Debug, Default)]
pub struct SortKeyBuilder {
columns: IndexMap<Arc<str>, SortOptions>,
}
impl<'a> SortKey<'a> {
/// Create a new empty sort key that can store `capacity` columns without allocating
impl SortKeyBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn with_capacity(capacity: usize) -> Self {
Self {
columns: IndexMap::with_capacity(capacity),
}
}
/// Adds a new column to the end of this sort key
pub fn push(&mut self, column: &'a str, options: SortOptions) {
self.columns.insert(column, options);
pub fn with_col(self, column: impl Into<Arc<str>>) -> Self {
self.with_col_sort_opts(column, Default::default())
}
/// Helper to insert col with specified sort options into sort key
pub fn with_col_opts(
self,
col: impl Into<Arc<str>>,
descending: bool,
nulls_first: bool,
) -> Self {
self.with_col_sort_opts(
col,
SortOptions {
descending,
nulls_first,
},
)
}
pub fn with_col_sort_opts(mut self, col: impl Into<Arc<str>>, options: SortOptions) -> Self {
self.columns.insert(col.into(), options);
self
}
pub fn build(self) -> SortKey {
SortKey {
columns: Arc::new(self.columns),
}
}
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct SortKey {
columns: Arc<IndexMap<Arc<str>, SortOptions>>,
}
impl SortKey {
/// Create a new empty sort key
pub fn empty() -> Self {
SortKey {
columns: Default::default(),
}
}
/// Create a new sort key from the provided columns
pub fn from_columns<C, I>(columns: C) -> Self
where
C: IntoIterator<Item = I>,
I: Into<Arc<str>>,
{
let iter = columns.into_iter();
let mut builder = SortKeyBuilder::with_capacity(iter.size_hint().0);
for c in iter {
builder = builder.with_col(c);
}
builder.build()
}
/// Gets the ColumnSort for a given column name
@ -96,10 +154,8 @@ impl<'a> SortKey<'a> {
}
/// Gets the column for a given index
pub fn get_index(&self, idx: usize) -> Option<(&'a str, SortOptions)> {
self.columns
.get_index(idx)
.map(|(col, options)| (*col, *options))
pub fn get_index(&self, idx: usize) -> Option<(&Arc<str>, &SortOptions)> {
self.columns.get_index(idx)
}
/// Return the index of the given column and its sort option. Return None otherwise.
@ -116,7 +172,7 @@ impl<'a> SortKey<'a> {
}
/// Returns an iterator over the columns in this key
pub fn iter(&self) -> Iter<'_, &'a str, SortOptions> {
pub fn iter(&self) -> Iter<'_, Arc<str>, SortOptions> {
self.columns.iter()
}
@ -130,23 +186,6 @@ impl<'a> SortKey<'a> {
self.columns.is_empty()
}
/// Returns a subset of the sort key that includes only the given columns
pub fn selected_sort_key(&self, select_keys: Vec<&str>) -> SortKey<'a> {
let keys: IndexMap<&'a str, SortOptions> = self
.columns
.iter()
.filter_map(|(col, options)| {
if select_keys.iter().any(|key| key == col) {
Some((*col, *options))
} else {
None
}
})
.collect();
SortKey { columns: keys }
}
/// Returns merge key of the 2 given keys if one covers the other. Returns None otherwise.
/// Key1 is said to cover key2 if key2 is a subset and in the same order of key1.
/// Examples:
@ -165,28 +204,19 @@ impl<'a> SortKey<'a> {
/// super key of (a, b, c) and any of { b, a), (c, a), (c, b), (b, a, c), (b, c, a), (c, a, b), (c, b, a) } is None
///
/// Note that the last column in the sort key must be time
pub fn try_merge_key(key1: &SortKey<'a>, key2: &SortKey<'a>) -> Option<SortKey<'a>> {
pub fn try_merge_key<'a>(key1: &'a SortKey, key2: &'a SortKey) -> Option<&'a SortKey> {
if key1.is_empty() || key2.is_empty() {
panic!("Sort key cannot be empty");
}
let key1 = key1.clone();
let key2 = key2.clone();
// Verify if time column in the sort key
match key1.columns.get_index_of(TIME_COLUMN_NAME) {
None => panic!("Time column is not included in the sort key {:#?}", key1),
Some(idx) => {
if idx < key1.len() - 1 {
panic!("Time column is not last in the sort key {:#?}", key1)
}
}
}
match key2.columns.get_index_of(TIME_COLUMN_NAME) {
None => panic!("Time column is not included in the sort key {:#?}", key2),
Some(idx) => {
if idx < key2.len() - 1 {
panic!("Time column is not last in the sort key {:#?}", key2)
for key in [&key1, &key2] {
match key.columns.get_index_of(TIME_COLUMN_NAME) {
None => panic!("Time column is not included in the sort key {:#?}", key),
Some(idx) => {
if idx < key.len() - 1 {
panic!("Time column is not last in the sort key {:#?}", key)
}
}
}
}
@ -199,7 +229,7 @@ impl<'a> SortKey<'a> {
// Go over short key and check its right-order availability in the long key
let mut prev_long_idx: Option<usize> = None;
for (col, sort_options) in &short_key.columns {
for (col, sort_options) in short_key.columns.iter() {
if let Some(long_idx) = long_key.find_index(col, sort_options) {
match prev_long_idx {
None => prev_long_idx = Some(long_idx),
@ -222,29 +252,13 @@ impl<'a> SortKey<'a> {
// Reach here means the long key is the super key of the sort one
Some(long_key)
}
/// Helper to insert col with default sort options into sort key
pub fn with_col(&mut self, col: &'a str) {
self.push(col, Default::default());
}
/// Helper to insert col with specified sort options into sort key
pub fn with_col_opts(&mut self, col: &'a str, descending: bool, nulls_first: bool) {
self.push(
col,
SortOptions {
descending,
nulls_first,
},
);
}
}
// Produces a human-readable representation of a sort key that looks like:
//
// "host, region DESC, env NULLS FIRST, time"
//
impl<'a> Display for SortKey<'a> {
impl Display for SortKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
for (i, (name, options)) in self.columns.iter().enumerate() {
write!(f, "{}", name)?;
@ -305,10 +319,7 @@ mod tests {
#[test]
fn test_basic() {
let mut key = SortKey::with_capacity(3);
key.push("a", Default::default());
key.push("c", Default::default());
key.push("b", Default::default());
let key = SortKey::from_columns(vec!["a", "c", "b"]);
assert_eq!(key.len(), 3);
assert!(!key.is_empty());
@ -339,48 +350,50 @@ mod tests {
#[test]
fn test_sort_key_eq() {
let mut key1 = SortKey::with_capacity(1);
key1.with_col("a");
let key1 = SortKey::from_columns(vec!["a"]);
let mut key1_2 = SortKey::with_capacity(2);
key1_2.with_col("a");
key1_2.with_col_opts("b", true, false);
let key1_2 = SortKeyBuilder::with_capacity(2)
.with_col("a")
.with_col_opts("b", true, false)
.build();
let key2 = SortKey::with_capacity(2);
let key2 = SortKey::empty();
// different keys
assert_ne!(key1, key2);
assert_ne!(key1_2, key2);
assert_ne!(key1, key1_2);
let mut key3 = SortKey::with_capacity(1);
key3.with_col("a");
let key3 = SortKey::from_columns(vec!["a"]);
let mut key3_2 = SortKey::with_capacity(2);
key3_2.with_col("a");
key3_2.with_col_opts("b", true, false);
let key3_2 = SortKeyBuilder::with_capacity(2)
.with_col("a")
.with_col_opts("b", true, false)
.build();
// same
assert_eq!(key1, key3);
assert_eq!(key1_2, key3_2);
let mut key4 = SortKey::with_capacity(1);
key4.with_col("aa");
let key4 = SortKey::from_columns(vec!["aa"]);
let mut key4_2 = SortKey::with_capacity(2);
key4_2.with_col("aa");
key4_2.with_col_opts("bb", true, false);
let key4_2 = SortKeyBuilder::with_capacity(2)
.with_col("aa")
.with_col_opts("bb", true, false)
.build();
// different key, same value
assert_ne!(key1, key4);
assert_ne!(key1_2, key4_2);
let mut key5 = SortKey::with_capacity(1);
key5.with_col_opts("a", true, true);
let key5 = SortKeyBuilder::with_capacity(1)
.with_col_opts("a", true, true)
.build();
let mut key5_2 = SortKey::with_capacity(2);
key5_2.with_col_opts("a", true, true);
key5_2.with_col_opts("b", false, true);
let key5_2 = SortKeyBuilder::with_capacity(2)
.with_col_opts("a", true, true)
.with_col_opts("b", false, true)
.build();
// same key, different value
assert_ne!(key1, key5);
@ -390,39 +403,41 @@ mod tests {
// Note that the last column must be TIME_COLUMN_NAME to avoid panicking
#[test]
fn test_super_sort_key() {
// key (a) with default sort options (false, true)
let mut key_a = SortKey::with_capacity(1);
let a = TIME_COLUMN_NAME;
key_a.with_col(a);
// key (a) with default sort options (false, true)
let key_a = SortKey::from_columns(vec![a]);
// key (a) with explicitly defined sort options
let mut key_a_2 = SortKey::with_capacity(1);
key_a_2.with_col_opts(a, true, false);
let key_a_2 = SortKeyBuilder::with_capacity(1)
.with_col_opts(TIME_COLUMN_NAME, true, false)
.build();
// super key of (a) and (a) is (a)
let merge_key = SortKey::try_merge_key(&key_a, &key_a).unwrap();
assert_eq!(merge_key, key_a);
assert_eq!(merge_key, &key_a);
let merge_key = SortKey::try_merge_key(&key_a_2, &key_a_2).unwrap();
assert_eq!(merge_key, key_a_2);
assert_eq!(merge_key, &key_a_2);
// (a,b)
let b = TIME_COLUMN_NAME;
let mut key_ab = SortKey::with_capacity(2);
key_ab.with_col("a");
key_ab.with_col(b);
let mut key_ab_2 = SortKey::with_capacity(2);
key_ab_2.with_col_opts("a", true, false);
key_ab_2.with_col_opts(b, false, false);
let key_ab = SortKey::from_columns(vec!["a", TIME_COLUMN_NAME]);
let key_ab_2 = SortKeyBuilder::with_capacity(2)
.with_col_opts("a", true, false)
.with_col_opts(b, false, false)
.build();
//(b)
let mut key_b = SortKey::with_capacity(1);
key_b.with_col(b);
let mut key_b_2 = SortKey::with_capacity(1);
key_b_2.with_col_opts(b, false, false);
let key_b = SortKey::from_columns(vec![b]);
let key_b_2 = SortKeyBuilder::with_capacity(1)
.with_col_opts(b, false, false)
.build();
// super key of (a, b) and (b) is (a, b)
let merge_key = SortKey::try_merge_key(&key_ab, &key_b).unwrap();
assert_eq!(merge_key, key_ab);
assert_eq!(merge_key, &key_ab);
let merge_key = SortKey::try_merge_key(&key_ab_2, &key_b_2).unwrap();
assert_eq!(merge_key, key_ab_2);
assert_eq!(merge_key, &key_ab_2);
// super key of (a, b) and (b') is None
let merge_key = SortKey::try_merge_key(&key_ab, &key_b_2);
assert_eq!(merge_key, None);
@ -431,9 +446,9 @@ mod tests {
// super key of (a, b) and (a, b) is (a, b)
let merge_key = SortKey::try_merge_key(&key_ab, &key_ab).unwrap();
assert_eq!(merge_key, key_ab);
assert_eq!(merge_key, &key_ab);
let merge_key = SortKey::try_merge_key(&key_ab_2, &key_ab_2).unwrap();
assert_eq!(merge_key, key_ab_2);
assert_eq!(merge_key, &key_ab_2);
// super key of (a, b) and (a',b') is None
let merge_key = SortKey::try_merge_key(&key_ab, &key_ab_2);
assert_eq!(merge_key, None);
@ -442,103 +457,46 @@ mod tests {
// (a, b, c)
let c = TIME_COLUMN_NAME;
let mut key_abc_2 = SortKey::with_capacity(3);
key_abc_2.with_col_opts("a", true, false);
key_abc_2.with_col_opts("b", false, false);
key_abc_2.with_col_opts(c, true, true);
let key_abc_2 = SortKeyBuilder::with_capacity(3)
.with_col_opts("a", true, false)
.with_col_opts("b", false, false)
.with_col_opts(c, true, true)
.build();
// (c)
let mut key_c_2 = SortKey::with_capacity(1);
key_c_2.with_col_opts(c, true, true);
let key_c_2 = SortKeyBuilder::with_capacity(1)
.with_col_opts(c, true, true)
.build();
// (a, c)
let mut key_ac_2 = SortKey::with_capacity(2);
key_ac_2.with_col_opts("a", true, false);
key_ac_2.with_col_opts(c, true, true);
let key_ac_2 = SortKeyBuilder::with_capacity(2)
.with_col_opts("a", true, false)
.with_col_opts(c, true, true)
.build();
// (b,c)
let mut key_bc_2 = SortKey::with_capacity(2);
key_bc_2.with_col_opts("b", false, false);
key_bc_2.with_col_opts(c, true, true);
let key_bc_2 = SortKeyBuilder::with_capacity(2)
.with_col_opts("b", false, false)
.with_col_opts(c, true, true)
.build();
// (b,a,c)
let mut key_bac_2 = SortKey::with_capacity(3);
key_bac_2.with_col_opts("b", false, false);
key_bac_2.with_col_opts("a", true, false);
key_bac_2.with_col_opts(c, true, true);
let key_bac_2 = SortKeyBuilder::with_capacity(3)
.with_col_opts("b", false, false)
.with_col_opts("a", true, false)
.with_col_opts(c, true, true)
.build();
// super key of (a, b, c) and any of { (a, c), (b, c), (a), (b), (c) } is (a, b, c)
let merge_key = SortKey::try_merge_key(&key_abc_2, &key_c_2).unwrap();
assert_eq!(merge_key, key_abc_2);
assert_eq!(merge_key, &key_abc_2);
let merge_key = SortKey::try_merge_key(&key_abc_2, &key_ac_2).unwrap();
assert_eq!(merge_key, key_abc_2);
assert_eq!(merge_key, &key_abc_2);
let merge_key = SortKey::try_merge_key(&key_abc_2, &key_bc_2).unwrap();
assert_eq!(merge_key, key_abc_2);
assert_eq!(merge_key, &key_abc_2);
// super key of (a, b, c) and any of (b, a, c) } is None
let merge_key = SortKey::try_merge_key(&key_abc_2, &key_bac_2);
assert_eq!(merge_key, None);
}
#[test]
fn test_selected_sort_key() {
let mut sort_key = SortKey::with_capacity(4);
sort_key.with_col("a"); // default sort option
sort_key.with_col_opts("b", true, false);
sort_key.with_col_opts("c", false, false);
sort_key.with_col(TIME_COLUMN_NAME);
// input cols is empty -> nothing selected
let cols = vec![];
let selected_key = sort_key.selected_sort_key(cols);
assert!(selected_key.is_empty());
// input cols is not part of the key -> nothing selected
let cols = vec!["d", "e"];
let selected_key = sort_key.selected_sort_key(cols);
assert!(selected_key.is_empty());
// input cols exactly the same and in the same order -> exact sort_key selected
let cols = vec!["a", "b", "c", TIME_COLUMN_NAME];
let selected_key = sort_key.selected_sort_key(cols);
assert_eq!(selected_key, sort_key);
// input cols exactly the same but in different order -> exact sort_key selected
let cols = vec!["c", TIME_COLUMN_NAME, "b", "a"];
let selected_key = sort_key.selected_sort_key(cols);
assert_eq!(selected_key, sort_key);
// input cols is subset but in the same order -> subset selected
let cols = vec!["a", "b"];
let selected_key = sort_key.selected_sort_key(cols);
let mut expected_key = SortKey::with_capacity(2);
expected_key.with_col("a"); // default sort option
expected_key.with_col_opts("b", true, false);
assert_eq!(selected_key, expected_key);
// input cols is subset but in the same order -> subset selected
let cols = vec![TIME_COLUMN_NAME];
let selected_key = sort_key.selected_sort_key(cols);
let mut expected_key = SortKey::with_capacity(1);
expected_key.with_col(TIME_COLUMN_NAME);
assert_eq!(selected_key, expected_key);
// input cols is subset but in the same order with gap -> subset selected
let cols = vec!["a", "c", TIME_COLUMN_NAME];
let selected_key = sort_key.selected_sort_key(cols);
let mut expected_key = SortKey::with_capacity(3);
expected_key.with_col("a"); // default sort option
expected_key.with_col_opts("c", false, false);
expected_key.with_col(TIME_COLUMN_NAME);
assert_eq!(selected_key, expected_key);
// input cols is subset but in different order -> subset in the order with sort_key selected
let cols = vec![TIME_COLUMN_NAME, "b", "c"];
let selected_key = sort_key.selected_sort_key(cols);
let mut expected_key = SortKey::with_capacity(3);
expected_key.with_col_opts("b", true, false);
expected_key.with_col_opts("c", false, false);
expected_key.with_col(TIME_COLUMN_NAME);
assert_eq!(selected_key, expected_key);
}
}