feat: Get the sort key from the schema and data in the QueryableBatch
Connects to #4194.pull/24376/head
parent
9043966443
commit
9d83554f20
|
@ -1,6 +1,9 @@
|
|||
//! This module is responsible for compacting Ingester's data
|
||||
|
||||
use crate::data::{PersistingBatch, QueryableBatch};
|
||||
use crate::{
|
||||
data::{PersistingBatch, QueryableBatch},
|
||||
sort_key::compute_sort_key,
|
||||
};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use data_types2::NamespaceId;
|
||||
use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream};
|
||||
|
@ -10,8 +13,9 @@ use query::{
|
|||
exec::{Executor, ExecutorType},
|
||||
frontend::reorg::ReorgPlanner,
|
||||
util::compute_timenanosecond_min_max,
|
||||
QueryChunkMeta,
|
||||
QueryChunk, QueryChunkMeta,
|
||||
};
|
||||
use schema::sort::SortKey;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::sync::Arc;
|
||||
use time::{Time, TimeProvider};
|
||||
|
@ -67,8 +71,11 @@ pub async fn compact_persisting_batch(
|
|||
return Ok(None);
|
||||
}
|
||||
|
||||
// Get sort key based on cardinality
|
||||
let sort_key = compute_sort_key(&batch.data);
|
||||
|
||||
// Compact
|
||||
let stream = compact(executor, Arc::clone(&batch.data)).await?;
|
||||
let stream = compact(executor, Arc::clone(&batch.data), sort_key.clone()).await?;
|
||||
// Collect compacted data into record batches for computing statistics
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
.await
|
||||
|
@ -106,7 +113,7 @@ pub async fn compact_persisting_batch(
|
|||
max_sequence_number: max_seq,
|
||||
row_count,
|
||||
compaction_level: INITIAL_COMPACTION_LEVEL,
|
||||
sort_key: None,
|
||||
sort_key: Some(sort_key),
|
||||
};
|
||||
|
||||
Ok(Some((output_batches, meta)))
|
||||
|
@ -116,11 +123,12 @@ pub async fn compact_persisting_batch(
|
|||
pub async fn compact(
|
||||
executor: &Executor,
|
||||
data: Arc<QueryableBatch>,
|
||||
sort_key: SortKey,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
// Build logical plan for compaction
|
||||
let ctx = executor.new_context(ExecutorType::Reorg);
|
||||
let logical_plan = ReorgPlanner::new()
|
||||
.scan_single_chunk_plan(data.schema(), data)
|
||||
.compact_plan(data.schema(), [data as Arc<dyn QueryChunk>], sort_key)
|
||||
.context(LogicalPlanSnafu {})?;
|
||||
|
||||
// Build physical plan
|
||||
|
@ -322,9 +330,11 @@ mod tests {
|
|||
let expected_pk = vec!["tag1", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key = compute_sort_key(&compact_batch);
|
||||
|
||||
// compact
|
||||
let exc = Executor::new(1);
|
||||
let stream = compact(&exc, compact_batch).await.unwrap();
|
||||
let stream = compact(&exc, compact_batch, sort_key).await.unwrap();
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
.await
|
||||
.unwrap();
|
||||
|
@ -360,9 +370,11 @@ mod tests {
|
|||
let expected_pk = vec!["tag1", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key = compute_sort_key(&compact_batch);
|
||||
|
||||
// compact
|
||||
let exc = Executor::new(1);
|
||||
let stream = compact(&exc, compact_batch).await.unwrap();
|
||||
let stream = compact(&exc, compact_batch, sort_key).await.unwrap();
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
.await
|
||||
.unwrap();
|
||||
|
@ -398,9 +410,11 @@ mod tests {
|
|||
let expected_pk = vec!["tag1", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key = compute_sort_key(&compact_batch);
|
||||
|
||||
// compact
|
||||
let exc = Executor::new(1);
|
||||
let stream = compact(&exc, compact_batch).await.unwrap();
|
||||
let stream = compact(&exc, compact_batch, sort_key).await.unwrap();
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
.await
|
||||
.unwrap();
|
||||
|
@ -441,9 +455,11 @@ mod tests {
|
|||
let expected_pk = vec!["tag1", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key = compute_sort_key(&compact_batch);
|
||||
|
||||
// compact
|
||||
let exc = Executor::new(1);
|
||||
let stream = compact(&exc, compact_batch).await.unwrap();
|
||||
let stream = compact(&exc, compact_batch, sort_key).await.unwrap();
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
.await
|
||||
.unwrap();
|
||||
|
@ -481,9 +497,11 @@ mod tests {
|
|||
let expected_pk = vec!["tag1", "tag2", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key = compute_sort_key(&compact_batch);
|
||||
|
||||
// compact
|
||||
let exc = Executor::new(1);
|
||||
let stream = compact(&exc, compact_batch).await.unwrap();
|
||||
let stream = compact(&exc, compact_batch, sort_key).await.unwrap();
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
.await
|
||||
.unwrap();
|
||||
|
@ -535,9 +553,11 @@ mod tests {
|
|||
let expected_pk = vec!["tag1", "tag2", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key = compute_sort_key(&compact_batch);
|
||||
|
||||
// compact
|
||||
let exc = Executor::new(1);
|
||||
let stream = compact(&exc, compact_batch).await.unwrap();
|
||||
let stream = compact(&exc, compact_batch, sort_key).await.unwrap();
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
.await
|
||||
.unwrap();
|
||||
|
@ -597,9 +617,11 @@ mod tests {
|
|||
let expected_pk = vec!["tag1", "tag2", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key = compute_sort_key(&compact_batch);
|
||||
|
||||
// compact
|
||||
let exc = Executor::new(1);
|
||||
let stream = compact(&exc, compact_batch).await.unwrap();
|
||||
let stream = compact(&exc, compact_batch, sort_key).await.unwrap();
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
.await
|
||||
.unwrap();
|
||||
|
@ -652,9 +674,11 @@ mod tests {
|
|||
let expected_pk = vec!["tag1", "tag2", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key = compute_sort_key(&compact_batch);
|
||||
|
||||
// compact
|
||||
let exc = Executor::new(1);
|
||||
let stream = compact(&exc, compact_batch).await.unwrap();
|
||||
let stream = compact(&exc, compact_batch, sort_key).await.unwrap();
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
.await
|
||||
.unwrap();
|
||||
|
|
|
@ -24,6 +24,7 @@ mod poison;
|
|||
pub mod querier_handler;
|
||||
pub mod query;
|
||||
pub mod server;
|
||||
pub mod sort_key;
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test_util;
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
//! Functions for computing a sort key based on cardinality of primary key columns.
|
||||
|
||||
use crate::data::QueryableBatch;
|
||||
use schema::sort::SortKey;
|
||||
|
||||
/// Given a `QueryableBatch`, compute a sort key based on:
|
||||
///
|
||||
/// - The columns that make up the primary key of the schema of this batch
|
||||
/// - Order those columns from low cardinality to high cardinality based on the data
|
||||
/// - Always have the time column last
|
||||
pub fn compute_sort_key(_queryable_batch: &QueryableBatch) -> SortKey {
|
||||
unimplemented!()
|
||||
}
|
Loading…
Reference in New Issue