refactor: Move sharding query from namespace to table
Better supports future work. Fixes #5003.pull/24376/head
parent
be53716e4d
commit
380166f4c0
|
@ -19,10 +19,13 @@ mod test_util;
|
||||||
/// Maps a catalog namespace to all the in-memory resources and sync-state that the querier needs.
|
/// Maps a catalog namespace to all the in-memory resources and sync-state that the querier needs.
|
||||||
///
|
///
|
||||||
/// # Data Structures & Sync
|
/// # Data Structures & Sync
|
||||||
/// Tables and schemas are created when [`QuerierNamespace`] is created because DataFusion does not implement async
|
|
||||||
/// schema inspection. The actual payload (chunks and tombstones) are only queried on demand.
|
|
||||||
///
|
///
|
||||||
/// Most access to the [IOx Catalog](iox_catalog::interface::Catalog) are cached via [`CatalogCache`].
|
/// Tables and schemas are created when [`QuerierNamespace`] is created because DataFusion does not
|
||||||
|
/// implement async schema inspection. The actual payload (chunks and tombstones) are only queried
|
||||||
|
/// on demand.
|
||||||
|
///
|
||||||
|
/// Most accesses to the [IOx Catalog](iox_catalog::interface::Catalog) are cached via
|
||||||
|
/// [`CatalogCache`].
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct QuerierNamespace {
|
pub struct QuerierNamespace {
|
||||||
/// ID of this namespace.
|
/// ID of this namespace.
|
||||||
|
@ -37,7 +40,7 @@ pub struct QuerierNamespace {
|
||||||
/// Executor for queries.
|
/// Executor for queries.
|
||||||
exec: Arc<Executor>,
|
exec: Arc<Executor>,
|
||||||
|
|
||||||
/// Catalog cache
|
/// Catalog cache.
|
||||||
catalog_cache: Arc<CatalogCache>,
|
catalog_cache: Arc<CatalogCache>,
|
||||||
|
|
||||||
/// Query log.
|
/// Query log.
|
||||||
|
@ -63,12 +66,8 @@ impl QuerierNamespace {
|
||||||
let id = table_schema.id;
|
let id = table_schema.id;
|
||||||
let schema = Schema::try_from(table_schema.clone()).expect("cannot build schema");
|
let schema = Schema::try_from(table_schema.clone()).expect("cannot build schema");
|
||||||
|
|
||||||
// Currently, the sharder will only return one sequencer ID per table, but in the
|
|
||||||
// near future, the sharder might return more than one sequencer ID for one table.
|
|
||||||
let sequencer_ids = vec![**sharder.shard_for_query(&table_name, &name)];
|
|
||||||
|
|
||||||
let table = Arc::new(QuerierTable::new(
|
let table = Arc::new(QuerierTable::new(
|
||||||
sequencer_ids,
|
Arc::clone(&sharder),
|
||||||
Arc::clone(&name),
|
Arc::clone(&name),
|
||||||
id,
|
id,
|
||||||
Arc::clone(&table_name),
|
Arc::clone(&table_name),
|
||||||
|
|
|
@ -11,6 +11,7 @@ use iox_query::{provider::ChunkPruner, QueryChunk};
|
||||||
use observability_deps::tracing::debug;
|
use observability_deps::tracing::debug;
|
||||||
use predicate::Predicate;
|
use predicate::Predicate;
|
||||||
use schema::Schema;
|
use schema::Schema;
|
||||||
|
use sharder::JumpHash;
|
||||||
use snafu::{ResultExt, Snafu};
|
use snafu::{ResultExt, Snafu};
|
||||||
use std::{
|
use std::{
|
||||||
collections::{hash_map::Entry, HashMap},
|
collections::{hash_map::Entry, HashMap},
|
||||||
|
@ -52,8 +53,8 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||||
/// Table representation for the querier.
|
/// Table representation for the querier.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct QuerierTable {
|
pub struct QuerierTable {
|
||||||
/// Sequencers responsible for the table's data that need to be queried for unpersisted data
|
/// Sharder to query for which sequencers are responsible for the table's data
|
||||||
sequencer_ids: Vec<KafkaPartition>,
|
sharder: Arc<JumpHash<Arc<KafkaPartition>>>,
|
||||||
|
|
||||||
/// Namespace the table is in
|
/// Namespace the table is in
|
||||||
namespace_name: Arc<str>,
|
namespace_name: Arc<str>,
|
||||||
|
@ -80,7 +81,7 @@ pub struct QuerierTable {
|
||||||
impl QuerierTable {
|
impl QuerierTable {
|
||||||
/// Create new table.
|
/// Create new table.
|
||||||
pub fn new(
|
pub fn new(
|
||||||
sequencer_ids: Vec<KafkaPartition>,
|
sharder: Arc<JumpHash<Arc<KafkaPartition>>>,
|
||||||
namespace_name: Arc<str>,
|
namespace_name: Arc<str>,
|
||||||
id: TableId,
|
id: TableId,
|
||||||
table_name: Arc<str>,
|
table_name: Arc<str>,
|
||||||
|
@ -95,7 +96,7 @@ impl QuerierTable {
|
||||||
);
|
);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
sequencer_ids,
|
sharder,
|
||||||
namespace_name,
|
namespace_name,
|
||||||
table_name,
|
table_name,
|
||||||
id,
|
id,
|
||||||
|
@ -189,10 +190,18 @@ impl QuerierTable {
|
||||||
.map(|(_, f)| f.name().to_string())
|
.map(|(_, f)| f.name().to_string())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
// get any chunks from the ingester
|
// Get the sequencer IDs responsible for this table's data from the sharder to
|
||||||
|
// determine which ingester(s) to query.
|
||||||
|
// Currently, the sharder will only return one sequencer ID per table, but in the
|
||||||
|
// near future, the sharder might return more than one sequencer ID for one table.
|
||||||
|
let sequencer_ids = vec![**self
|
||||||
|
.sharder
|
||||||
|
.shard_for_query(&self.table_name, &self.namespace_name)];
|
||||||
|
|
||||||
|
// get any chunks from the ingester(s)
|
||||||
let partitions_result = ingester_connection
|
let partitions_result = ingester_connection
|
||||||
.partitions(
|
.partitions(
|
||||||
&self.sequencer_ids,
|
&sequencer_ids,
|
||||||
Arc::clone(&self.namespace_name),
|
Arc::clone(&self.namespace_name),
|
||||||
Arc::clone(&self.table_name),
|
Arc::clone(&self.table_name),
|
||||||
columns,
|
columns,
|
||||||
|
|
|
@ -10,6 +10,7 @@ use iox_tests::util::{TestCatalog, TestPartition, TestSequencer, TestTable};
|
||||||
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
|
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
|
||||||
use parquet_file::storage::ParquetStorage;
|
use parquet_file::storage::ParquetStorage;
|
||||||
use schema::{selection::Selection, sort::SortKey, Schema};
|
use schema::{selection::Selection, sort::SortKey, Schema};
|
||||||
|
use sharder::JumpHash;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
/// Create a [`QuerierTable`] for testing.
|
/// Create a [`QuerierTable`] for testing.
|
||||||
|
@ -37,7 +38,7 @@ pub async fn querier_table(catalog: &Arc<TestCatalog>, table: &Arc<TestTable>) -
|
||||||
let namespace_name = Arc::from(table.namespace.namespace.name.as_str());
|
let namespace_name = Arc::from(table.namespace.namespace.name.as_str());
|
||||||
|
|
||||||
QuerierTable::new(
|
QuerierTable::new(
|
||||||
vec![KafkaPartition::new(0)],
|
Arc::new(JumpHash::new((0..1).map(KafkaPartition::new).map(Arc::new)).unwrap()),
|
||||||
namespace_name,
|
namespace_name,
|
||||||
table.table.id,
|
table.table.id,
|
||||||
table.table.name.clone().into(),
|
table.table.name.clone().into(),
|
||||||
|
|
Loading…
Reference in New Issue