refactor: Move sharding query from namespace to table

Better supports future work. Fixes #5003.
pull/24376/head
Carol (Nichols || Goulding) 2022-06-30 10:35:27 -04:00
parent be53716e4d
commit 380166f4c0
No known key found for this signature in database
GPG Key ID: E907EE5A736F87D4
3 changed files with 25 additions and 16 deletions

View File

@ -19,10 +19,13 @@ mod test_util;
/// Maps a catalog namespace to all the in-memory resources and sync-state that the querier needs. /// Maps a catalog namespace to all the in-memory resources and sync-state that the querier needs.
/// ///
/// # Data Structures & Sync /// # Data Structures & Sync
/// Tables and schemas are created when [`QuerierNamespace`] is created because DataFusion does not implement async
/// schema inspection. The actual payload (chunks and tombstones) are only queried on demand.
/// ///
/// Most access to the [IOx Catalog](iox_catalog::interface::Catalog) are cached via [`CatalogCache`]. /// Tables and schemas are created when [`QuerierNamespace`] is created because DataFusion does not
/// implement async schema inspection. The actual payload (chunks and tombstones) are only queried
/// on demand.
///
/// Most accesses to the [IOx Catalog](iox_catalog::interface::Catalog) are cached via
/// [`CatalogCache`].
#[derive(Debug)] #[derive(Debug)]
pub struct QuerierNamespace { pub struct QuerierNamespace {
/// ID of this namespace. /// ID of this namespace.
@ -37,7 +40,7 @@ pub struct QuerierNamespace {
/// Executor for queries. /// Executor for queries.
exec: Arc<Executor>, exec: Arc<Executor>,
/// Catalog cache /// Catalog cache.
catalog_cache: Arc<CatalogCache>, catalog_cache: Arc<CatalogCache>,
/// Query log. /// Query log.
@ -63,12 +66,8 @@ impl QuerierNamespace {
let id = table_schema.id; let id = table_schema.id;
let schema = Schema::try_from(table_schema.clone()).expect("cannot build schema"); let schema = Schema::try_from(table_schema.clone()).expect("cannot build schema");
// Currently, the sharder will only return one sequencer ID per table, but in the
// near future, the sharder might return more than one sequencer ID for one table.
let sequencer_ids = vec![**sharder.shard_for_query(&table_name, &name)];
let table = Arc::new(QuerierTable::new( let table = Arc::new(QuerierTable::new(
sequencer_ids, Arc::clone(&sharder),
Arc::clone(&name), Arc::clone(&name),
id, id,
Arc::clone(&table_name), Arc::clone(&table_name),

View File

@ -11,6 +11,7 @@ use iox_query::{provider::ChunkPruner, QueryChunk};
use observability_deps::tracing::debug; use observability_deps::tracing::debug;
use predicate::Predicate; use predicate::Predicate;
use schema::Schema; use schema::Schema;
use sharder::JumpHash;
use snafu::{ResultExt, Snafu}; use snafu::{ResultExt, Snafu};
use std::{ use std::{
collections::{hash_map::Entry, HashMap}, collections::{hash_map::Entry, HashMap},
@ -52,8 +53,8 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
/// Table representation for the querier. /// Table representation for the querier.
#[derive(Debug)] #[derive(Debug)]
pub struct QuerierTable { pub struct QuerierTable {
/// Sequencers responsible for the table's data that need to be queried for unpersisted data /// Sharder to query for which sequencers are responsible for the table's data
sequencer_ids: Vec<KafkaPartition>, sharder: Arc<JumpHash<Arc<KafkaPartition>>>,
/// Namespace the table is in /// Namespace the table is in
namespace_name: Arc<str>, namespace_name: Arc<str>,
@ -80,7 +81,7 @@ pub struct QuerierTable {
impl QuerierTable { impl QuerierTable {
/// Create new table. /// Create new table.
pub fn new( pub fn new(
sequencer_ids: Vec<KafkaPartition>, sharder: Arc<JumpHash<Arc<KafkaPartition>>>,
namespace_name: Arc<str>, namespace_name: Arc<str>,
id: TableId, id: TableId,
table_name: Arc<str>, table_name: Arc<str>,
@ -95,7 +96,7 @@ impl QuerierTable {
); );
Self { Self {
sequencer_ids, sharder,
namespace_name, namespace_name,
table_name, table_name,
id, id,
@ -189,10 +190,18 @@ impl QuerierTable {
.map(|(_, f)| f.name().to_string()) .map(|(_, f)| f.name().to_string())
.collect(); .collect();
// get any chunks from the ingester // Get the sequencer IDs responsible for this table's data from the sharder to
// determine which ingester(s) to query.
// Currently, the sharder will only return one sequencer ID per table, but in the
// near future, the sharder might return more than one sequencer ID for one table.
let sequencer_ids = vec![**self
.sharder
.shard_for_query(&self.table_name, &self.namespace_name)];
// get any chunks from the ingester(s)
let partitions_result = ingester_connection let partitions_result = ingester_connection
.partitions( .partitions(
&self.sequencer_ids, &sequencer_ids,
Arc::clone(&self.namespace_name), Arc::clone(&self.namespace_name),
Arc::clone(&self.table_name), Arc::clone(&self.table_name),
columns, columns,

View File

@ -10,6 +10,7 @@ use iox_tests::util::{TestCatalog, TestPartition, TestSequencer, TestTable};
use mutable_batch_lp::test_helpers::lp_to_mutable_batch; use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
use parquet_file::storage::ParquetStorage; use parquet_file::storage::ParquetStorage;
use schema::{selection::Selection, sort::SortKey, Schema}; use schema::{selection::Selection, sort::SortKey, Schema};
use sharder::JumpHash;
use std::sync::Arc; use std::sync::Arc;
/// Create a [`QuerierTable`] for testing. /// Create a [`QuerierTable`] for testing.
@ -37,7 +38,7 @@ pub async fn querier_table(catalog: &Arc<TestCatalog>, table: &Arc<TestTable>) -
let namespace_name = Arc::from(table.namespace.namespace.name.as_str()); let namespace_name = Arc::from(table.namespace.namespace.name.as_str());
QuerierTable::new( QuerierTable::new(
vec![KafkaPartition::new(0)], Arc::new(JumpHash::new((0..1).map(KafkaPartition::new).map(Arc::new)).unwrap()),
namespace_name, namespace_name,
table.table.id, table.table.id,
table.table.name.clone().into(), table.table.name.clone().into(),