influxdb/db/src/access.rs

320 lines
9.6 KiB
Rust

//! This module contains the interface to the Catalog / Chunks used by
//! the query engine
use super::{
catalog::{Catalog, TableNameFilter},
chunk::DbChunk,
query_log::QueryLog,
};
use crate::system_tables;
use async_trait::async_trait;
use data_types::{chunk_metadata::ChunkSummary, partition_metadata::PartitionAddr};
use datafusion::{
catalog::{catalog::CatalogProvider, schema::SchemaProvider},
datasource::TableProvider,
};
use hashbrown::HashMap;
use job_registry::JobRegistry;
use metric::{Attributes, Metric, U64Counter};
use observability_deps::tracing::debug;
use parking_lot::{MappedMutexGuard, Mutex, MutexGuard};
use predicate::predicate::{Predicate, PredicateBuilder};
use query::{
provider::{ChunkPruner, ProviderBuilder},
pruning::{prune_chunks, PruningObserver},
QueryChunk, QueryChunkMeta, QueryDatabase, DEFAULT_SCHEMA,
};
use schema::Schema;
use std::{any::Any, sync::Arc};
use system_tables::{SystemSchemaProvider, SYSTEM_SCHEMA};
use time::TimeProvider;
/// The number of entries to store in the circular query buffer log
const QUERY_LOG_SIZE: usize = 100;
/// Metrics related to chunk access (pruning specifically)
#[derive(Debug)]
struct AccessMetrics {
/// The database name
db_name: Arc<str>,
/// Total number of chunks pruned via statistics
pruned_chunks: Metric<U64Counter>,
/// Total number of rows pruned using statistics
pruned_rows: Metric<U64Counter>,
/// Keyed by table name
tables: Mutex<HashMap<Arc<str>, TableAccessMetrics>>,
}
impl AccessMetrics {
fn new(registry: &metric::Registry, db_name: Arc<str>) -> Self {
let pruned_chunks = registry.register_metric(
"query_access_pruned_chunks",
"Number of chunks pruned using metadata",
);
let pruned_rows = registry.register_metric(
"query_access_pruned_rows",
"Number of rows pruned using metadata",
);
Self {
db_name,
pruned_chunks,
pruned_rows,
tables: Default::default(),
}
}
fn table_metrics(&self, table: &Arc<str>) -> MappedMutexGuard<'_, TableAccessMetrics> {
MutexGuard::map(self.tables.lock(), |tables| {
let (_, metrics) = tables.raw_entry_mut().from_key(table).or_insert_with(|| {
let attributes = Attributes::from([
("db_name", self.db_name.to_string().into()),
("table_name", table.to_string().into()),
]);
let metrics = TableAccessMetrics {
pruned_chunks: self.pruned_chunks.recorder(attributes.clone()),
pruned_rows: self.pruned_rows.recorder(attributes),
};
(Arc::clone(table), metrics)
});
metrics
})
}
}
/// Metrics related to chunk access for a specific table
#[derive(Debug)]
struct TableAccessMetrics {
/// Total number of chunks pruned via statistics
pruned_chunks: U64Counter,
/// Total number of rows pruned using statistics
pruned_rows: U64Counter,
}
/// `QueryCatalogAccess` implements traits that allow the query engine
/// (and DataFusion) to access the contents of the IOx catalog.
#[derive(Debug)]
pub(crate) struct QueryCatalogAccess {
/// The catalog to have access to
catalog: Arc<Catalog>,
/// Handles finding / pruning chunks based on predicates
chunk_access: Arc<ChunkAccess>,
/// Stores queries which have been executed
query_log: Arc<QueryLog>,
/// Provides access to system tables
system_tables: Arc<SystemSchemaProvider>,
/// Provides access to "normal" user tables
user_tables: Arc<DbSchemaProvider>,
}
impl QueryCatalogAccess {
pub fn new(
db_name: impl Into<String>,
catalog: Arc<Catalog>,
jobs: Arc<JobRegistry>,
time_provider: Arc<dyn TimeProvider>,
metric_registry: &metric::Registry,
) -> Self {
let db_name = Arc::from(db_name.into());
let access_metrics = AccessMetrics::new(metric_registry, Arc::clone(&db_name));
let chunk_access = Arc::new(ChunkAccess::new(Arc::clone(&catalog), access_metrics));
let query_log = Arc::new(QueryLog::new(QUERY_LOG_SIZE, time_provider));
let system_tables = Arc::new(SystemSchemaProvider::new(
db_name.as_ref(),
Arc::clone(&catalog),
jobs,
Arc::clone(&query_log),
));
let user_tables = Arc::new(DbSchemaProvider::new(
Arc::clone(&catalog),
Arc::clone(&chunk_access),
));
Self {
catalog,
chunk_access,
query_log,
system_tables,
user_tables,
}
}
}
/// Encapsulates everything needed to find candidate chunks for
/// queries (including pruning based on metadata)
#[derive(Debug)]
struct ChunkAccess {
/// The catalog to have access to
catalog: Arc<Catalog>,
/// Metrics about query processing
access_metrics: AccessMetrics,
}
impl ChunkAccess {
fn new(catalog: Arc<Catalog>, access_metrics: AccessMetrics) -> Self {
Self {
catalog,
access_metrics,
}
}
/// Returns all chunks that may have data that passes the
/// specified predicates. The chunks are pruned as aggressively as
/// possible based on metadata.
fn candidate_chunks(&self, predicate: &Predicate) -> Vec<Arc<DbChunk>> {
let partition_key = predicate.partition_key.as_deref();
let table_names: TableNameFilter<'_> = predicate.table_names.as_ref().into();
// Apply initial partition key / table name pruning
let chunks = self
.catalog
.filtered_chunks(table_names, partition_key, DbChunk::snapshot);
self.prune_chunks(chunks, predicate)
}
}
impl ChunkPruner<DbChunk> for ChunkAccess {
fn prune_chunks(&self, chunks: Vec<Arc<DbChunk>>, predicate: &Predicate) -> Vec<Arc<DbChunk>> {
// TODO: call "apply_predicate" here too for additional
// metadata based pruning
debug!(num_chunks=chunks.len(), %predicate, "Attempting to prune chunks");
prune_chunks(self, chunks, predicate)
}
}
impl PruningObserver for ChunkAccess {
type Observed = DbChunk;
fn was_pruned(&self, chunk: &Self::Observed) {
let metrics = self.access_metrics.table_metrics(chunk.table_name());
metrics.pruned_chunks.inc(1);
metrics.pruned_rows.inc(chunk.summary().total_count())
}
fn could_not_prune_chunk(&self, chunk: &Self::Observed, reason: &str) {
debug!(
chunk_id=%chunk.id().get(),
reason,
"could not prune chunk from query",
)
}
}
#[async_trait]
impl QueryDatabase for QueryCatalogAccess {
type Chunk = DbChunk;
/// Return a covering set of chunks for a particular partition
fn chunks(&self, predicate: &Predicate) -> Vec<Arc<Self::Chunk>> {
self.chunk_access.candidate_chunks(predicate)
}
fn partition_addrs(&self) -> Vec<PartitionAddr> {
self.catalog.partition_addrs()
}
fn chunk_summaries(&self) -> Vec<ChunkSummary> {
self.catalog.chunk_summaries()
}
fn table_schema(&self, table_name: &str) -> Option<Arc<Schema>> {
self.catalog
.table(table_name)
.ok()
.map(|table| Arc::clone(&table.schema().read()))
}
fn record_query(&self, query_type: impl Into<String>, query_text: impl Into<String>) {
self.query_log.push(query_type, query_text)
}
}
// Datafusion catalog provider interface
impl CatalogProvider for QueryCatalogAccess {
fn as_any(&self) -> &dyn Any {
self as &dyn Any
}
fn schema_names(&self) -> Vec<String> {
vec![
DEFAULT_SCHEMA.to_string(),
system_tables::SYSTEM_SCHEMA.to_string(),
]
}
fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
match name {
DEFAULT_SCHEMA => Some(Arc::clone(&self.user_tables) as Arc<dyn SchemaProvider>),
SYSTEM_SCHEMA => Some(Arc::clone(&self.system_tables) as Arc<dyn SchemaProvider>),
_ => None,
}
}
}
/// Implement the DataFusion schema provider API
#[derive(Debug)]
struct DbSchemaProvider {
/// The catalog to have access to
catalog: Arc<Catalog>,
/// Handles finding / pruning chunks based on predicates
chunk_access: Arc<ChunkAccess>,
}
impl DbSchemaProvider {
fn new(catalog: Arc<Catalog>, chunk_access: Arc<ChunkAccess>) -> Self {
Self {
catalog,
chunk_access,
}
}
}
impl SchemaProvider for DbSchemaProvider {
fn as_any(&self) -> &dyn Any {
self as &dyn Any
}
fn table_names(&self) -> Vec<String> {
self.catalog.table_names()
}
/// Create a table provider for the named table
fn table(&self, table_name: &str) -> Option<Arc<dyn TableProvider>> {
let schema = {
let table = self.catalog.table(table_name).ok()?;
let schema = Arc::clone(&table.schema().read());
schema
};
let mut builder = ProviderBuilder::new(table_name, schema);
builder =
builder.add_pruner(Arc::clone(&self.chunk_access) as Arc<dyn ChunkPruner<DbChunk>>);
let predicate = PredicateBuilder::new().table(table_name).build();
for chunk in self.chunk_access.candidate_chunks(&predicate) {
builder = builder.add_chunk(chunk);
}
match builder.build() {
Ok(provider) => Some(Arc::new(provider)),
Err(e) => panic!("unexpected error: {:?}", e),
}
}
fn table_exist(&self, name: &str) -> bool {
self.catalog.table(name).is_ok()
}
}