fix: ensure ingester state tracked in querier cache is always in-sync (#6512)

Fixes #6510.

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
pull/24376/head
Marco Neumann 2023-01-10 13:00:05 +01:00 committed by GitHub
parent d0f52c89ae
commit 2bb6db3f37
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 61 additions and 68 deletions

View File

@ -15,7 +15,6 @@ use data_types::{ParquetFile, SequenceNumber, TableId};
use iox_catalog::interface::Catalog;
use iox_time::TimeProvider;
use observability_deps::tracing::debug;
use parking_lot::RwLock;
use snafu::{ResultExt, Snafu};
use std::{collections::HashMap, mem, sync::Arc};
use trace::span::Span;
@ -34,19 +33,31 @@ pub enum Error {
},
}
type IngesterCounts = Option<Arc<HashMap<Uuid, u64>>>;
/// Holds catalog information about a parquet file
#[derive(Debug)]
pub struct CachedParquetFiles {
/// Parquet catalog information
pub files: Arc<Vec<Arc<ParquetFile>>>,
/// Number of persisted Parquet files per table ID per ingester UUID that ingesters have told
/// us about. When a call to `get` includes a number of persisted Parquet files for this table
/// and a particular ingester UUID that doesn't match what we've previously seen, the cache
/// needs to be expired.
persisted_file_counts_from_ingesters: IngesterCounts,
}
impl CachedParquetFiles {
fn new(parquet_files: Vec<ParquetFile>) -> Self {
fn new(
parquet_files: Vec<ParquetFile>,
persisted_file_counts_from_ingesters: IngesterCounts,
) -> Self {
let files: Vec<_> = parquet_files.into_iter().map(Arc::new).collect();
Self {
files: Arc::new(files),
persisted_file_counts_from_ingesters,
}
}
@ -64,12 +75,14 @@ impl CachedParquetFiles {
// Note size_of_val is the size of the Arc
// https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=ae8fee8b4f7f5f013dc01ea1fda165da
// size of the Arc itself
// size of the Arc+(Option+HashMap) itself
mem::size_of_val(self) +
// Vec overhead
mem::size_of_val(self.files.as_ref()) +
// size of the underlying parquet files
self.files.iter().map(|f| f.size()).sum::<usize>()
self.files.iter().map(|f| f.size()).sum::<usize>() +
// hashmap data
self.persisted_file_counts_from_ingesters.as_ref().map(|map| std::mem::size_of_val(map.as_ref()) + map.capacity() * mem::size_of::<(Uuid, u64)>()).unwrap_or_default()
}
/// Returns the greatest parquet sequence number stored in this cache entry
@ -82,7 +95,7 @@ type CacheT = Box<
dyn Cache<
K = TableId,
V = Arc<CachedParquetFiles>,
GetExtra = ((), Option<Span>),
GetExtra = (IngesterCounts, Option<Span>),
PeekExtra = ((), Option<Span>),
>,
>;
@ -96,12 +109,6 @@ pub struct ParquetFileCache {
/// Handle that allows clearing entries for existing cache entries
remove_if_handle: RemoveIfHandle<TableId, Arc<CachedParquetFiles>>,
/// Number of persisted Parquet files per table ID per ingester UUID that ingesters have told
/// us about. When a call to `get` includes a number of persisted Parquet files for this table
/// and a particular ingester UUID that doesn't match what we've previously seen, the cache
/// needs to be expired.
persisted_file_counts_from_ingesters: RwLock<HashMap<TableId, HashMap<Uuid, u64>>>,
}
impl ParquetFileCache {
@ -114,35 +121,38 @@ impl ParquetFileCache {
ram_pool: Arc<ResourcePool<RamSize>>,
testing: bool,
) -> Self {
let loader = FunctionLoader::new(move |table_id: TableId, _extra: ()| {
let loader = FunctionLoader::new(move |table_id: TableId, extra: IngesterCounts| {
let catalog = Arc::clone(&catalog);
let backoff_config = backoff_config.clone();
async move {
Backoff::new(&backoff_config)
.retry_all_errors("get parquet_files", || async {
// TODO refreshing all parquet files for the
// entire table is likely to be quite wasteful
// for large tables.
//
// Some this code could be more efficeint:
//
// 1. incrementally fetch only NEW parquet
// files that aren't already in the cache
//
// 2. track time ranges needed for queries and
// limit files fetched to what is actually
// needed
let parquet_files: Vec<_> = catalog
.repositories()
.await
.parquet_files()
.list_by_table_not_to_delete(table_id)
.await
.context(CatalogSnafu)?;
.retry_all_errors("get parquet_files", || {
let extra = extra.clone();
async {
// TODO refreshing all parquet files for the
// entire table is likely to be quite wasteful
// for large tables.
//
// Some this code could be more efficeint:
//
// 1. incrementally fetch only NEW parquet
// files that aren't already in the cache
//
// 2. track time ranges needed for queries and
// limit files fetched to what is actually
// needed
let parquet_files: Vec<_> = catalog
.repositories()
.await
.parquet_files()
.list_by_table_not_to_delete(table_id)
.await
.context(CatalogSnafu)?;
Ok(Arc::new(CachedParquetFiles::new(parquet_files)))
as std::result::Result<_, Error>
Ok(Arc::new(CachedParquetFiles::new(parquet_files, extra)))
as std::result::Result<_, Error>
}
})
.await
.expect("retry forever")
@ -181,7 +191,6 @@ impl ParquetFileCache {
Self {
cache,
remove_if_handle,
persisted_file_counts_from_ingesters: Default::default(),
}
}
@ -221,22 +230,13 @@ impl ParquetFileCache {
persisted_file_counts_by_ingester_uuid: Option<HashMap<Uuid, u64>>,
span: Option<Span>,
) -> Arc<CachedParquetFiles> {
// Make a copy of the information we've stored about the ingesters and the number of files
// they've persisted, for the closure to use to decide whether to expire the cache.
let stored_counts = self
.persisted_file_counts_from_ingesters
.read()
.get(&table_id)
.cloned();
// Update the stored information with what we've just seen from the ingesters, if anything.
if let Some(ingester_counts) = &persisted_file_counts_by_ingester_uuid {
self.persisted_file_counts_from_ingesters
.write()
.entry(table_id)
.and_modify(|existing| existing.extend(ingester_counts))
.or_insert_with(|| ingester_counts.clone());
}
let persisted_file_counts_by_ingester_uuid =
persisted_file_counts_by_ingester_uuid.map(|mut map| {
map.shrink_to_fit();
Arc::new(map)
});
let persisted_file_counts_by_ingester_uuid_captured =
persisted_file_counts_by_ingester_uuid.clone();
self.remove_if_handle
.remove_if_and_get(
@ -263,15 +263,20 @@ impl ParquetFileCache {
);
expire
} else if let Some(ingester_counts) = &persisted_file_counts_by_ingester_uuid {
} else if let Some(ingester_counts) =
&persisted_file_counts_by_ingester_uuid_captured
{
// If there's new or different information about the ingesters or the
// number of files they've persisted, we need to refresh.
new_or_different(stored_counts.as_ref(), ingester_counts)
new_or_different(
cached_file.persisted_file_counts_from_ingesters.as_deref(),
ingester_counts,
)
} else {
false
}
},
((), span),
(persisted_file_counts_by_ingester_uuid, span),
)
.await
}
@ -399,8 +404,8 @@ mod tests {
partition.create_parquet_file(builder).await;
let table_id = table.table.id;
let single_file_size = 216;
let two_file_size = 400;
let single_file_size = 224;
let two_file_size = 408;
assert!(single_file_size < two_file_size);
let cache = make_cache(&catalog);
@ -534,18 +539,6 @@ mod tests {
.get(table_id, None, Some(HashMap::from([(new_uuid, 1)])), None)
.await;
assert_histogram_metric_count(&catalog.metric_registry, METRIC_NAME, 4);
// See the "new" UUID *and* the old UUID with the same counts as seen before: still use the
// cache
cache
.get(
table_id,
None,
Some(HashMap::from([(new_uuid, 1), (uuid, 4)])),
None,
)
.await;
assert_histogram_metric_count(&catalog.metric_registry, METRIC_NAME, 4);
}
#[tokio::test]