refactor: remove gen1 duration refs

praveen/avoid-oom-snapshot
Praveen Kumar 2025-03-11 15:23:24 +00:00
parent d6275d8955
commit d493deb9ed
No known key found for this signature in database
GPG Key ID: CB9E05780A79EA5A
3 changed files with 36 additions and 25 deletions

View File

@ -224,7 +224,6 @@ impl WriteBufferImpl {
distinct_cache_provider: Arc::clone(&distinct_cache),
persisted_files: Arc::clone(&persisted_files),
parquet_cache: parquet_cache.clone(),
gen1_duration: wal_config.gen1_duration,
max_size_per_parquet_file_bytes: max_memory_for_snapshot_bytes,
}));

View File

@ -21,8 +21,8 @@ use influxdb3_cache::{distinct_cache::DistinctCacheProvider, last_cache::LastCac
use influxdb3_catalog::catalog::{Catalog, DatabaseSchema, TableDefinition};
use influxdb3_id::{DbId, TableId};
use influxdb3_wal::{
CatalogOp, Gen1Duration, SnapshotDetails, WalContents, WalFileNotifier, WalFileSequenceNumber,
WalOp, WriteBatch,
CatalogOp, SnapshotDetails, WalContents, WalFileNotifier, WalFileSequenceNumber, WalOp,
WriteBatch,
};
use iox_query::QueryChunk;
use iox_query::chunk_statistics::{NoColumnRanges, create_chunk_statistics};
@ -58,7 +58,7 @@ pub struct QueryableBuffer {
/// Sends a notification to this watch channel whenever a snapshot info is persisted
persisted_snapshot_notify_rx: tokio::sync::watch::Receiver<Option<PersistedSnapshot>>,
persisted_snapshot_notify_tx: tokio::sync::watch::Sender<Option<PersistedSnapshot>>,
gen1_duration: Gen1Duration,
buffer_chunk_interval: Duration,
max_size_per_parquet_file_bytes: u64,
}
@ -71,7 +71,6 @@ pub struct QueryableBufferArgs {
pub distinct_cache_provider: Arc<DistinctCacheProvider>,
pub persisted_files: Arc<PersistedFiles>,
pub parquet_cache: Option<Arc<dyn ParquetCacheOracle>>,
pub gen1_duration: Gen1Duration,
pub max_size_per_parquet_file_bytes: u64,
}
@ -85,11 +84,14 @@ impl QueryableBuffer {
distinct_cache_provider,
persisted_files,
parquet_cache,
gen1_duration,
max_size_per_parquet_file_bytes,
}: QueryableBufferArgs,
) -> Self {
let buffer = Arc::new(RwLock::new(BufferState::new(Arc::clone(&catalog))));
let buffer_chunk_interval = Duration::from_secs(60);
let buffer = Arc::new(RwLock::new(BufferState::new(
Arc::clone(&catalog),
buffer_chunk_interval,
)));
let (persisted_snapshot_notify_tx, persisted_snapshot_notify_rx) =
tokio::sync::watch::channel(None);
Self {
@ -103,8 +105,8 @@ impl QueryableBuffer {
parquet_cache,
persisted_snapshot_notify_rx,
persisted_snapshot_notify_tx,
gen1_duration,
max_size_per_parquet_file_bytes,
buffer_chunk_interval,
}
}
@ -268,8 +270,8 @@ impl QueryableBuffer {
let catalog = Arc::clone(&self.catalog);
let notify_snapshot_tx = self.persisted_snapshot_notify_tx.clone();
let parquet_cache = self.parquet_cache.clone();
let gen1_duration = self.gen1_duration;
let max_size_per_parquet_file = self.max_size_per_parquet_file_bytes;
let chunk_interval = self.num_chunk_intervals_in_10m();
tokio::spawn(async move {
// persist the catalog if it has been updated
@ -321,7 +323,7 @@ impl QueryableBuffer {
Arc::from(persister.node_identifier_prefix()),
wal_file_number,
Arc::clone(&catalog),
gen1_duration.as_10m() as usize,
chunk_interval,
Some(max_size_per_parquet_file),
);
@ -351,7 +353,7 @@ impl QueryableBuffer {
Arc::from(persister.node_identifier_prefix()),
wal_file_number,
Arc::clone(&catalog),
gen1_duration.as_10m() as usize,
chunk_interval,
None,
);
@ -453,6 +455,16 @@ impl QueryableBuffer {
let buffer = self.buffer.read();
buffer.find_overall_buffer_size_bytes()
}
fn num_chunk_intervals_in_10m(&self) -> usize {
let ten_mins_secs = 600;
let chunk_interval_secs = self.buffer_chunk_interval.as_secs();
if chunk_interval_secs >= ten_mins_secs {
return 1;
}
let num_chunks_in_ten_mins = ten_mins_secs / self.buffer_chunk_interval.as_secs();
num_chunks_in_ten_mins as usize
}
}
async fn sort_dedupe_parallel<I: Iterator<Item = PersistJob>>(
@ -619,15 +631,17 @@ impl WalFileNotifier for QueryableBuffer {
pub struct BufferState {
pub db_to_table: HashMap<DbId, TableIdToBufferMap>,
catalog: Arc<Catalog>,
chunk_interval: Duration,
}
type TableIdToBufferMap = HashMap<TableId, TableBuffer>;
impl BufferState {
pub fn new(catalog: Arc<Catalog>) -> Self {
pub fn new(catalog: Arc<Catalog>, chunk_interval: Duration) -> Self {
Self {
db_to_table: HashMap::new(),
catalog,
chunk_interval,
}
}
@ -741,14 +755,14 @@ impl BufferState {
let database_buffer = self.db_to_table.entry(write_batch.database_id).or_default();
// keep internal query buffer chunks divided by 1m
let one_min_ns = Duration::from_secs(60).as_nanos() as i64;
let one_min_ns = self.chunk_interval.as_nanos() as i64;
for (table_id, table_chunks) in &write_batch.table_chunks {
let table_buffer = database_buffer.entry(*table_id).or_insert_with(|| {
let table_def = db_schema
.table_definition_by_id(table_id)
.expect("table should exist");
TableBuffer::new(table_def.sort_key())
TableBuffer::new(table_def.sort_key(), self.chunk_interval)
});
let mut one_min_groups = HashMap::new();
@ -1127,7 +1141,6 @@ mod tests {
.unwrap(),
persisted_files: Arc::new(PersistedFiles::new()),
parquet_cache: None,
gen1_duration: Gen1Duration::new_1m(),
max_size_per_parquet_file_bytes: 4_000,
};
let queryable_buffer = QueryableBuffer::new(queryable_buffer_args);
@ -1281,7 +1294,6 @@ mod tests {
.unwrap(),
persisted_files: Arc::new(PersistedFiles::new()),
parquet_cache: None,
gen1_duration: Gen1Duration::new_1m(),
max_size_per_parquet_file_bytes: 50_000,
};
let queryable_buffer = QueryableBuffer::new(queryable_buffer_args);
@ -1410,7 +1422,6 @@ mod tests {
.unwrap(),
persisted_files: Arc::new(PersistedFiles::new()),
parquet_cache: None,
gen1_duration: Gen1Duration::new_1m(),
max_size_per_parquet_file_bytes: 2_000,
};
let queryable_buffer = QueryableBuffer::new(queryable_buffer_args);
@ -1544,7 +1555,6 @@ mod tests {
.unwrap(),
persisted_files: Arc::new(PersistedFiles::new()),
parquet_cache: None,
gen1_duration: Gen1Duration::new_1m(),
max_size_per_parquet_file_bytes: 150_000,
};
let queryable_buffer = QueryableBuffer::new(queryable_buffer_args);

View File

@ -14,9 +14,9 @@ use influxdb3_wal::{FieldData, Row};
use observability_deps::tracing::error;
use schema::sort::SortKey;
use schema::{InfluxColumnType, InfluxFieldType, Schema, SchemaBuilder};
use std::collections::BTreeMap;
use std::mem::size_of;
use std::sync::Arc;
use std::{collections::BTreeMap, time::Duration};
use std::{collections::btree_map::Entry, slice::Iter};
use thiserror::Error;
@ -37,14 +37,16 @@ pub struct TableBuffer {
pub(crate) chunk_time_to_chunks: BTreeMap<i64, MutableTableChunk>,
pub(crate) snapshotting_chunks: Vec<SnapshotChunk>,
pub(crate) sort_key: SortKey,
pub(crate) _chunk_interval: Duration,
}
impl TableBuffer {
pub fn new(sort_key: SortKey) -> Self {
pub fn new(sort_key: SortKey, chunk_interval: Duration) -> Self {
Self {
chunk_time_to_chunks: BTreeMap::default(),
snapshotting_chunks: Vec::new(),
sort_key,
_chunk_interval: chunk_interval,
}
}
@ -692,7 +694,7 @@ mod tests {
let table_def = writer.db_schema().table_definition("tbl").unwrap();
let mut table_buffer = TableBuffer::new(SortKey::empty());
let mut table_buffer = TableBuffer::new(SortKey::empty(), Duration::from_secs(60));
for (rows, offset) in row_batches {
table_buffer.buffer_chunk(offset, &rows);
}
@ -742,16 +744,16 @@ mod tests {
0,
);
let mut table_buffer = TableBuffer::new(SortKey::empty());
let mut table_buffer = TableBuffer::new(SortKey::empty(), Duration::from_secs(60));
table_buffer.buffer_chunk(0, &rows);
let size = table_buffer.computed_size();
assert_eq!(size, 17769);
assert_eq!(size, 17785);
}
#[test]
fn timestamp_min_max_works_when_empty() {
let table_buffer = TableBuffer::new(SortKey::empty());
let table_buffer = TableBuffer::new(SortKey::empty(), Duration::from_secs(60));
let timestamp_min_max = table_buffer.timestamp_min_max();
assert_eq!(timestamp_min_max.min, 0);
assert_eq!(timestamp_min_max.max, 0);
@ -777,7 +779,7 @@ mod tests {
row_batches.push((offset, rows));
}
let table_def = writer.db_schema().table_definition("tbl").unwrap();
let mut table_buffer = TableBuffer::new(SortKey::empty());
let mut table_buffer = TableBuffer::new(SortKey::empty(), Duration::from_secs(60));
for (offset, rows) in row_batches {
table_buffer.buffer_chunk(offset, &rows);