chore: merge main to branch

pull/24376/head
Nga Tran 2021-10-12 15:59:57 -04:00
commit 144ce77e39
76 changed files with 2611 additions and 1893 deletions

View File

@ -288,7 +288,8 @@ jobs:
name: buf lint
command: buf lint
# Check that the generated flatbuffers code is up-to-date with the changes in this PR.
# Check that any generated files are is up-to-date with the changes in this PR.
# named "check-flatbuffers" because that name is hardcoded into github checks
check-flatbuffers:
docker:
- image: quay.io/influxdb/rust:ci
@ -296,6 +297,9 @@ jobs:
steps:
- checkout
- rust_components # Regenerating flatbuffers uses rustfmt
- run:
name: Check Query Tests
command: ./query_tests/check-generated.sh
- run:
name: Check Flatbuffers
command: INFLUXDB_IOX_INTEGRATION_LOCAL=1 ./entry/check-flatbuffers.sh

15
Cargo.lock generated
View File

@ -467,9 +467,9 @@ checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
[[package]]
name = "cache_loader_async"
version = "0.1.1"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c372ea90888b43d6899b58a7831a2f3ea916d95c0042d81255ef4960e1376be"
checksum = "606d302374be324dae8264e59d63952b9d39b5180d85edbfc4a533d4046d5e43"
dependencies = [
"futures",
"thiserror",
@ -814,6 +814,7 @@ dependencies = [
"regex",
"snafu",
"test_helpers",
"time 0.1.0",
"uuid",
]
@ -1017,6 +1018,7 @@ dependencies = [
"ouroboros",
"schema",
"snafu",
"time 0.1.0",
]
[[package]]
@ -1269,6 +1271,7 @@ dependencies = [
"regex",
"serde",
"thiserror",
"time 0.1.0",
"tonic",
"tonic-build",
]
@ -1698,6 +1701,7 @@ dependencies = [
"thiserror",
"tikv-jemalloc-ctl",
"tikv-jemalloc-sys",
"time 0.1.0",
"tokio",
"tokio-stream",
"tokio-util",
@ -1799,6 +1803,7 @@ dependencies = [
"futures",
"parking_lot",
"snafu",
"time 0.1.0",
"tokio",
]
@ -1972,6 +1977,7 @@ dependencies = [
"hashbrown 0.11.2",
"internal_types",
"observability_deps",
"time 0.1.0",
"tokio",
"tracker",
]
@ -2713,6 +2719,7 @@ dependencies = [
"tempfile",
"test_helpers",
"thrift",
"time 0.1.0",
"tokio",
"tokio-stream",
"uuid",
@ -2795,12 +2802,12 @@ checksum = "d9978962f8a4b158e97447a6d09d2d75e206d2994eff056c894019f362b27142"
name = "persistence_windows"
version = "0.1.0"
dependencies = [
"chrono",
"data_types",
"internal_types",
"observability_deps",
"snafu",
"test_helpers",
"time 0.1.0",
]
[[package]]
@ -3860,6 +3867,7 @@ dependencies = [
"snafu",
"snap",
"test_helpers",
"time 0.1.0",
"tokio",
"tokio-util",
"trace",
@ -4982,6 +4990,7 @@ dependencies = [
"observability_deps",
"parking_lot",
"rdkafka",
"time 0.1.0",
"tokio",
"uuid",
]

View File

@ -111,6 +111,7 @@ trace_exporters = { path = "trace_exporters" }
trace_http = { path = "trace_http" }
tracker = { path = "tracker" }
trogging = { path = "trogging", default-features = false, features = ["structopt"] }
time = { path = "time" }
# Crates.io dependencies, in alphabetical order
arrow = { version = "5.5", features = ["prettyprint"] }

View File

@ -15,6 +15,7 @@ observability_deps = { path = "../observability_deps" }
percent-encoding = "2.1.0"
regex = "1.4"
snafu = "0.6"
time = { path = "../time" }
uuid = { version = "0.8", features = ["v4"] }
[dev-dependencies] # In alphabetical order

View File

@ -2,8 +2,8 @@
use std::{convert::TryFrom, num::NonZeroU32, sync::Arc};
use bytes::Bytes;
use chrono::{DateTime, Utc};
use snafu::{ResultExt, Snafu};
use time::Time;
use uuid::Uuid;
use crate::partition_metadata::PartitionAddr;
@ -149,17 +149,17 @@ pub struct ChunkSummary {
pub row_count: usize,
/// The time at which the chunk data was accessed, by a query or a write
pub time_of_last_access: Option<DateTime<Utc>>,
pub time_of_last_access: Option<Time>,
/// The earliest time at which data contained within this chunk was written
/// into IOx. Note due to the compaction, etc... this may not be the chunk
/// that data was originally written into
pub time_of_first_write: DateTime<Utc>,
pub time_of_first_write: Time,
/// The latest time at which data contained within this chunk was written
/// into IOx. Note due to the compaction, etc... this may not be the chunk
/// that data was originally written into
pub time_of_last_write: DateTime<Utc>,
pub time_of_last_write: Time,
}
/// Represents metadata about the physical storage of a column in a chunk

View File

@ -1,20 +1,20 @@
use crate::partition_metadata::StatValues;
use chrono::{DateTime, Timelike, Utc};
use time::Time;
/// A description of a set of writes
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct WriteSummary {
/// The wall clock timestamp of the first write in this summary
pub time_of_first_write: DateTime<Utc>,
pub time_of_first_write: Time,
/// The wall clock timestamp of the last write in this summary
pub time_of_last_write: DateTime<Utc>,
pub time_of_last_write: Time,
/// The minimum row timestamp for data in this summary
pub min_timestamp: DateTime<Utc>,
pub min_timestamp: Time,
/// The maximum row timestamp value for data in this summary
pub max_timestamp: DateTime<Utc>,
pub max_timestamp: Time,
/// The number of rows in this summary
pub row_count: usize,
@ -62,7 +62,7 @@ impl TimestampSummary {
}
/// Records a timestamp value
pub fn record(&mut self, timestamp: DateTime<Utc>) {
pub fn record(&mut self, timestamp: Time) {
self.counts[timestamp.minute() as usize] += 1;
self.stats.update(&timestamp.timestamp_nanos())
}

View File

@ -12,6 +12,7 @@ data_types = { path = "../data_types" }
# version of the flatbuffers crate
flatbuffers = "2"
snafu = "0.6"
time = { path = "../time" }
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
ouroboros = "0.13.0"
schema = { path = "../schema" }

View File

@ -17,6 +17,7 @@ use schema::{
builder::{Error as SchemaBuilderError, SchemaBuilder},
IOxValueType, InfluxColumnType, InfluxFieldType, Schema, TIME_COLUMN_NAME,
};
use time::Time;
use crate::entry_fb;
@ -926,7 +927,7 @@ impl<'a> TableBatch<'a> {
let timestamps = self.timestamps()?;
let mut summary = TimestampSummary::default();
for t in &timestamps {
summary.record(Utc.timestamp_nanos(t))
summary.record(Time::from_timestamp_nanos(t))
}
Ok(summary)
}
@ -1748,13 +1749,13 @@ pub struct SequencedEntry {
///
/// At the time of writing, sequences will not be present when there is no configured mechanism to define the order
/// of all writes.
sequence_and_producer_ts: Option<(Sequence, DateTime<Utc>)>,
sequence_and_producer_ts: Option<(Sequence, Time)>,
}
impl SequencedEntry {
pub fn new_from_sequence(
sequence: Sequence,
producer_wallclock_timestamp: DateTime<Utc>,
producer_wallclock_timestamp: Time,
entry: Entry,
) -> Self {
Self {
@ -1780,7 +1781,7 @@ impl SequencedEntry {
.map(|(sequence, _ts)| sequence)
}
pub fn producer_wallclock_timestamp(&self) -> Option<DateTime<Utc>> {
pub fn producer_wallclock_timestamp(&self) -> Option<Time> {
self.sequence_and_producer_ts
.as_ref()
.map(|(_sequence, ts)| *ts)

View File

@ -15,6 +15,7 @@ regex = "1.4"
serde = { version = "1.0", features = ["derive"] }
thiserror = "1.0.30"
tonic = "0.5"
time = { path = "../time" }
[dev-dependencies]
chrono = { version = "0.4", features = ["serde"] }

View File

@ -9,6 +9,7 @@ use std::{
convert::{TryFrom, TryInto},
sync::Arc,
};
use time::Time;
/// Conversion code to management API chunk structure
impl From<ChunkSummary> for management::Chunk {
@ -37,9 +38,9 @@ impl From<ChunkSummary> for management::Chunk {
memory_bytes: memory_bytes as u64,
object_store_bytes: object_store_bytes as u64,
row_count: row_count as u64,
time_of_last_access: time_of_last_access.map(Into::into),
time_of_first_write: Some(time_of_first_write.into()),
time_of_last_write: Some(time_of_last_write.into()),
time_of_last_access: time_of_last_access.map(|t| t.date_time().into()),
time_of_first_write: Some(time_of_first_write.date_time().into()),
time_of_last_write: Some(time_of_last_write.date_time().into()),
order: order.get(),
}
}
@ -74,10 +75,11 @@ impl TryFrom<management::Chunk> for ChunkSummary {
fn try_from(proto: management::Chunk) -> Result<Self, Self::Error> {
let convert_timestamp = |t: pbjson_types::Timestamp, field: &'static str| {
t.try_into().map_err(|_| FieldViolation {
let date_time = t.try_into().map_err(|_| FieldViolation {
field: field.to_string(),
description: "Timestamp must be positive".to_string(),
})
})?;
Ok(Time::from_date_time(date_time))
};
let timestamp = |t: Option<pbjson_types::Timestamp>, field: &'static str| {
@ -166,12 +168,12 @@ impl TryFrom<management::ChunkLifecycleAction> for Option<ChunkLifecycleAction>
mod test {
use super::*;
use bytes::Bytes;
use chrono::{TimeZone, Utc};
use data_types::chunk_metadata::ChunkOrder;
use time::Time;
#[test]
fn valid_proto_to_summary() {
let now = Utc::now();
let now = Time::from_timestamp(2, 6);
let proto = management::Chunk {
partition_key: "foo".to_string(),
table_name: "bar".to_string(),
@ -182,8 +184,8 @@ mod test {
storage: management::ChunkStorage::ObjectStoreOnly.into(),
lifecycle_action: management::ChunkLifecycleAction::Compacting.into(),
time_of_first_write: Some(now.into()),
time_of_last_write: Some(now.into()),
time_of_first_write: Some(now.date_time().into()),
time_of_last_write: Some(now.date_time().into()),
time_of_last_access: Some(pbjson_types::Timestamp {
seconds: 50,
nanos: 7,
@ -203,7 +205,7 @@ mod test {
lifecycle_action: Some(ChunkLifecycleAction::Compacting),
time_of_first_write: now,
time_of_last_write: now,
time_of_last_access: Some(Utc.timestamp_nanos(50_000_000_007)),
time_of_last_access: Some(Time::from_timestamp_nanos(50_000_000_007)),
order: ChunkOrder::new(5).unwrap(),
};
@ -216,7 +218,7 @@ mod test {
#[test]
fn valid_summary_to_proto() {
let now = Utc::now();
let now = Time::from_timestamp(756, 23);
let summary = ChunkSummary {
partition_key: Arc::from("foo"),
table_name: Arc::from("bar"),
@ -228,7 +230,7 @@ mod test {
lifecycle_action: Some(ChunkLifecycleAction::Persisting),
time_of_first_write: now,
time_of_last_write: now,
time_of_last_access: Some(Utc.timestamp_nanos(12_000_100_007)),
time_of_last_access: Some(Time::from_timestamp_nanos(12_000_100_007)),
order: ChunkOrder::new(5).unwrap(),
};
@ -243,8 +245,8 @@ mod test {
row_count: 321,
storage: management::ChunkStorage::ObjectStoreOnly.into(),
lifecycle_action: management::ChunkLifecycleAction::Persisting.into(),
time_of_first_write: Some(now.into()),
time_of_last_write: Some(now.into()),
time_of_first_write: Some(now.date_time().into()),
time_of_last_write: Some(now.date_time().into()),
time_of_last_access: Some(pbjson_types::Timestamp {
seconds: 12,
nanos: 100_007,

View File

@ -6,7 +6,7 @@ edition = "2018"
[dependencies]
bytes = "1.0"
cache_loader_async = {version = "0.1.0", features = ["ttl-cache"] }
cache_loader_async = {version = "0.1.2", features = ["ttl-cache"] }
futures = "0.3"
observability_deps = { path = "../observability_deps" }
paste = "1.0.5"

View File

@ -190,7 +190,7 @@ where
/// Builds a [`CachingConnectionManager`].
pub fn build(self) -> CachingConnectionManager<T> {
let make_client = self.make_client;
let (cache, _) = LoadingCache::with_backing(self.backing, move |connect| async move {
let cache = LoadingCache::with_backing(self.backing, move |connect| async move {
(make_client)(connect)
.await
.map_err(|e| Arc::new(Box::new(e) as _))

View File

@ -10,6 +10,7 @@ readme = "README.md"
chrono = "0.4"
parking_lot = "0.11"
snafu = "0.6"
time = { path = "../time" }
tokio = { version = "1.11", features = ["sync"] }
[dev-dependencies]

View File

@ -1,19 +1,14 @@
use chrono::{DateTime, Utc};
use parking_lot::RwLock;
use std::sync::Arc;
use time::{Time, TimeProvider};
/// A struct that allows recording access by a query
#[derive(Debug, Clone)]
pub struct AccessRecorder {
time_provider: Arc<dyn TimeProvider>,
state: Arc<RwLock<AccessMetrics>>,
}
impl Default for AccessRecorder {
fn default() -> Self {
Self::new(Utc::now())
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct AccessMetrics {
/// The number of accesses that have been recorded
@ -21,20 +16,22 @@ pub struct AccessMetrics {
/// The time of the last access or if none the
/// time when the `AccessRecorder` was created
pub last_access: DateTime<Utc>,
pub last_access: Time,
}
impl AccessMetrics {
/// Returns the Instant of the last access if any
pub fn last_access(&self) -> Option<DateTime<Utc>> {
/// Returns the time of the last access if any
pub fn last_access(&self) -> Option<Time> {
(self.count > 0).then(|| self.last_access)
}
}
impl AccessRecorder {
/// Creates a new AccessRecorder with the provided creation DateTime
pub fn new(now: DateTime<Utc>) -> Self {
/// Creates a new AccessRecorder
pub fn new(time_provider: Arc<dyn TimeProvider>) -> Self {
let now = time_provider.now();
Self {
time_provider,
state: Arc::new(RwLock::new(AccessMetrics {
count: 0,
last_access: now,
@ -42,18 +39,14 @@ impl AccessRecorder {
}
}
/// Records an access at the given DateTime
pub fn record_access(&self, now: DateTime<Utc>) {
/// Records an access
pub fn record_access(&self) {
let now = self.time_provider.now();
let mut state = self.state.write();
state.last_access = state.last_access.max(now);
state.count += 1;
}
/// Records an access at the current time
pub fn record_access_now(&self) {
self.record_access(Utc::now())
}
/// Gets the access metrics
pub fn get_metrics(&self) -> AccessMetrics {
self.state.read().clone()
@ -63,15 +56,16 @@ impl AccessRecorder {
#[cfg(test)]
mod tests {
use super::*;
use chrono::Duration;
use std::time::Duration;
#[test]
fn test_access() {
let t1 = Utc::now();
let t2 = t1 + Duration::nanoseconds(1);
let t3 = t1 + Duration::nanoseconds(2);
let t1 = Time::from_timestamp(3044, 2);
let t2 = t1 + Duration::from_nanos(1);
let t3 = t1 + Duration::from_nanos(2);
let access_recorder = AccessRecorder::new(t1);
let time = Arc::new(time::MockProvider::new(t1));
let access_recorder = AccessRecorder::new(Arc::<time::MockProvider>::clone(&time));
assert_eq!(
access_recorder.get_metrics(),
@ -81,7 +75,8 @@ mod tests {
}
);
access_recorder.record_access(t3);
time.set(t3);
access_recorder.record_access();
assert_eq!(
access_recorder.get_metrics(),
AccessMetrics {
@ -90,7 +85,8 @@ mod tests {
}
);
access_recorder.record_access(t2);
time.set(t2);
access_recorder.record_access();
assert_eq!(
access_recorder.get_metrics(),
AccessMetrics {

View File

@ -12,6 +12,7 @@ futures = "0.3"
hashbrown = "0.11"
internal_types = { path = "../internal_types" }
observability_deps = { path = "../observability_deps" }
time = { path = "../time" }
tokio = { version = "1.11", features = ["macros", "time"] }
tracker = { path = "../tracker" }

View File

@ -21,6 +21,7 @@ mod guard;
pub use guard::*;
mod policy;
pub use policy::*;
use time::Time;
/// A trait that encapsulates the database logic that is automated by `LifecyclePolicy`
pub trait LifecycleDb {
@ -81,11 +82,10 @@ pub trait LockablePartition: Sized + std::fmt::Display {
/// Returns None if there is a persistence operation in flight, or
/// if there are no persistable windows.
///
/// `now` is the wall clock time that should be used to compute how long a given
/// write has been present in memory
/// If `force` is `true` will persist all unpersisted data regardless of arrival time
fn prepare_persist(
partition: &mut LifecycleWriteGuard<'_, Self::Partition, Self>,
now: DateTime<Utc>,
force: bool,
) -> Option<Self::PersistHandle>;
/// Split and persist chunks.
@ -157,10 +157,10 @@ pub trait LifecyclePartition {
///
/// `now` is the wall clock time that should be used to compute how long a given
/// write has been present in memory
fn persistable_row_count(&self, now: DateTime<Utc>) -> usize;
fn persistable_row_count(&self) -> usize;
/// Returns the age of the oldest unpersisted write
fn minimum_unpersisted_age(&self) -> Option<DateTime<Utc>>;
fn minimum_unpersisted_age(&self) -> Option<Time>;
}
/// The lifecycle operates on chunks implementing this trait
@ -175,7 +175,7 @@ pub trait LifecycleChunk {
/// Returns the access metrics for this chunk
fn access_metrics(&self) -> AccessMetrics;
fn time_of_last_write(&self) -> DateTime<Utc>;
fn time_of_last_write(&self) -> Time;
fn addr(&self) -> &ChunkAddr;
@ -188,5 +188,5 @@ pub trait LifecycleChunk {
pub trait PersistHandle {
/// Any unpersisted chunks containing rows with timestamps less than or equal to this
/// must be included in the corresponding `LockablePartition::persist_chunks` call
fn timestamp(&self) -> DateTime<Utc>;
fn timestamp(&self) -> Time;
}

View File

@ -12,6 +12,7 @@ use futures::future::BoxFuture;
use internal_types::access::AccessMetrics;
use observability_deps::tracing::{debug, info, trace, warn};
use std::{convert::TryInto, fmt::Debug};
use time::Time;
use tracker::TaskTracker;
/// Number of seconds to wait before retrying a failed lifecycle action
@ -350,14 +351,14 @@ where
let persistable_age_seconds: u32 = partition
.minimum_unpersisted_age()
.and_then(|minimum_unpersisted_age| {
(now - minimum_unpersisted_age)
(now - minimum_unpersisted_age.date_time())
.num_seconds()
.try_into()
.ok()
})
.unwrap_or_default();
let persistable_row_count = partition.persistable_row_count(now);
let persistable_row_count = partition.persistable_row_count();
debug!(%db_name, %partition,
partition_persist_row_count=persistable_row_count,
rules_persist_row_count=%rules.persist_row_threshold.get(),
@ -379,7 +380,7 @@ where
// Upgrade partition to be able to rotate persistence windows
let mut partition = partition.upgrade();
let persist_handle = match LockablePartition::prepare_persist(&mut partition, now) {
let persist_handle = match LockablePartition::prepare_persist(&mut partition, false) {
Some(x) => x,
None => {
debug!(%db_name, %partition, "no persistable windows or previous outstanding persist");
@ -438,7 +439,7 @@ where
/// The core policy logic
///
/// Returns a future that resolves when this method should be called next
pub fn check_for_work(&mut self, now: DateTime<Utc>) -> BoxFuture<'_, ()> {
pub fn check_for_work(&mut self, now: DateTime<Utc>) -> BoxFuture<'static, ()> {
// Any time-consuming work should be spawned as tokio tasks and not
// run directly within this loop
@ -581,7 +582,8 @@ fn can_move<C: LifecycleChunk>(rules: &LifecycleRules, chunk: &C, now: DateTime<
return true;
}
elapsed_seconds(now, chunk.time_of_last_write()) >= rules.late_arrive_window_seconds.get()
elapsed_seconds(now, chunk.time_of_last_write().date_time())
>= rules.late_arrive_window_seconds.get()
}
/// An action to free up memory
@ -624,7 +626,7 @@ fn sort_free_candidates<P>(candidates: &mut Vec<FreeCandidate<'_, P>>) {
/// job that is already running).
pub fn select_persistable_chunks<P, D>(
chunks: &[D],
flush_ts: DateTime<Utc>,
flush_ts: Time,
) -> Result<Vec<LifecycleWriteGuard<'_, P, D>>, bool>
where
D: LockableChunk<Chunk = P>,
@ -655,7 +657,7 @@ where
// Chunk's data is entirely after the time we are flushing
// up to, and thus there is reason to include it in the
// plan
if chunk.min_timestamp() > flush_ts {
if chunk.min_timestamp() > flush_ts.date_time() {
// Ignore chunk for now, but we might need it later to close chunk order gaps
debug!(
chunk=%chunk.addr(),
@ -725,8 +727,8 @@ mod tests {
struct TestPartition {
chunks: BTreeMap<ChunkId, (ChunkOrder, Arc<RwLock<TestChunk>>)>,
persistable_row_count: usize,
minimum_unpersisted_age: Option<DateTime<Utc>>,
max_persistable_timestamp: Option<DateTime<Utc>>,
minimum_unpersisted_age: Option<Time>,
max_persistable_timestamp: Option<Time>,
next_id: u128,
}
@ -734,8 +736,8 @@ mod tests {
fn with_persistence(
self,
persistable_row_count: usize,
minimum_unpersisted_age: DateTime<Utc>,
max_persistable_timestamp: DateTime<Utc>,
minimum_unpersisted_age: Time,
max_persistable_timestamp: Time,
) -> Self {
Self {
chunks: self.chunks,
@ -753,7 +755,7 @@ mod tests {
row_count: usize,
min_timestamp: Option<DateTime<Utc>>,
access_metrics: AccessMetrics,
time_of_last_write: DateTime<Utc>,
time_of_last_write: Time,
lifecycle_action: Option<TaskTracker<ChunkLifecycleAction>>,
storage: ChunkStorage,
order: ChunkOrder,
@ -774,9 +776,9 @@ mod tests {
min_timestamp: None,
access_metrics: AccessMetrics {
count: 0,
last_access: Utc::now(),
last_access: Time::from_timestamp(0, 0),
},
time_of_last_write: from_secs(time_of_last_write),
time_of_last_write: Time::from_timestamp(time_of_last_write, 0),
lifecycle_action: None,
storage,
order: ChunkOrder::MIN,
@ -831,11 +833,11 @@ mod tests {
#[derive(Debug)]
struct TestPersistHandle {
timestamp: DateTime<Utc>,
timestamp: Time,
}
impl PersistHandle for TestPersistHandle {
fn timestamp(&self) -> DateTime<Utc> {
fn timestamp(&self) -> Time {
self.timestamp
}
}
@ -920,7 +922,7 @@ mod tests {
fn prepare_persist(
partition: &mut LifecycleWriteGuard<'_, Self::Partition, Self>,
_now: DateTime<Utc>,
_force: bool,
) -> Option<Self::PersistHandle> {
Some(TestPersistHandle {
timestamp: partition.max_persistable_timestamp.unwrap(),
@ -942,8 +944,9 @@ mod tests {
partition.next_id += 1;
// The remainder left behind after the split
let new_chunk = TestChunk::new(id, 0, ChunkStorage::ReadBuffer)
.with_min_timestamp(handle.timestamp + chrono::Duration::nanoseconds(1));
let new_chunk = TestChunk::new(id, 0, ChunkStorage::ReadBuffer).with_min_timestamp(
handle.timestamp.date_time() + chrono::Duration::nanoseconds(1),
);
partition
.chunks
@ -1013,11 +1016,11 @@ mod tests {
false
}
fn persistable_row_count(&self, _now: DateTime<Utc>) -> usize {
fn persistable_row_count(&self) -> usize {
self.persistable_row_count
}
fn minimum_unpersisted_age(&self) -> Option<DateTime<Utc>> {
fn minimum_unpersisted_age(&self) -> Option<Time> {
self.minimum_unpersisted_age
}
}
@ -1039,7 +1042,7 @@ mod tests {
self.access_metrics.clone()
}
fn time_of_last_write(&self) -> DateTime<Utc> {
fn time_of_last_write(&self) -> Time {
self.time_of_last_write
}
@ -1182,10 +1185,10 @@ mod tests {
#[test]
fn test_sort_free_candidates() {
let now = Utc::now();
let access_metrics = |secs: i64| AccessMetrics {
let now = Time::from_timestamp_nanos(0);
let access_metrics = |secs: u64| AccessMetrics {
count: 1,
last_access: now + chrono::Duration::seconds(secs),
last_access: now + Duration::from_secs(secs),
};
let mut candidates = vec![
@ -1375,8 +1378,6 @@ mod tests {
lifecycle.check_for_work(from_secs(10));
assert_eq!(*db.events.read(), vec![]);
let now = Utc::now();
let chunks = vec![
// two "open" chunks => they must not be dropped (yet)
TestChunk::new(ChunkId::new_test(0), 0, ChunkStorage::OpenMutableBuffer),
@ -1394,7 +1395,7 @@ mod tests {
)
.with_access_metrics(AccessMetrics {
count: 1,
last_access: now,
last_access: Time::from_timestamp(5, 0),
}),
// "written" chunk => can be unloaded
TestChunk::new(
@ -1404,7 +1405,7 @@ mod tests {
)
.with_access_metrics(AccessMetrics {
count: 12,
last_access: now - chrono::Duration::seconds(1),
last_access: Time::from_timestamp(4, 0),
}),
];
@ -1697,6 +1698,7 @@ mod tests {
..Default::default()
};
let now = from_secs(0);
let time_now = Time::from_date_time(now);
let partitions = vec![
// Insufficient rows and not old enough => don't persist but can compact
@ -1706,7 +1708,7 @@ mod tests {
TestChunk::new(ChunkId::new_test(1), 0, ChunkStorage::ReadBuffer)
.with_min_timestamp(from_secs(5)),
])
.with_persistence(10, now, from_secs(20)),
.with_persistence(10, time_now, Time::from_timestamp(20, 0)),
// Sufficient rows => persist
TestPartition::new(vec![
TestChunk::new(ChunkId::new_test(2), 0, ChunkStorage::ClosedMutableBuffer)
@ -1714,7 +1716,7 @@ mod tests {
TestChunk::new(ChunkId::new_test(3), 0, ChunkStorage::ReadBuffer)
.with_min_timestamp(from_secs(5)),
])
.with_persistence(1_000, now, from_secs(20)),
.with_persistence(1_000, time_now, Time::from_timestamp(20, 0)),
// Writes too old => persist
TestPartition::new(vec![
// Should split open chunks
@ -1725,7 +1727,11 @@ mod tests {
TestChunk::new(ChunkId::new_test(6), 0, ChunkStorage::ObjectStoreOnly)
.with_min_timestamp(from_secs(5)),
])
.with_persistence(10, now - chrono::Duration::seconds(10), from_secs(20)),
.with_persistence(
10,
time_now - Duration::from_secs(10),
Time::from_timestamp(20, 0),
),
// Sufficient rows but conflicting compaction => prevent compaction
TestPartition::new(vec![
TestChunk::new(ChunkId::new_test(7), 0, ChunkStorage::ClosedMutableBuffer)
@ -1737,7 +1743,7 @@ mod tests {
TestChunk::new(ChunkId::new_test(9), 0, ChunkStorage::ReadBuffer)
.with_min_timestamp(from_secs(5)),
])
.with_persistence(1_000, now, from_secs(20)),
.with_persistence(1_000, time_now, Time::from_timestamp(20, 0)),
// Sufficient rows and non-conflicting compaction => persist
TestPartition::new(vec![
TestChunk::new(ChunkId::new_test(10), 0, ChunkStorage::ClosedMutableBuffer)
@ -1748,7 +1754,7 @@ mod tests {
TestChunk::new(ChunkId::new_test(12), 0, ChunkStorage::ReadBuffer)
.with_min_timestamp(from_secs(5)),
])
.with_persistence(1_000, now, from_secs(20)),
.with_persistence(1_000, time_now, Time::from_timestamp(20, 0)),
// Sufficient rows, non-conflicting compaction and compact-able chunk => persist + compact
TestPartition::new(vec![
TestChunk::new(ChunkId::new_test(13), 0, ChunkStorage::ClosedMutableBuffer)
@ -1762,7 +1768,7 @@ mod tests {
TestChunk::new(ChunkId::new_test(16), 0, ChunkStorage::ReadBuffer)
.with_min_timestamp(from_secs(5)),
])
.with_persistence(1_000, now, from_secs(20)),
.with_persistence(1_000, time_now, Time::from_timestamp(20, 0)),
// Checks that we include chunks in a closed "order"-based interval.
// Note that the chunks here are ordered in reverse to check if the lifecycle policy really uses the chunk
// order during iteration.
@ -1783,7 +1789,7 @@ mod tests {
.with_min_timestamp(from_secs(25))
.with_order(ChunkOrder::new(1).unwrap()),
])
.with_persistence(1_000, now, from_secs(20)),
.with_persistence(1_000, time_now, Time::from_timestamp(20, 0)),
];
let db = TestDb::from_partitions(rules, partitions);
@ -1823,14 +1829,15 @@ mod tests {
..Default::default()
};
let now = Utc::now();
let time_now = Time::from_date_time(now);
// This could occur if the in-memory contents of a partition are deleted, and
// compaction causes the chunks to be removed. In such a scenario the persistence
// windows will still think there are rows to be persisted
let partitions = vec![TestPartition::new(vec![]).with_persistence(
10,
now - chrono::Duration::seconds(20),
from_secs(20),
time_now - Duration::from_secs(20),
Time::from_timestamp(20, 0),
)];
let db = TestDb::from_partitions(rules, partitions);
@ -1851,6 +1858,7 @@ mod tests {
..Default::default()
};
let now = Utc::now();
let time_now = Time::from_date_time(now);
let partitions = vec![
// Sufficient rows => could persist but should be suppressed
@ -1860,7 +1868,7 @@ mod tests {
TestChunk::new(ChunkId::new_test(3), 0, ChunkStorage::ReadBuffer)
.with_min_timestamp(from_secs(5)),
])
.with_persistence(1_000, now, from_secs(20)),
.with_persistence(1_000, time_now, Time::from_timestamp(20, 0)),
];
let db = TestDb::from_partitions(rules, partitions);

View File

@ -29,6 +29,7 @@ snafu = "0.6"
schema = { path = "../schema" }
tempfile = "3.1.0"
thrift = "0.13"
time = { path = "../time" }
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] }
tokio-stream = "0.1"
uuid = { version = "0.8", features = ["serde", "v4"] }

View File

@ -61,14 +61,11 @@ pub async fn get_unreferenced_parquet_files(
let iox_object_store = catalog.iox_object_store();
let all_known = {
// replay catalog transactions to track ALL (even dropped) files that are referenced
let (_catalog, state) = PreservedCatalog::load::<TracerCatalogState>(
db_name,
Arc::clone(&iox_object_store),
(),
)
.await
.context(CatalogLoadError)?
.expect("catalog gone while reading it?");
let (_catalog, state) =
PreservedCatalog::load::<TracerCatalogState>(db_name, catalog.config(), ())
.await
.context(CatalogLoadError)?
.expect("catalog gone while reading it?");
state.files.into_inner()
};
@ -165,16 +162,16 @@ mod tests {
use super::*;
use crate::{
catalog::test_helpers::{new_empty, DB_NAME},
test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize},
test_utils::{chunk_addr, make_config, make_metadata, TestSize},
};
use std::{collections::HashSet, sync::Arc};
use tokio::sync::RwLock;
#[tokio::test]
async fn test_cleanup_empty() {
let iox_object_store = make_iox_object_store().await;
let config = make_config().await;
let (catalog, _state) = new_empty(&iox_object_store).await;
let (catalog, _state) = new_empty(config).await;
// run clean-up
let files = get_unreferenced_parquet_files(DB_NAME, &catalog, 1_000)
@ -185,9 +182,10 @@ mod tests {
#[tokio::test]
async fn test_cleanup_rules() {
let iox_object_store = make_iox_object_store().await;
let config = make_config().await;
let iox_object_store = &config.iox_object_store;
let (catalog, _state) = new_empty(&iox_object_store).await;
let (catalog, _state) = new_empty(config.clone()).await;
// create some data
let mut paths_keep = vec![];
@ -197,7 +195,7 @@ mod tests {
// an ordinary tracked parquet file => keep
let (path, metadata) =
make_metadata(&iox_object_store, "foo", chunk_addr(1), TestSize::Full).await;
make_metadata(iox_object_store, "foo", chunk_addr(1), TestSize::Full).await;
let metadata = Arc::new(metadata);
let info = CatalogParquetInfo {
path,
@ -211,7 +209,7 @@ mod tests {
// another ordinary tracked parquet file that was added and removed => keep (for time
// travel)
let (path, metadata) =
make_metadata(&iox_object_store, "foo", chunk_addr(2), TestSize::Full).await;
make_metadata(iox_object_store, "foo", chunk_addr(2), TestSize::Full).await;
let metadata = Arc::new(metadata);
let info = CatalogParquetInfo {
path,
@ -224,7 +222,7 @@ mod tests {
// an untracked parquet file => delete
let (path, _md) =
make_metadata(&iox_object_store, "foo", chunk_addr(3), TestSize::Full).await;
make_metadata(iox_object_store, "foo", chunk_addr(3), TestSize::Full).await;
paths_delete.push(path);
transaction.commit().await.unwrap();
@ -240,7 +238,7 @@ mod tests {
delete_files(&catalog, &files).await.unwrap();
// list all files
let all_files = list_all_files(&iox_object_store).await;
let all_files = list_all_files(iox_object_store).await;
for p in paths_keep {
assert!(dbg!(&all_files).contains(dbg!(&p)));
}
@ -251,10 +249,11 @@ mod tests {
#[tokio::test]
async fn test_cleanup_with_parallel_transaction() {
let iox_object_store = make_iox_object_store().await;
let config = make_config().await;
let iox_object_store = &config.iox_object_store;
let lock: RwLock<()> = Default::default();
let (catalog, _state) = new_empty(&iox_object_store).await;
let (catalog, _state) = new_empty(config.clone()).await;
// try multiple times to provoke a conflict
for i in 0..100 {
@ -262,15 +261,14 @@ mod tests {
// not trick the cleanup logic to remove the actual file because file paths contains a
// UUIDv4 part.
if i % 2 == 0 {
make_metadata(&iox_object_store, "foo", chunk_addr(i), TestSize::Full).await;
make_metadata(iox_object_store, "foo", chunk_addr(i), TestSize::Full).await;
}
let (path, _) = tokio::join!(
async {
let guard = lock.read().await;
let (path, md) =
make_metadata(&iox_object_store, "foo", chunk_addr(i), TestSize::Full)
.await;
make_metadata(iox_object_store, "foo", chunk_addr(i), TestSize::Full).await;
let metadata = Arc::new(md);
let info = CatalogParquetInfo {
@ -298,22 +296,23 @@ mod tests {
},
);
let all_files = list_all_files(&iox_object_store).await;
let all_files = list_all_files(iox_object_store).await;
assert!(dbg!(all_files).contains(dbg!(&path)));
}
}
#[tokio::test]
async fn test_cleanup_max_files() {
let iox_object_store = make_iox_object_store().await;
let config = make_config().await;
let iox_object_store = &config.iox_object_store;
let (catalog, _state) = new_empty(&iox_object_store).await;
let (catalog, _state) = new_empty(config.clone()).await;
// create some files
let mut to_remove = HashSet::default();
for chunk_id in 0..3 {
let (path, _md) = make_metadata(
&iox_object_store,
iox_object_store,
"foo",
chunk_addr(chunk_id),
TestSize::Full,
@ -330,7 +329,7 @@ mod tests {
delete_files(&catalog, &files).await.unwrap();
// should only delete 2
let all_files = list_all_files(&iox_object_store).await;
let all_files = list_all_files(iox_object_store).await;
let leftover: HashSet<_> = all_files.intersection(&to_remove).collect();
assert_eq!(leftover.len(), 1);
@ -342,7 +341,7 @@ mod tests {
delete_files(&catalog, &files).await.unwrap();
// should delete remaining file
let all_files = list_all_files(&iox_object_store).await;
let all_files = list_all_files(iox_object_store).await;
let leftover: HashSet<_> = all_files.intersection(&to_remove).collect();
assert_eq!(leftover.len(), 0);
}

File diff suppressed because it is too large Load Diff

View File

@ -225,30 +225,31 @@ mod tests {
interface::CatalogParquetInfo,
test_helpers::{TestCatalogState, DB_NAME},
},
test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize},
test_utils::{chunk_addr, make_config, make_metadata, TestSize},
};
use chrono::{TimeZone, Utc};
use time::Time;
use uuid::Uuid;
#[tokio::test]
async fn test_dump_default_options() {
let iox_object_store = make_iox_object_store().await;
let time_provider = Arc::new(time::MockProvider::new(Time::from_timestamp(10, 20)));
let config = make_config()
.await
.with_fixed_uuid(Uuid::nil())
.with_time_provider(time_provider);
let iox_object_store = &config.iox_object_store;
// build catalog with some data
let (catalog, _state) = PreservedCatalog::new_empty_for_testing::<TestCatalogState>(
DB_NAME,
Arc::clone(&iox_object_store),
(),
Uuid::nil(),
Utc.timestamp(10, 20),
)
.await
.unwrap();
let (catalog, _state) =
PreservedCatalog::new_empty::<TestCatalogState>(DB_NAME, config.clone(), ())
.await
.unwrap();
{
let mut transaction = catalog.open_transaction().await;
let (path, metadata) =
make_metadata(&iox_object_store, "foo", chunk_addr(0), TestSize::Minimal).await;
make_metadata(iox_object_store, "foo", chunk_addr(0), TestSize::Minimal).await;
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
@ -261,7 +262,7 @@ mod tests {
let mut buf = std::io::Cursor::new(Vec::new());
let options = DumpOptions::default();
dump(&iox_object_store, &mut buf, options).await.unwrap();
dump(iox_object_store, &mut buf, options).await.unwrap();
let actual = String::from_utf8(buf.into_inner()).unwrap();
let actual = actual.trim();
@ -352,23 +353,23 @@ File {
#[tokio::test]
async fn test_dump_show_parsed_data() {
let iox_object_store = make_iox_object_store().await;
let time_provider = Arc::new(time::MockProvider::new(Time::from_timestamp(10, 20)));
let config = make_config()
.await
.with_fixed_uuid(Uuid::nil())
.with_time_provider(time_provider);
let iox_object_store = &config.iox_object_store;
// build catalog with some data
let (catalog, _state) = PreservedCatalog::new_empty_for_testing::<TestCatalogState>(
DB_NAME,
Arc::clone(&iox_object_store),
(),
Uuid::nil(),
Utc.timestamp(10, 20),
)
.await
.unwrap();
let (catalog, _state) =
PreservedCatalog::new_empty::<TestCatalogState>(DB_NAME, config.clone(), ())
.await
.unwrap();
{
let mut transaction = catalog.open_transaction().await;
let (path, metadata) =
make_metadata(&iox_object_store, "foo", chunk_addr(0), TestSize::Minimal).await;
make_metadata(iox_object_store, "foo", chunk_addr(0), TestSize::Minimal).await;
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
@ -386,7 +387,7 @@ File {
show_statistics: true,
..Default::default()
};
dump(&iox_object_store, &mut buf, options).await.unwrap();
dump(iox_object_store, &mut buf, options).await.unwrap();
let actual = String::from_utf8(buf.into_inner()).unwrap();
let actual = actual.trim();
@ -463,9 +464,9 @@ File {
Metadata {
iox_metadata: Ok(
IoxMetadata {
creation_timestamp: 1970-01-01T00:00:10.000000020Z,
time_of_first_write: 1970-01-01T00:00:30.000000040Z,
time_of_last_write: 1970-01-01T00:00:50.000000060Z,
creation_timestamp: 1970-01-01T00:00:10.000000020+00:00,
time_of_first_write: 1970-01-01T00:00:30.000000040+00:00,
time_of_last_write: 1970-01-01T00:00:50.000000060+00:00,
table_name: "table1",
partition_key: "part1",
chunk_id: ChunkId(
@ -486,7 +487,7 @@ File {
max: 28,
},
},
flush_timestamp: 1970-01-01T00:00:10.000000020Z,
flush_timestamp: 1970-01-01T00:00:10.000000020+00:00,
},
database_checkpoint: DatabaseCheckpoint {
sequencer_numbers: {

View File

@ -1,10 +1,10 @@
use std::{convert::TryInto, num::TryFromIntError};
use chrono::{DateTime, Utc};
use generated_types::influxdata::iox::catalog::v1 as proto;
use iox_object_store::{ParquetFilePath, ParquetFilePathParseError};
use object_store::path::{parsed::DirsAndFileName, parts::PathPart};
use snafu::{OptionExt, ResultExt, Snafu};
use time::Time;
use uuid::Uuid;
#[derive(Debug, Snafu)]
@ -81,13 +81,11 @@ pub fn unparse_dirs_and_filename(path: &ParquetFilePath) -> proto::Path {
}
/// Parse timestamp from protobuf.
pub fn parse_timestamp(
ts: &Option<generated_types::google::protobuf::Timestamp>,
) -> Result<DateTime<Utc>> {
pub fn parse_timestamp(ts: &Option<generated_types::google::protobuf::Timestamp>) -> Result<Time> {
let ts: generated_types::google::protobuf::Timestamp =
ts.as_ref().context(DateTimeRequired)?.clone();
let ts: DateTime<Utc> = ts.try_into().context(DateTimeParseError)?;
Ok(ts)
let ts = ts.try_into().context(DateTimeParseError)?;
Ok(Time::from_date_time(ts))
}
/// Parse encoding from protobuf.

View File

@ -1,11 +1,11 @@
//! Tooling to remove parts of the preserved catalog that are no longer needed.
use std::{collections::BTreeMap, sync::Arc};
use chrono::{DateTime, Utc};
use futures::TryStreamExt;
use iox_object_store::{IoxObjectStore, TransactionFilePath};
use object_store::{ObjectStore, ObjectStoreApi};
use snafu::{ResultExt, Snafu};
use time::Time;
use crate::catalog::{
core::{ProtoIOError, ProtoParseError},
@ -52,10 +52,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
///
/// This will delete the following content: C1, T2, and T3. C3 and T4 cannot be deleted because it is required to
/// recover T5 which is AFTER `before`.
pub async fn prune_history(
iox_object_store: Arc<IoxObjectStore>,
before: DateTime<Utc>,
) -> Result<()> {
pub async fn prune_history(iox_object_store: Arc<IoxObjectStore>, before: Time) -> Result<()> {
// collect all files so we can quickly filter them later for deletion
// Use a btree-map so we can iterate from oldest to newest revision.
let mut files: BTreeMap<u64, Vec<TransactionFilePath>> = Default::default();
@ -122,7 +119,7 @@ fn is_checkpoint_or_zero(path: &TransactionFilePath) -> bool {
#[cfg(test)]
mod tests {
use chrono::Duration;
use std::time::Duration;
use crate::{
catalog::{
@ -130,7 +127,7 @@ mod tests {
interface::CheckpointData,
test_helpers::{load_ok, new_empty},
},
test_utils::make_iox_object_store,
test_utils::{make_config, make_iox_object_store},
};
use super::*;
@ -139,61 +136,83 @@ mod tests {
async fn test_empty_store() {
let iox_object_store = make_iox_object_store().await;
prune_history(iox_object_store, Utc::now()).await.unwrap();
prune_history(iox_object_store, Time::from_timestamp_nanos(0))
.await
.unwrap();
}
#[tokio::test]
async fn test_do_delete_wipe_last_checkpoint() {
let iox_object_store = make_iox_object_store().await;
let config = make_config().await;
new_empty(&iox_object_store).await;
new_empty(config.clone()).await;
prune_history(Arc::clone(&iox_object_store), Utc::now())
.await
.unwrap();
prune_history(
Arc::clone(&config.iox_object_store),
Time::from_timestamp_nanos(0),
)
.await
.unwrap();
load_ok(&iox_object_store).await.unwrap();
load_ok(config).await.unwrap();
}
#[tokio::test]
async fn test_complex_1() {
let iox_object_store = make_iox_object_store().await;
let time = Arc::new(time::MockProvider::new(Time::from_timestamp(0, 32)));
let config = make_config()
.await
.with_time_provider(Arc::<time::MockProvider>::clone(&time));
let (catalog, _state) = new_empty(&iox_object_store).await;
let iox_object_store = &config.iox_object_store;
let (catalog, _state) = new_empty(config.clone()).await;
create_transaction(&catalog).await;
create_transaction_and_checkpoint(&catalog).await;
let before = Utc::now();
let before = time.inc(Duration::from_secs(21));
time.inc(Duration::from_secs(1));
create_transaction(&catalog).await;
prune_history(Arc::clone(&iox_object_store), before)
prune_history(Arc::clone(iox_object_store), before)
.await
.unwrap();
assert_eq!(
known_revisions(&iox_object_store).await,
known_revisions(iox_object_store).await,
vec![(2, true), (3, false)],
);
}
#[tokio::test]
async fn test_complex_2() {
let iox_object_store = make_iox_object_store().await;
let time = Arc::new(time::MockProvider::new(Time::from_timestamp(0, 32)));
let config = make_config()
.await
.with_time_provider(Arc::<time::MockProvider>::clone(&time));
let iox_object_store = &config.iox_object_store;
let (catalog, _state) = new_empty(config.clone()).await;
let (catalog, _state) = new_empty(&iox_object_store).await;
create_transaction(&catalog).await;
create_transaction_and_checkpoint(&catalog).await;
create_transaction(&catalog).await;
let before = Utc::now();
create_transaction(&catalog).await;
create_transaction_and_checkpoint(&catalog).await;
create_transaction(&catalog).await;
prune_history(Arc::clone(&iox_object_store), before)
let before = time.inc(Duration::from_secs(25));
time.inc(Duration::from_secs(1));
create_transaction(&catalog).await;
create_transaction_and_checkpoint(&catalog).await;
create_transaction(&catalog).await;
prune_history(Arc::clone(iox_object_store), before)
.await
.unwrap();
assert_eq!(
known_revisions(&iox_object_store).await,
known_revisions(iox_object_store).await,
vec![
(2, true),
(3, false),
@ -207,20 +226,21 @@ mod tests {
#[tokio::test]
async fn test_keep_all() {
let iox_object_store = make_iox_object_store().await;
let config = make_config().await;
let iox_object_store = &config.iox_object_store;
let (catalog, _state) = new_empty(&iox_object_store).await;
let (catalog, _state) = new_empty(config.clone()).await;
create_transaction(&catalog).await;
create_transaction_and_checkpoint(&catalog).await;
create_transaction(&catalog).await;
let before = Utc::now() - Duration::seconds(1_000);
prune_history(Arc::clone(&iox_object_store), before)
let before = config.time_provider.now() - Duration::from_secs(1_000);
prune_history(Arc::clone(iox_object_store), before)
.await
.unwrap();
assert_eq!(
known_revisions(&iox_object_store).await,
known_revisions(iox_object_store).await,
vec![(0, false), (1, false), (2, false), (2, true), (3, false)],
);
}

View File

@ -6,6 +6,7 @@ use iox_object_store::{IoxObjectStore, ParquetFilePath};
use observability_deps::tracing::error;
use snafu::{ResultExt, Snafu};
use crate::catalog::core::PreservedCatalogConfig;
use crate::{
catalog::{
core::PreservedCatalog,
@ -69,7 +70,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
/// `ignore_metadata_read_failure` to `true` to ignore these cases.
pub async fn rebuild_catalog<S>(
db_name: &str,
iox_object_store: Arc<IoxObjectStore>,
config: PreservedCatalogConfig,
catalog_empty_input: S::EmptyInput,
ignore_metadata_read_failure: bool,
) -> Result<(PreservedCatalog, S)>
@ -77,23 +78,20 @@ where
S: CatalogState + Debug + Send + Sync,
{
// collect all revisions from parquet files
let files = collect_files(&iox_object_store, ignore_metadata_read_failure).await?;
let files = collect_files(&config.iox_object_store, ignore_metadata_read_failure).await?;
// create new empty catalog
let (catalog, mut state) = PreservedCatalog::new_empty::<S>(
db_name,
Arc::clone(&iox_object_store),
catalog_empty_input,
)
.await
.context(NewEmptyFailure)?;
let (catalog, mut state) =
PreservedCatalog::new_empty::<S>(db_name, config.clone(), catalog_empty_input)
.await
.context(NewEmptyFailure)?;
// create single transaction with all files
if !files.is_empty() {
let mut transaction = catalog.open_transaction().await;
for info in files {
state
.add(Arc::clone(&iox_object_store), info.clone())
.add(Arc::clone(&config.iox_object_store), info.clone())
.context(FileRecordFailure)?;
transaction.add_parquet(&info);
}
@ -181,32 +179,32 @@ mod tests {
metadata::IoxMetadata,
storage::{MemWriter, Storage},
test_utils::{
create_partition_and_database_checkpoint, make_iox_object_store, make_record_batch,
TestSize,
create_partition_and_database_checkpoint, make_config, make_record_batch, TestSize,
},
};
use chrono::Utc;
use data_types::chunk_metadata::{ChunkAddr, ChunkId, ChunkOrder};
use datafusion::physical_plan::SendableRecordBatchStream;
use datafusion_util::MemoryStream;
use parquet::arrow::ArrowWriter;
use time::Time;
use tokio_stream::StreamExt;
#[tokio::test]
async fn test_rebuild_successfull() {
let iox_object_store = make_iox_object_store().await;
let config = make_config().await;
let iox_object_store = &config.iox_object_store;
let db_name = Arc::from("db1");
// build catalog with some data
let (catalog, mut state) = new_empty(&iox_object_store).await;
let (catalog, mut state) = new_empty(config.clone()).await;
{
let mut transaction = catalog.open_transaction().await;
let info = create_parquet_file(&db_name, &iox_object_store, ChunkId::new_test(0)).await;
let info = create_parquet_file(&db_name, iox_object_store, ChunkId::new_test(0)).await;
state.insert(info.clone()).unwrap();
transaction.add_parquet(&info);
let info = create_parquet_file(&db_name, &iox_object_store, ChunkId::new_test(1)).await;
let info = create_parquet_file(&db_name, iox_object_store, ChunkId::new_test(1)).await;
state.insert(info.clone()).unwrap();
transaction.add_parquet(&info);
@ -220,7 +218,7 @@ mod tests {
{
let mut transaction = catalog.open_transaction().await;
let info = create_parquet_file(&db_name, &iox_object_store, ChunkId::new_test(2)).await;
let info = create_parquet_file(&db_name, iox_object_store, ChunkId::new_test(2)).await;
state.insert(info.clone()).unwrap();
transaction.add_parquet(&info);
@ -236,13 +234,12 @@ mod tests {
// wipe catalog
drop(catalog);
PreservedCatalog::wipe(&iox_object_store).await.unwrap();
PreservedCatalog::wipe(iox_object_store).await.unwrap();
// rebuild
let (catalog, state) =
rebuild_catalog::<TestCatalogState>(DB_NAME, iox_object_store, (), false)
.await
.unwrap();
let (catalog, state) = rebuild_catalog::<TestCatalogState>(DB_NAME, config, (), false)
.await
.unwrap();
// check match
let paths_actual = {
@ -256,20 +253,21 @@ mod tests {
#[tokio::test]
async fn test_rebuild_empty() {
let iox_object_store = make_iox_object_store().await;
let config = make_config().await;
// build empty catalog
let (catalog, _state) = new_empty(&iox_object_store).await;
let (catalog, _state) = new_empty(config.clone()).await;
// wipe catalog
drop(catalog);
PreservedCatalog::wipe(&iox_object_store).await.unwrap();
PreservedCatalog::wipe(&config.iox_object_store)
.await
.unwrap();
// rebuild
let (catalog, state) =
rebuild_catalog::<TestCatalogState>(DB_NAME, iox_object_store, (), false)
.await
.unwrap();
let (catalog, state) = rebuild_catalog::<TestCatalogState>(DB_NAME, config, (), false)
.await
.unwrap();
// check match
assert!(state.files().next().is_none());
@ -278,30 +276,30 @@ mod tests {
#[tokio::test]
async fn test_rebuild_no_metadata() {
let iox_object_store = make_iox_object_store().await;
let config = make_config().await;
let iox_object_store = &config.iox_object_store;
let db_name = Arc::from("db1");
// build catalog with same data
let catalog = new_empty(&iox_object_store).await;
let catalog = new_empty(config.clone()).await;
// file w/o metadata
create_parquet_file_without_metadata(&db_name, &iox_object_store, ChunkId::new_test(0))
create_parquet_file_without_metadata(&db_name, iox_object_store, ChunkId::new_test(0))
.await;
// wipe catalog
drop(catalog);
PreservedCatalog::wipe(&iox_object_store).await.unwrap();
PreservedCatalog::wipe(iox_object_store).await.unwrap();
// rebuild (do not ignore errors)
let res =
rebuild_catalog::<TestCatalogState>(DB_NAME, Arc::clone(&iox_object_store), (), false)
.await;
let res = rebuild_catalog::<TestCatalogState>(DB_NAME, config.clone(), (), false).await;
assert!(dbg!(res.unwrap_err().to_string())
.starts_with("Cannot read IOx metadata from parquet file"));
// rebuild (ignore errors)
let (catalog, state) =
rebuild_catalog::<TestCatalogState>(DB_NAME, iox_object_store, (), true)
rebuild_catalog::<TestCatalogState>(DB_NAME, config.clone(), (), true)
.await
.unwrap();
assert!(state.files().next().is_none());
@ -318,21 +316,21 @@ mod tests {
// transaction files and then check that rebuilt catalog will be gone afterwards. Note the
// difference to the `test_rebuild_empty` case where we can indeed proof the existence of a
// catalog (even though it is empty aka has no files).
let iox_object_store = make_iox_object_store().await;
let config = make_config().await;
let iox_object_store = &config.iox_object_store;
// build catalog with some data (2 transactions + initial empty one)
let (catalog, _state) = new_empty(&iox_object_store).await;
let (catalog, _state) = new_empty(config.clone()).await;
assert_eq!(catalog.revision_counter(), 0);
// wipe catalog
drop(catalog);
PreservedCatalog::wipe(&iox_object_store).await.unwrap();
PreservedCatalog::wipe(iox_object_store).await.unwrap();
// rebuild
let catalog =
rebuild_catalog::<TestCatalogState>(DB_NAME, Arc::clone(&iox_object_store), (), false)
.await
.unwrap();
let catalog = rebuild_catalog::<TestCatalogState>(DB_NAME, config.clone(), (), false)
.await
.unwrap();
drop(catalog);
// delete transaction files
@ -356,7 +354,7 @@ mod tests {
assert!(deleted);
// the catalog should be gone because there should have been no checkpoint files remaining
assert!(!exists(&iox_object_store).await);
assert!(!exists(iox_object_store).await);
}
pub async fn create_parquet_file(
@ -375,14 +373,14 @@ mod tests {
Arc::clone(&partition_key),
);
let metadata = IoxMetadata {
creation_timestamp: Utc::now(),
creation_timestamp: Time::from_timestamp_nanos(0),
table_name: Arc::clone(&table_name),
partition_key: Arc::clone(&partition_key),
chunk_id,
partition_checkpoint,
database_checkpoint,
time_of_first_write: Utc::now(),
time_of_last_write: Utc::now(),
time_of_first_write: Time::from_timestamp_nanos(0),
time_of_last_write: Time::from_timestamp_nanos(0),
chunk_order: ChunkOrder::new(5).unwrap(),
};
let stream: SendableRecordBatchStream = Box::pin(MemoryStream::new(record_batches));

View File

@ -1,3 +1,4 @@
use crate::catalog::core::PreservedCatalogConfig;
use crate::{
catalog::{
core::PreservedCatalog,
@ -11,7 +12,7 @@ use crate::{
},
},
metadata::IoxParquetMetaData,
test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize},
test_utils::{chunk_addr, make_config, make_metadata, TestSize},
};
use data_types::{chunk_metadata::ChunkId, timestamp::TimestampRange};
use iox_object_store::{IoxObjectStore, ParquetFilePath, TransactionFilePath};
@ -219,25 +220,21 @@ pub async fn exists(iox_object_store: &Arc<IoxObjectStore>) -> bool {
/// Load a `PreservedCatalog` and unwrap, expecting the operation to succeed
pub async fn load_ok(
iox_object_store: &Arc<IoxObjectStore>,
config: PreservedCatalogConfig,
) -> Option<(PreservedCatalog, TestCatalogState)> {
PreservedCatalog::load(DB_NAME, Arc::clone(iox_object_store), ())
.await
.unwrap()
PreservedCatalog::load(DB_NAME, config, ()).await.unwrap()
}
/// Load a `PreservedCatalog` and unwrap the error, expecting the operation to fail
pub async fn load_err(iox_object_store: &Arc<IoxObjectStore>) -> crate::catalog::core::Error {
PreservedCatalog::load::<TestCatalogState>(DB_NAME, Arc::clone(iox_object_store), ())
pub async fn load_err(config: PreservedCatalogConfig) -> crate::catalog::core::Error {
PreservedCatalog::load::<TestCatalogState>(DB_NAME, config, ())
.await
.unwrap_err()
}
/// Create a new empty catalog with the TestCatalogState, expecting the operation to succeed
pub async fn new_empty(
iox_object_store: &Arc<IoxObjectStore>,
) -> (PreservedCatalog, TestCatalogState) {
PreservedCatalog::new_empty(DB_NAME, Arc::clone(iox_object_store), ())
pub async fn new_empty(config: PreservedCatalogConfig) -> (PreservedCatalog, TestCatalogState) {
PreservedCatalog::new_empty(DB_NAME, config, ())
.await
.unwrap()
}
@ -274,9 +271,9 @@ where
F: Fn(&S) -> CheckpointData + Send,
{
// empty state
let iox_object_store = make_iox_object_store().await;
let config = make_config().await;
let (_catalog, mut state) =
PreservedCatalog::new_empty::<S>(DB_NAME, Arc::clone(&iox_object_store), state_data)
PreservedCatalog::new_empty::<S>(DB_NAME, config.clone(), state_data)
.await
.unwrap();
@ -291,7 +288,7 @@ where
{
for chunk_id in 0..5 {
let (path, metadata) = make_metadata(
&iox_object_store,
&config.iox_object_store,
"ok",
chunk_addr(chunk_id),
TestSize::Full,
@ -299,7 +296,7 @@ where
.await;
state
.add(
Arc::clone(&iox_object_store),
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
@ -321,11 +318,16 @@ where
// add and remove in the same transaction
{
let (path, metadata) =
make_metadata(&iox_object_store, "ok", chunk_addr(5), TestSize::Full).await;
let (path, metadata) = make_metadata(
&config.iox_object_store,
"ok",
chunk_addr(5),
TestSize::Full,
)
.await;
state
.add(
Arc::clone(&iox_object_store),
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
@ -343,7 +345,7 @@ where
state.remove(path).unwrap();
state
.add(
Arc::clone(&iox_object_store),
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
@ -356,11 +358,16 @@ where
// add, remove, add in the same transaction
{
let (path, metadata) =
make_metadata(&iox_object_store, "ok", chunk_addr(6), TestSize::Full).await;
let (path, metadata) = make_metadata(
&config.iox_object_store,
"ok",
chunk_addr(6),
TestSize::Full,
)
.await;
state
.add(
Arc::clone(&iox_object_store),
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
@ -371,7 +378,7 @@ where
state.remove(&path).unwrap();
state
.add(
Arc::clone(&iox_object_store),
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
@ -389,7 +396,7 @@ where
state.remove(&path).unwrap();
state
.add(
Arc::clone(&iox_object_store),
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
@ -406,11 +413,16 @@ where
// TODO: Error handling should disambiguate between chunk collision and filename collision
// chunk with same ID already exists (should also not change the metadata)
let (path, metadata) =
make_metadata(&iox_object_store, "fail", chunk_addr(0), TestSize::Full).await;
let (path, metadata) = make_metadata(
&config.iox_object_store,
"fail",
chunk_addr(0),
TestSize::Full,
)
.await;
let err = state
.add(
Arc::clone(&iox_object_store),
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path,
file_size_bytes: 33,
@ -431,7 +443,7 @@ where
let (_, metadata) = expected_files.get(&ChunkId::new_test(0)).unwrap();
let err = state
.add(
Arc::clone(&iox_object_store),
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
// Intentionally "incorrect" path
path: ParquetFilePath::new(&chunk_addr(10)),
@ -446,12 +458,17 @@ where
));
// this transaction will still work
let (path, metadata) =
make_metadata(&iox_object_store, "ok", chunk_addr(7), TestSize::Full).await;
let (path, metadata) = make_metadata(
&config.iox_object_store,
"ok",
chunk_addr(7),
TestSize::Full,
)
.await;
let metadata = Arc::new(metadata);
state
.add(
Arc::clone(&iox_object_store),
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
@ -464,7 +481,7 @@ where
// recently added
let err = state
.add(
Arc::clone(&iox_object_store),
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path,
file_size_bytes: 33,
@ -495,7 +512,7 @@ where
// create two chunks that we can use for delete predicate
let chunk_addr_1 = chunk_addr(8);
let (path, metadata) = make_metadata(
&iox_object_store,
&config.iox_object_store,
"ok",
chunk_addr_1.clone(),
TestSize::Full,
@ -503,7 +520,7 @@ where
.await;
state
.add(
Arc::clone(&iox_object_store),
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
@ -515,7 +532,7 @@ where
let chunk_addr_2 = chunk_addr(9);
let (path, metadata) = make_metadata(
&iox_object_store,
&config.iox_object_store,
"ok",
chunk_addr_2.clone(),
TestSize::Full,
@ -523,7 +540,7 @@ where
.await;
state
.add(
Arc::clone(&iox_object_store),
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
@ -548,7 +565,7 @@ where
// chunks created afterwards are unaffected
let chunk_addr_3 = chunk_addr(10);
let (path, metadata) = make_metadata(
&iox_object_store,
&config.iox_object_store,
"ok",
chunk_addr_3.clone(),
TestSize::Full,
@ -556,7 +573,7 @@ where
.await;
state
.add(
Arc::clone(&iox_object_store),
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,

View File

@ -86,7 +86,6 @@
//! [Apache Parquet]: https://parquet.apache.org/
//! [Apache Thrift]: https://thrift.apache.org/
//! [Thrift Compact Protocol]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md
use chrono::{DateTime, Utc};
use data_types::{
chunk_metadata::{ChunkId, ChunkOrder},
partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics},
@ -114,6 +113,7 @@ use schema::{InfluxColumnType, InfluxFieldType, Schema};
use snafu::{ensure, OptionExt, ResultExt, Snafu};
use std::{collections::BTreeMap, convert::TryInto, sync::Arc};
use thrift::protocol::{TCompactInputProtocol, TCompactOutputProtocol, TOutputProtocol};
use time::Time;
/// Current version for serialized metadata.
///
@ -262,10 +262,11 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct IoxMetadata {
/// Timestamp when this file was created.
pub creation_timestamp: DateTime<Utc>,
pub creation_timestamp: Time,
pub time_of_first_write: DateTime<Utc>,
pub time_of_last_write: DateTime<Utc>,
pub time_of_first_write: Time,
pub time_of_last_write: Time,
/// Table that holds this parquet file.
pub table_name: Arc<str>,
@ -404,7 +405,12 @@ impl IoxMetadata {
)
})
.collect(),
flush_timestamp: Some(self.partition_checkpoint.flush_timestamp().into()),
flush_timestamp: Some(
self.partition_checkpoint
.flush_timestamp()
.date_time()
.into(),
),
};
let proto_database_checkpoint = proto::DatabaseCheckpoint {
@ -427,9 +433,9 @@ impl IoxMetadata {
let proto_msg = proto::IoxMetadata {
version: METADATA_VERSION,
creation_timestamp: Some(self.creation_timestamp.into()),
time_of_first_write: Some(self.time_of_first_write.into()),
time_of_last_write: Some(self.time_of_last_write.into()),
creation_timestamp: Some(self.creation_timestamp.date_time().into()),
time_of_first_write: Some(self.time_of_first_write.date_time().into()),
time_of_last_write: Some(self.time_of_last_write.date_time().into()),
table_name: self.table_name.to_string(),
partition_key: self.partition_key.to_string(),
chunk_id: self.chunk_id.into(),
@ -448,12 +454,14 @@ impl IoxMetadata {
fn decode_timestamp_from_field(
value: Option<pbjson_types::Timestamp>,
field: &'static str,
) -> Result<DateTime<Utc>> {
value
) -> Result<Time> {
let date_time = value
.context(IoxMetadataFieldMissing { field })?
.try_into()
.map_err(|e| Box::new(e) as _)
.context(IoxMetadataBroken)
.context(IoxMetadataBroken)?;
Ok(Time::from_date_time(date_time))
}
/// Parquet metadata with IOx-specific wrapper.
@ -1071,14 +1079,14 @@ mod tests {
Arc::clone(&partition_key),
);
let metadata = IoxMetadata {
creation_timestamp: Utc::now(),
creation_timestamp: Time::from_timestamp(3234, 0),
table_name,
partition_key,
chunk_id: ChunkId::new_test(1337),
partition_checkpoint,
database_checkpoint,
time_of_first_write: Utc::now(),
time_of_last_write: Utc::now(),
time_of_first_write: Time::from_timestamp(3234, 0),
time_of_last_write: Time::from_timestamp(3234, 3456),
chunk_order: ChunkOrder::new(5).unwrap(),
};

View File

@ -408,7 +408,6 @@ mod tests {
};
use arrow::array::{ArrayRef, StringArray};
use arrow_util::assert_batches_eq;
use chrono::Utc;
use data_types::{
chunk_metadata::{ChunkId, ChunkOrder},
partition_metadata::TableSummary,
@ -416,6 +415,7 @@ mod tests {
use datafusion::physical_plan::common::SizedRecordBatchStream;
use datafusion_util::MemoryStream;
use parquet::schema::types::ColumnPath;
use time::Time;
#[tokio::test]
async fn test_parquet_contains_key_value_metadata() {
@ -426,14 +426,14 @@ mod tests {
Arc::clone(&partition_key),
);
let metadata = IoxMetadata {
creation_timestamp: Utc::now(),
creation_timestamp: Time::from_timestamp_nanos(3453),
table_name,
partition_key,
chunk_id: ChunkId::new_test(1337),
partition_checkpoint,
database_checkpoint,
time_of_first_write: Utc::now(),
time_of_last_write: Utc::now(),
time_of_first_write: Time::from_timestamp_nanos(456),
time_of_last_write: Time::from_timestamp_nanos(43069346),
chunk_order: ChunkOrder::new(5).unwrap(),
};
@ -502,14 +502,14 @@ mod tests {
Arc::clone(&partition_key),
);
let metadata = IoxMetadata {
creation_timestamp: Utc::now(),
creation_timestamp: Time::from_timestamp_nanos(43069346),
table_name: Arc::clone(&table_name),
partition_key: Arc::clone(&partition_key),
chunk_id,
partition_checkpoint,
database_checkpoint,
time_of_first_write: Utc::now(),
time_of_last_write: Utc::now(),
time_of_first_write: Time::from_timestamp_nanos(234),
time_of_last_write: Time::from_timestamp_nanos(4784),
chunk_order: ChunkOrder::new(5).unwrap(),
};

View File

@ -1,3 +1,4 @@
use crate::catalog::core::PreservedCatalogConfig;
use crate::{
chunk::{self, ChunkMetrics, ParquetChunk},
metadata::{IoxMetadata, IoxParquetMetaData},
@ -11,7 +12,6 @@ use arrow::{
datatypes::{Int32Type, SchemaRef},
record_batch::RecordBatch,
};
use chrono::{TimeZone, Utc};
use data_types::{
chunk_metadata::{ChunkAddr, ChunkId, ChunkOrder},
partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics, TableSummary},
@ -35,6 +35,7 @@ use schema::selection::Selection;
use schema::{builder::SchemaBuilder, Schema, TIME_COLUMN_NAME};
use snafu::{ResultExt, Snafu};
use std::{collections::BTreeMap, num::NonZeroU32, sync::Arc};
use time::Time;
#[derive(Debug, Snafu)]
pub enum Error {
@ -172,14 +173,14 @@ pub async fn make_chunk_given_record_batch(
Arc::clone(&addr.partition_key),
);
let metadata = IoxMetadata {
creation_timestamp: Utc.timestamp(10, 20),
creation_timestamp: Time::from_timestamp(10, 20),
table_name: Arc::clone(&addr.table_name),
partition_key: Arc::clone(&addr.partition_key),
chunk_id: addr.chunk_id,
partition_checkpoint,
database_checkpoint,
time_of_first_write: Utc.timestamp(30, 40),
time_of_last_write: Utc.timestamp(50, 60),
time_of_first_write: Time::from_timestamp(30, 40),
time_of_last_write: Time::from_timestamp(50, 60),
chunk_order: ChunkOrder::new(5).unwrap(),
};
let (path, file_size_bytes, parquet_metadata) = storage
@ -867,6 +868,13 @@ pub async fn make_iox_object_store() -> Arc<IoxObjectStore> {
)
}
/// Creates a new [`PreservedCatalogConfig`] with an in-memory object store
pub async fn make_config() -> PreservedCatalogConfig {
let iox_object_store = make_iox_object_store().await;
let time_provider = Arc::new(time::SystemProvider::new());
PreservedCatalogConfig::new(iox_object_store, time_provider)
}
pub fn read_data_from_parquet_data(schema: SchemaRef, parquet_data: Vec<u8>) -> Vec<RecordBatch> {
let mut record_batches = vec![];
@ -927,7 +935,7 @@ pub fn create_partition_and_database_checkpoint(
let mut sequencer_numbers_1 = BTreeMap::new();
sequencer_numbers_1.insert(1, OptionalMinMaxSequence::new(None, 18));
sequencer_numbers_1.insert(2, OptionalMinMaxSequence::new(Some(25), 28));
let flush_timestamp = Utc.timestamp(10, 20);
let flush_timestamp = Time::from_timestamp(10, 20);
let partition_checkpoint_1 = PartitionCheckpoint::new(
Arc::clone(&table_name),
Arc::clone(&partition_key),

View File

@ -4,11 +4,11 @@ version = "0.1.0"
edition = "2018"
[dependencies]
chrono = "0.4"
data_types = { path = "../data_types" }
internal_types = { path = "../internal_types" }
observability_deps = { path = "../observability_deps" }
snafu = "0.6.2"
time = { path = "../time" }
[dev-dependencies]
test_helpers = { path = "../test_helpers" }

View File

@ -76,7 +76,7 @@
//!
//! # // mocking for the example below
//! # use std::collections::BTreeMap;
//! # use chrono::Utc;
//! # use time::Time;
//! # use persistence_windows::min_max_sequence::OptionalMinMaxSequence;
//! #
//! # struct Partition {
@ -105,7 +105,7 @@
//! # Arc::from("table"),
//! # Arc::from("part"),
//! # Default::default(),
//! # Utc::now(),
//! # Time::from_timestamp_nanos(3963),
//! # )
//! # }
//! # }
@ -176,7 +176,7 @@
//!
//! # // mocking for the example below
//! # use std::sync::Arc;
//! # use chrono::Utc;
//! # use time::Time;
//! # use persistence_windows::checkpoint::{DatabaseCheckpoint, PartitionCheckpoint, PersistCheckpointBuilder};
//! #
//! # struct File {}
@ -187,7 +187,7 @@
//! # Arc::from("table"),
//! # Arc::from("part"),
//! # Default::default(),
//! # Utc::now(),
//! # Time::from_timestamp_nanos(0),
//! # )
//! # }
//! #
@ -265,9 +265,9 @@ use std::{
sync::Arc,
};
use chrono::{DateTime, Utc};
use observability_deps::tracing::warn;
use snafu::{OptionExt, Snafu};
use time::Time;
use crate::min_max_sequence::OptionalMinMaxSequence;
@ -373,7 +373,7 @@ pub struct PartitionCheckpoint {
sequencer_numbers: BTreeMap<u32, OptionalMinMaxSequence>,
/// Flush timestamp
flush_timestamp: DateTime<Utc>,
flush_timestamp: Time,
}
impl PartitionCheckpoint {
@ -382,7 +382,7 @@ impl PartitionCheckpoint {
table_name: Arc<str>,
partition_key: Arc<str>,
sequencer_numbers: BTreeMap<u32, OptionalMinMaxSequence>,
flush_timestamp: DateTime<Utc>,
flush_timestamp: Time,
) -> Self {
Self {
table_name,
@ -424,8 +424,8 @@ impl PartitionCheckpoint {
.map(|(sequencer_id, min_max)| (*sequencer_id, *min_max))
}
/// Maximum persisted timestamp.
pub fn flush_timestamp(&self) -> DateTime<Utc> {
/// Flush timestamp
pub fn flush_timestamp(&self) -> Time {
self.flush_timestamp
}
}
@ -908,7 +908,7 @@ mod tests {
($table_name:expr, $partition_key:expr, {$($sequencer_number:expr => ($min:expr, $max:expr)),*}) => {
{
let sequencer_numbers = sequencer_numbers!{$($sequencer_number => ($min, $max)),*};
let flush_timestamp = DateTime::from_utc(chrono::NaiveDateTime::from_timestamp(0, 0), Utc);
let flush_timestamp = Time::from_timestamp_nanos(0);
PartitionCheckpoint::new(Arc::from($table_name), Arc::from($partition_key), sequencer_numbers, flush_timestamp)
}
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,39 @@
-- Test Setup: TwoMeasurements
-- SQL: SELECT * from cpu;
+--------+--------------------------------+------+
| region | time | user |
+--------+--------------------------------+------+
| west | 1970-01-01T00:00:00.000000100Z | 23.2 |
| west | 1970-01-01T00:00:00.000000150Z | 21 |
+--------+--------------------------------+------+
-- SQL: SELECT user, region from cpu;
+------+--------+
| user | region |
+------+--------+
| 23.2 | west |
| 21 | west |
+------+--------+
-- SQL: SELECT * from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00');
+--------+--------------------------------+------+
| region | time | user |
+--------+--------------------------------+------+
| west | 1970-01-01T00:00:00.000000150Z | 21 |
+--------+--------------------------------+------+
-- SQL: SELECT user, region from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00');
+------+--------+
| user | region |
+------+--------+
| 21 | west |
+------+--------+
-- SQL: SELECT count(*) from cpu group by region;
+-----------------+
| COUNT(UInt8(1)) |
+-----------------+
| 2 |
+-----------------+
-- SQL: SELECT * from disk;
+-------+--------+--------------------------------+
| bytes | region | time |
+-------+--------+--------------------------------+
| 99 | east | 1970-01-01T00:00:00.000000200Z |
+-------+--------+--------------------------------+

View File

@ -0,0 +1,32 @@
-- Basic query tests
-- IOX_SETUP: TwoMeasurements
-- query data
SELECT * from cpu;
-- BUG: https://github.com/influxdata/influxdb_iox/issues/2776
-- "+----------------+",
-- "| MIN(cpu.region |",
-- "+----------------+",
-- "| west |",
-- "+----------------+",
--SELECT min(region) from cpu;
-- projection
-- expect that to get a subset of the columns and in the order specified
SELECT user, region from cpu;
-- predicate on CPU
SELECT * from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00');
-- projection and predicate
-- expect that to get a subset of the columns and in the order specified
SELECT user, region from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00');
-- basic grouping
SELECT count(*) from cpu group by region;
-- select from a different measurement
SELECT * from disk;

View File

@ -0,0 +1,26 @@
-- Test Setup: OneMeasurementRealisticTimes
-- SQL: SELECT * from cpu;
+--------+----------------------+------+
| region | time | user |
+--------+----------------------+------+
| west | 2021-07-20T19:28:50Z | 23.2 |
| west | 2021-07-20T19:30:30Z | 21 |
+--------+----------------------+------+
-- SQL: SELECT * FROM cpu WHERE time > to_timestamp('2021-07-20 19:28:50+00:00');
+--------+----------------------+------+
| region | time | user |
+--------+----------------------+------+
| west | 2021-07-20T19:30:30Z | 21 |
+--------+----------------------+------+
-- SQL: SELECT * FROM cpu WHERE time > to_timestamp('2021-07-20T19:28:50Z');
+--------+----------------------+------+
| region | time | user |
+--------+----------------------+------+
| west | 2021-07-20T19:30:30Z | 21 |
+--------+----------------------+------+
-- SQL: SELECT * FROM cpu WHERE CAST(time AS BIGINT) > CAST(to_timestamp('2021-07-20T19:28:50Z') AS BIGINT);
+--------+----------------------+------+
| region | time | user |
+--------+----------------------+------+
| west | 2021-07-20T19:30:30Z | 21 |
+--------+----------------------+------+

View File

@ -0,0 +1,12 @@
-- Timestamp printing / output testss
-- IOX_SETUP: OneMeasurementRealisticTimes
-- Expect the timestamp output to be formatted correctly (with `Z`)
SELECT * from cpu;
-- explicit offset format
SELECT * FROM cpu WHERE time > to_timestamp('2021-07-20 19:28:50+00:00');
-- Use RCF3339 format
SELECT * FROM cpu WHERE time > to_timestamp('2021-07-20T19:28:50Z');
--use cast workaround
SELECT * FROM cpu WHERE
CAST(time AS BIGINT) > CAST(to_timestamp('2021-07-20T19:28:50Z') AS BIGINT);

26
query_tests/check-generated.sh Executable file
View File

@ -0,0 +1,26 @@
#!/bin/bash -eu
# Change to the query_tests crate directory, where this script is located
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
pushd $DIR
echo "Regenerating query_tests..."
(cd generate && cargo run)
echo "Checking for uncommitted changes..."
if ! git diff --quiet HEAD --; then
echo "git diff found:"
git diff HEAD
echo "************************************************************"
echo "* Found uncommitted changes to generated flatbuffers code! *"
echo "* Please do:"
echo "* cd query_tests/generate"
echo "* cargo run"
echo "* to regenerate the query_tests code and check it in! *"
echo "************************************************************"
exit 1
else
echo "No uncommitted changes; everything is awesome."
fi

7
query_tests/generate/Cargo.lock generated Normal file
View File

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "generate"
version = "0.1.0"

View File

@ -0,0 +1,11 @@
[package]
name = "generate"
description = "Creates rust #tests for files in .sql"
version = "0.1.0"
authors = ["Andrew Lamb <andrew@nerdnetworks.org>"]
edition = "2018"
[dependencies] # In alphabetical order
# Note this is a standalone binary and not part of the overall workspace
[workspace]

View File

@ -1,26 +1,41 @@
//! Finds all .sql files in `cases/in/` and creates corresponding entries in src/cases.rs
//! native Rust types.
//! Finds all .sql files in `cases/in/` and creates corresponding
//! entries in src/cases.rs as native Rust test runner tests
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
type Error = Box<dyn std::error::Error>;
type Result<T, E = Error> = std::result::Result<T, E>;
fn main() -> Result<()> {
// Ignores all args and finds relative paths based on PWD and the command
// example: query_tests/generate/target/debug/generate
let current_exe = std::env::current_exe()?;
// walk up parent tree looking for query_tests
let mut query_tests = current_exe.clone();
let needle = OsStr::new("query_tests");
loop {
if query_tests.file_name() == Some(&needle) {
break;
}
if !query_tests.pop() {
panic!("Can not find 'query_tests' in the path: {:?}", current_exe);
}
}
// crate root
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let cases = root.join("cases").join("in");
let cases = query_tests.join("cases").join("in");
let sql_files = find_sql_files(&cases);
// Tell cargo to recompile if anything in the cases directory changes
println!("cargo:rerun-if-changed={}", cases.display());
// Now create the generated sql file
let output_content = make_cases_rs(&sql_files).join("\n");
let output_file = root.join("src").join("cases.rs");
let output_file = query_tests.join("src").join("cases.rs");
write_if_changed(&output_file, &output_content);
println!("Done");
Ok(())
}
@ -94,6 +109,8 @@ fn write_if_changed(path: &Path, content: &str) {
};
if changed {
println!("Writing changes to {}", path.display());
std::fs::write(path, content)
.map_err(|e| format!("Error writing to {:?}: {}", path, e))
.unwrap();

View File

@ -18,6 +18,20 @@ async fn test_cases_all_chunks_dropped_sql() {
.expect("flush worked");
}
#[tokio::test]
// Tests from "basic.sql",
async fn test_cases_basic_sql() {
let input_path = Path::new("cases").join("in").join("basic.sql");
let mut runner = Runner::new();
runner
.run(input_path)
.await
.expect("test failed");
runner
.flush()
.expect("flush worked");
}
#[tokio::test]
// Tests from "chunk_order.sql",
async fn test_cases_chunk_order_sql() {
@ -157,3 +171,17 @@ async fn test_cases_stats_plans_sql() {
.flush()
.expect("flush worked");
}
#[tokio::test]
// Tests from "timestamps.sql",
async fn test_cases_timestamps_sql() {
let input_path = Path::new("cases").join("in").join("timestamps.sql");
let mut runner = Runner::new();
runner
.run(input_path)
.await
.expect("test failed");
runner
.flush()
.expect("flush worked");
}

View File

@ -70,6 +70,7 @@ pub fn get_all_setups() -> &'static HashMap<String, Arc<dyn DbSetup>> {
register_setup!(OneDeleteSimpleExprOneChunk),
register_setup!(OneDeleteMultiExprsOneChunk),
register_setup!(TwoDeletesMultiExprsOneChunk),
register_setup!(OneMeasurementRealisticTimes),
]
.into_iter()
.map(|(name, setup)| (name.to_string(), setup as Arc<dyn DbSetup>))
@ -1103,8 +1104,7 @@ impl DbSetup for ChunkOrder {
let partition = partition.read();
let chunks = LockablePartition::chunks(&partition);
let mut partition = partition.upgrade();
let flush_handle =
LockablePartition::prepare_persist(&mut partition, chrono::MAX_DATETIME).unwrap();
let flush_handle = LockablePartition::prepare_persist(&mut partition, true).unwrap();
(chunks, flush_handle)
};

View File

@ -39,168 +39,6 @@ where
}
}
#[tokio::test]
async fn sql_select_from_cpu() {
let expected = vec![
"+--------+--------------------------------+------+",
"| region | time | user |",
"+--------+--------------------------------+------+",
"| west | 1970-01-01T00:00:00.000000100Z | 23.2 |",
"| west | 1970-01-01T00:00:00.000000150Z | 21 |",
"+--------+--------------------------------+------+",
];
run_sql_test_case(TwoMeasurements {}, "SELECT * from cpu", &expected).await;
}
// BUG: https://github.com/influxdata/influxdb_iox/issues/2776
#[ignore]
#[tokio::test]
async fn sql_select_from_cpu_min_utf8() {
let expected = vec![
"+----------------+",
"| MIN(cpu.region |",
"+----------------+",
"| west |",
"+----------------+",
];
run_sql_test_case(TwoMeasurements {}, "SELECT min(region) from cpu", &expected).await;
}
#[tokio::test]
async fn sql_select_from_cpu_2021() {
let expected = vec![
"+--------+----------------------+------+",
"| region | time | user |",
"+--------+----------------------+------+",
"| west | 2021-07-20T19:28:50Z | 23.2 |",
"| west | 2021-07-20T19:30:30Z | 21 |",
"+--------+----------------------+------+",
];
run_sql_test_case(
OneMeasurementRealisticTimes {},
"SELECT * from cpu",
&expected,
)
.await;
}
#[tokio::test]
async fn sql_select_from_cpu_with_timestamp_predicate_explicit_utc() {
let expected = vec![
"+--------+----------------------+------+",
"| region | time | user |",
"+--------+----------------------+------+",
"| west | 2021-07-20T19:30:30Z | 21 |",
"+--------+----------------------+------+",
];
run_sql_test_case(
OneMeasurementRealisticTimes {},
"SELECT * FROM cpu WHERE time > to_timestamp('2021-07-20 19:28:50+00:00')",
&expected,
)
.await;
// Use RCF3339 format
run_sql_test_case(
OneMeasurementRealisticTimes {},
"SELECT * FROM cpu WHERE time > to_timestamp('2021-07-20T19:28:50Z')",
&expected,
)
.await;
// use cast workaround
run_sql_test_case(
OneMeasurementRealisticTimes {},
"SELECT * FROM cpu WHERE \
CAST(time AS BIGINT) > CAST(to_timestamp('2021-07-20T19:28:50Z') AS BIGINT)",
&expected,
)
.await;
}
#[tokio::test]
async fn sql_select_from_cpu_with_projection() {
// expect that to get a subset of the columns and in the order specified
let expected = vec![
"+------+--------+",
"| user | region |",
"+------+--------+",
"| 23.2 | west |",
"| 21 | west |",
"+------+--------+",
];
run_sql_test_case(
TwoMeasurements {},
"SELECT user, region from cpu",
&expected,
)
.await;
}
#[tokio::test]
async fn sql_select_from_cpu_pred() {
let expected = vec![
"+--------+--------------------------------+------+",
"| region | time | user |",
"+--------+--------------------------------+------+",
"| west | 1970-01-01T00:00:00.000000150Z | 21 |",
"+--------+--------------------------------+------+",
];
run_sql_test_case(
TwoMeasurements {},
"SELECT * from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00')",
&expected,
)
.await;
}
#[tokio::test]
async fn sql_select_from_cpu_with_projection_and_pred() {
// expect that to get a subset of the columns and in the order specified
let expected = vec![
"+------+--------+",
"| user | region |",
"+------+--------+",
"| 21 | west |",
"+------+--------+",
];
run_sql_test_case(
TwoMeasurements {},
"SELECT user, region from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00')",
&expected
).await;
}
#[tokio::test]
async fn sql_select_from_cpu_group() {
let expected = vec![
"+-----------------+",
"| COUNT(UInt8(1)) |",
"+-----------------+",
"| 2 |",
"+-----------------+",
];
run_sql_test_case(
TwoMeasurements {},
"SELECT count(*) from cpu group by region",
&expected,
)
.await;
}
#[tokio::test]
async fn sql_select_from_disk() {
let expected = vec![
"+-------+--------+--------------------------------+",
"| bytes | region | time |",
"+-------+--------+--------------------------------+",
"| 99 | east | 1970-01-01T00:00:00.000000200Z |",
"+-------+--------+--------------------------------+",
];
run_sql_test_case(TwoMeasurements {}, "SELECT * from disk", &expected).await;
}
#[tokio::test]
async fn sql_select_with_schema_merge() {
let expected = vec![

View File

@ -10,7 +10,7 @@ arrow_util = { path = "../arrow_util" }
async-trait = "0.1"
bytes = "1.0"
chrono = "0.4"
cache_loader_async = { version = "0.1.0", features = ["ttl-cache"] }
cache_loader_async = { version = "0.1.2", features = ["ttl-cache"] }
crc32fast = "1.2.0"
data_types = { path = "../data_types" }
datafusion = { path = "../datafusion" }
@ -48,6 +48,7 @@ serde = "1.0"
serde_json = "1.0"
snafu = "0.6"
snap = "1.0.0"
time = { path = "../time" }
trace = { path = "../trace" }
tokio = { version = "1.11", features = ["macros", "time"] }
tokio-util = { version = "0.6.3" }

View File

@ -3,6 +3,7 @@ use std::sync::Arc;
use object_store::ObjectStore;
use observability_deps::tracing::info;
use query::exec::Executor;
use time::TimeProvider;
use write_buffer::config::WriteBufferConfigFactory;
use crate::JobRegistry;
@ -16,6 +17,7 @@ pub struct ApplicationState {
executor: Arc<Executor>,
job_registry: Arc<JobRegistry>,
metric_registry: Arc<metric::Registry>,
time_provider: Arc<dyn TimeProvider>,
}
impl ApplicationState {
@ -23,33 +25,34 @@ impl ApplicationState {
///
/// Uses number of CPUs in the system if num_worker_threads is not set
pub fn new(object_store: Arc<ObjectStore>, num_worker_threads: Option<usize>) -> Self {
Self::with_write_buffer_factory(
object_store,
Arc::new(Default::default()),
num_worker_threads,
)
}
/// Same as [`new`](Self::new) but also specifies the write buffer factory.
///
/// This is mostly useful for testing.
pub fn with_write_buffer_factory(
object_store: Arc<ObjectStore>,
write_buffer_factory: Arc<WriteBufferConfigFactory>,
num_worker_threads: Option<usize>,
) -> Self {
let num_threads = num_worker_threads.unwrap_or_else(num_cpus::get);
info!(%num_threads, "using specified number of threads per thread pool");
let metric_registry = Arc::new(metric::Registry::new());
let time_provider: Arc<dyn TimeProvider> = Arc::new(time::SystemProvider::new());
let job_registry = Arc::new(JobRegistry::new(Arc::clone(&metric_registry)));
let write_buffer_factory =
Arc::new(WriteBufferConfigFactory::new(Arc::clone(&time_provider)));
Self {
object_store,
write_buffer_factory,
executor: Arc::new(Executor::new(num_threads)),
job_registry,
metric_registry,
time_provider,
}
}
/// Overrides the write_buffer_factory
pub fn with_write_buffer_factory(
self,
write_buffer_factory: Arc<WriteBufferConfigFactory>,
) -> Self {
Self {
write_buffer_factory,
..self
}
}
@ -69,6 +72,10 @@ impl ApplicationState {
&self.metric_registry
}
pub fn time_provider(&self) -> &Arc<dyn TimeProvider> {
&self.time_provider
}
pub fn executor(&self) -> &Arc<Executor> {
&self.executor
}

View File

@ -57,7 +57,7 @@ pub enum CacheFillError {
impl ConnectionManagerImpl {
pub fn new() -> Self {
let (cache, _) = LoadingCache::new(Self::cached_remote_server);
let cache = LoadingCache::new(Self::cached_remote_server);
Self { cache }
}

View File

@ -7,7 +7,6 @@ use crate::{
rules::ProvidedDatabaseRules,
ApplicationState, Db,
};
use chrono::{DateTime, Utc};
use data_types::{
database_rules::WriteBufferDirection, detailed_database::GenerationId, server_id::ServerId,
DatabaseName,
@ -213,8 +212,9 @@ impl Database {
create_preserved_catalog(
db_name,
Arc::clone(&iox_object_store),
iox_object_store,
Arc::clone(application.metric_registry()),
Arc::clone(application.time_provider()),
true,
)
.await
@ -542,11 +542,7 @@ impl Database {
/// - write it to a write buffer
/// - write it to a local `Db`
///
pub async fn write_entry(
&self,
entry: entry::Entry,
time_of_write: DateTime<Utc>,
) -> Result<(), WriteError> {
pub async fn write_entry(&self, entry: entry::Entry) -> Result<(), WriteError> {
let recorder = self.shared.metrics.entry_ingest(entry.data().len());
let db = {
@ -565,7 +561,7 @@ impl Database {
}
};
db.store_entry(entry, time_of_write).await.map_err(|e| {
db.store_entry(entry).await.map_err(|e| {
use super::db::Error;
match e {
// TODO: Pull write buffer producer out of Db
@ -1076,6 +1072,7 @@ impl DatabaseStateRulesLoaded {
shared.config.name.as_str(),
Arc::clone(&self.iox_object_store),
Arc::clone(shared.application.metric_registry()),
Arc::clone(shared.application.time_provider()),
shared.config.wipe_catalog_on_error,
shared.config.skip_replay,
)
@ -1104,6 +1101,7 @@ impl DatabaseStateRulesLoaded {
catalog,
write_buffer_producer: producer,
metric_registry: Arc::clone(shared.application.metric_registry()),
time_provider: Arc::clone(shared.application.time_provider()),
};
let db = Db::new(
@ -1135,7 +1133,7 @@ impl DatabaseStateCatalogLoaded {
let db = Arc::clone(&self.db);
// TODO: Pull write buffer and lifecycle out of Db
db.unsuppress_persistence().await;
db.unsuppress_persistence();
let rules = self.provided_rules.rules();
let write_buffer_factory = shared.application.write_buffer_factory();
@ -1191,7 +1189,6 @@ impl DatabaseStateInitialized {
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
use data_types::database_rules::{
PartitionTemplate, TemplatePart, WriteBufferConnection, WriteBufferDirection,
};
@ -1203,6 +1200,7 @@ mod tests {
num::NonZeroU32,
time::Instant,
};
use time::Time;
use uuid::Uuid;
use write_buffer::{config::WriteBufferConfigFactory, mock::MockBufferSharedState};
@ -1377,23 +1375,23 @@ mod tests {
.unwrap();
state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, 10),
Utc::now(),
Time::from_timestamp_nanos(0),
entry_a,
));
state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, 11),
Utc::now(),
Time::from_timestamp_nanos(0),
entry_b,
));
// setup application
let mut factory = WriteBufferConfigFactory::new();
let application = ApplicationState::new(Arc::new(ObjectStore::new_in_memory()), None);
let mut factory = WriteBufferConfigFactory::new(Arc::clone(application.time_provider()));
factory.register_mock("my_mock".to_string(), state.clone());
let application = Arc::new(ApplicationState::with_write_buffer_factory(
Arc::new(ObjectStore::new_in_memory()),
Arc::new(factory),
None,
));
let application = Arc::new(application.with_write_buffer_factory(Arc::new(factory)));
let server_id = ServerId::try_from(1).unwrap();
// setup DB
@ -1465,7 +1463,7 @@ mod tests {
.unwrap();
state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, 12),
Utc::now(),
Time::from_timestamp_nanos(0),
entry_c,
));
@ -1486,7 +1484,7 @@ mod tests {
.unwrap();
state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, 13),
Utc::now(),
Time::from_timestamp_nanos(0),
entry_d,
));
let db = database.initialized_db().unwrap();

View File

@ -14,7 +14,6 @@ use std::{
use ::lifecycle::select_persistable_chunks;
use async_trait::async_trait;
use chrono::{DateTime, TimeZone, Utc};
use parking_lot::{Mutex, RwLock};
use rand_distr::{Distribution, Poisson};
use snafu::{ensure, OptionExt, ResultExt, Snafu};
@ -46,6 +45,7 @@ use query::{
QueryDatabase,
};
use schema::Schema;
use time::{Time, TimeProvider};
use trace::ctx::SpanContext;
use write_buffer::core::{WriteBufferReading, WriteBufferWriting};
@ -298,12 +298,9 @@ pub struct Db {
/// This is stored here for the following reasons:
/// - to control the persistence suppression via a [`Db::unsuppress_persistence`]
/// - to keep the lifecycle state (e.g. the number of running compactions) around
lifecycle_policy: tokio::sync::Mutex<Option<::lifecycle::LifecyclePolicy<WeakDb>>>,
lifecycle_policy: Mutex<Option<::lifecycle::LifecyclePolicy<WeakDb>>>,
/// TESTING ONLY: Mocked `Utc::now()` for the background worker
///
/// TODO: Replace with TimeProvider (#2722)
now_override: Mutex<Option<DateTime<Utc>>>,
time_provider: Arc<dyn TimeProvider>,
/// To-be-written delete predicates.
delete_predicates_mailbox: Mutex<Vec<(Arc<DeletePredicate>, Vec<ChunkAddrWithoutDatabase>)>>,
@ -321,6 +318,7 @@ pub(crate) struct DatabaseToCommit {
pub(crate) preserved_catalog: PreservedCatalog,
pub(crate) catalog: Catalog,
pub(crate) rules: Arc<DatabaseRules>,
pub(crate) time_provider: Arc<dyn TimeProvider>,
/// TODO: Move onto Database
pub(crate) write_buffer_producer: Option<Arc<dyn WriteBufferWriting>>,
@ -362,8 +360,8 @@ impl Db {
worker_iterations_delete_predicate_preservation: AtomicUsize::new(0),
write_buffer_producer: database_to_commit.write_buffer_producer,
cleanup_lock: Default::default(),
lifecycle_policy: tokio::sync::Mutex::new(None),
now_override: Default::default(),
lifecycle_policy: Mutex::new(None),
time_provider: database_to_commit.time_provider,
delete_predicates_mailbox: Default::default(),
persisted_chunk_id_override: Default::default(),
};
@ -380,8 +378,8 @@ impl Db {
}
/// Allow persistence if database rules all it.
pub async fn unsuppress_persistence(&self) {
let mut guard = self.lifecycle_policy.lock().await;
pub fn unsuppress_persistence(&self) {
let mut guard = self.lifecycle_policy.lock();
let policy = guard
.as_mut()
.expect("lifecycle policy should be initialized");
@ -701,7 +699,7 @@ impl Db {
.persistence_windows_mut()
.map(|window| match force {
true => window.flush_all_handle(),
false => window.flush_handle(self.utc_now()),
false => window.flush_handle(),
})
.flatten()
.context(CannotFlushPartition {
@ -821,13 +819,16 @@ impl Db {
loop {
self.worker_iterations_lifecycle
.fetch_add(1, Ordering::Relaxed);
let mut guard = self.lifecycle_policy.lock().await;
let policy = guard
.as_mut()
.expect("lifecycle policy should be initialized");
let fut = {
let mut guard = self.lifecycle_policy.lock();
let policy = guard
.as_mut()
.expect("lifecycle policy should be initialized");
policy.check_for_work(self.utc_now()).await
policy.check_for_work(self.time_provider.now().date_time())
};
fut.await
}
};
@ -855,20 +856,13 @@ impl Db {
debug!(?duration, "cleanup worker sleeps");
tokio::time::sleep(duration).await;
match chrono::Duration::from_std(catalog_transaction_prune_age) {
Ok(catalog_transaction_prune_age) => {
if let Err(e) = prune_catalog_transaction_history(
self.iox_object_store(),
Utc::now() - catalog_transaction_prune_age,
)
.await
{
error!(%e, "error while pruning catalog transactions");
}
}
Err(e) => {
error!(%e, "cannot convert `catalog_transaction_prune_age`, skipping transaction pruning");
}
if let Err(e) = prune_catalog_transaction_history(
self.iox_object_store(),
self.time_provider.now() - catalog_transaction_prune_age,
)
.await
{
error!(%e, "error while pruning catalog transactions");
}
if let Err(e) = self.cleanup_unreferenced_parquet_files().await {
@ -917,13 +911,6 @@ impl Db {
info!("finished db background worker");
}
/// `Utc::now()` that is used by `Db`. Can be mocked for testing.
///
/// TODO: Remove (#2722)
fn utc_now(&self) -> DateTime<Utc> {
self.now_override.lock().unwrap_or_else(Utc::now)
}
async fn cleanup_unreferenced_parquet_files(
self: &Arc<Self>,
) -> std::result::Result<(), parquet_file::catalog::cleanup::Error> {
@ -974,7 +961,7 @@ impl Db {
}
/// Stores an entry based on the configuration.
pub async fn store_entry(&self, entry: Entry, time_of_write: DateTime<Utc>) -> Result<()> {
pub async fn store_entry(&self, entry: Entry) -> Result<()> {
let immutable = {
let rules = self.rules.read();
rules.lifecycle_rules.immutable
@ -1009,11 +996,7 @@ impl Db {
entry,
));
self.store_sequenced_entry(
sequenced_entry,
filter_table_batch_keep_all,
time_of_write,
)
self.store_sequenced_entry(sequenced_entry, filter_table_batch_keep_all)
}
(_, true) => {
// If not configured to send entries to the write buffer and the database is
@ -1026,11 +1009,7 @@ impl Db {
// sequencing entries so skip doing so here
let sequenced_entry = Arc::new(SequencedEntry::new_unsequenced(entry));
self.store_sequenced_entry(
sequenced_entry,
filter_table_batch_keep_all,
time_of_write,
)
self.store_sequenced_entry(sequenced_entry, filter_table_batch_keep_all)
}
}
}
@ -1051,7 +1030,6 @@ impl Db {
&self,
sequenced_entry: Arc<SequencedEntry>,
filter_table_batch: F,
time_of_write: DateTime<Utc>,
) -> Result<()>
where
F: Fn(Option<&Sequence>, &str, &TableBatch<'_>) -> (bool, Option<Vec<bool>>),
@ -1144,7 +1122,7 @@ impl Db {
let mut partition = partition.write();
let handle_chunk_write = |chunk: &mut CatalogChunk| {
chunk.record_write(time_of_write, &timestamp_summary);
chunk.record_write(&timestamp_summary);
if chunk.storage().0 >= mub_row_threshold.get() {
chunk.freeze().expect("freeze mub chunk");
}
@ -1182,8 +1160,7 @@ impl Db {
match chunk_result {
Ok(mb_chunk) => {
let chunk =
partition.create_open_chunk(mb_chunk, time_of_write);
let chunk = partition.create_open_chunk(mb_chunk);
let mut chunk = chunk
.try_write()
.expect("partition lock should prevent contention");
@ -1204,32 +1181,20 @@ impl Db {
schema_handle.commit();
// TODO: PersistenceWindows use TimestampSummary
let min_time = Utc.timestamp_nanos(timestamp_summary.stats.min.unwrap());
let max_time = Utc.timestamp_nanos(timestamp_summary.stats.max.unwrap());
let min_time = Time::from_timestamp_nanos(timestamp_summary.stats.min.unwrap());
let max_time = Time::from_timestamp_nanos(timestamp_summary.stats.max.unwrap());
match partition.persistence_windows_mut() {
Some(windows) => {
windows.add_range(
sequence,
row_count,
min_time,
max_time,
self.utc_now(),
);
windows.add_range(sequence, row_count, min_time, max_time);
}
None => {
let mut windows = PersistenceWindows::new(
partition.addr().clone(),
late_arrival_window,
self.utc_now(),
);
windows.add_range(
sequence,
row_count,
min_time,
max_time,
self.utc_now(),
Arc::clone(&self.time_provider),
);
windows.add_range(sequence, row_count, min_time, max_time);
partition.set_persistence_windows(windows);
}
}
@ -1358,12 +1323,8 @@ pub mod test_helpers {
use super::*;
/// Try to write lineprotocol data w/ specific `time_of_write` and return all tables that where written.
pub async fn try_write_lp_with_time(
db: &Db,
lp: &str,
time_of_write: DateTime<Utc>,
) -> Result<Vec<String>> {
/// Try to write lineprotocol data and return all tables that where written.
pub async fn try_write_lp(db: &Db, lp: &str) -> Result<Vec<String>> {
let entries = {
let partitioner = &db.rules.read().partition_template;
lp_to_entries(lp, partitioner)
@ -1377,7 +1338,7 @@ pub mod test_helpers {
tables.insert(batch.name().to_string());
}
}
db.store_entry(entry, time_of_write).await?;
db.store_entry(entry).await?;
}
}
@ -1387,20 +1348,6 @@ pub mod test_helpers {
Ok(tables)
}
/// Try to write lineprotocol data and return all tables that where written.
pub async fn try_write_lp(db: &Db, lp: &str) -> Result<Vec<String>> {
try_write_lp_with_time(db, lp, Utc::now()).await
}
/// Same was [`try_write_lp_with_time`](try_write_lp_with_time) but will panic on failure.
pub async fn write_lp_with_time(
db: &Db,
lp: &str,
time_of_write: DateTime<Utc>,
) -> Vec<String> {
try_write_lp_with_time(db, lp, time_of_write).await.unwrap()
}
/// Same was [`try_write_lp`](try_write_lp) but will panic on failure.
pub async fn write_lp(db: &Db, lp: &str) -> Vec<String> {
try_write_lp(db, lp).await.unwrap()
@ -1447,7 +1394,6 @@ mod tests {
use arrow::record_batch::RecordBatch;
use bytes::Bytes;
use chrono::{DateTime, TimeZone};
use futures::{stream, StreamExt, TryStreamExt};
use predicate::delete_expr::DeleteExpr;
use tokio_util::sync::CancellationToken;
@ -1474,15 +1420,17 @@ mod tests {
use query::{QueryChunk, QueryDatabase};
use schema::selection::Selection;
use schema::Schema;
use time::Time;
use write_buffer::mock::{
MockBufferForWriting, MockBufferForWritingThatAlwaysErrors, MockBufferSharedState,
};
use crate::utils::make_db_time;
use crate::{
assert_store_sequenced_entry_failures,
db::{
catalog::chunk::ChunkStage,
test_helpers::{run_query, try_write_lp, write_lp, write_lp_with_time},
test_helpers::{run_query, try_write_lp, write_lp},
},
utils::{make_db, TestDb},
};
@ -1509,7 +1457,7 @@ mod tests {
let db = immutable_db().await;
let entry = lp_to_entry("cpu bar=1 10");
let res = db.store_entry(entry, Utc::now()).await;
let res = db.store_entry(entry).await;
assert_contains!(
res.unwrap_err().to_string(),
"Cannot write to this database: no mutable buffer configured"
@ -1522,8 +1470,10 @@ mod tests {
// configured and the mutable buffer isn't
let write_buffer_state =
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap());
let write_buffer =
Arc::new(MockBufferForWriting::new(write_buffer_state.clone(), None).unwrap());
let time_provider = Arc::new(time::MockProvider::new(Time::from_timestamp_nanos(0)));
let write_buffer = Arc::new(
MockBufferForWriting::new(write_buffer_state.clone(), None, time_provider).unwrap(),
);
let test_db = TestDb::builder()
.write_buffer_producer(write_buffer)
.lifecycle_rules(LifecycleRules {
@ -1535,7 +1485,7 @@ mod tests {
.db;
let entry = lp_to_entry("cpu bar=1 10");
test_db.store_entry(entry, Utc::now()).await.unwrap();
test_db.store_entry(entry).await.unwrap();
assert_eq!(write_buffer_state.get_messages(0).len(), 1);
}
@ -1546,8 +1496,10 @@ mod tests {
// configured.
let write_buffer_state =
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap());
let write_buffer =
Arc::new(MockBufferForWriting::new(write_buffer_state.clone(), None).unwrap());
let time_provider = Arc::new(time::MockProvider::new(Time::from_timestamp_nanos(0)));
let write_buffer = Arc::new(
MockBufferForWriting::new(write_buffer_state.clone(), None, time_provider).unwrap(),
);
let db = TestDb::builder()
.write_buffer_producer(write_buffer)
.build()
@ -1555,7 +1507,7 @@ mod tests {
.db;
let entry = lp_to_entry("cpu bar=1 10");
db.store_entry(entry, Utc::now()).await.unwrap();
db.store_entry(entry).await.unwrap();
assert_eq!(write_buffer_state.get_messages(0).len(), 1);
@ -1583,7 +1535,7 @@ mod tests {
let entry = lp_to_entry("cpu bar=1 10");
let res = db.store_entry(entry, Utc::now()).await;
let res = db.store_entry(entry).await;
assert!(
matches!(res, Err(Error::WriteBufferWritingError { .. })),
@ -1597,7 +1549,7 @@ mod tests {
// Validate that writes are rejected if this database is reading from the write buffer
let db = immutable_db().await;
let entry = lp_to_entry("cpu bar=1 10");
let res = db.store_entry(entry, Utc::now()).await;
let res = db.store_entry(entry).await;
assert_contains!(
res.unwrap_err().to_string(),
"Cannot write to this database: no mutable buffer configured"
@ -1641,7 +1593,7 @@ mod tests {
let entry = lp_to_entry(&lp);
// This should succeed and start chunks in the MUB
db.store_entry(entry, Utc::now()).await.unwrap();
db.store_entry(entry).await.unwrap();
// 3 more lines that should go in the 3 partitions/chunks.
// Line 1 has the same schema and should end up in the MUB.
@ -1659,7 +1611,7 @@ mod tests {
let entry = lp_to_entry(&lp);
// This should return an error because there was at least one error in the loop
let result = db.store_entry(entry, Utc::now()).await;
let result = db.store_entry(entry).await;
assert_contains!(
result.unwrap_err().to_string(),
"Storing sequenced entry failed with the following error(s), and possibly more:"
@ -1723,12 +1675,15 @@ mod tests {
#[tokio::test]
async fn metrics_during_rollover() {
let test_db = make_db().await;
let time = Arc::new(time::MockProvider::new(Time::from_timestamp(11, 22)));
let test_db = TestDb::builder()
.time_provider(Arc::<time::MockProvider>::clone(&time))
.build()
.await;
let db = Arc::clone(&test_db.db);
let t1_write = Utc.timestamp(11, 22);
write_lp_with_time(db.as_ref(), "cpu bar=1 10", t1_write).await;
write_lp(db.as_ref(), "cpu bar=1 10").await;
let registry = test_db.metric_registry.as_ref();
@ -1744,14 +1699,17 @@ mod tests {
catalog_chunk_size_bytes_metric_eq(registry, "mutable_buffer", 700);
// write into same chunk again.
let t2_write = t1_write + chrono::Duration::seconds(1);
write_lp_with_time(db.as_ref(), "cpu bar=2 20", t2_write).await;
let t3_write = t2_write + chrono::Duration::seconds(1);
write_lp_with_time(db.as_ref(), "cpu bar=3 30", t3_write).await;
let t4_write = t3_write + chrono::Duration::seconds(1);
write_lp_with_time(db.as_ref(), "cpu bar=4 40", t4_write).await;
let t5_write = t4_write + chrono::Duration::seconds(1);
write_lp_with_time(db.as_ref(), "cpu bar=5 50", t5_write).await;
time.inc(Duration::from_secs(1));
write_lp(db.as_ref(), "cpu bar=2 20").await;
time.inc(Duration::from_secs(1));
write_lp(db.as_ref(), "cpu bar=3 30").await;
time.inc(Duration::from_secs(1));
write_lp(db.as_ref(), "cpu bar=4 40").await;
time.inc(Duration::from_secs(1));
write_lp(db.as_ref(), "cpu bar=5 50").await;
// verify chunk size updated
catalog_chunk_size_bytes_metric_eq(registry, "mutable_buffer", 764);
@ -1791,8 +1749,7 @@ mod tests {
let expected_read_buffer_size = 1706;
catalog_chunk_size_bytes_metric_eq(registry, "read_buffer", expected_read_buffer_size);
let t6_write = t5_write + chrono::Duration::seconds(1);
*db.now_override.lock() = Some(t6_write);
time.inc(Duration::from_secs(1));
*db.persisted_chunk_id_override.lock() = Some(ChunkId::new_test(1337));
let chunk_id = db
.persist_partition("cpu", "1970-01-01T00", true)
@ -1843,10 +1800,10 @@ mod tests {
write_lp(db.as_ref(), "write_metrics_test foo=3 650000000010").await;
let mut summary = TimestampSummary::default();
summary.record(Utc.timestamp_nanos(100000000000));
summary.record(Utc.timestamp_nanos(180000000000));
summary.record(Utc.timestamp_nanos(650000000000));
summary.record(Utc.timestamp_nanos(650000000010));
summary.record(Time::from_timestamp_nanos(100000000000));
summary.record(Time::from_timestamp_nanos(180000000000));
summary.record(Time::from_timestamp_nanos(650000000000));
summary.record(Time::from_timestamp_nanos(650000000010));
let mut reporter = metric::RawReporter::default();
test_db.metric_registry.report(&mut reporter);
@ -2028,11 +1985,10 @@ mod tests {
#[tokio::test]
async fn compact() {
// Test that data can be read after it is compacted
let test_db = make_db().await;
let db = Arc::new(test_db.db);
let (db, time) = make_db_time().await;
let t_write1 = Utc::now();
write_lp_with_time(db.as_ref(), "cpu bar=1 10", t_write1).await;
let t_write1 = time.inc(Duration::from_secs(1));
write_lp(db.as_ref(), "cpu bar=1 10").await;
let partition_key = "1970-01-01T00";
db.rollover_partition("cpu", partition_key)
@ -2052,8 +2008,8 @@ mod tests {
assert_eq!(first_old_rb_write, t_write1);
// Put new data into the mutable buffer
let t_write2 = Utc::now();
write_lp_with_time(db.as_ref(), "cpu bar=2 20", t_write2).await;
let t_write2 = time.inc(Duration::from_secs(1));
write_lp(db.as_ref(), "cpu bar=2 20").await;
// now, compact it
let compacted_rb_chunk = db
@ -2188,6 +2144,7 @@ mod tests {
async fn write_one_chunk_to_parquet_file() {
// Test that data can be written into parquet files
let object_store = Arc::new(ObjectStore::new_in_memory());
let time = Arc::new(time::MockProvider::new(Time::from_timestamp(11, 22)));
// Create a DB given a server id, an object store and a db name
let test_db = TestDb::builder()
@ -2196,16 +2153,16 @@ mod tests {
..Default::default()
})
.object_store(Arc::clone(&object_store))
.time_provider(Arc::<time::MockProvider>::clone(&time))
.build()
.await;
let db = test_db.db;
// Write some line protocols in Mutable buffer of the DB
let t1_write = Utc.timestamp(11, 22);
write_lp_with_time(db.as_ref(), "cpu bar=1 10", t1_write).await;
let t2_write = t1_write + chrono::Duration::seconds(1);
write_lp_with_time(db.as_ref(), "cpu bar=2 20", t2_write).await;
write_lp(db.as_ref(), "cpu bar=1 10").await;
time.inc(Duration::from_secs(1));
write_lp(db.as_ref(), "cpu bar=2 20").await;
//Now mark the MB chunk close
let partition_key = "1970-01-01T00";
@ -2220,9 +2177,9 @@ mod tests {
.await
.unwrap()
.unwrap();
// Write the RB chunk to Object Store but keep it in RB
let t3_persist = t2_write + chrono::Duration::seconds(1);
*db.now_override.lock() = Some(t3_persist);
time.inc(Duration::from_secs(1));
*db.persisted_chunk_id_override.lock() = Some(ChunkId::new_test(1337));
let pq_chunk = db
.persist_partition("cpu", partition_key, true)
@ -2261,6 +2218,7 @@ mod tests {
load_parquet_from_store_for_path(&path_list[0], Arc::clone(&db.iox_object_store))
.await
.unwrap();
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data.clone()).unwrap();
// Read metadata at file level
let schema = parquet_metadata.decode().unwrap().read_schema().unwrap();
@ -2287,6 +2245,7 @@ mod tests {
// Create an object store in memory
let object_store = Arc::new(ObjectStore::new_in_memory());
let time = Arc::new(time::MockProvider::new(Time::from_timestamp(11, 22)));
// Create a DB given a server id, an object store and a db name
let test_db = TestDb::builder()
@ -2295,16 +2254,17 @@ mod tests {
..Default::default()
})
.object_store(Arc::clone(&object_store))
.time_provider(Arc::<time::MockProvider>::clone(&time))
.build()
.await;
let db = test_db.db;
// Write some line protocols in Mutable buffer of the DB
let t1_write = Utc.timestamp(11, 22);
write_lp_with_time(db.as_ref(), "cpu bar=1 10", t1_write).await;
let t2_write = t1_write + chrono::Duration::seconds(1);
write_lp_with_time(db.as_ref(), "cpu bar=2 20", t2_write).await;
write_lp(db.as_ref(), "cpu bar=1 10").await;
time.inc(Duration::from_secs(1));
write_lp(db.as_ref(), "cpu bar=2 20").await;
// Now mark the MB chunk close
let partition_key = "1970-01-01T00";
@ -2319,9 +2279,9 @@ mod tests {
.await
.unwrap()
.unwrap();
// Write the RB chunk to Object Store but keep it in RB
let t3_persist = t2_write + chrono::Duration::seconds(1);
*db.now_override.lock() = Some(t3_persist);
time.inc(Duration::from_secs(1));
*db.persisted_chunk_id_override.lock() = Some(ChunkId::new_test(1337));
let pq_chunk = db
.persist_partition("cpu", partition_key, true)
@ -2407,63 +2367,64 @@ mod tests {
#[tokio::test]
async fn write_updates_last_write_at() {
let db = Arc::new(make_db().await.db);
let before_create = Utc::now();
let (db, time) = make_db_time().await;
let w0 = time.inc(Duration::from_secs(23));
let partition_key = "1970-01-01T00";
write_lp(&db, "cpu bar=1 10").await;
let after_write = Utc::now();
let last_write_prev = {
{
let partition = db.catalog.partition("cpu", partition_key).unwrap();
let partition = partition.read();
assert_ne!(partition.created_at(), partition.last_write_at());
assert!(before_create < partition.last_write_at());
assert!(after_write > partition.last_write_at());
partition.last_write_at()
};
assert_eq!(partition.created_at(), w0);
assert_eq!(partition.last_write_at(), w0);
}
let w1 = time.inc(Duration::from_secs(1));
write_lp(&db, "cpu bar=1 20").await;
{
let partition = db.catalog.partition("cpu", partition_key).unwrap();
let partition = partition.read();
assert!(last_write_prev < partition.last_write_at());
assert_eq!(partition.created_at(), w0);
assert_eq!(partition.last_write_at(), w1);
}
}
#[tokio::test]
async fn failed_write_doesnt_update_last_write_at() {
let db = Arc::new(make_db().await.db);
let before_create = Utc::now();
let (db, time) = make_db_time().await;
let t0 = time.inc(Duration::from_secs(2));
let partition_key = "1970-01-01T00";
write_lp(&db, "cpu bar=1 10").await;
let after_write = Utc::now();
let (last_write_prev, chunk_last_write_prev) = {
{
let partition = db.catalog.partition("cpu", partition_key).unwrap();
let partition = partition.read();
assert_ne!(partition.created_at(), partition.last_write_at());
assert!(before_create < partition.last_write_at());
assert!(after_write > partition.last_write_at());
assert_eq!(partition.created_at(), t0);
assert_eq!(partition.last_write_at(), t0);
let chunk = partition.open_chunk().unwrap();
let chunk = chunk.read();
assert_eq!(chunk.time_of_last_write(), t0);
}
(partition.last_write_at(), chunk.time_of_last_write())
};
time.inc(Duration::from_secs(1));
let entry = lp_to_entry("cpu bar=true 10");
let result = db.store_entry(entry, Utc::now()).await;
let result = db.store_entry(entry).await;
assert!(result.is_err());
{
let partition = db.catalog.partition("cpu", partition_key).unwrap();
let partition = partition.read();
assert_eq!(last_write_prev, partition.last_write_at());
assert_eq!(partition.created_at(), t0);
assert_eq!(partition.last_write_at(), t0);
let chunk = partition.open_chunk().unwrap();
let chunk = chunk.read();
assert_eq!(chunk_last_write_prev, chunk.time_of_last_write());
assert_eq!(chunk.time_of_last_write(), t0);
}
}
@ -2473,7 +2434,10 @@ mod tests {
// is a write buffer configured.
let write_buffer_state =
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap());
let write_buffer = Arc::new(MockBufferForWriting::new(write_buffer_state, None).unwrap());
let time_provider = Arc::new(time::MockProvider::new(Time::from_timestamp_nanos(0)));
let write_buffer = Arc::new(
MockBufferForWriting::new(write_buffer_state.clone(), None, time_provider).unwrap(),
);
let db = TestDb::builder()
.write_buffer_producer(write_buffer)
.build()
@ -2517,12 +2481,15 @@ mod tests {
#[tokio::test]
async fn test_chunk_timestamps() {
let start = Utc::now();
let db = Arc::new(make_db().await.db);
let (db, time) = make_db_time().await;
let w0 = time.inc(Duration::from_secs(95));
// Given data loaded into two chunks
write_lp(&db, "cpu bar=1 10").await;
let after_data_load = Utc::now();
let w1 = time.inc(Duration::from_secs(2));
write_lp(&db, "cpu bar=1 20").await;
// When the chunk is rolled over
let partition_key = "1970-01-01T00";
@ -2532,23 +2499,15 @@ mod tests {
.unwrap()
.unwrap()
.id();
let after_rollover = Utc::now();
let partition = db.catalog.partition("cpu", partition_key).unwrap();
let partition = partition.read();
let (chunk, _order) = partition.chunk(chunk_id).unwrap();
let chunk = chunk.read();
println!(
"start: {:?}, after_data_load: {:?}, after_rollover: {:?}",
start, after_data_load, after_rollover
);
println!("Chunk: {:#?}", chunk);
// then the chunk creation and rollover times are as expected
assert!(start < chunk.time_of_first_write());
assert!(chunk.time_of_first_write() < after_data_load);
assert!(chunk.time_of_first_write() == chunk.time_of_last_write());
assert_eq!(chunk.time_of_first_write(), w0);
assert_eq!(chunk.time_of_last_write(), w1);
}
#[tokio::test]
@ -2608,8 +2567,8 @@ mod tests {
object_store_bytes: 0, // os_size
row_count: 1,
time_of_last_access: None,
time_of_first_write: Utc.timestamp_nanos(1),
time_of_last_write: Utc.timestamp_nanos(1),
time_of_first_write: Time::from_timestamp_nanos(1),
time_of_last_write: Time::from_timestamp_nanos(1),
order: ChunkOrder::new(5).unwrap(),
}];
@ -2634,15 +2593,13 @@ mod tests {
#[tokio::test]
async fn partition_chunk_summaries_timestamp() {
let db = Arc::new(make_db().await.db);
let (db, time) = make_db_time().await;
let t_first_write = Utc::now();
write_lp_with_time(&db, "cpu bar=1 1", t_first_write).await;
let t_first_write = time.inc(Duration::from_secs(2));
write_lp(&db, "cpu bar=1 1").await;
let t_second_write = Utc::now();
write_lp_with_time(&db, "cpu bar=2 2", t_second_write).await;
db.rollover_partition("cpu", "1970-01-01T00").await.unwrap();
let t_second_write = time.inc(Duration::from_secs(2));
write_lp(&db, "cpu bar=2 2").await;
let mut chunk_summaries = db.chunk_summaries().unwrap();
@ -2653,7 +2610,7 @@ mod tests {
assert_eq!(summary.time_of_last_write, t_second_write);
}
fn assert_first_last_times_eq(chunk_summary: &ChunkSummary, expected: DateTime<Utc>) {
fn assert_first_last_times_eq(chunk_summary: &ChunkSummary, expected: Time) {
let first_write = chunk_summary.time_of_first_write;
let last_write = chunk_summary.time_of_last_write;
@ -2688,20 +2645,21 @@ mod tests {
#[tokio::test]
async fn chunk_summaries() {
// Test that chunk id listing is hooked up
let db = make_db().await.db;
let (db, time) = make_db_time().await;
// get three chunks: one open, one closed in mb and one close in rb
// In open chunk, will end up in rb/os
let t1_write = Utc.timestamp(11, 22);
write_lp_with_time(&db, "cpu bar=1 1", t1_write).await;
let t1_write = Time::from_timestamp(11, 22);
time.set(t1_write);
write_lp(&db, "cpu bar=1 1").await;
// Move open chunk to closed
db.rollover_partition("cpu", "1970-01-01T00").await.unwrap();
// New open chunk in mb
// This point will end up in rb/os
let t2_write = t1_write + chrono::Duration::seconds(1);
write_lp_with_time(&db, "cpu bar=1,baz=2 2", t2_write).await;
let t2_write = time.inc(Duration::from_secs(1));
write_lp(&db, "cpu bar=1,baz=2 2").await;
// Check first/last write times on the chunks at this point
let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return");
@ -2717,8 +2675,8 @@ mod tests {
assert_chunks_times_ordered(&closed_mb_t3, &open_mb_t3);
// This point makes a new open mb chunk and will end up in the closed mb chunk
let t3_write = t2_write + chrono::Duration::seconds(1);
write_lp_with_time(&db, "cpu bar=1,baz=2,frob=3 400000000000000", t3_write).await;
time.inc(Duration::from_secs(1));
write_lp(&db, "cpu bar=1,baz=2,frob=3 400000000000000").await;
// Check first/last write times on the chunks at this point
let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return");
@ -2762,8 +2720,7 @@ mod tests {
assert_chunks_times_eq(&other_open_mb_t5, &other_open_mb_t4);
// Persist rb to parquet os
let t4_persist = t3_write + chrono::Duration::seconds(1);
*db.now_override.lock() = Some(t4_persist);
time.inc(Duration::from_secs(1));
*db.persisted_chunk_id_override.lock() = Some(ChunkId::new_test(1337));
db.persist_partition("cpu", "1970-01-01T00", true)
.await
@ -2806,8 +2763,8 @@ mod tests {
// New open chunk in mb
// This point will stay in this open mb chunk
let t5_write = t4_persist + chrono::Duration::seconds(1);
write_lp_with_time(&db, "cpu bar=1,baz=3,blargh=3 400000000000000", t5_write).await;
let t5_write = time.inc(Duration::from_secs(1));
write_lp(&db, "cpu bar=1,baz=3,blargh=3 400000000000000").await;
// Check first/last write times on the chunks at this point
let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return");
@ -2841,8 +2798,8 @@ mod tests {
object_store_bytes: 1557, // size of parquet file
row_count: 2,
time_of_last_access: None,
time_of_first_write: Utc.timestamp_nanos(1),
time_of_last_write: Utc.timestamp_nanos(1),
time_of_first_write: Time::from_timestamp_nanos(1),
time_of_last_write: Time::from_timestamp_nanos(1),
},
ChunkSummary {
partition_key: Arc::from("1970-01-05T15"),
@ -2855,8 +2812,8 @@ mod tests {
object_store_bytes: 0, // no OS chunks
row_count: 1,
time_of_last_access: None,
time_of_first_write: Utc.timestamp_nanos(1),
time_of_last_write: Utc.timestamp_nanos(1),
time_of_first_write: Time::from_timestamp_nanos(1),
time_of_last_write: Time::from_timestamp_nanos(1),
},
ChunkSummary {
partition_key: Arc::from("1970-01-05T15"),
@ -2869,8 +2826,8 @@ mod tests {
object_store_bytes: 0, // no OS chunks
row_count: 1,
time_of_last_access: None,
time_of_first_write: Utc.timestamp_nanos(1),
time_of_last_write: Utc.timestamp_nanos(1),
time_of_first_write: Time::from_timestamp_nanos(1),
time_of_last_write: Time::from_timestamp_nanos(1),
},
];
@ -3279,11 +3236,12 @@ mod tests {
.db_name(db_name)
.build()
.await;
let db = Arc::new(test_db.db);
let db = test_db.db;
// ==================== check: empty catalog created ====================
// at this point, an empty preserved catalog exists
let maybe_preserved_catalog = load_ok(&db.iox_object_store).await;
let config = db.preserved_catalog.config();
let maybe_preserved_catalog = load_ok(config.clone()).await;
assert!(maybe_preserved_catalog.is_some());
// ==================== do: write data to parquet ====================
@ -3313,7 +3271,7 @@ mod tests {
}
}
paths_expected.sort();
let (_preserved_catalog, catalog) = load_ok(&db.iox_object_store).await.unwrap();
let (_preserved_catalog, catalog) = load_ok(config).await.unwrap();
let paths_actual = {
let mut tmp: Vec<_> = catalog.files().map(|info| info.path.clone()).collect();
tmp.sort();

View File

@ -18,6 +18,7 @@ use self::metrics::CatalogMetrics;
use self::partition::Partition;
use self::table::Table;
use data_types::write_summary::WriteSummary;
use time::TimeProvider;
pub mod chunk;
mod metrics;
@ -92,21 +93,32 @@ pub struct Catalog {
tables: RwLock<HashMap<Arc<str>, Table>>,
metrics: Arc<CatalogMetrics>,
time_provider: Arc<dyn TimeProvider>,
}
impl Catalog {
#[cfg(test)]
fn test() -> Self {
Self::new(Arc::from("test"), Default::default())
Self::new(
Arc::from("test"),
Default::default(),
Arc::new(time::SystemProvider::new()),
)
}
pub fn new(db_name: Arc<str>, metric_registry: Arc<::metric::Registry>) -> Self {
pub fn new(
db_name: Arc<str>,
metric_registry: Arc<::metric::Registry>,
time_provider: Arc<dyn TimeProvider>,
) -> Self {
let metrics = Arc::new(CatalogMetrics::new(Arc::clone(&db_name), metric_registry));
Self {
db_name,
tables: Default::default(),
metrics,
time_provider,
}
}
@ -194,6 +206,7 @@ impl Catalog {
Arc::clone(&self.db_name),
Arc::clone(&table_name),
self.metrics.new_table_metrics(table_name.as_ref()),
Arc::clone(&self.time_provider),
);
(table_name, table)
@ -317,13 +330,11 @@ mod tests {
use entry::test_helpers::lp_to_entry;
use super::*;
use chrono::Utc;
fn create_open_chunk(partition: &Arc<RwLock<Partition>>) -> ChunkAddr {
let mut partition = partition.write();
let table = partition.table_name();
let entry = lp_to_entry(&format!("{} bar=1 10", table));
let time_of_write = Utc::now();
let write = entry.partition_writes().unwrap().remove(0);
let batch = write.table_batches().remove(0);
@ -334,7 +345,7 @@ mod tests {
)
.unwrap();
let chunk = partition.create_open_chunk(mb_chunk, time_of_write);
let chunk = partition.create_open_chunk(mb_chunk);
let chunk = chunk.read();
chunk.addr().clone()
}

View File

@ -1,6 +1,5 @@
use std::sync::Arc;
use chrono::{DateTime, Utc};
use snafu::Snafu;
use data_types::{
@ -22,6 +21,7 @@ use tracker::{TaskRegistration, TaskTracker};
use crate::db::catalog::metrics::{StorageRecorder, TimestampHistogram};
use parking_lot::Mutex;
use time::{Time, TimeProvider};
#[derive(Debug, Snafu)]
pub enum Error {
@ -210,15 +210,18 @@ pub struct CatalogChunk {
/// Record access to this chunk's data by queries and writes
access_recorder: AccessRecorder,
/// Time provider
time_provider: Arc<dyn TimeProvider>,
/// The earliest time at which data contained within this chunk was written
/// into IOx. Note due to the compaction, etc... this may not be the chunk
/// that data was originally written into
time_of_first_write: DateTime<Utc>,
time_of_first_write: Time,
/// The latest time at which data contained within this chunk was written
/// into IOx. Note due to the compaction, etc... this may not be the chunk
/// that data was originally written into
time_of_last_write: DateTime<Utc>,
time_of_last_write: Time,
/// Order of this chunk relative to other overlapping chunks.
order: ChunkOrder,
@ -273,22 +276,25 @@ impl CatalogChunk {
pub(super) fn new_open(
addr: ChunkAddr,
chunk: mutable_buffer::chunk::MBChunk,
time_of_write: DateTime<Utc>,
metrics: ChunkMetrics,
order: ChunkOrder,
time_provider: Arc<dyn TimeProvider>,
) -> Self {
assert_eq!(chunk.table_name(), &addr.table_name);
let stage = ChunkStage::Open { mb_chunk: chunk };
let now = time_provider.now();
let chunk = Self {
addr,
stage,
lifecycle_action: None,
metrics: Mutex::new(metrics),
access_recorder: Default::default(),
time_of_first_write: time_of_write,
time_of_last_write: time_of_write,
access_recorder: AccessRecorder::new(Arc::clone(&time_provider)),
time_provider,
time_of_first_write: now,
time_of_last_write: now,
order,
};
chunk.update_metrics();
@ -302,12 +308,13 @@ impl CatalogChunk {
pub(super) fn new_rub_chunk(
addr: ChunkAddr,
chunk: read_buffer::RBChunk,
time_of_first_write: DateTime<Utc>,
time_of_last_write: DateTime<Utc>,
time_of_first_write: Time,
time_of_last_write: Time,
schema: Arc<Schema>,
metrics: ChunkMetrics,
delete_predicates: Vec<Arc<DeletePredicate>>,
order: ChunkOrder,
time_provider: Arc<dyn TimeProvider>,
) -> Self {
let stage = ChunkStage::Frozen {
meta: Arc::new(ChunkMetadata {
@ -323,7 +330,8 @@ impl CatalogChunk {
stage,
lifecycle_action: None,
metrics: Mutex::new(metrics),
access_recorder: Default::default(),
access_recorder: AccessRecorder::new(Arc::clone(&time_provider)),
time_provider,
time_of_first_write,
time_of_last_write,
order,
@ -334,14 +342,16 @@ impl CatalogChunk {
/// Creates a new chunk that is only registered via an object store reference (= only exists in
/// parquet).
#[allow(clippy::too_many_arguments)]
pub(super) fn new_object_store_only(
addr: ChunkAddr,
chunk: Arc<parquet_file::chunk::ParquetChunk>,
time_of_first_write: DateTime<Utc>,
time_of_last_write: DateTime<Utc>,
time_of_first_write: Time,
time_of_last_write: Time,
metrics: ChunkMetrics,
delete_predicates: Vec<Arc<DeletePredicate>>,
order: ChunkOrder,
time_provider: Arc<dyn TimeProvider>,
) -> Self {
assert_eq!(chunk.table_name(), addr.table_name.as_ref());
@ -363,7 +373,8 @@ impl CatalogChunk {
stage,
lifecycle_action: None,
metrics: Mutex::new(metrics),
access_recorder: Default::default(),
access_recorder: AccessRecorder::new(Arc::clone(&time_provider)),
time_provider,
time_of_first_write,
time_of_last_write,
order,
@ -407,11 +418,11 @@ impl CatalogChunk {
.map_or(false, |action| action.metadata() == &lifecycle_action)
}
pub fn time_of_first_write(&self) -> DateTime<Utc> {
pub fn time_of_first_write(&self) -> Time {
self.time_of_first_write
}
pub fn time_of_last_write(&self) -> DateTime<Utc> {
pub fn time_of_last_write(&self) -> Time {
self.time_of_last_write
}
@ -511,19 +522,18 @@ impl CatalogChunk {
///
/// `time_of_write` is the wall clock time of the write
/// `timestamps` is a summary of the row timestamps contained in the write
pub fn record_write(&mut self, time_of_write: DateTime<Utc>, timestamps: &TimestampSummary) {
pub fn record_write(&mut self, timestamps: &TimestampSummary) {
{
let metrics = self.metrics.lock();
if let Some(timestamp_histogram) = metrics.timestamp_histogram.as_ref() {
timestamp_histogram.add(timestamps)
}
}
self.access_recorder.record_access_now();
self.access_recorder.record_access();
self.time_of_first_write = self.time_of_first_write.min(time_of_write);
// DateTime<Utc> isn't necessarily monotonic
self.time_of_last_write = self.time_of_last_write.max(time_of_write);
let now = self.time_provider.now();
self.time_of_first_write = self.time_of_first_write.min(now);
self.time_of_last_write = self.time_of_last_write.max(now);
self.update_metrics();
}
@ -1128,20 +1138,19 @@ mod tests {
fn make_open_chunk() -> CatalogChunk {
let addr = chunk_addr();
let mb_chunk = make_mb_chunk(&addr.table_name);
let time_of_write = Utc::now();
CatalogChunk::new_open(
addr,
mb_chunk,
time_of_write,
ChunkMetrics::new_unregistered(),
ChunkOrder::new(5).unwrap(),
Arc::new(time::SystemProvider::new()),
)
}
async fn make_persisted_chunk() -> CatalogChunk {
let addr = chunk_addr();
let now = Utc::now();
let now = Time::from_timestamp_nanos(43564);
// assemble ParquetChunk
let parquet_chunk = make_parquet_chunk(addr.clone()).await;
@ -1154,6 +1163,7 @@ mod tests {
ChunkMetrics::new_unregistered(),
vec![],
ChunkOrder::new(6).unwrap(),
Arc::new(time::SystemProvider::new()),
)
}
}

View File

@ -2,7 +2,6 @@
use super::chunk::{CatalogChunk, Error as ChunkError};
use crate::db::catalog::metrics::PartitionMetrics;
use chrono::{DateTime, Utc};
use data_types::{
chunk_metadata::{ChunkAddr, ChunkId, ChunkLifecycleAction, ChunkOrder, ChunkSummary},
partition_metadata::{PartitionAddr, PartitionSummary},
@ -16,6 +15,7 @@ use predicate::delete_predicate::DeletePredicate;
use schema::Schema;
use snafu::{OptionExt, Snafu};
use std::{collections::BTreeMap, fmt::Display, sync::Arc};
use time::{Time, TimeProvider};
use tracker::RwLock;
#[derive(Debug, Snafu)]
@ -120,11 +120,11 @@ pub struct Partition {
chunks: ChunkCollection,
/// When this partition was created
created_at: DateTime<Utc>,
created_at: Time,
/// the last time at which write was made to this
/// partition. Partition::new initializes this to now.
last_write_at: DateTime<Utc>,
last_write_at: Time,
/// Partition metrics
metrics: Arc<PartitionMetrics>,
@ -134,6 +134,9 @@ pub struct Partition {
/// Tracks next chunk order in this partition.
next_chunk_order: ChunkOrder,
/// The time provider
time_provider: Arc<dyn TimeProvider>,
}
impl Partition {
@ -141,8 +144,12 @@ impl Partition {
///
/// This function is not pub because `Partition`s should be created using the interfaces on
/// [`Catalog`](crate::db::catalog::Catalog) and not instantiated directly.
pub(super) fn new(addr: PartitionAddr, metrics: PartitionMetrics) -> Self {
let now = Utc::now();
pub(super) fn new(
addr: PartitionAddr,
metrics: PartitionMetrics,
time_provider: Arc<dyn TimeProvider>,
) -> Self {
let now = time_provider.now();
Self {
addr,
chunks: Default::default(),
@ -151,6 +158,7 @@ impl Partition {
metrics: Arc::new(metrics),
persistence_windows: None,
next_chunk_order: ChunkOrder::MIN,
time_provider,
}
}
@ -176,16 +184,16 @@ impl Partition {
/// Update the last write time to now
pub fn update_last_write_at(&mut self) {
self.last_write_at = Utc::now();
self.last_write_at = self.time_provider.now();
}
/// Return the time at which this partition was created
pub fn created_at(&self) -> DateTime<Utc> {
pub fn created_at(&self) -> Time {
self.created_at
}
/// Return the time at which the last write was written to this partititon
pub fn last_write_at(&self) -> DateTime<Utc> {
pub fn last_write_at(&self) -> Time {
self.last_write_at
}
@ -198,7 +206,6 @@ impl Partition {
pub fn create_open_chunk(
&mut self,
chunk: mutable_buffer::chunk::MBChunk,
time_of_write: DateTime<Utc>,
) -> &Arc<RwLock<CatalogChunk>> {
assert_eq!(chunk.table_name().as_ref(), self.table_name());
@ -210,9 +217,9 @@ impl Partition {
let chunk = CatalogChunk::new_open(
addr,
chunk,
time_of_write,
self.metrics.new_chunk_metrics(),
chunk_order,
Arc::clone(&self.time_provider),
);
let chunk = Arc::new(self.metrics.new_chunk_lock(chunk));
self.chunks.insert(chunk_id, chunk_order, chunk)
@ -225,8 +232,8 @@ impl Partition {
pub fn create_rub_chunk(
&mut self,
chunk: read_buffer::RBChunk,
time_of_first_write: DateTime<Utc>,
time_of_last_write: DateTime<Utc>,
time_of_first_write: Time,
time_of_last_write: Time,
schema: Arc<Schema>,
delete_predicates: Vec<Arc<DeletePredicate>>,
chunk_order: ChunkOrder,
@ -252,6 +259,7 @@ impl Partition {
self.metrics.new_chunk_metrics(),
delete_predicates,
chunk_order,
Arc::clone(&self.time_provider),
)));
let chunk = self.chunks.insert(chunk_id, chunk_order, chunk);
@ -269,8 +277,8 @@ impl Partition {
&mut self,
chunk_id: ChunkId,
chunk: Arc<parquet_file::chunk::ParquetChunk>,
time_of_first_write: DateTime<Utc>,
time_of_last_write: DateTime<Utc>,
time_of_first_write: Time,
time_of_last_write: Time,
delete_predicates: Vec<Arc<DeletePredicate>>,
chunk_order: ChunkOrder,
) -> &Arc<RwLock<CatalogChunk>> {
@ -288,6 +296,7 @@ impl Partition {
self.metrics.new_chunk_metrics(),
delete_predicates,
chunk_order,
Arc::clone(&self.time_provider),
)),
);
@ -440,18 +449,17 @@ mod tests {
Arc::clone(&addr.db_name),
Arc::clone(&registry),
));
let time_provider = Arc::new(time::SystemProvider::new());
let table_metrics = Arc::new(catalog_metrics.new_table_metrics("t"));
let partition_metrics = table_metrics.new_partition_metrics();
let t = Utc::now();
// should be in ascending order
let mut expected_ids = vec![];
// Make three chunks
let mut partition = Partition::new(addr, partition_metrics);
let mut partition = Partition::new(addr, partition_metrics, time_provider);
for _ in 0..3 {
let chunk = partition.create_open_chunk(make_mb_chunk("t"), t);
let chunk = partition.create_open_chunk(make_mb_chunk("t"));
expected_ids.push(chunk.read().addr().chunk_id)
}

View File

@ -8,6 +8,7 @@ use schema::{
Schema,
};
use std::{ops::Deref, result::Result, sync::Arc};
use time::TimeProvider;
use tracker::{RwLock, RwLockReadGuard, RwLockWriteGuard};
/// A `Table` is a collection of `Partition` each of which is a collection of `Chunk`
@ -31,6 +32,8 @@ pub struct Table {
/// - the outer `Arc<RwLock<...>>` so so that we can reference the locked schema w/o a lifetime to the table
/// - the inner `Arc<Schema>` is a schema that we don't need to copy when moving it around the query stack
schema: Arc<RwLock<Arc<Schema>>>,
time_provider: Arc<dyn TimeProvider>,
}
impl Table {
@ -39,7 +42,12 @@ impl Table {
/// This function is not pub because `Table`s should be
/// created using the interfaces on [`Catalog`](crate::db::catalog::Catalog) and not
/// instantiated directly.
pub(super) fn new(db_name: Arc<str>, table_name: Arc<str>, metrics: TableMetrics) -> Self {
pub(super) fn new(
db_name: Arc<str>,
table_name: Arc<str>,
metrics: TableMetrics,
time_provider: Arc<dyn TimeProvider>,
) -> Self {
// build empty schema for this table
let mut builder = SchemaBuilder::new();
builder.measurement(table_name.as_ref());
@ -52,6 +60,7 @@ impl Table {
partitions: Default::default(),
metrics: Arc::new(metrics),
schema,
time_provider,
}
}
@ -70,6 +79,7 @@ impl Table {
let metrics = &self.metrics;
let db_name = &self.db_name;
let table_name = &self.table_name;
let time_provider = &self.time_provider;
let (_, partition) = self
.partitions
.raw_entry_mut()
@ -84,6 +94,7 @@ impl Table {
partition_key: Arc::clone(&partition_key),
},
partition_metrics,
Arc::clone(time_provider),
);
let partition = Arc::new(metrics.new_partition_lock(partition));
(partition_key, partition)

View File

@ -1,7 +1,6 @@
use super::{
catalog::chunk::ChunkMetadata, pred::to_read_buffer_predicate, streams::ReadFilterResultsStream,
};
use chrono::{DateTime, Utc};
use data_types::{
chunk_metadata::{ChunkId, ChunkOrder},
partition_metadata,
@ -26,6 +25,7 @@ use std::{
collections::{BTreeMap, BTreeSet},
sync::Arc,
};
use time::Time;
#[allow(clippy::enum_variant_names)]
#[derive(Debug, Snafu)]
@ -81,8 +81,8 @@ pub struct DbChunk {
access_recorder: AccessRecorder,
state: State,
meta: Arc<ChunkMetadata>,
time_of_first_write: DateTime<Utc>,
time_of_last_write: DateTime<Utc>,
time_of_first_write: Time,
time_of_last_write: Time,
order: ChunkOrder,
}
@ -218,11 +218,11 @@ impl DbChunk {
&self.table_name
}
pub fn time_of_first_write(&self) -> DateTime<Utc> {
pub fn time_of_first_write(&self) -> Time {
self.time_of_first_write
}
pub fn time_of_last_write(&self) -> DateTime<Utc> {
pub fn time_of_last_write(&self) -> Time {
self.time_of_last_write
}
@ -343,7 +343,7 @@ impl QueryChunk for DbChunk {
// when possible for performance gain
debug!(?predicate, "Input Predicate to read_filter");
self.access_recorder.record_access_now();
self.access_recorder.record_access();
debug!(?delete_predicates, "Input Delete Predicates to read_filter");
@ -419,7 +419,7 @@ impl QueryChunk for DbChunk {
// TODO: Support predicates
return Ok(None);
}
self.access_recorder.record_access_now();
self.access_recorder.record_access();
Ok(chunk.column_names(columns))
}
State::ReadBuffer { chunk, .. } => {
@ -431,7 +431,7 @@ impl QueryChunk for DbChunk {
}
};
self.access_recorder.record_access_now();
self.access_recorder.record_access();
Ok(Some(
chunk
.column_names(rb_predicate, columns, BTreeSet::new())
@ -445,7 +445,7 @@ impl QueryChunk for DbChunk {
// TODO: Support predicates when MB supports it
return Ok(None);
}
self.access_recorder.record_access_now();
self.access_recorder.record_access();
Ok(chunk.column_names(columns))
}
}
@ -471,7 +471,7 @@ impl QueryChunk for DbChunk {
}
};
self.access_recorder.record_access_now();
self.access_recorder.record_access();
let mut values = chunk
.column_values(
rb_predicate,
@ -555,70 +555,74 @@ impl QueryChunkMeta for DbChunk {
#[cfg(test)]
mod tests {
use super::*;
use crate::{
db::{
catalog::chunk::{CatalogChunk, ChunkStage},
test_helpers::{write_lp, write_lp_with_time},
},
utils::make_db,
use crate::db::{
catalog::chunk::{CatalogChunk, ChunkStage},
test_helpers::write_lp,
};
use crate::utils::make_db_time;
use data_types::chunk_metadata::ChunkStorage;
use std::time::Duration;
async fn test_chunk_access(chunk: &CatalogChunk) {
let t1 = chunk.access_recorder().get_metrics();
async fn test_chunk_access(chunk: &CatalogChunk, time: Arc<time::MockProvider>) {
let m1 = chunk.access_recorder().get_metrics();
let snapshot = DbChunk::snapshot(chunk);
let t2 = chunk.access_recorder().get_metrics();
let m2 = chunk.access_recorder().get_metrics();
let t1 = time.inc(Duration::from_secs(1));
snapshot
.read_filter(&Default::default(), Selection::All, &[])
.unwrap();
let t3 = chunk.access_recorder().get_metrics();
let m3 = chunk.access_recorder().get_metrics();
let t2 = time.inc(Duration::from_secs(1));
let column_names = snapshot
.column_names(&Default::default(), Selection::All)
.unwrap()
.is_some();
let t4 = chunk.access_recorder().get_metrics();
let m4 = chunk.access_recorder().get_metrics();
let t3 = time.inc(Duration::from_secs(1));
let column_values = snapshot
.column_values("tag", &Default::default())
.unwrap()
.is_some();
let t5 = chunk.access_recorder().get_metrics();
let m5 = chunk.access_recorder().get_metrics();
// Snapshot shouldn't count as an access
assert_eq!(t1, t2);
assert_eq!(m1, m2);
// Query should count as an access
assert_eq!(t2.count + 1, t3.count);
assert!(t2.last_access < t3.last_access);
assert_eq!(m2.count + 1, m3.count);
assert!(m2.last_access < m3.last_access);
assert_eq!(m3.last_access, t1);
// If column names successful should record access
match column_names {
true => {
assert_eq!(t3.count + 1, t4.count);
assert!(t3.last_access < t4.last_access);
assert_eq!(m3.count + 1, m4.count);
assert_eq!(m4.last_access, t2);
}
false => {
assert_eq!(t3, t4);
assert_eq!(m3, m4);
}
}
// If column values successful should record access
match column_values {
true => {
assert_eq!(t4.count + 1, t5.count);
assert!(t4.last_access < t5.last_access);
assert_eq!(m4.count + 1, m5.count);
assert!(m4.last_access < m5.last_access);
assert_eq!(m5.last_access, t3);
}
false => {
assert_eq!(t4, t5);
assert_eq!(m4, m5);
}
}
}
#[tokio::test]
async fn mub_records_access() {
let db = make_db().await.db;
let (db, time) = make_db_time().await;
write_lp(&db, "cpu,tag=1 bar=1 1").await;
@ -628,12 +632,12 @@ mod tests {
let chunk = chunk.read();
assert_eq!(chunk.storage().1, ChunkStorage::OpenMutableBuffer);
test_chunk_access(&chunk).await;
test_chunk_access(&chunk, time).await;
}
#[tokio::test]
async fn rub_records_access() {
let db = make_db().await.db;
let (db, time) = make_db_time().await;
write_lp(&db, "cpu,tag=1 bar=1 1").await;
db.compact_partition("cpu", "1970-01-01T00").await.unwrap();
@ -644,15 +648,15 @@ mod tests {
let chunk = chunk.read();
assert_eq!(chunk.storage().1, ChunkStorage::ReadBuffer);
test_chunk_access(&chunk).await
test_chunk_access(&chunk, time).await
}
#[tokio::test]
async fn parquet_records_access() {
let db = make_db().await.db;
let (db, time) = make_db_time().await;
let creation_time = Utc::now();
write_lp_with_time(&db, "cpu,tag=1 bar=1 1", creation_time).await;
let t0 = time.inc(Duration::from_secs(324));
write_lp(&db, "cpu,tag=1 bar=1 1").await;
let id = db
.persist_partition("cpu", "1970-01-01T00", true)
@ -668,22 +672,24 @@ mod tests {
let chunk = chunks.into_iter().next().unwrap();
let chunk = chunk.read();
assert_eq!(chunk.storage().1, ChunkStorage::ObjectStoreOnly);
let first_write = chunk.time_of_first_write();
let last_write = chunk.time_of_last_write();
assert_eq!(first_write, last_write);
assert_eq!(first_write, creation_time);
assert_eq!(first_write, t0);
assert_eq!(last_write, t0);
test_chunk_access(&chunk).await
test_chunk_access(&chunk, time).await
}
#[tokio::test]
async fn parquet_snapshot() {
let db = make_db().await.db;
let (db, time) = make_db_time().await;
let w0 = Utc::now();
write_lp_with_time(&db, "cpu,tag=1 bar=1 1", w0).await;
let w1 = w0 + chrono::Duration::seconds(4);
write_lp_with_time(&db, "cpu,tag=2 bar=2 2", w1).await;
let w0 = time.inc(Duration::from_secs(10));
write_lp(&db, "cpu,tag=1 bar=1 1").await;
let w1 = time.inc(Duration::from_secs(10));
write_lp(&db, "cpu,tag=2 bar=2 2").await;
db.persist_partition("cpu", "1970-01-01T00", true)
.await

View File

@ -28,6 +28,7 @@ use std::{
fmt::Display,
sync::{Arc, Weak},
};
use time::Time;
use tracker::{RwLock, TaskTracker};
pub(crate) use compact::compact_chunks;
@ -103,7 +104,7 @@ impl LockableChunk for LockableCatalogChunk {
pub struct CatalogPersistHandle(FlushHandle);
impl lifecycle::PersistHandle for CatalogPersistHandle {
fn timestamp(&self) -> DateTime<Utc> {
fn timestamp(&self) -> Time {
self.0.timestamp()
}
}
@ -200,10 +201,13 @@ impl LockablePartition for LockableCatalogPartition {
fn prepare_persist(
partition: &mut LifecycleWriteGuard<'_, Self::Partition, Self>,
now: DateTime<Utc>,
force: bool,
) -> Option<Self::PersistHandle> {
let window = partition.persistence_windows_mut().unwrap();
let handle = window.flush_handle(now);
let handle = match force {
true => window.flush_all_handle(),
false => window.flush_handle(),
};
trace!(?handle, "preparing for persist");
Some(CatalogPersistHandle(handle?))
}
@ -285,13 +289,13 @@ impl LifecyclePartition for Partition {
.unwrap_or(true)
}
fn persistable_row_count(&self, now: DateTime<Utc>) -> usize {
fn persistable_row_count(&self) -> usize {
self.persistence_windows()
.map(|w| w.persistable_row_count(now))
.map(|w| w.persistable_row_count())
.unwrap_or(0)
}
fn minimum_unpersisted_age(&self) -> Option<DateTime<Utc>> {
fn minimum_unpersisted_age(&self) -> Option<Time> {
self.persistence_windows()
.and_then(|w| w.minimum_unpersisted_age())
}
@ -327,7 +331,7 @@ impl LifecycleChunk for CatalogChunk {
self.access_recorder().get_metrics()
}
fn time_of_last_write(&self) -> DateTime<Utc> {
fn time_of_last_write(&self) -> Time {
self.time_of_last_write()
}

View File

@ -6,13 +6,13 @@ use crate::db::{
lifecycle::collect_rub,
DbChunk,
};
use chrono::{DateTime, Utc};
use data_types::{chunk_metadata::ChunkOrder, job::Job};
use lifecycle::LifecycleWriteGuard;
use observability_deps::tracing::info;
use predicate::delete_predicate::DeletePredicate;
use query::{compute_sort_key, exec::ExecutorType, frontend::reorg::ReorgPlanner, QueryChunkMeta};
use std::{collections::HashSet, future::Future, sync::Arc};
use time::Time;
use tracker::{TaskTracker, TrackedFuture, TrackedFutureExt};
/// Compact the provided chunks into a single chunk,
@ -45,8 +45,8 @@ pub(crate) fn compact_chunks(
// Mark and snapshot chunks, then drop locks
let mut input_rows = 0;
let mut time_of_first_write: Option<DateTime<Utc>> = None;
let mut time_of_last_write: Option<DateTime<Utc>> = None;
let mut time_of_first_write: Option<Time> = None;
let mut time_of_last_write: Option<Time> = None;
let mut delete_predicates_before: HashSet<Arc<DeletePredicate>> = HashSet::new();
let mut min_order = ChunkOrder::MAX;
let query_chunks = chunks
@ -168,24 +168,26 @@ pub(crate) fn compact_chunks(
mod tests {
use super::*;
use crate::db::test_helpers::write_lp;
use crate::{db::test_helpers::write_lp_with_time, utils::make_db};
use crate::utils::{make_db, make_db_time};
use data_types::chunk_metadata::ChunkStorage;
use data_types::timestamp::TimestampRange;
use lifecycle::{LockableChunk, LockablePartition};
use predicate::delete_expr::{DeleteExpr, Op, Scalar};
use query::QueryDatabase;
use std::time::Duration;
#[tokio::test]
async fn test_compact_freeze() {
let db = make_db().await.db;
let (db, time) = make_db_time().await;
let t_first_write = Utc::now();
write_lp_with_time(db.as_ref(), "cpu,tag1=cupcakes bar=1 10", t_first_write).await;
write_lp_with_time(db.as_ref(), "cpu,tag1=asfd,tag2=foo bar=2 20", Utc::now()).await;
write_lp_with_time(db.as_ref(), "cpu,tag1=bingo,tag2=foo bar=2 10", Utc::now()).await;
write_lp_with_time(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 20", Utc::now()).await;
let t_last_write = Utc::now();
write_lp_with_time(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 10", t_last_write).await;
let t_first_write = time.inc(Duration::from_secs(1));
write_lp(db.as_ref(), "cpu,tag1=cupcakes bar=1 10").await;
write_lp(db.as_ref(), "cpu,tag1=asfd,tag2=foo bar=2 20").await;
write_lp(db.as_ref(), "cpu,tag1=bingo,tag2=foo bar=2 10").await;
write_lp(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 20").await;
let t_last_write = time.inc(Duration::from_secs(1));
write_lp(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 10").await;
let partition_keys = db.partition_keys().unwrap();
assert_eq!(partition_keys.len(), 1);
@ -201,8 +203,8 @@ mod tests {
let (_, fut) = compact_chunks(partition.upgrade(), vec![chunk.upgrade()]).unwrap();
// NB: perform the write before spawning the background task that performs the compaction
let t_later_write = Utc::now();
write_lp_with_time(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 40", t_later_write).await;
let t_later_write = time.inc(Duration::from_secs(1));
write_lp(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 40").await;
tokio::spawn(fut).await.unwrap().unwrap().unwrap();
let mut chunk_summaries: Vec<_> = db_partition.read().chunk_summaries().collect();

View File

@ -6,7 +6,6 @@ use crate::db::{
lifecycle::{collect_rub, merge_schemas, write::write_chunk_to_object_store},
DbChunk,
};
use chrono::{DateTime, Utc};
use data_types::{chunk_metadata::ChunkOrder, job::Job};
use lifecycle::{LifecycleWriteGuard, LockableChunk, LockablePartition};
use observability_deps::tracing::info;
@ -14,6 +13,7 @@ use persistence_windows::persistence_windows::FlushHandle;
use predicate::delete_predicate::DeletePredicate;
use query::{compute_sort_key, exec::ExecutorType, frontend::reorg::ReorgPlanner, QueryChunkMeta};
use std::{collections::HashSet, future::Future, sync::Arc};
use time::Time;
use tracker::{TaskTracker, TrackedFuture, TrackedFutureExt};
/// Split and then persist the provided chunks
@ -47,8 +47,8 @@ pub fn persist_chunks(
// Mark and snapshot chunks, then drop locks
let mut input_rows = 0;
let mut time_of_first_write: Option<DateTime<Utc>> = None;
let mut time_of_last_write: Option<DateTime<Utc>> = None;
let mut time_of_first_write: Option<Time> = None;
let mut time_of_last_write: Option<Time> = None;
let mut query_chunks = vec![];
let mut delete_predicates_before: HashSet<Arc<DeletePredicate>> = HashSet::new();
let mut min_order = ChunkOrder::MAX;
@ -232,7 +232,6 @@ mod tests {
Db,
};
use chrono::{TimeZone, Utc};
use data_types::{
chunk_metadata::ChunkStorage, database_rules::LifecycleRules, server_id::ServerId,
timestamp::TimestampRange,
@ -246,8 +245,10 @@ mod tests {
num::{NonZeroU32, NonZeroU64},
time::Duration,
};
use time::Time;
async fn test_db() -> Arc<Db> {
async fn test_db() -> (Arc<Db>, Arc<time::MockProvider>) {
let time_provider = Arc::new(time::MockProvider::new(Time::from_timestamp(3409, 45)));
let test_db = TestDb::builder()
.lifecycle_rules(LifecycleRules {
late_arrive_window_seconds: NonZeroU32::new(1).unwrap(),
@ -255,23 +256,24 @@ mod tests {
worker_backoff_millis: NonZeroU64::new(u64::MAX).unwrap(),
..Default::default()
})
.time_provider(Arc::<time::MockProvider>::clone(&time_provider))
.build()
.await;
test_db.db
(test_db.db, time_provider)
}
#[tokio::test]
async fn test_flush_overlapping() {
let db = test_db().await;
let (db, time) = test_db().await;
write_lp(db.as_ref(), "cpu,tag1=cupcakes bar=1 10").await;
let partition_keys = db.partition_keys().unwrap();
assert_eq!(partition_keys.len(), 1);
let db_partition = db.partition("cpu", &partition_keys[0]).unwrap();
// Wait for the persistence window to be closed
tokio::time::sleep(Duration::from_secs(2)).await;
// Close window
time.inc(Duration::from_secs(2));
write_lp(db.as_ref(), "cpu,tag1=lagged bar=1 10").await;
@ -283,11 +285,11 @@ mod tests {
let mut partition = partition.upgrade();
let handle = LockablePartition::prepare_persist(&mut partition, Utc::now())
let handle = LockablePartition::prepare_persist(&mut partition, false)
.unwrap()
.0;
assert_eq!(handle.timestamp(), Utc.timestamp_nanos(10));
assert_eq!(handle.timestamp(), Time::from_timestamp_nanos(10));
let chunks: Vec<_> = chunks.map(|x| x.upgrade()).collect();
persist_chunks(partition, chunks, handle)
@ -307,18 +309,14 @@ mod tests {
#[tokio::test]
async fn test_persist_delete_all() {
let db = test_db().await;
let (db, time) = test_db().await;
let late_arrival = chrono::Duration::seconds(1);
let late_arrival = Duration::from_secs(1);
let t0 = Utc::now();
let t1 = t0 + late_arrival * 10;
let t2 = t1 + late_arrival * 10;
*db.now_override.lock() = Some(t0);
time.inc(Duration::from_secs(32));
write_lp(db.as_ref(), "cpu,tag1=cupcakes bar=1 10").await;
*db.now_override.lock() = Some(t1);
time.inc(late_arrival);
write_lp(db.as_ref(), "cpu,tag1=cupcakes bar=3 23").await;
let partition_keys = db.partition_keys().unwrap();
@ -335,7 +333,6 @@ mod tests {
db.delete("cpu", predicate).await.unwrap();
// Try to persist first write but it has been deleted
*db.now_override.lock() = Some(t0 + late_arrival);
let maybe_chunk = db
.persist_partition("cpu", partition_key.as_str(), false)
.await
@ -355,11 +352,11 @@ mod tests {
.unwrap()
.minimum_unpersisted_timestamp()
.unwrap(),
Utc.timestamp_nanos(23)
Time::from_timestamp_nanos(23)
);
// Add a second set of writes one of which overlaps the above chunk
*db.now_override.lock() = Some(t2);
time.inc(late_arrival * 10);
write_lp(db.as_ref(), "cpu,tag1=foo bar=2 23").await;
write_lp(db.as_ref(), "cpu,tag1=cupcakes bar=2 26").await;
@ -380,7 +377,7 @@ mod tests {
// The persistence windows only know that all rows <= 23 have been persisted
// They do not know that the remaining row has timestamp 26, only that
// it is in the range 24..=26
Utc.timestamp_nanos(24)
Time::from_timestamp_nanos(24)
);
let mut chunks: Vec<_> = partition.read().chunk_summaries().collect();
@ -403,7 +400,7 @@ mod tests {
db.delete("cpu", predicate).await.unwrap();
// Try to persist third set of writes
*db.now_override.lock() = Some(t2 + late_arrival);
time.inc(late_arrival);
let maybe_chunk = db
.persist_partition("cpu", partition_key.as_str(), false)
.await
@ -422,12 +419,9 @@ mod tests {
#[tokio::test]
async fn persist_compacted_deletes() {
let db = test_db().await;
let (db, time) = test_db().await;
let late_arrival = chrono::Duration::seconds(1);
let t0 = Utc::now();
*db.now_override.lock() = Some(t0);
let late_arrival = Duration::from_secs(1);
write_lp(db.as_ref(), "cpu,tag1=cupcakes bar=1 10").await;
let partition_keys = db.partition_keys().unwrap();
@ -459,7 +453,7 @@ mod tests {
// Persistence windows unaware rows have been deleted
assert!(!partition.read().persistence_windows().unwrap().is_empty());
*db.now_override.lock() = Some(t0 + late_arrival);
time.inc(late_arrival);
let maybe_chunk = db
.persist_partition("cpu", partition_key.as_str(), false)
.await
@ -489,7 +483,7 @@ mod tests {
})
.build()
.await;
let db = Arc::new(test_db.db);
let db = test_db.db;
// | foo | delete before persist | delete during persist |
// | --- | --------------------- | --------------------- |
@ -527,7 +521,6 @@ mod tests {
let db_partition = db.partition("cpu", &partition_keys[0]).unwrap();
// Wait for the persistence window to be closed
tokio::time::sleep(Duration::from_secs(2)).await;
let partition = LockableCatalogPartition::new(Arc::clone(&db), Arc::clone(&db_partition));
let partition = partition.read();
@ -536,11 +529,11 @@ mod tests {
let mut partition = partition.upgrade();
let handle = LockablePartition::prepare_persist(&mut partition, Utc::now())
let handle = LockablePartition::prepare_persist(&mut partition, true)
.unwrap()
.0;
assert_eq!(handle.timestamp(), Utc.timestamp_nanos(20));
assert_eq!(handle.timestamp(), Time::from_timestamp_nanos(20));
let chunks: Vec<_> = chunks.map(|x| x.upgrade()).collect();
let (_, fut) = persist_chunks(partition, chunks, handle).unwrap();
@ -568,8 +561,9 @@ mod tests {
let metric_registry = Arc::new(metric::Registry::new());
let (_preserved_catalog, catalog, _replay_plan) = load_or_create_preserved_catalog(
db_name,
db.iox_object_store(),
Arc::clone(&db.iox_object_store),
metric_registry,
Arc::clone(&db.time_provider),
false,
false,
)

View File

@ -125,7 +125,7 @@ pub(super) fn write_chunk_to_object_store(
// IMPORTANT: Writing must take place while holding the cleanup lock, otherwise the file might be deleted
// between creation and the transaction commit.
let metadata = IoxMetadata {
creation_timestamp: db.utc_now(),
creation_timestamp: db.time_provider.now(),
table_name: Arc::clone(&table_name),
partition_key: Arc::clone(&partition_key),
chunk_id: addr.chunk_id,

View File

@ -6,7 +6,7 @@ use iox_object_store::{IoxObjectStore, ParquetFilePath};
use observability_deps::tracing::{error, info};
use parquet_file::{
catalog::{
core::PreservedCatalog,
core::{PreservedCatalog, PreservedCatalogConfig},
interface::{
CatalogParquetInfo, CatalogState, CatalogStateAddError, CatalogStateRemoveError,
ChunkAddrWithoutDatabase, ChunkCreationFailed,
@ -18,6 +18,7 @@ use persistence_windows::checkpoint::{ReplayPlan, ReplayPlanner};
use predicate::delete_predicate::DeletePredicate;
use snafu::{ResultExt, Snafu};
use std::sync::Arc;
use time::TimeProvider;
#[derive(Debug, Snafu)]
pub enum Error {
@ -53,14 +54,19 @@ pub async fn load_or_create_preserved_catalog(
db_name: &str,
iox_object_store: Arc<IoxObjectStore>,
metric_registry: Arc<::metric::Registry>,
time_provider: Arc<dyn TimeProvider>,
wipe_on_error: bool,
skip_replay: bool,
) -> Result<(PreservedCatalog, Catalog, Option<ReplayPlan>)> {
// first try to load existing catalogs
match PreservedCatalog::load(
db_name,
Arc::clone(&iox_object_store),
LoaderEmptyInput::new(Arc::clone(&metric_registry), skip_replay),
PreservedCatalogConfig::new(Arc::clone(&iox_object_store), Arc::clone(&time_provider)),
LoaderEmptyInput::new(
Arc::clone(&metric_registry),
Arc::clone(&time_provider),
skip_replay,
),
)
.await
{
@ -85,8 +91,9 @@ pub async fn load_or_create_preserved_catalog(
create_preserved_catalog(
db_name,
Arc::clone(&iox_object_store),
iox_object_store,
metric_registry,
time_provider,
skip_replay,
)
.await
@ -103,8 +110,9 @@ pub async fn load_or_create_preserved_catalog(
create_preserved_catalog(
db_name,
Arc::clone(&iox_object_store),
iox_object_store,
metric_registry,
time_provider,
skip_replay,
)
.await
@ -122,12 +130,15 @@ pub async fn create_preserved_catalog(
db_name: &str,
iox_object_store: Arc<IoxObjectStore>,
metric_registry: Arc<metric::Registry>,
time_provider: Arc<dyn TimeProvider>,
skip_replay: bool,
) -> Result<(PreservedCatalog, Catalog, Option<ReplayPlan>)> {
let config = PreservedCatalogConfig::new(iox_object_store, Arc::clone(&time_provider));
let (preserved_catalog, loader) = PreservedCatalog::new_empty(
db_name,
Arc::clone(&iox_object_store),
LoaderEmptyInput::new(metric_registry, skip_replay),
config,
LoaderEmptyInput::new(metric_registry, time_provider, skip_replay),
)
.await
.context(CannotCreateCatalog)?;
@ -146,13 +157,19 @@ pub async fn create_preserved_catalog(
#[derive(Debug)]
struct LoaderEmptyInput {
metric_registry: Arc<::metric::Registry>,
time_provider: Arc<dyn TimeProvider>,
skip_replay: bool,
}
impl LoaderEmptyInput {
fn new(metric_registry: Arc<metric::Registry>, skip_replay: bool) -> Self {
fn new(
metric_registry: Arc<metric::Registry>,
time_provider: Arc<dyn TimeProvider>,
skip_replay: bool,
) -> Self {
Self {
metric_registry,
time_provider,
skip_replay,
}
}
@ -170,8 +187,14 @@ impl CatalogState for Loader {
type EmptyInput = LoaderEmptyInput;
fn new_empty(db_name: &str, data: Self::EmptyInput) -> Self {
let catalog = Catalog::new(
Arc::from(db_name),
Arc::clone(&data.metric_registry),
Arc::clone(&data.time_provider),
);
Self {
catalog: Catalog::new(Arc::from(db_name), Arc::clone(&data.metric_registry)),
catalog,
planner: (!data.skip_replay).then(ReplayPlanner::new),
metric_registry: Arc::new(Default::default()),
}
@ -317,6 +340,7 @@ mod tests {
#[tokio::test]
async fn load_or_create_preserved_catalog_recovers_from_error() {
let object_store = Arc::new(ObjectStore::new_in_memory());
let time_provider: Arc<dyn TimeProvider> = Arc::new(time::SystemProvider::new());
let server_id = ServerId::try_from(1).unwrap();
let db_name = DatabaseName::new("preserved_catalog_test").unwrap();
let iox_object_store = Arc::new(
@ -324,8 +348,10 @@ mod tests {
.await
.unwrap(),
);
let config =
PreservedCatalogConfig::new(Arc::clone(&iox_object_store), Arc::clone(&time_provider));
let (preserved_catalog, _catalog) = new_empty(&iox_object_store).await;
let (preserved_catalog, _catalog) = new_empty(config).await;
parquet_file::catalog::test_helpers::break_catalog_with_weird_version(&preserved_catalog)
.await;
@ -333,6 +359,7 @@ mod tests {
&db_name,
iox_object_store,
Default::default(),
time_provider,
true,
false,
)
@ -348,6 +375,7 @@ mod tests {
async fn test_catalog_state() {
let empty_input = LoaderEmptyInput {
metric_registry: Default::default(),
time_provider: Arc::new(time::SystemProvider::new()),
skip_replay: false,
};
assert_catalog_state_implementation::<Loader, _>(empty_input, checkpoint_data_from_loader)

View File

@ -4,7 +4,6 @@ use std::{
time::Duration,
};
use chrono::Utc;
use data_types::sequence::Sequence;
use entry::TableBatch;
use futures::TryStreamExt;
@ -15,6 +14,7 @@ use persistence_windows::{
persistence_windows::PersistenceWindows,
};
use snafu::{ResultExt, Snafu};
use time::Time;
use write_buffer::core::WriteBufferReading;
use crate::Db;
@ -120,7 +120,7 @@ pub async fn seek_to_end(db: &Db, write_buffer: &mut dyn WriteBufferReading) ->
Arc::from(partition.table_name()),
Arc::from(partition.key()),
sequencer_numbers.clone(),
Utc::now(),
Time::from_timestamp_nanos(0),
);
match partition.persistence_windows_mut() {
@ -131,7 +131,7 @@ pub async fn seek_to_end(db: &Db, write_buffer: &mut dyn WriteBufferReading) ->
let mut windows = PersistenceWindows::new(
partition.addr().clone(),
late_arrival_window,
db.utc_now(),
Arc::clone(&db.time_provider),
);
windows.mark_seen_and_persisted(&dummy_checkpoint);
partition.set_persistence_windows(windows);
@ -237,7 +237,6 @@ pub async fn perform_replay(
|sequence, partition_key, table_batch| {
filter_entry(sequence, partition_key, table_batch, replay_plan)
},
db.utc_now(),
) {
Ok(_) => {
break;
@ -290,7 +289,7 @@ pub async fn perform_replay(
let mut windows = PersistenceWindows::new(
partition.addr().clone(),
late_arrival_window,
db.utc_now(),
Arc::clone(&db.time_provider),
);
windows.mark_seen_and_persisted(partition_checkpoint);
partition.set_persistence_windows(windows);
@ -418,7 +417,6 @@ mod tests {
};
use arrow_util::assert_batches_eq;
use chrono::{DateTime, Utc};
use data_types::{
database_rules::{PartitionTemplate, Partitioner, TemplatePart},
sequence::Sequence,
@ -435,6 +433,7 @@ mod tests {
};
use query::{exec::ExecutionContextProvider, frontend::sql::SqlQueryPlanner};
use test_helpers::{assert_contains, assert_not_contains, tracing::TracingCapture};
use time::{Time, TimeProvider};
use tokio::task::JoinHandle;
use tokio_util::sync::CancellationToken;
use write_buffer::mock::{MockBufferForReading, MockBufferSharedState};
@ -462,7 +461,7 @@ mod tests {
SequencedEntry::new_from_sequence(
Sequence::new(sequencer_id, sequence_number),
Utc::now(),
Time::from_timestamp_nanos(0),
entries.pop().unwrap(),
)
}
@ -561,6 +560,7 @@ mod tests {
// ==================== setup ====================
let object_store = Arc::new(ObjectStore::new_in_memory());
let time = Arc::new(time::MockProvider::new(Time::from_timestamp(12, 0)));
let server_id = ServerId::try_from(1).unwrap();
let db_name = "replay_test";
let partition_template = PartitionTemplate {
@ -577,7 +577,7 @@ mod tests {
db_name,
partition_template.clone(),
self.catalog_transactions_until_checkpoint,
Utc::now(),
Arc::<time::MockProvider>::clone(&time),
)
.await;
@ -610,9 +610,6 @@ mod tests {
shutdown.cancel();
join_handle.await.unwrap();
// remember time
let now = test_db.db.now_override.lock().unwrap();
// drop old DB
drop(test_db);
@ -623,7 +620,7 @@ mod tests {
db_name,
partition_template.clone(),
self.catalog_transactions_until_checkpoint,
now,
Arc::<time::MockProvider>::clone(&time),
)
.await;
test_db = test_db_tmp;
@ -694,8 +691,7 @@ mod tests {
}
}
Step::MakeWritesPersistable => {
let mut guard = test_db.db.now_override.lock();
*guard = Some(guard.unwrap() + chrono::Duration::seconds(60));
time.inc(Duration::from_secs(60));
}
Step::Assert(checks) => {
Self::eval_checks(&checks, true, &test_db).await;
@ -715,7 +711,7 @@ mod tests {
}
let db = &test_db.db;
db.unsuppress_persistence().await;
db.unsuppress_persistence();
// wait until checks pass
let t_0 = Instant::now();
@ -762,7 +758,7 @@ mod tests {
db_name: &'static str,
partition_template: PartitionTemplate,
catalog_transactions_until_checkpoint: NonZeroU64,
now: DateTime<Utc>,
time_provider: Arc<dyn TimeProvider>,
) -> (TestDb, CancellationToken, JoinHandle<()>) {
let test_db = TestDb::builder()
.object_store(object_store)
@ -775,13 +771,11 @@ mod tests {
..Default::default()
})
.partition_template(partition_template)
.time_provider(time_provider)
.db_name(db_name)
.build()
.await;
// Mock time
*test_db.db.now_override.lock() = Some(now);
// start background worker
let shutdown: CancellationToken = Default::default();
let shutdown_captured = shutdown.clone();
@ -2595,7 +2589,7 @@ mod tests {
Arc::from("table"),
Arc::from("partition"),
sequencer_numbers,
Utc::now(),
Time::from_timestamp_nanos(236),
);
let builder = PersistCheckpointBuilder::new(partition_checkpoint);
let (partition_checkpoint, database_checkpoint) = builder.build();
@ -2622,12 +2616,12 @@ mod tests {
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap());
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, 0),
Utc::now(),
Time::from_timestamp_nanos(0),
lp_to_entry("cpu bar=1 0"),
));
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, 2),
Utc::now(),
Time::from_timestamp_nanos(0),
lp_to_entry("cpu bar=1 10"),
));
let mut write_buffer = MockBufferForReading::new(write_buffer_state, None).unwrap();
@ -2642,7 +2636,7 @@ mod tests {
Arc::from("table"),
Arc::from("partition"),
sequencer_numbers,
Utc::now(),
Time::from_timestamp_nanos(0),
);
let builder = PersistCheckpointBuilder::new(partition_checkpoint);
let (partition_checkpoint, database_checkpoint) = builder.build();
@ -2672,17 +2666,17 @@ mod tests {
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(3).unwrap());
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, 0),
Utc::now(),
Time::from_timestamp_nanos(0),
lp_to_entry("cpu bar=0 0"),
));
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, 3),
Utc::now(),
Time::from_timestamp_nanos(0),
lp_to_entry("cpu bar=3 3"),
));
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(1, 1),
Utc::now(),
Time::from_timestamp_nanos(0),
lp_to_entry("cpu bar=11 11"),
));
let mut write_buffer = MockBufferForReading::new(write_buffer_state.clone(), None).unwrap();
@ -2697,17 +2691,17 @@ mod tests {
// add more data
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, 4),
Utc::now(),
Time::from_timestamp_nanos(0),
lp_to_entry("cpu bar=4 4"),
));
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(1, 9),
Utc::now(),
Time::from_timestamp_nanos(0),
lp_to_entry("cpu bar=19 19"),
));
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(2, 0),
Utc::now(),
Time::from_timestamp_nanos(0),
lp_to_entry("cpu bar=20 20"),
));

View File

@ -5,9 +5,9 @@ use arrow::{
error::Result,
record_batch::RecordBatch,
};
use chrono::{DateTime, Utc};
use data_types::{chunk_metadata::ChunkSummary, error::ErrorLogger};
use std::sync::Arc;
use time::Time;
/// Implementation of system.chunks table
#[derive(Debug)]
@ -55,11 +55,11 @@ fn chunk_summaries_schema() -> SchemaRef {
}
// TODO: Use a custom proc macro or serde to reduce the boilerplate
fn optional_time_to_ts(time: Option<DateTime<Utc>>) -> Option<i64> {
fn optional_time_to_ts(time: Option<Time>) -> Option<i64> {
time.and_then(time_to_ts)
}
fn time_to_ts(ts: DateTime<Utc>) -> Option<i64> {
fn time_to_ts(ts: Time) -> Option<i64> {
Some(ts.timestamp_nanos())
}
@ -139,7 +139,6 @@ fn from_chunk_summaries(schema: SchemaRef, chunks: Vec<ChunkSummary>) -> Result<
mod tests {
use super::*;
use arrow_util::assert_batches_eq;
use chrono::{TimeZone, Utc};
use data_types::chunk_metadata::{ChunkId, ChunkLifecycleAction, ChunkOrder, ChunkStorage};
#[test]
@ -155,8 +154,8 @@ mod tests {
object_store_bytes: 0,
row_count: 11,
time_of_last_access: None,
time_of_first_write: Utc.timestamp_nanos(10_000_000_000),
time_of_last_write: Utc.timestamp_nanos(10_000_000_000),
time_of_first_write: Time::from_timestamp_nanos(10_000_000_000),
time_of_last_write: Time::from_timestamp_nanos(10_000_000_000),
order: ChunkOrder::new(5).unwrap(),
},
ChunkSummary {
@ -168,9 +167,9 @@ mod tests {
memory_bytes: 23455,
object_store_bytes: 0,
row_count: 22,
time_of_last_access: Some(Utc.timestamp_nanos(754_000_000_000)),
time_of_first_write: Utc.timestamp_nanos(80_000_000_000),
time_of_last_write: Utc.timestamp_nanos(80_000_000_000),
time_of_last_access: Some(Time::from_timestamp_nanos(754_000_000_000)),
time_of_first_write: Time::from_timestamp_nanos(80_000_000_000),
time_of_last_write: Time::from_timestamp_nanos(80_000_000_000),
order: ChunkOrder::new(6).unwrap(),
},
ChunkSummary {
@ -182,9 +181,9 @@ mod tests {
memory_bytes: 1234,
object_store_bytes: 5678,
row_count: 33,
time_of_last_access: Some(Utc.timestamp_nanos(5_000_000_000)),
time_of_first_write: Utc.timestamp_nanos(100_000_000_000),
time_of_last_write: Utc.timestamp_nanos(200_000_000_000),
time_of_last_access: Some(Time::from_timestamp_nanos(5_000_000_000)),
time_of_first_write: Time::from_timestamp_nanos(100_000_000_000),
time_of_last_write: Time::from_timestamp_nanos(200_000_000_000),
order: ChunkOrder::new(7).unwrap(),
},
];

View File

@ -218,11 +218,11 @@ fn assemble_chunk_columns(
mod tests {
use super::*;
use arrow_util::assert_batches_eq;
use chrono::{TimeZone, Utc};
use data_types::{
chunk_metadata::{ChunkColumnSummary, ChunkId, ChunkOrder, ChunkStorage, ChunkSummary},
partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics},
};
use time::Time;
#[test]
fn test_from_partition_summaries() {
@ -318,8 +318,8 @@ mod tests {
object_store_bytes: 0,
row_count: 11,
time_of_last_access: None,
time_of_first_write: Utc.timestamp_nanos(1),
time_of_last_write: Utc.timestamp_nanos(2),
time_of_first_write: Time::from_timestamp_nanos(1),
time_of_last_write: Time::from_timestamp_nanos(2),
order: ChunkOrder::new(5).unwrap(),
},
columns: vec![
@ -354,8 +354,8 @@ mod tests {
object_store_bytes: 0,
row_count: 11,
time_of_last_access: None,
time_of_first_write: Utc.timestamp_nanos(1),
time_of_last_write: Utc.timestamp_nanos(2),
time_of_first_write: Time::from_timestamp_nanos(1),
time_of_last_write: Time::from_timestamp_nanos(2),
order: ChunkOrder::new(6).unwrap(),
},
columns: vec![ChunkColumnSummary {
@ -384,8 +384,8 @@ mod tests {
object_store_bytes: 0,
row_count: 11,
time_of_last_access: None,
time_of_first_write: Utc.timestamp_nanos(1),
time_of_last_write: Utc.timestamp_nanos(2),
time_of_first_write: Time::from_timestamp_nanos(1),
time_of_last_write: Time::from_timestamp_nanos(2),
order: ChunkOrder::new(5).unwrap(),
},
columns: vec![ChunkColumnSummary {

View File

@ -101,9 +101,9 @@ fn from_write_summaries(
#[cfg(test)]
mod tests {
use chrono::{TimeZone, Utc};
use arrow_util::assert_batches_eq;
use time::Time;
use super::*;
@ -119,20 +119,20 @@ mod tests {
(
addr.clone(),
WriteSummary {
time_of_first_write: Utc.timestamp_nanos(0),
time_of_last_write: Utc.timestamp_nanos(20),
min_timestamp: Utc.timestamp_nanos(50),
max_timestamp: Utc.timestamp_nanos(60),
time_of_first_write: Time::from_timestamp_nanos(0),
time_of_last_write: Time::from_timestamp_nanos(20),
min_timestamp: Time::from_timestamp_nanos(50),
max_timestamp: Time::from_timestamp_nanos(60),
row_count: 320,
},
),
(
addr,
WriteSummary {
time_of_first_write: Utc.timestamp_nanos(6),
time_of_last_write: Utc.timestamp_nanos(21),
min_timestamp: Utc.timestamp_nanos(1),
max_timestamp: Utc.timestamp_nanos(2),
time_of_first_write: Time::from_timestamp_nanos(6),
time_of_last_write: Time::from_timestamp_nanos(21),
min_timestamp: Time::from_timestamp_nanos(1),
max_timestamp: Time::from_timestamp_nanos(2),
row_count: 2,
},
),

View File

@ -69,7 +69,6 @@
)]
use async_trait::async_trait;
use chrono::Utc;
use data_types::{
chunk_metadata::ChunkId,
database_rules::{NodeGroup, RoutingRules, ShardId, Sink},
@ -918,7 +917,7 @@ where
use database::WriteError;
self.active_database(db_name)?
.write_entry(entry, Utc::now())
.write_entry(entry)
.await
.map_err(|e| match e {
WriteError::NotInitialized { .. } => Error::DatabaseNotInitialized {
@ -1251,6 +1250,7 @@ mod tests {
path::{parsed::DirsAndFileName, ObjectStorePath},
ObjectStore, ObjectStoreApi,
};
use parquet_file::catalog::core::PreservedCatalogConfig;
use parquet_file::catalog::{
core::PreservedCatalog,
test_helpers::{load_ok, new_empty},
@ -2206,9 +2206,12 @@ mod tests {
.await
.unwrap();
let (preserved_catalog, _catalog) = load_ok(&catalog_broken.iox_object_store().unwrap())
.await
.unwrap();
let config = PreservedCatalogConfig::new(
catalog_broken.iox_object_store().unwrap(),
Arc::clone(application.time_provider()),
);
let (preserved_catalog, _catalog) = load_ok(config).await.unwrap();
parquet_file::catalog::test_helpers::break_catalog_with_weird_version(&preserved_catalog)
.await;
@ -2287,7 +2290,13 @@ mod tests {
.await
.unwrap(),
);
new_empty(&non_existing_iox_object_store).await;
let config = PreservedCatalogConfig::new(
non_existing_iox_object_store,
Arc::clone(application.time_provider()),
);
new_empty(config).await;
assert_eq!(
server
.wipe_preserved_catalog(&db_name_non_existing)
@ -2382,8 +2391,11 @@ mod tests {
.unwrap(),
);
let config =
PreservedCatalogConfig::new(iox_object_store, Arc::clone(application.time_provider()));
// create catalog
new_empty(&iox_object_store).await;
new_empty(config).await;
// creating database will now result in an error
let err = create_simple_database(&server, db_name).await.unwrap_err();
@ -2396,13 +2408,13 @@ mod tests {
#[tokio::test]
async fn write_buffer_errors_propagate() {
let mut factory = WriteBufferConfigFactory::new();
let application = ApplicationState::new(Arc::new(ObjectStore::new_in_memory()), None);
let mut factory = WriteBufferConfigFactory::new(Arc::clone(application.time_provider()));
factory.register_always_fail_mock("my_mock".to_string());
let application = Arc::new(ApplicationState::with_write_buffer_factory(
Arc::new(ObjectStore::new_in_memory()),
Arc::new(factory),
None,
));
let application = Arc::new(application.with_write_buffer_factory(Arc::new(factory)));
let server = make_server(application);
server.set_id(ServerId::try_from(1).unwrap()).unwrap();
server.wait_for_init().await.unwrap();

View File

@ -14,6 +14,7 @@ use persistence_windows::checkpoint::ReplayPlan;
use query::exec::ExecutorConfig;
use query::{exec::Executor, QueryDatabase};
use std::{borrow::Cow, convert::TryFrom, num::NonZeroU32, sync::Arc, time::Duration};
use time::{Time, TimeProvider};
use write_buffer::core::WriteBufferWriting;
// A wrapper around a Db and a metric registry allowing for isolated testing
@ -40,6 +41,7 @@ pub struct TestDbBuilder {
write_buffer_producer: Option<Arc<dyn WriteBufferWriting>>,
lifecycle_rules: Option<LifecycleRules>,
partition_template: Option<PartitionTemplate>,
time_provider: Option<Arc<dyn TimeProvider>>,
}
impl TestDbBuilder {
@ -51,10 +53,17 @@ impl TestDbBuilder {
let server_id = self
.server_id
.unwrap_or_else(|| ServerId::try_from(1).unwrap());
let db_name = self
.db_name
.unwrap_or_else(|| DatabaseName::new("placeholder").unwrap());
let time_provider = self
.time_provider
.clone()
.take()
.unwrap_or_else(|| Arc::new(time::SystemProvider::new()));
let object_store = self
.object_store
.unwrap_or_else(|| Arc::new(ObjectStore::new_in_memory()));
@ -85,6 +94,7 @@ impl TestDbBuilder {
db_name.as_str(),
Arc::clone(&iox_object_store),
Arc::clone(&metric_registry),
Arc::clone(&time_provider),
false,
false,
)
@ -123,6 +133,7 @@ impl TestDbBuilder {
write_buffer_producer: self.write_buffer_producer,
exec,
metric_registry: Arc::clone(&metric_registry),
time_provider,
};
TestDb {
@ -172,6 +183,11 @@ impl TestDbBuilder {
self.partition_template = Some(template);
self
}
pub fn time_provider(mut self, time_provider: Arc<dyn TimeProvider>) -> Self {
self.time_provider = Some(time_provider);
self
}
}
/// Used for testing: create a Database with a local store
@ -179,6 +195,16 @@ pub async fn make_db() -> TestDb {
TestDb::builder().build().await
}
pub async fn make_db_time() -> (Arc<Db>, Arc<time::MockProvider>) {
let provider = Arc::new(time::MockProvider::new(Time::from_timestamp(295293, 3)));
let db = TestDb::builder()
.time_provider(Arc::<time::MockProvider>::clone(&provider))
.build()
.await
.db;
(db, provider)
}
fn chunk_summary_iter(db: &Db) -> impl Iterator<Item = ChunkSummary> + '_ {
db.partition_keys()
.unwrap()

View File

@ -2,7 +2,6 @@ use std::future::Future;
use std::sync::Arc;
use std::time::{Duration, Instant};
use chrono::Utc;
use futures::future::{BoxFuture, Shared};
use futures::stream::{BoxStream, FuturesUnordered};
use futures::{FutureExt, StreamExt, TryFutureExt};
@ -155,7 +154,6 @@ async fn stream_in_sequenced_entries<'a>(
match db.store_sequenced_entry(
Arc::clone(&sequenced_entry),
crate::db::filter_table_batch_keep_all,
Utc::now(),
) {
Ok(_) => {
metrics.success();
@ -196,8 +194,6 @@ mod tests {
use std::convert::TryFrom;
use std::num::{NonZeroU32, NonZeroUsize};
use chrono::{TimeZone, Utc};
use ::test_helpers::assert_contains;
use arrow_util::assert_batches_eq;
use data_types::database_rules::{PartitionTemplate, TemplatePart};
@ -215,6 +211,7 @@ mod tests {
use super::*;
use metric::{Attributes, Metric, U64Counter, U64Gauge};
use time::Time;
#[tokio::test]
async fn read_from_write_buffer_updates_persistence_windows() {
@ -225,22 +222,22 @@ mod tests {
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(2).unwrap());
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, 0),
Utc::now(),
Time::from_timestamp_nanos(0),
entry.clone(),
));
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(1, 0),
Utc::now(),
Time::from_timestamp_nanos(0),
entry.clone(),
));
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(1, 2),
Utc::now(),
Time::from_timestamp_nanos(0),
entry.clone(),
));
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, 1),
Utc::now(),
Time::from_timestamp_nanos(0),
entry,
));
let db = TestDb::builder().build().await.db;
@ -293,8 +290,8 @@ mod tests {
async fn read_from_write_buffer_write_to_mutable_buffer() {
let write_buffer_state =
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap());
let ingest_ts1 = Utc.timestamp_millis(42);
let ingest_ts2 = Utc.timestamp_millis(1337);
let ingest_ts1 = Time::from_timestamp_millis(42);
let ingest_ts2 = Time::from_timestamp_millis(1337);
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, 0),
ingest_ts1,
@ -454,13 +451,13 @@ mod tests {
);
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, sequence_number),
Utc::now(),
Time::from_timestamp_nanos(0),
lp_to_entry(&lp),
));
}
write_buffer_state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, n_entries),
Utc::now(),
Time::from_timestamp_nanos(0),
lp_to_entry("table_2,partition_by=a foo=1 0"),
));

View File

@ -1,16 +1,18 @@
//! This module contains code to translate from InfluxDB IOx data
//! formats into the formats needed by gRPC
use std::{collections::BTreeSet, sync::Arc};
use std::{collections::BTreeSet, fmt, sync::Arc};
use arrow::{
array::{
ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray, TimestampNanosecondArray,
UInt64Array,
},
bitmap::Bitmap,
datatypes::DataType as ArrowDataType,
};
use observability_deps::tracing::trace;
use query::exec::{
field::FieldIndex,
fieldlist::FieldList,
@ -31,11 +33,11 @@ use snafu::Snafu;
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("Unsupported data type in gRPC data translation: {}", type_name))]
UnsupportedDataType { type_name: String },
#[snafu(display("Unsupported data type in gRPC data translation: {}", data_type))]
UnsupportedDataType { data_type: ArrowDataType },
#[snafu(display("Unsupported field data type in gRPC data translation: {}", type_name))]
UnsupportedFieldType { type_name: String },
#[snafu(display("Unsupported field data type in gRPC data translation: {}", data_type))]
UnsupportedFieldType { data_type: ArrowDataType },
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -101,6 +103,7 @@ pub fn series_set_item_to_read_response(series_set_item: SeriesSetItem) -> Resul
}
SeriesSetItem::Data(series_set) => series_set_to_frames(series_set)?,
};
trace!(frames=%DisplayableFrames::new(&frames), "Response gRPC frames");
Ok(ReadResponse { frames })
}
@ -145,7 +148,7 @@ fn data_type(array: &ArrayRef) -> Result<DataType> {
ArrowDataType::UInt64 => Ok(DataType::Unsigned),
ArrowDataType::Boolean => Ok(DataType::Boolean),
_ => UnsupportedDataType {
type_name: format!("{:?}", array.data_type()),
data_type: array.data_type().clone(),
}
.fail(),
}
@ -189,12 +192,16 @@ fn field_to_data(
};
frames.push(Data::Series(series_frame));
// Only take timestamps (and values) from the rows that have non
// null values for this field
let valid = array.data().null_bitmap().as_ref();
let timestamps = batch
.column(indexes.timestamp_index)
.as_any()
.downcast_ref::<TimestampNanosecondArray>()
.unwrap()
.extract_values(start_row, num_rows);
.extract_values(start_row, num_rows, valid);
frames.push(match array.data_type() {
ArrowDataType::Utf8 => {
@ -202,7 +209,7 @@ fn field_to_data(
.as_any()
.downcast_ref::<StringArray>()
.unwrap()
.extract_values(start_row, num_rows);
.extract_values(start_row, num_rows, valid);
Data::StringPoints(StringPointsFrame { timestamps, values })
}
ArrowDataType::Float64 => {
@ -210,7 +217,8 @@ fn field_to_data(
.as_any()
.downcast_ref::<Float64Array>()
.unwrap()
.extract_values(start_row, num_rows);
.extract_values(start_row, num_rows, valid);
Data::FloatPoints(FloatPointsFrame { timestamps, values })
}
ArrowDataType::Int64 => {
@ -218,7 +226,7 @@ fn field_to_data(
.as_any()
.downcast_ref::<Int64Array>()
.unwrap()
.extract_values(start_row, num_rows);
.extract_values(start_row, num_rows, valid);
Data::IntegerPoints(IntegerPointsFrame { timestamps, values })
}
ArrowDataType::UInt64 => {
@ -226,7 +234,7 @@ fn field_to_data(
.as_any()
.downcast_ref::<UInt64Array>()
.unwrap()
.extract_values(start_row, num_rows);
.extract_values(start_row, num_rows, valid);
Data::UnsignedPoints(UnsignedPointsFrame { timestamps, values })
}
ArrowDataType::Boolean => {
@ -234,12 +242,12 @@ fn field_to_data(
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap()
.extract_values(start_row, num_rows);
.extract_values(start_row, num_rows, valid);
Data::BooleanPoints(BooleanPointsFrame { timestamps, values })
}
_ => {
return UnsupportedDataType {
type_name: format!("{:?}", array.data_type()),
data_type: array.data_type().clone(),
}
.fail();
}
@ -275,52 +283,68 @@ fn convert_tags(table_name: &str, field_name: &str, tags: &[(Arc<str>, Arc<str>)
}
trait ExtractValues<T> {
/// Extracts num_rows of data starting from start_row as a vector
fn extract_values(&self, start_row: usize, num_rows: usize) -> Vec<T>;
/// Extracts num_rows of data starting from start_row as a vector,
/// for all rows `i` where `valid[i]` is set
fn extract_values(&self, start_row: usize, num_rows: usize, valid: Option<&Bitmap>) -> Vec<T>;
}
/// Implements extract_values for a particular type of array that
macro_rules! extract_values_impl {
($DATA_TYPE:ty) => {
fn extract_values(
&self,
start_row: usize,
num_rows: usize,
valid: Option<&Bitmap>,
) -> Vec<$DATA_TYPE> {
let end_row = start_row + num_rows;
match valid {
Some(valid) => (start_row..end_row)
.filter_map(|row| valid.is_set(row).then(|| self.value(row)))
.collect(),
None => (start_row..end_row).map(|row| self.value(row)).collect(),
}
}
};
}
impl ExtractValues<String> for StringArray {
fn extract_values(&self, start_row: usize, num_rows: usize) -> Vec<String> {
fn extract_values(
&self,
start_row: usize,
num_rows: usize,
valid: Option<&Bitmap>,
) -> Vec<String> {
let end_row = start_row + num_rows;
(start_row..end_row)
.map(|row| self.value(row).to_string())
.collect()
match valid {
Some(valid) => (start_row..end_row)
.filter_map(|row| valid.is_set(row).then(|| self.value(row).to_string()))
.collect(),
None => (start_row..end_row)
.map(|row| self.value(row).to_string())
.collect(),
}
}
}
impl ExtractValues<i64> for Int64Array {
fn extract_values(&self, start_row: usize, num_rows: usize) -> Vec<i64> {
let end_row = start_row + num_rows;
(start_row..end_row).map(|row| self.value(row)).collect()
}
extract_values_impl! {i64}
}
impl ExtractValues<u64> for UInt64Array {
fn extract_values(&self, start_row: usize, num_rows: usize) -> Vec<u64> {
let end_row = start_row + num_rows;
(start_row..end_row).map(|row| self.value(row)).collect()
}
extract_values_impl! {u64}
}
impl ExtractValues<f64> for Float64Array {
fn extract_values(&self, start_row: usize, num_rows: usize) -> Vec<f64> {
let end_row = start_row + num_rows;
(start_row..end_row).map(|row| self.value(row)).collect()
}
extract_values_impl! {f64}
}
impl ExtractValues<bool> for BooleanArray {
fn extract_values(&self, start_row: usize, num_rows: usize) -> Vec<bool> {
let end_row = start_row + num_rows;
(start_row..end_row).map(|row| self.value(row)).collect()
}
extract_values_impl! {bool}
}
impl ExtractValues<i64> for TimestampNanosecondArray {
fn extract_values(&self, start_row: usize, num_rows: usize) -> Vec<i64> {
let end_row = start_row + num_rows;
(start_row..end_row).map(|row| self.value(row)).collect()
}
extract_values_impl! {i64}
}
/// Translates FieldList into the gRPC format
@ -350,12 +374,116 @@ fn datatype_to_measurement_field_enum(data_type: &ArrowDataType) -> Result<Field
ArrowDataType::Utf8 => Ok(FieldType::String),
ArrowDataType::Boolean => Ok(FieldType::Boolean),
_ => UnsupportedFieldType {
type_name: format!("{:?}", data_type),
data_type: data_type.clone(),
}
.fail(),
}
}
/// Wrapper struture that implements [`std::fmt::Display`] for a slice
/// of `Frame`s
struct DisplayableFrames<'a> {
frames: &'a [Frame],
}
impl<'a> DisplayableFrames<'a> {
fn new(frames: &'a [Frame]) -> Self {
Self { frames }
}
}
impl<'a> fmt::Display for DisplayableFrames<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.frames.iter().try_for_each(|frame| {
format_frame(frame, f)?;
writeln!(f)
})
}
}
fn format_frame(frame: &Frame, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let data = &frame.data;
match data {
Some(Data::Series(SeriesFrame { tags, data_type })) => write!(
f,
"SeriesFrame, tags: {}, type: {:?}",
dump_tags(tags),
data_type
),
Some(Data::FloatPoints(FloatPointsFrame { timestamps, values })) => write!(
f,
"FloatPointsFrame, timestamps: {:?}, values: {:?}",
timestamps,
dump_values(values)
),
Some(Data::IntegerPoints(IntegerPointsFrame { timestamps, values })) => write!(
f,
"IntegerPointsFrame, timestamps: {:?}, values: {:?}",
timestamps,
dump_values(values)
),
Some(Data::UnsignedPoints(UnsignedPointsFrame { timestamps, values })) => write!(
f,
"UnsignedPointsFrame, timestamps: {:?}, values: {:?}",
timestamps,
dump_values(values)
),
Some(Data::BooleanPoints(BooleanPointsFrame { timestamps, values })) => write!(
f,
"BooleanPointsFrame, timestamps: {:?}, values: {}",
timestamps,
dump_values(values)
),
Some(Data::StringPoints(StringPointsFrame { timestamps, values })) => write!(
f,
"StringPointsFrame, timestamps: {:?}, values: {}",
timestamps,
dump_values(values)
),
Some(Data::Group(GroupFrame {
tag_keys,
partition_key_vals,
})) => write!(
f,
"GroupFrame, tag_keys: {}, partition_key_vals: {}",
dump_u8_vec(tag_keys),
dump_u8_vec(partition_key_vals)
),
None => write!(f, "<NO data field>"),
}
}
fn dump_values<T>(v: &[T]) -> String
where
T: std::fmt::Display,
{
v.iter()
.map(|item| format!("{}", item))
.collect::<Vec<_>>()
.join(",")
}
fn dump_u8_vec(encoded_strings: &[Vec<u8>]) -> String {
encoded_strings
.iter()
.map(|b| String::from_utf8_lossy(b))
.collect::<Vec<_>>()
.join(",")
}
fn dump_tags(tags: &[Tag]) -> String {
tags.iter()
.map(|tag| {
format!(
"{}={}",
String::from_utf8_lossy(&tag.key),
String::from_utf8_lossy(&tag.value),
)
})
.collect::<Vec<_>>()
.join(",")
}
#[cfg(test)]
mod tests {
use arrow::{datatypes::DataType as ArrowDataType, record_batch::RecordBatch};
@ -409,11 +537,7 @@ mod tests {
let response =
series_set_to_read_response(series_set).expect("Correctly converted series set");
let dumped_frames = response
.frames
.iter()
.map(|f| dump_frame(f))
.collect::<Vec<_>>();
let dumped_frames = dump_frames(&response.frames);
let expected_frames = vec![
"SeriesFrame, tags: _field=string_field,_measurement=the_table,tag1=val1, type: 4",
@ -465,11 +589,7 @@ mod tests {
let response =
series_set_to_read_response(series_set).expect("Correctly converted series set");
let dumped_frames = response
.frames
.iter()
.map(|f| dump_frame(f))
.collect::<Vec<_>>();
let dumped_frames = dump_frames(&response.frames);
let expected_frames = vec![
"SeriesFrame, tags: _field=string_field2,_measurement=the_table,tag1=val1, type: 4",
@ -486,7 +606,7 @@ mod tests {
}
#[test]
fn test_series_set_conversion_with_null_field() {
fn test_series_set_conversion_with_entirely_null_field() {
// single series
let tag_array: ArrayRef = Arc::new(StringArray::from(vec!["MA", "MA", "MA", "MA"]));
let int_array: ArrayRef = Arc::new(Int64Array::from(vec![None, None, None, None]));
@ -525,15 +645,71 @@ mod tests {
let response =
series_set_to_read_response(series_set).expect("Correctly converted series set");
let dumped_frames = response
.frames
.iter()
.map(|f| dump_frame(f))
.collect::<Vec<_>>();
let dumped_frames = dump_frames(&response.frames);
let expected_frames = vec![
"SeriesFrame, tags: _field=float_field,_measurement=the_table,state=MA, type: 0",
"FloatPointsFrame, timestamps: [1000, 2000, 3000, 4000], values: \"10.1,20.1,0,40.1\"",
"FloatPointsFrame, timestamps: [1000, 2000, 4000], values: \"10.1,20.1,40.1\"",
];
assert_eq!(
dumped_frames, expected_frames,
"Expected:\n{:#?}\nActual:\n{:#?}",
expected_frames, dumped_frames
);
}
#[test]
fn test_series_set_conversion_with_some_null_fields() {
// single series
let tag_array = StringArray::from(vec!["MA", "MA"]);
let string_array = StringArray::from(vec![None, Some("foo")]);
let float_array = Float64Array::from(vec![None, Some(1.0)]);
let int_array = Int64Array::from(vec![None, Some(-10)]);
let uint_array = UInt64Array::from(vec![None, Some(100)]);
let bool_array = BooleanArray::from(vec![None, Some(true)]);
let timestamp_array = TimestampNanosecondArray::from_vec(vec![1000, 2000], None);
let batch = RecordBatch::try_from_iter_with_nullable(vec![
("state", Arc::new(tag_array) as ArrayRef, true),
("srting_field", Arc::new(string_array), true),
("float_field", Arc::new(float_array), true),
("int_field", Arc::new(int_array), true),
("uint_field", Arc::new(uint_array), true),
("bool_field", Arc::new(bool_array), true),
("time", Arc::new(timestamp_array), false),
])
.expect("created new record batch");
let series_set = SeriesSet {
table_name: Arc::from("the_table"),
tags: vec![(Arc::from("state"), Arc::from("MA"))],
field_indexes: FieldIndexes::from_timestamp_and_value_indexes(6, &[1, 2, 3, 4, 5]),
start_row: 0,
num_rows: batch.num_rows(),
batch,
};
// Expect only a single series (for the data in float_field, int_field is all
// nulls)
let response =
series_set_to_read_response(series_set).expect("Correctly converted series set");
let dumped_frames = dump_frames(&response.frames);
let expected_frames = vec![
"SeriesFrame, tags: _field=srting_field,_measurement=the_table,state=MA, type: 4",
"StringPointsFrame, timestamps: [2000], values: foo",
"SeriesFrame, tags: _field=float_field,_measurement=the_table,state=MA, type: 0",
"FloatPointsFrame, timestamps: [2000], values: \"1\"",
"SeriesFrame, tags: _field=int_field,_measurement=the_table,state=MA, type: 1",
"IntegerPointsFrame, timestamps: [2000], values: \"-10\"",
"SeriesFrame, tags: _field=uint_field,_measurement=the_table,state=MA, type: 2",
"UnsignedPointsFrame, timestamps: [2000], values: \"100\"",
"SeriesFrame, tags: _field=bool_field,_measurement=the_table,state=MA, type: 3",
"BooleanPointsFrame, timestamps: [2000], values: true",
];
assert_eq!(
@ -555,11 +731,7 @@ mod tests {
let response = series_set_item_to_read_response(grouped_series_set_item)
.expect("Correctly converted grouped_series_set_item");
let dumped_frames = response
.frames
.iter()
.map(|f| dump_frame(f))
.collect::<Vec<_>>();
let dumped_frames = dump_frames(&response.frames);
let expected_frames = vec![
"GroupFrame, tag_keys: _field,_measurement,tag1,tag2, partition_key_vals: val1,val2",
@ -600,11 +772,7 @@ mod tests {
let response = series_set_item_to_read_response(series_set_item)
.expect("Correctly converted series_set_item");
let dumped_frames = response
.frames
.iter()
.map(|f| dump_frame(f))
.collect::<Vec<_>>();
let dumped_frames = dump_frames(&response.frames);
let expected_frames = vec![
"SeriesFrame, tags: _field=float_field,_measurement=the_table,tag1=val1, type: 0",
@ -713,82 +881,6 @@ mod tests {
}
}
fn dump_frame(frame: &Frame) -> String {
let data = &frame.data;
match data {
Some(Data::Series(SeriesFrame { tags, data_type })) => format!(
"SeriesFrame, tags: {}, type: {:?}",
dump_tags(tags),
data_type
),
Some(Data::FloatPoints(FloatPointsFrame { timestamps, values })) => format!(
"FloatPointsFrame, timestamps: {:?}, values: {:?}",
timestamps,
dump_values(values)
),
Some(Data::IntegerPoints(IntegerPointsFrame { timestamps, values })) => format!(
"IntegerPointsFrame, timestamps: {:?}, values: {:?}",
timestamps,
dump_values(values)
),
Some(Data::UnsignedPoints(UnsignedPointsFrame { timestamps, values })) => format!(
"UnsignedPointsFrame, timestamps: {:?}, values: {:?}",
timestamps,
dump_values(values)
),
Some(Data::BooleanPoints(BooleanPointsFrame { timestamps, values })) => format!(
"BooleanPointsFrame, timestamps: {:?}, values: {}",
timestamps,
dump_values(values)
),
Some(Data::StringPoints(StringPointsFrame { timestamps, values })) => format!(
"StringPointsFrame, timestamps: {:?}, values: {}",
timestamps,
dump_values(values)
),
Some(Data::Group(GroupFrame {
tag_keys,
partition_key_vals,
})) => format!(
"GroupFrame, tag_keys: {}, partition_key_vals: {}",
dump_u8_vec(tag_keys),
dump_u8_vec(partition_key_vals),
),
None => "<NO data field>".into(),
}
}
fn dump_values<T>(v: &[T]) -> String
where
T: std::fmt::Display,
{
v.iter()
.map(|item| format!("{}", item))
.collect::<Vec<_>>()
.join(",")
}
fn dump_u8_vec(encoded_strings: &[Vec<u8>]) -> String {
encoded_strings
.iter()
.map(|b| String::from_utf8_lossy(b))
.collect::<Vec<_>>()
.join(",")
}
fn dump_tags(tags: &[Tag]) -> String {
tags.iter()
.map(|tag| {
format!(
"{}={}",
String::from_utf8_lossy(&tag.key),
String::from_utf8_lossy(&tag.value),
)
})
.collect::<Vec<_>>()
.join(",")
}
fn make_record_batch() -> RecordBatch {
let string_array: ArrayRef = Arc::new(StringArray::from(vec!["foo", "bar", "baz", "foo"]));
let int_array: ArrayRef = Arc::new(Int64Array::from(vec![1, 2, 3, 4]));
@ -811,4 +903,13 @@ mod tests {
])
.expect("created new record batch")
}
fn dump_frames(frames: &[Frame]) -> Vec<String> {
DisplayableFrames::new(frames)
.to_string()
.trim()
.split('\n')
.map(|s| s.to_string())
.collect()
}
}

View File

@ -717,6 +717,7 @@ pub async fn fixture_replay_broken(db_name: &str, kafka_connection: &str) -> Ser
db_name,
&Default::default(),
creation_config.as_ref(),
Arc::new(time::SystemProvider::new()),
)
.await
.unwrap();

View File

@ -51,6 +51,11 @@ pub struct SpanContext {
pub span_id: SpanId,
/// Link to other spans, can be cross-trace if this span aggregates multiple spans.
///
/// See <https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/overview.md#links-between-spans>.
pub links: Vec<(TraceId, SpanId)>,
pub collector: Option<Arc<dyn TraceCollector>>,
}
@ -67,6 +72,7 @@ impl SpanContext {
trace_id: TraceId(NonZeroU128::new(trace_id).unwrap()),
parent_span_id: None,
span_id: SpanId(NonZeroU64::new(span_id).unwrap()),
links: vec![],
collector: Some(collector),
}
}
@ -79,6 +85,7 @@ impl SpanContext {
trace_id: self.trace_id,
span_id: SpanId::gen(),
collector: self.collector.clone(),
links: vec![],
parent_span_id: Some(self.span_id),
},
start: None,

View File

@ -236,9 +236,14 @@ mod tests {
trace_id: TraceId::new(43434).unwrap(),
parent_span_id: None,
span_id: SpanId::new(3495993).unwrap(),
links: vec![],
collector: None,
};
let mut span = ctx.child("foo");
span.ctx.links = vec![
(TraceId::new(12).unwrap(), SpanId::new(123).unwrap()),
(TraceId::new(45).unwrap(), SpanId::new(456).unwrap()),
];
span.status = SpanStatus::Ok;
span.events = vec![SpanEvent {
time: Utc.timestamp_nanos(200000),
@ -283,6 +288,14 @@ mod tests {
span.ctx.parent_span_id.unwrap().get() as i64
);
// test links
let b1_s0_refs = b1_s0.references.as_ref().unwrap();
assert_eq!(b1_s0_refs.len(), 2);
let b1_s0_r0 = &b1_s0_refs[0];
let b1_s0_r1 = &b1_s0_refs[1];
assert_eq!(b1_s0_r0.span_id, span.ctx.links[0].1.get() as i64);
assert_eq!(b1_s0_r1.span_id, span.ctx.links[1].1.get() as i64);
// microseconds not nanoseconds
assert_eq!(b1_s0.start_time, 100);
assert_eq!(b1_s0.duration, 200);

View File

@ -1,12 +1,21 @@
/// Contains the conversion logic from a `trace::span::Span` to `thrift::jaeger::Span`
use crate::thrift::jaeger;
use trace::span::{MetaValue, Span, SpanEvent, SpanStatus};
use crate::thrift::jaeger::{self, SpanRef};
use trace::{
ctx::TraceId,
span::{MetaValue, Span, SpanEvent, SpanStatus},
};
/// Split [`TraceId`] into high and low part.
fn split_trace_id(trace_id: TraceId) -> (i64, i64) {
let trace_id = trace_id.get();
let trace_id_high = (trace_id >> 64) as i64;
let trace_id_low = trace_id as i64;
(trace_id_high, trace_id_low)
}
impl From<Span> for jaeger::Span {
fn from(mut s: Span) -> Self {
let trace_id = s.ctx.trace_id.get();
let trace_id_high = (trace_id >> 64) as i64;
let trace_id_low = trace_id as i64;
let (trace_id_high, trace_id_low) = split_trace_id(s.ctx.trace_id);
// A parent span id of 0 indicates no parent span ID (span IDs are non-zero)
let parent_span_id = s.ctx.parent_span_id.map(|id| id.get()).unwrap_or_default() as i64;
@ -51,13 +60,34 @@ impl From<Span> for jaeger::Span {
false => Some(s.events.into_iter().map(Into::into).collect()),
};
let references = if s.ctx.links.is_empty() {
None
} else {
Some(
s.ctx
.links
.into_iter()
.map(|(trace_id, span_id)| {
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/sdk_exporters/jaeger.md#links
let (trace_id_high, trace_id_low) = split_trace_id(trace_id);
SpanRef {
ref_type: jaeger::SpanRefType::FollowsFrom,
trace_id_high,
trace_id_low,
span_id: span_id.get() as i64,
}
})
.collect(),
)
};
Self {
trace_id_low,
trace_id_high,
span_id: s.ctx.span_id.get() as i64,
parent_span_id,
operation_name: s.name.to_string(),
references: None,
references,
flags: 0,
start_time,
duration,
@ -115,3 +145,18 @@ fn tag_from_meta(key: String, value: MetaValue) -> jaeger::Tag {
};
tag
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_split_trace_id_integer_conversion() {
// test case from
// https://github.com/open-telemetry/opentelemetry-specification/blob/639c7443e78800b085d2c9826d1b300f5e81fded/specification/trace/sdk_exporters/jaeger.md#ids
let trace_id = TraceId::new(0xFF00000000000000).unwrap();
let (high, low) = split_trace_id(trace_id);
assert_eq!(high, 0);
assert_eq!(low, -72057594037927936);
}
}

View File

@ -154,10 +154,14 @@ fn decode_b3(
return Ok(None);
}
// Links cannot be specified via the HTTP header
let links = vec![];
Ok(Some(SpanContext {
trace_id: required_header(headers, B3_TRACE_ID_HEADER, parse_trace)?,
parent_span_id: parsed_header(headers, B3_PARENT_SPAN_ID_HEADER, parse_span)?,
span_id: required_header(headers, B3_SPAN_ID_HEADER, parse_span)?,
links,
collector: Some(Arc::clone(collector)),
}))
}
@ -211,10 +215,14 @@ fn decode_jaeger(
return Ok(None);
}
// Links cannot be specified via the HTTP header
let links = vec![];
Ok(Some(SpanContext {
trace_id: decoded.trace_id,
parent_span_id: decoded.parent_span_id,
span_id: decoded.span_id,
links,
collector: Some(Arc::clone(collector)),
}))
}

View File

@ -116,6 +116,58 @@ struct TrackerState {
notify: Notify,
}
impl TrackerState {
fn get_status(&self) -> TaskStatus {
// The atomic decrement in TrackerRegistration::drop has release semantics
// acquire here ensures that if a thread observes the tracker to have
// no pending_registrations it cannot subsequently observe pending_futures
// to increase. If it could, observing pending_futures==0 would be insufficient
// to conclude there are no outstanding futures
let pending_registrations = self.pending_registrations.load(Ordering::Acquire);
// The atomic decrement in TrackedFuture::drop has release semantics
// acquire therefore ensures that if a thread observes the completion of
// a TrackedFuture, it is guaranteed to see its updates (e.g. wall_nanos)
let pending_futures = self.pending_futures.load(Ordering::Acquire);
match (pending_registrations == 0, pending_futures == 0) {
(false, _) => TaskStatus::Creating,
(true, false) => TaskStatus::Running {
total_count: self.created_futures.load(Ordering::Relaxed),
pending_count: self.pending_futures.load(Ordering::Relaxed),
cpu_nanos: self.cpu_nanos.load(Ordering::Relaxed),
},
(true, true) => {
let total_count = self.created_futures.load(Ordering::Relaxed);
let success_count = self.ok_futures.load(Ordering::Relaxed);
let error_count = self.err_futures.load(Ordering::Relaxed);
let cancelled_count = self.cancelled_futures.load(Ordering::Relaxed);
// Failure of this would imply a future reported its completion status multiple
// times or a future was created without incrementing created_futures.
// Both of these should be impossible
let dropped_count = total_count
.checked_sub(success_count + error_count + cancelled_count)
.expect("invalid tracker state");
TaskStatus::Complete {
total_count,
success_count,
error_count,
cancelled_count,
dropped_count,
cpu_nanos: self.cpu_nanos.load(Ordering::Relaxed),
wall_nanos: self.wall_nanos.load(Ordering::Relaxed),
}
}
}
}
fn is_complete(&self) -> bool {
matches!(self.get_status(), TaskStatus::Complete { .. })
}
}
/// Returns a summary of the task execution
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum TaskResult {
@ -323,54 +375,12 @@ where
/// Returns true if all futures associated with this tracker have
/// been dropped and no more can be created
pub fn is_complete(&self) -> bool {
matches!(self.get_status(), TaskStatus::Complete { .. })
self.state.is_complete()
}
/// Gets the status of the tracker
pub fn get_status(&self) -> TaskStatus {
// The atomic decrement in TrackerRegistration::drop has release semantics
// acquire here ensures that if a thread observes the tracker to have
// no pending_registrations it cannot subsequently observe pending_futures
// to increase. If it could, observing pending_futures==0 would be insufficient
// to conclude there are no outstanding futures
let pending_registrations = self.state.pending_registrations.load(Ordering::Acquire);
// The atomic decrement in TrackedFuture::drop has release semantics
// acquire therefore ensures that if a thread observes the completion of
// a TrackedFuture, it is guaranteed to see its updates (e.g. wall_nanos)
let pending_futures = self.state.pending_futures.load(Ordering::Acquire);
match (pending_registrations == 0, pending_futures == 0) {
(false, _) => TaskStatus::Creating,
(true, false) => TaskStatus::Running {
total_count: self.state.created_futures.load(Ordering::Relaxed),
pending_count: self.state.pending_futures.load(Ordering::Relaxed),
cpu_nanos: self.state.cpu_nanos.load(Ordering::Relaxed),
},
(true, true) => {
let total_count = self.state.created_futures.load(Ordering::Relaxed);
let success_count = self.state.ok_futures.load(Ordering::Relaxed);
let error_count = self.state.err_futures.load(Ordering::Relaxed);
let cancelled_count = self.state.cancelled_futures.load(Ordering::Relaxed);
// Failure of this would imply a future reported its completion status multiple
// times or a future was created without incrementing created_futures.
// Both of these should be impossible
let dropped_count = total_count
.checked_sub(success_count + error_count + cancelled_count)
.expect("invalid tracker state");
TaskStatus::Complete {
total_count,
success_count,
error_count,
cancelled_count,
dropped_count,
cpu_nanos: self.state.cpu_nanos.load(Ordering::Relaxed),
wall_nanos: self.state.wall_nanos.load(Ordering::Relaxed),
}
}
}
self.state.get_status()
}
/// Returns the instant the tracker was created
@ -385,21 +395,24 @@ where
/// Blocks until all futures associated with the tracker have been
/// dropped and no more can be created
pub async fn join(&self) {
// Notify is notified when pending_futures hits 0 AND when pending_registrations
// hits 0. In almost all cases join won't be called before pending_registrations
// has already hit 0, but in the extremely rare case this occurs the loop
// handles the spurious wakeup
loop {
// Request notification before checking if complete
// to avoid a race condition
let notify = self.state.notify.notified();
pub fn join(&self) -> impl std::future::Future<Output = ()> {
let state = Arc::clone(&self.state);
async move {
// Notify is notified when pending_futures hits 0 AND when pending_registrations
// hits 0. In almost all cases join won't be called before pending_registrations
// has already hit 0, but in the extremely rare case this occurs the loop
// handles the spurious wakeup
loop {
// Request notification before checking if complete
// to avoid a race condition
let notify = state.notify.notified();
if self.is_complete() {
return;
if state.is_complete() {
return;
}
notify.await
}
notify.await
}
}
}

View File

@ -13,6 +13,7 @@ futures = "0.3"
observability_deps = { path = "../observability_deps" }
parking_lot = "0.11.2"
rdkafka = "0.26.0"
time = { path = "../time" }
tokio = { version = "1.11", features = ["macros", "fs"] }
uuid = { version = "0.8", features = ["serde", "v4"] }

View File

@ -7,6 +7,7 @@ use data_types::{
database_rules::{WriteBufferConnection, WriteBufferDirection},
server_id::ServerId,
};
use time::TimeProvider;
use crate::{
core::{WriteBufferError, WriteBufferReading, WriteBufferWriting},
@ -34,13 +35,15 @@ enum Mock {
#[derive(Debug)]
pub struct WriteBufferConfigFactory {
mocks: BTreeMap<String, Mock>,
time_provider: Arc<dyn TimeProvider>,
}
impl WriteBufferConfigFactory {
/// Create new factory w/o any mocks.
pub fn new() -> Self {
pub fn new(time_provider: Arc<dyn TimeProvider>) -> Self {
Self {
mocks: Default::default(),
time_provider,
}
}
@ -97,14 +100,18 @@ impl WriteBufferConfigFactory {
db_name,
&cfg.connection_config,
cfg.creation_config.as_ref(),
Arc::clone(&self.time_provider),
)
.await?;
Arc::new(kafka_buffer) as _
}
"mock" => match self.get_mock(&cfg.connection)? {
Mock::Normal(state) => {
let mock_buffer =
MockBufferForWriting::new(state, cfg.creation_config.as_ref())?;
let mock_buffer = MockBufferForWriting::new(
state,
cfg.creation_config.as_ref(),
Arc::clone(&self.time_provider),
)?;
Arc::new(mock_buffer) as _
}
Mock::AlwaysFailing => {
@ -164,12 +171,6 @@ impl WriteBufferConfigFactory {
}
}
impl Default for WriteBufferConfigFactory {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use std::{convert::TryFrom, num::NonZeroU32};
@ -186,7 +187,8 @@ mod tests {
#[tokio::test]
async fn test_writing_kafka() {
let conn = maybe_skip_kafka_integration!();
let factory = WriteBufferConfigFactory::new();
let time = Arc::new(time::SystemProvider::new());
let factory = WriteBufferConfigFactory::new(time);
let db_name = DatabaseName::try_from(random_kafka_topic()).unwrap();
let cfg = WriteBufferConnection {
direction: WriteBufferDirection::Write,
@ -206,7 +208,8 @@ mod tests {
#[tokio::test]
async fn test_reading_kafka() {
let conn = maybe_skip_kafka_integration!();
let factory = WriteBufferConfigFactory::new();
let time = Arc::new(time::SystemProvider::new());
let factory = WriteBufferConfigFactory::new(time);
let server_id = ServerId::try_from(1).unwrap();
let db_name = DatabaseName::try_from(random_kafka_topic()).unwrap();
@ -227,7 +230,8 @@ mod tests {
#[tokio::test]
async fn test_writing_mock() {
let mut factory = WriteBufferConfigFactory::new();
let time = Arc::new(time::SystemProvider::new());
let mut factory = WriteBufferConfigFactory::new(time);
let state =
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap());
@ -264,7 +268,8 @@ mod tests {
#[tokio::test]
async fn test_reading_mock() {
let mut factory = WriteBufferConfigFactory::new();
let time = Arc::new(time::SystemProvider::new());
let mut factory = WriteBufferConfigFactory::new(time);
let state =
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap());
@ -302,7 +307,8 @@ mod tests {
#[tokio::test]
async fn test_writing_mock_failing() {
let mut factory = WriteBufferConfigFactory::new();
let time = Arc::new(time::SystemProvider::new());
let mut factory = WriteBufferConfigFactory::new(time);
let mock_name = "some_mock";
factory.register_always_fail_mock(mock_name.to_string());
@ -337,7 +343,8 @@ mod tests {
#[tokio::test]
async fn test_reading_mock_failing() {
let mut factory = WriteBufferConfigFactory::new();
let time = Arc::new(time::SystemProvider::new());
let mut factory = WriteBufferConfigFactory::new(time);
let mock_name = "some_mock";
factory.register_always_fail_mock(mock_name.to_string());
@ -375,7 +382,8 @@ mod tests {
#[test]
#[should_panic(expected = "Mock with the name 'some_mock' already registered")]
fn test_register_mock_twice_panics() {
let mut factory = WriteBufferConfigFactory::new();
let time = Arc::new(time::SystemProvider::new());
let mut factory = WriteBufferConfigFactory::new(time);
let state =
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap());

View File

@ -1,10 +1,13 @@
use std::fmt::Debug;
use std::{
collections::{BTreeMap, BTreeSet},
fmt::Debug,
};
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use data_types::sequence::Sequence;
use entry::{Entry, SequencedEntry};
use futures::{future::BoxFuture, stream::BoxStream};
use time::Time;
/// Generic boxed error type that is used in this crate.
///
@ -15,6 +18,11 @@ pub type WriteBufferError = Box<dyn std::error::Error + Sync + Send>;
/// entries from the Write Buffer at a later time.
#[async_trait]
pub trait WriteBufferWriting: Sync + Send + Debug + 'static {
/// List all known sequencers.
///
/// This set not empty.
fn sequencer_ids(&self) -> BTreeSet<u32>;
/// Send an `Entry` to the write buffer using the specified sequencer ID.
///
/// Returns information that can be used to restore entries at a later time.
@ -22,7 +30,7 @@ pub trait WriteBufferWriting: Sync + Send + Debug + 'static {
&self,
entry: &Entry,
sequencer_id: u32,
) -> Result<(Sequence, DateTime<Utc>), WriteBufferError>;
) -> Result<(Sequence, Time), WriteBufferError>;
/// Return type (like `"mock"` or `"kafka"`) of this writer.
fn type_name(&self) -> &'static str;
@ -58,7 +66,7 @@ pub trait WriteBufferReading: Sync + Send + Debug + 'static {
/// [`WriteBufferReading`] instance at the same time. If all streams are dropped and requested again, the last
/// offsets of the old streams will be the start offsets for the new streams. If you want to prevent that either
/// create a new [`WriteBufferReading`] or use [`seek`](Self::seek).
fn streams(&mut self) -> Vec<(u32, EntryStream<'_>)>;
fn streams(&mut self) -> BTreeMap<u32, EntryStream<'_>>;
/// Seek given sequencer to given sequence number. The next output of related streams will be an entry with at least
/// the given sequence number (the actual sequence number might be skipped due to "holes" in the stream).
@ -76,12 +84,19 @@ pub trait WriteBufferReading: Sync + Send + Debug + 'static {
pub mod test_utils {
//! Generic tests for all write buffer implementations.
use std::{convert::TryFrom, num::NonZeroU32, time::Duration};
use std::{
collections::{BTreeMap, BTreeSet},
convert::TryFrom,
num::NonZeroU32,
sync::Arc,
time::Duration,
};
use async_trait::async_trait;
use chrono::{DateTime, TimeZone, Utc};
use chrono::{TimeZone, Utc};
use entry::{test_helpers::lp_to_entry, Entry};
use futures::{StreamExt, TryStreamExt};
use time::{Time, TimeProvider};
use super::{WriteBufferError, WriteBufferReading, WriteBufferWriting};
@ -95,7 +110,16 @@ pub mod test_utils {
///
/// This will be called multiple times during the test suite. Each resulting context must represent an isolated
/// environment.
async fn new_context(&self, n_sequencers: NonZeroU32) -> Self::Context;
async fn new_context(&self, n_sequencers: NonZeroU32) -> Self::Context {
self.new_context_with_time(n_sequencers, Arc::new(time::SystemProvider::new()))
.await
}
async fn new_context_with_time(
&self,
n_sequencers: NonZeroU32,
time_provider: Arc<dyn TimeProvider>,
) -> Self::Context;
}
/// Context used during testing.
@ -134,6 +158,7 @@ pub mod test_utils {
test_watermark(&adapter).await;
test_timestamp(&adapter).await;
test_sequencer_auto_creation(&adapter).await;
test_sequencer_ids(&adapter).await;
}
/// Test IO with a single writer and single reader stream.
@ -157,7 +182,7 @@ pub mod test_utils {
let mut streams = reader.streams();
assert_eq!(streams.len(), 1);
let (sequencer_id, mut stream) = streams.pop().unwrap();
let (sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
let waker = futures::task::noop_waker();
let mut cx = futures::task::Context::from_waker(&waker);
@ -218,12 +243,12 @@ pub mod test_utils {
// creating stream, drop stream, re-create it => still starts at first entry
let mut streams = reader.streams();
assert_eq!(streams.len(), 1);
let (_sequencer_id, stream) = streams.pop().unwrap();
let (_sequencer_id, stream) = map_pop_first(&mut streams).unwrap();
drop(stream);
drop(streams);
let mut streams = reader.streams();
assert_eq!(streams.len(), 1);
let (_sequencer_id, mut stream) = streams.pop().unwrap();
let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
assert_eq!(
stream.stream.next().await.unwrap().unwrap().entry(),
&entry_1
@ -234,7 +259,7 @@ pub mod test_utils {
drop(streams);
let mut streams = reader.streams();
assert_eq!(streams.len(), 1);
let (_sequencer_id, mut stream) = streams.pop().unwrap();
let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
assert_eq!(
stream.stream.next().await.unwrap().unwrap().entry(),
&entry_2
@ -249,7 +274,7 @@ pub mod test_utils {
drop(streams);
let mut streams = reader.streams();
assert_eq!(streams.len(), 1);
let (_sequencer_id, mut stream) = streams.pop().unwrap();
let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
assert!(stream.stream.poll_next_unpin(&mut cx).is_pending());
}
@ -273,8 +298,8 @@ pub mod test_utils {
let mut streams = reader.streams();
assert_eq!(streams.len(), 2);
let (sequencer_id_1, mut stream_1) = streams.pop().unwrap();
let (sequencer_id_2, mut stream_2) = streams.pop().unwrap();
let (sequencer_id_1, mut stream_1) = map_pop_first(&mut streams).unwrap();
let (sequencer_id_2, mut stream_2) = map_pop_first(&mut streams).unwrap();
assert_ne!(sequencer_id_1, sequencer_id_2);
let waker = futures::task::noop_waker();
@ -314,6 +339,7 @@ pub mod test_utils {
/// Test multiple multiple writers and multiple readers on multiple sequencers.
///
/// This tests that:
/// - writers retrieve consistent sequencer IDs
/// - writes go to and reads come from the right sequencer, similar to [`test_multi_sequencer_io`] but less
/// detailled
/// - multiple writers can write to a single sequencer
@ -332,19 +358,40 @@ pub mod test_utils {
let mut reader_1 = context.reading(true).await.unwrap();
let mut reader_2 = context.reading(true).await.unwrap();
// TODO: do not hard-code sequencer IDs here but provide a proper interface
writer_1.store_entry(&entry_east_1, 0).await.unwrap();
writer_1.store_entry(&entry_west_1, 1).await.unwrap();
writer_2.store_entry(&entry_east_2, 0).await.unwrap();
let mut sequencer_ids_1 = writer_1.sequencer_ids();
let sequencer_ids_2 = writer_2.sequencer_ids();
assert_eq!(sequencer_ids_1, sequencer_ids_2);
assert_eq!(sequencer_ids_1.len(), 2);
let sequencer_id_1 = set_pop_first(&mut sequencer_ids_1).unwrap();
let sequencer_id_2 = set_pop_first(&mut sequencer_ids_1).unwrap();
writer_1
.store_entry(&entry_east_1, sequencer_id_1)
.await
.unwrap();
writer_1
.store_entry(&entry_west_1, sequencer_id_2)
.await
.unwrap();
writer_2
.store_entry(&entry_east_2, sequencer_id_1)
.await
.unwrap();
assert_reader_content(
&mut reader_1,
&[(0, &[&entry_east_1, &entry_east_2]), (1, &[&entry_west_1])],
&[
(sequencer_id_1, &[&entry_east_1, &entry_east_2]),
(sequencer_id_2, &[&entry_west_1]),
],
)
.await;
assert_reader_content(
&mut reader_2,
&[(0, &[&entry_east_1, &entry_east_2]), (1, &[&entry_west_1])],
&[
(sequencer_id_1, &[&entry_east_1, &entry_east_2]),
(sequencer_id_2, &[&entry_west_1]),
],
)
.await;
}
@ -404,8 +451,8 @@ pub mod test_utils {
let _sequence_number_east_3 = writer.store_entry(&entry_east_3, 0).await.unwrap().0.number;
let mut streams = reader_1.streams();
assert_eq!(streams.len(), 2);
let (_sequencer_id, mut stream_1) = streams.pop().unwrap();
let (_sequencer_id, mut stream_2) = streams.pop().unwrap();
let (_sequencer_id, mut stream_1) = map_pop_first(&mut streams).unwrap();
let (_sequencer_id, mut stream_2) = map_pop_first(&mut streams).unwrap();
assert!(stream_1.stream.poll_next_unpin(&mut cx).is_pending());
assert!(stream_2.stream.poll_next_unpin(&mut cx).is_pending());
drop(stream_1);
@ -436,8 +483,8 @@ pub mod test_utils {
let mut streams = reader.streams();
assert_eq!(streams.len(), 2);
let (sequencer_id_1, stream_1) = streams.pop().unwrap();
let (sequencer_id_2, stream_2) = streams.pop().unwrap();
let (sequencer_id_1, stream_1) = map_pop_first(&mut streams).unwrap();
let (sequencer_id_2, stream_2) = map_pop_first(&mut streams).unwrap();
// start at watermark 0
assert_eq!((stream_1.fetch_high_watermark)().await.unwrap(), 0);
@ -469,7 +516,15 @@ pub mod test_utils {
where
T: TestAdapter,
{
let context = adapter.new_context(NonZeroU32::try_from(1).unwrap()).await;
// Note: Roundtrips are only guaranteed for millisecond-precision
let t0 = Time::from_date_time(Utc.timestamp_millis(129));
let time = Arc::new(time::MockProvider::new(t0));
let context = adapter
.new_context_with_time(
NonZeroU32::try_from(1).unwrap(),
Arc::<time::MockProvider>::clone(&time),
)
.await;
let entry = lp_to_entry("upc user=1 100");
@ -478,33 +533,18 @@ pub mod test_utils {
let mut streams = reader.streams();
assert_eq!(streams.len(), 1);
let (sequencer_id, mut stream) = streams.pop().unwrap();
let (sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
// ingest data
//
// We want to capture the time of `store_entry`. However for certain sequencers (like Kafka) the time is
// slightly imprecise in a way that it truncates the time to milliseconds. So the workaround in the test is:
//
// 1. Capture a `ts_pre` from which we know that it is close but less or equal to the store time. We use the
// wallclock for that but truncate to milliseconds.
// 2. Capture a `ts_post` from which we know that it is close but greater or equal to the store time. We use
// the wallclock but if it has a sub-millisecond part we use the next millisecond (it's like a ceil
// operation).
// 3. Wait a bit between step 2 and the restore operation so that we can be really sure that the restore
// operation must know the timestamp of the store operation and cannot just "guess" it.
let ts_pre = timestamp_floor_millis(Utc::now());
let reported_ts = writer.store_entry(&entry, sequencer_id).await.unwrap().1;
let ts_post = timestamp_ceil_millis(Utc::now());
// wait a bit
tokio::time::sleep(Duration::from_millis(100)).await;
// advance time
time.inc(Duration::from_secs(10));
// check that the timestamp records the ingestion time, not the read time
let sequenced_entry = stream.stream.next().await.unwrap().unwrap();
let ts_entry = sequenced_entry.producer_wallclock_timestamp().unwrap();
assert!(ts_entry >= ts_pre, "{} >= {}", ts_entry, ts_pre);
assert!(ts_entry <= ts_post, "{} <= {}", ts_entry, ts_post);
assert_eq!(ts_entry, reported_ts);
assert_eq!(ts_entry, t0);
assert_eq!(reported_ts, t0);
}
/// Test that sequencer auto-creation works.
@ -534,6 +574,28 @@ pub mod test_utils {
context.writing(false).await.unwrap();
}
/// Test sequencer IDs reporting of writers.
///
/// This tests that:
/// - all sequencers are reported
async fn test_sequencer_ids<T>(adapter: &T)
where
T: TestAdapter,
{
let n_sequencers = 10;
let context = adapter
.new_context(NonZeroU32::try_from(n_sequencers).unwrap())
.await;
let writer_1 = context.writing(true).await.unwrap();
let writer_2 = context.writing(true).await.unwrap();
let sequencer_ids_1 = writer_1.sequencer_ids();
let sequencer_ids_2 = writer_2.sequencer_ids();
assert_eq!(sequencer_ids_1, sequencer_ids_2);
assert_eq!(sequencer_ids_1.len(), n_sequencers as usize);
}
/// Assert that the content of the reader is as expected.
///
/// This will read `expected.len()` from the reader and then ensures that the stream is pending.
@ -541,10 +603,16 @@ pub mod test_utils {
where
R: WriteBufferReading,
{
// normalize expected values
let expected = {
let mut expected = expected.to_vec();
expected.sort_by_key(|(sequencer_id, _entries)| *sequencer_id);
expected
};
// Ensure content of the streams
let mut streams = reader.streams();
let streams = reader.streams();
assert_eq!(streams.len(), expected.len());
streams.sort_by_key(|(sequencer_id, _stream)| *sequencer_id);
for ((actual_sequencer_id, actual_stream), (expected_sequencer_id, expected_entries)) in
streams.into_iter().zip(expected.iter())
@ -563,9 +631,8 @@ pub mod test_utils {
}
// Ensure that streams a pending
let mut streams = reader.streams();
let streams = reader.streams();
assert_eq!(streams.len(), expected.len());
streams.sort_by_key(|(sequencer_id, _stream)| *sequencer_id);
let waker = futures::task::noop_waker();
let mut cx = futures::task::Context::from_waker(&waker);
@ -582,25 +649,27 @@ pub mod test_utils {
}
}
/// Return largest "milliseconds only" timestamp less than or equal to the given timestamp.
/// Pops first entry from map.
///
/// The result will not have micro- or nanoseconds attached.
fn timestamp_floor_millis(ts: DateTime<Utc>) -> DateTime<Utc> {
let millis = ts.timestamp_millis();
Utc.timestamp_millis(millis)
/// Helper until <https://github.com/rust-lang/rust/issues/62924> is stable.
pub(crate) fn map_pop_first<K, V>(map: &mut BTreeMap<K, V>) -> Option<(K, V)>
where
K: Clone + Ord,
{
map.keys()
.next()
.cloned()
.map(|k| map.remove_entry(&k))
.flatten()
}
/// Return smallest "milliseconds only" timestamp greater than or equal to the given timestamp.
/// Pops first entry from set.
///
/// The result will not have micro- or nanoseconds attached.
fn timestamp_ceil_millis(ts: DateTime<Utc>) -> DateTime<Utc> {
let millis = ts.timestamp_millis();
let ts2 = Utc.timestamp_millis(millis);
if ts2 != ts {
// ts has sub-milli precision, increase millis by 1 (ceiling)
Utc.timestamp_millis(millis + 1)
} else {
ts2
}
/// Helper until <https://github.com/rust-lang/rust/issues/62924> is stable.
pub(crate) fn set_pop_first<T>(set: &mut BTreeSet<T>) -> Option<T>
where
T: Clone + Ord,
{
set.iter().next().cloned().map(|k| set.take(&k)).flatten()
}
}

View File

@ -1,5 +1,5 @@
use std::{
collections::{BTreeMap, HashMap},
collections::{BTreeMap, BTreeSet, HashMap},
convert::{TryFrom, TryInto},
num::NonZeroU32,
sync::Arc,
@ -7,7 +7,7 @@ use std::{
};
use async_trait::async_trait;
use chrono::{DateTime, TimeZone, Utc};
use chrono::{TimeZone, Utc};
use data_types::{
database_rules::WriteBufferCreationConfig, sequence::Sequence, server_id::ServerId,
};
@ -19,21 +19,89 @@ use rdkafka::{
client::DefaultClientContext,
consumer::{BaseConsumer, Consumer, StreamConsumer},
error::KafkaError,
message::{Headers, OwnedHeaders},
producer::{FutureProducer, FutureRecord},
types::RDKafkaErrorCode,
util::Timeout,
ClientConfig, Message, Offset, TopicPartitionList,
};
use time::{Time, TimeProvider};
use crate::core::{
EntryStream, FetchHighWatermark, FetchHighWatermarkFut, WriteBufferError, WriteBufferReading,
WriteBufferWriting,
};
/// Message header that determines message content type.
pub const HEADER_CONTENT_TYPE: &str = "content-type";
/// Current flatbuffer-based content type.
///
/// This is a value for [`HEADER_CONTENT_TYPE`].
///
/// Inspired by:
/// - <https://stackoverflow.com/a/56502135>
/// - <https://stackoverflow.com/a/48051331>
pub const CONTENT_TYPE_FLATBUFFER: &str =
r#"application/x-flatbuffers; schema="influxdata.iox.write.v1.Entry""#;
/// IOx-specific headers attached to every Kafka message.
#[derive(Debug, PartialEq)]
struct IoxHeaders {
content_type: Option<String>,
}
impl IoxHeaders {
/// Create new headers with sane default values.
fn new() -> Self {
Self {
content_type: Some(CONTENT_TYPE_FLATBUFFER.to_string()),
}
}
/// Create new headers where all information is missing.
fn empty() -> Self {
Self { content_type: None }
}
}
impl<H> From<&H> for IoxHeaders
where
H: Headers,
{
fn from(headers: &H) -> Self {
let mut res = Self { content_type: None };
for i in 0..headers.count() {
if let Some((name, value)) = headers.get(i) {
if name.eq_ignore_ascii_case(HEADER_CONTENT_TYPE) {
res.content_type = String::from_utf8(value.to_vec()).ok();
}
}
}
res
}
}
impl From<&IoxHeaders> for OwnedHeaders {
fn from(iox_headers: &IoxHeaders) -> Self {
let mut res = Self::new();
if let Some(content_type) = iox_headers.content_type.as_ref() {
res = res.add(HEADER_CONTENT_TYPE, content_type);
}
res
}
}
pub struct KafkaBufferProducer {
conn: String,
database_name: String,
time_provider: Arc<dyn TimeProvider>,
producer: FutureProducer,
partitions: BTreeSet<u32>,
}
// Needed because rdkafka's FutureProducer doesn't impl Debug
@ -48,24 +116,32 @@ impl std::fmt::Debug for KafkaBufferProducer {
#[async_trait]
impl WriteBufferWriting for KafkaBufferProducer {
fn sequencer_ids(&self) -> BTreeSet<u32> {
self.partitions.clone()
}
/// Send an `Entry` to Kafka using the sequencer ID as a partition.
async fn store_entry(
&self,
entry: &Entry,
sequencer_id: u32,
) -> Result<(Sequence, DateTime<Utc>), WriteBufferError> {
) -> Result<(Sequence, Time), WriteBufferError> {
let partition = i32::try_from(sequencer_id)?;
// truncate milliseconds from timestamps because that's what Kafka supports
let timestamp_millis = Utc::now().timestamp_millis();
let timestamp = Utc.timestamp_millis(timestamp_millis);
let date_time = self.time_provider.now().date_time();
let timestamp_millis = date_time.timestamp_millis();
let timestamp = Time::from_timestamp_millis(timestamp_millis);
let headers = IoxHeaders::new();
// This type annotation is necessary because `FutureRecord` is generic over key type, but
// key is optional and we're not setting a key. `String` is arbitrary.
let record: FutureRecord<'_, String, _> = FutureRecord::to(&self.database_name)
.payload(entry.data())
.partition(partition)
.timestamp(timestamp_millis);
.timestamp(timestamp_millis)
.headers((&headers).into());
debug!(db_name=%self.database_name, partition, size=entry.data().len(), "writing to kafka");
@ -97,6 +173,7 @@ impl KafkaBufferProducer {
database_name: impl Into<String> + Send,
connection_config: &HashMap<String, String>,
creation_config: Option<&WriteBufferCreationConfig>,
time_provider: Arc<dyn TimeProvider>,
) -> Result<Self, WriteBufferError> {
let conn = conn.into();
let database_name = database_name.into();
@ -121,22 +198,17 @@ impl KafkaBufferProducer {
cfg.set("allow.auto.create.topics", "false");
// handle auto-creation
if get_partitions(&database_name, &cfg).await?.is_empty() {
if let Some(cfg) = creation_config {
create_kafka_topic(&conn, &database_name, cfg.n_sequencers, &cfg.options).await?;
} else {
return Err("no partitions found and auto-creation not requested"
.to_string()
.into());
}
}
let partitions =
maybe_auto_create_topics(&conn, &database_name, creation_config, &cfg).await?;
let producer: FutureProducer = cfg.create()?;
Ok(Self {
conn,
database_name,
time_provider,
producer,
partitions,
})
}
}
@ -159,8 +231,8 @@ impl std::fmt::Debug for KafkaBufferConsumer {
#[async_trait]
impl WriteBufferReading for KafkaBufferConsumer {
fn streams(&mut self) -> Vec<(u32, EntryStream<'_>)> {
let mut streams = vec![];
fn streams(&mut self) -> BTreeMap<u32, EntryStream<'_>> {
let mut streams = BTreeMap::new();
for (sequencer_id, consumer) in &self.consumers {
let sequencer_id = *sequencer_id;
@ -171,7 +243,19 @@ impl WriteBufferReading for KafkaBufferConsumer {
.stream()
.map(move |message| {
let message = message?;
let entry = Entry::try_from(message.payload().unwrap().to_vec())?;
let headers: IoxHeaders = message.headers().map(|headers| headers.into()).unwrap_or_else(IoxHeaders::empty);
// Fallback for now https://github.com/influxdata/influxdb_iox/issues/2805
let content_type = headers.content_type.unwrap_or_else(|| CONTENT_TYPE_FLATBUFFER.to_string());
if content_type != CONTENT_TYPE_FLATBUFFER {
return Err(format!("Unknown message format: {}", content_type).into());
}
let payload = message.payload().ok_or_else::<WriteBufferError, _>(|| {
"Payload missing".to_string().into()
})?;
let entry = Entry::try_from(payload.to_vec())?;
// Timestamps were added as part of
// [KIP-32](https://cwiki.apache.org/confluence/display/KAFKA/KIP-32+-+Add+timestamps+to+Kafka+message).
@ -194,7 +278,7 @@ impl WriteBufferReading for KafkaBufferConsumer {
number: message.offset().try_into()?,
};
Ok(SequencedEntry::new_from_sequence(sequence, timestamp, entry))
Ok(SequencedEntry::new_from_sequence(sequence, Time::from_date_time(timestamp), entry))
})
.boxed();
@ -223,13 +307,13 @@ impl WriteBufferReading for KafkaBufferConsumer {
};
let fetch_high_watermark = Box::new(fetch_high_watermark) as FetchHighWatermark<'_>;
streams.push((
streams.insert(
sequencer_id,
EntryStream {
stream,
fetch_high_watermark,
},
));
);
}
streams
@ -305,17 +389,8 @@ impl KafkaBufferConsumer {
cfg.set("auto.offset.reset", "smallest");
// figure out which partitions exists
let mut partitions = get_partitions(&database_name, &cfg).await?;
if partitions.is_empty() {
if let Some(cfg2) = creation_config {
create_kafka_topic(&conn, &database_name, cfg2.n_sequencers, &cfg2.options).await?;
partitions = get_partitions(&database_name, &cfg).await?;
} else {
return Err("no partitions found and auto-creation not requested"
.to_string()
.into());
}
}
let partitions =
maybe_auto_create_topics(&conn, &database_name, creation_config, &cfg).await?;
info!(%database_name, ?partitions, "found Kafka partitions");
// setup a single consumer per partition, at least until https://github.com/fede1024/rust-rdkafka/pull/351 is
@ -351,7 +426,10 @@ impl KafkaBufferConsumer {
}
}
async fn get_partitions(database_name: &str, cfg: &ClientConfig) -> Result<Vec<u32>, KafkaError> {
async fn get_partitions(
database_name: &str,
cfg: &ClientConfig,
) -> Result<BTreeSet<u32>, KafkaError> {
let database_name = database_name.to_string();
let cfg = cfg.clone();
@ -365,12 +443,11 @@ async fn get_partitions(database_name: &str, cfg: &ClientConfig) -> Result<Vec<u
let topic_metadata = metadata.topics().get(0).expect("requested a single topic");
let mut partitions: Vec<_> = topic_metadata
let partitions: BTreeSet<_> = topic_metadata
.partitions()
.iter()
.map(|partition_metdata| partition_metdata.id().try_into().unwrap())
.collect();
partitions.sort_unstable();
Ok(partitions)
}
@ -420,6 +497,39 @@ async fn create_kafka_topic(
}
}
async fn maybe_auto_create_topics(
kafka_connection: &str,
database_name: &str,
creation_config: Option<&WriteBufferCreationConfig>,
cfg: &ClientConfig,
) -> Result<BTreeSet<u32>, WriteBufferError> {
let mut partitions = get_partitions(database_name, cfg).await?;
if partitions.is_empty() {
if let Some(creation_config) = creation_config {
create_kafka_topic(
kafka_connection,
database_name,
creation_config.n_sequencers,
&creation_config.options,
)
.await?;
partitions = get_partitions(database_name, cfg).await?;
// while the number of partitions might be different than `creation_cfg.n_sequencers` due to a
// conflicting, concurrent topic creation, it must not be empty at this point
if partitions.is_empty() {
return Err("Cannot create non-empty topic".to_string().into());
}
} else {
return Err("no partitions found and auto-creation not requested"
.to_string()
.into());
}
}
Ok(partitions)
}
pub mod test_utils {
use std::{collections::HashMap, time::Duration};
@ -518,9 +628,14 @@ mod tests {
num::NonZeroU32,
sync::atomic::{AtomicU32, Ordering},
};
use time::TimeProvider;
use entry::test_helpers::lp_to_entry;
use crate::{
core::test_utils::{perform_generic_tests, TestAdapter, TestContext},
core::test_utils::{
map_pop_first, perform_generic_tests, set_pop_first, TestAdapter, TestContext,
},
kafka::test_utils::random_kafka_topic,
maybe_skip_kafka_integration,
};
@ -541,12 +656,17 @@ mod tests {
impl TestAdapter for KafkaTestAdapter {
type Context = KafkaTestContext;
async fn new_context(&self, n_sequencers: NonZeroU32) -> Self::Context {
async fn new_context_with_time(
&self,
n_sequencers: NonZeroU32,
time_provider: Arc<dyn TimeProvider>,
) -> Self::Context {
KafkaTestContext {
conn: self.conn.clone(),
database_name: random_kafka_topic(),
server_id_counter: AtomicU32::new(1),
n_sequencers,
time_provider,
}
}
}
@ -556,6 +676,7 @@ mod tests {
database_name: String,
server_id_counter: AtomicU32,
n_sequencers: NonZeroU32,
time_provider: Arc<dyn TimeProvider>,
}
impl KafkaTestContext {
@ -579,6 +700,7 @@ mod tests {
&self.database_name,
&Default::default(),
self.creation_config(creation_config).as_ref(),
Arc::clone(&self.time_provider),
)
.await
}
@ -609,6 +731,7 @@ mod tests {
async fn topic_create_twice() {
let conn = maybe_skip_kafka_integration!();
let database_name = random_kafka_topic();
create_kafka_topic(
&conn,
&database_name,
@ -617,6 +740,7 @@ mod tests {
)
.await
.unwrap();
create_kafka_topic(
&conn,
&database_name,
@ -626,4 +750,90 @@ mod tests {
.await
.unwrap();
}
#[tokio::test]
async fn error_no_payload() {
let conn = maybe_skip_kafka_integration!();
let adapter = KafkaTestAdapter::new(conn);
let ctx = adapter.new_context(NonZeroU32::new(1).unwrap()).await;
let writer = ctx.writing(true).await.unwrap();
let partition = set_pop_first(&mut writer.sequencer_ids()).unwrap() as i32;
let record: FutureRecord<'_, String, [u8]> =
FutureRecord::to(&writer.database_name).partition(partition);
writer.producer.send(record, Timeout::Never).await.unwrap();
let mut reader = ctx.reading(true).await.unwrap();
let mut streams = reader.streams();
assert_eq!(streams.len(), 1);
let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
let err = stream.stream.next().await.unwrap().unwrap_err();
assert_eq!(err.to_string(), "Payload missing");
}
#[tokio::test]
async fn content_type_header_missing() {
// Fallback for now https://github.com/influxdata/influxdb_iox/issues/2805
let conn = maybe_skip_kafka_integration!();
let adapter = KafkaTestAdapter::new(conn);
let ctx = adapter.new_context(NonZeroU32::new(1).unwrap()).await;
let writer = ctx.writing(true).await.unwrap();
let partition = set_pop_first(&mut writer.sequencer_ids()).unwrap() as i32;
let entry = lp_to_entry("upc,region=east user=1 100");
let record: FutureRecord<'_, String, _> = FutureRecord::to(&writer.database_name)
.payload(entry.data())
.partition(partition);
writer.producer.send(record, Timeout::Never).await.unwrap();
let mut reader = ctx.reading(true).await.unwrap();
let mut streams = reader.streams();
assert_eq!(streams.len(), 1);
let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
stream.stream.next().await.unwrap().unwrap();
}
#[tokio::test]
async fn content_type_header_unknown() {
let conn = maybe_skip_kafka_integration!();
let adapter = KafkaTestAdapter::new(conn);
let ctx = adapter.new_context(NonZeroU32::new(1).unwrap()).await;
let writer = ctx.writing(true).await.unwrap();
let partition = set_pop_first(&mut writer.sequencer_ids()).unwrap() as i32;
let entry = lp_to_entry("upc,region=east user=1 100");
let record: FutureRecord<'_, String, _> = FutureRecord::to(&writer.database_name)
.payload(entry.data())
.partition(partition)
.headers(OwnedHeaders::new().add(HEADER_CONTENT_TYPE, "foo"));
writer.producer.send(record, Timeout::Never).await.unwrap();
let mut reader = ctx.reading(true).await.unwrap();
let mut streams = reader.streams();
assert_eq!(streams.len(), 1);
let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
let err = stream.stream.next().await.unwrap().unwrap_err();
assert_eq!(err.to_string(), "Unknown message format: foo");
}
#[test]
fn headers_roundtrip() {
let iox_headers1 = IoxHeaders::new();
let kafka_headers: OwnedHeaders = (&iox_headers1).into();
let iox_headers2: IoxHeaders = (&kafka_headers).into();
assert_eq!(iox_headers1, iox_headers2);
}
#[test]
fn headers_case_handling() {
let kafka_headers = OwnedHeaders::new()
.add("content-type", "a")
.add("CONTENT-TYPE", "b")
.add("content-TYPE", "c");
let actual: IoxHeaders = (&kafka_headers).into();
let expected = IoxHeaders {
content_type: Some("c".to_string()),
};
assert_eq!(actual, expected);
}
}

View File

@ -1,18 +1,18 @@
use std::{
collections::BTreeMap,
collections::{BTreeMap, BTreeSet},
num::NonZeroU32,
sync::Arc,
task::{Poll, Waker},
};
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use futures::{stream, FutureExt, StreamExt};
use parking_lot::Mutex;
use data_types::database_rules::WriteBufferCreationConfig;
use data_types::sequence::Sequence;
use entry::{Entry, SequencedEntry};
use time::{Time, TimeProvider};
use crate::core::{
EntryStream, FetchHighWatermark, FetchHighWatermarkFut, WriteBufferError, WriteBufferReading,
@ -199,12 +199,14 @@ impl MockBufferSharedState {
#[derive(Debug)]
pub struct MockBufferForWriting {
state: MockBufferSharedState,
time_provider: Arc<dyn TimeProvider>,
}
impl MockBufferForWriting {
pub fn new(
state: MockBufferSharedState,
creation_config: Option<&WriteBufferCreationConfig>,
time_provider: Arc<dyn TimeProvider>,
) -> Result<Self, WriteBufferError> {
state.maybe_auto_init(creation_config);
@ -215,17 +217,26 @@ impl MockBufferForWriting {
}
}
Ok(Self { state })
Ok(Self {
state,
time_provider,
})
}
}
#[async_trait]
impl WriteBufferWriting for MockBufferForWriting {
fn sequencer_ids(&self) -> BTreeSet<u32> {
let mut guard = self.state.entries.lock();
let entries = guard.as_mut().unwrap();
entries.keys().copied().collect()
}
async fn store_entry(
&self,
entry: &Entry,
sequencer_id: u32,
) -> Result<(Sequence, DateTime<Utc>), WriteBufferError> {
) -> Result<(Sequence, Time), WriteBufferError> {
let mut guard = self.state.entries.lock();
let entries = guard.as_mut().unwrap();
let sequencer_entries = entries.get_mut(&sequencer_id).unwrap();
@ -236,7 +247,7 @@ impl WriteBufferWriting for MockBufferForWriting {
id: sequencer_id,
number: sequence_number,
};
let timestamp = Utc::now();
let timestamp = self.time_provider.now();
sequencer_entries.push(Ok(SequencedEntry::new_from_sequence(
sequence,
timestamp,
@ -257,11 +268,15 @@ pub struct MockBufferForWritingThatAlwaysErrors;
#[async_trait]
impl WriteBufferWriting for MockBufferForWritingThatAlwaysErrors {
fn sequencer_ids(&self) -> BTreeSet<u32> {
IntoIterator::into_iter([0]).collect()
}
async fn store_entry(
&self,
_entry: &Entry,
_sequencer_id: u32,
) -> Result<(Sequence, DateTime<Utc>), WriteBufferError> {
) -> Result<(Sequence, Time), WriteBufferError> {
Err(String::from(
"Something bad happened on the way to writing an entry in the write buffer",
)
@ -331,13 +346,13 @@ impl std::fmt::Debug for MockBufferForReading {
#[async_trait]
impl WriteBufferReading for MockBufferForReading {
fn streams(&mut self) -> Vec<(u32, EntryStream<'_>)> {
fn streams(&mut self) -> BTreeMap<u32, EntryStream<'_>> {
let sequencer_ids: Vec<_> = {
let playback_states = self.playback_states.lock();
playback_states.keys().copied().collect()
};
let mut streams = vec![];
let mut streams = BTreeMap::new();
for sequencer_id in sequencer_ids {
let shared_state = self.shared_state.clone();
let playback_states = Arc::clone(&self.playback_states);
@ -399,13 +414,13 @@ impl WriteBufferReading for MockBufferForReading {
};
let fetch_high_watermark = Box::new(fetch_high_watermark) as FetchHighWatermark<'_>;
streams.push((
streams.insert(
sequencer_id,
EntryStream {
stream,
fetch_high_watermark,
},
));
);
}
streams
@ -438,7 +453,7 @@ pub struct MockBufferForReadingThatAlwaysErrors;
#[async_trait]
impl WriteBufferReading for MockBufferForReadingThatAlwaysErrors {
fn streams(&mut self) -> Vec<(u32, EntryStream<'_>)> {
fn streams(&mut self) -> BTreeMap<u32, EntryStream<'_>> {
let stream = stream::poll_fn(|_ctx| {
Poll::Ready(Some(Err(String::from(
"Something bad happened while reading from stream",
@ -453,13 +468,14 @@ impl WriteBufferReading for MockBufferForReadingThatAlwaysErrors {
fut.boxed() as FetchHighWatermarkFut<'_>
};
let fetch_high_watermark = Box::new(fetch_high_watermark) as FetchHighWatermark<'_>;
vec![(
IntoIterator::into_iter([(
0,
EntryStream {
stream,
fetch_high_watermark,
},
)]
)])
.collect()
}
async fn seek(
@ -481,8 +497,9 @@ mod tests {
use std::time::Duration;
use entry::test_helpers::lp_to_entry;
use time::TimeProvider;
use crate::core::test_utils::{perform_generic_tests, TestAdapter, TestContext};
use crate::core::test_utils::{map_pop_first, perform_generic_tests, TestAdapter, TestContext};
use super::*;
@ -492,10 +509,15 @@ mod tests {
impl TestAdapter for MockTestAdapter {
type Context = MockTestContext;
async fn new_context(&self, n_sequencers: NonZeroU32) -> Self::Context {
async fn new_context_with_time(
&self,
n_sequencers: NonZeroU32,
time_provider: Arc<dyn TimeProvider>,
) -> Self::Context {
MockTestContext {
state: MockBufferSharedState::uninitialized(),
n_sequencers,
time_provider,
}
}
}
@ -503,6 +525,7 @@ mod tests {
struct MockTestContext {
state: MockBufferSharedState,
n_sequencers: NonZeroU32,
time_provider: Arc<dyn TimeProvider>,
}
impl MockTestContext {
@ -524,6 +547,7 @@ mod tests {
MockBufferForWriting::new(
self.state.clone(),
self.creation_config(creation_config).as_ref(),
Arc::clone(&self.time_provider),
)
}
@ -558,7 +582,7 @@ mod tests {
let sequence = Sequence::new(2, 0);
state.push_entry(SequencedEntry::new_from_sequence(
sequence,
Utc::now(),
Time::from_timestamp_nanos(0),
entry,
));
}
@ -571,7 +595,7 @@ mod tests {
let sequence = Sequence::new(0, 0);
state.push_entry(SequencedEntry::new_from_sequence(
sequence,
Utc::now(),
Time::from_timestamp_nanos(0),
entry,
));
}
@ -587,12 +611,12 @@ mod tests {
let sequence = Sequence::new(1, 13);
state.push_entry(SequencedEntry::new_from_sequence(
sequence,
Utc::now(),
Time::from_timestamp_nanos(0),
entry.clone(),
));
state.push_entry(SequencedEntry::new_from_sequence(
sequence,
Utc::now(),
Time::from_timestamp_nanos(0),
entry,
));
}
@ -609,12 +633,12 @@ mod tests {
let sequence_2 = Sequence::new(1, 12);
state.push_entry(SequencedEntry::new_from_sequence(
sequence_1,
Utc::now(),
Time::from_timestamp_nanos(0),
entry.clone(),
));
state.push_entry(SequencedEntry::new_from_sequence(
sequence_2,
Utc::now(),
Time::from_timestamp_nanos(0),
entry,
));
}
@ -676,12 +700,12 @@ mod tests {
let sequence_2 = Sequence::new(1, 12);
state.push_entry(SequencedEntry::new_from_sequence(
sequence_1,
Utc::now(),
Time::from_timestamp_nanos(0),
entry.clone(),
));
state.push_entry(SequencedEntry::new_from_sequence(
sequence_2,
Utc::now(),
Time::from_timestamp_nanos(0),
entry,
));
@ -704,7 +728,7 @@ mod tests {
);
let mut streams = reader.streams();
let (_id, mut stream) = streams.pop().unwrap();
let (_id, mut stream) = map_pop_first(&mut streams).unwrap();
assert_eq!(
stream.stream.next().await.unwrap().unwrap_err().to_string(),
"Something bad happened while reading from stream"
@ -738,7 +762,7 @@ mod tests {
let sequence_1 = Sequence::new(0, 11);
state.push_entry(SequencedEntry::new_from_sequence(
sequence_1,
Utc::now(),
Time::from_timestamp_nanos(0),
entry,
));
@ -775,13 +799,12 @@ mod tests {
#[tokio::test]
async fn test_delayed_insert() {
let now = Utc::now();
let state =
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap());
state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, 0),
now,
Time::from_timestamp_nanos(0),
lp_to_entry("mem foo=1 10"),
));
@ -789,7 +812,7 @@ mod tests {
let playback_state = Arc::clone(&read.playback_states);
let consumer = tokio::spawn(async move {
let mut stream = read.streams().pop().unwrap().1.stream;
let mut stream = map_pop_first(&mut read.streams()).unwrap().1.stream;
stream.next().await.unwrap().unwrap();
stream.next().await.unwrap().unwrap();
});
@ -801,7 +824,7 @@ mod tests {
state.push_entry(SequencedEntry::new_from_sequence(
Sequence::new(0, 1),
now,
Time::from_timestamp_nanos(0),
lp_to_entry("mem foo=2 20"),
));