diff --git a/Cargo.lock b/Cargo.lock index 80a1918e25..5e5870e339 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1318,13 +1318,13 @@ dependencies = [ "data_types", "futures", "google_types", - "influxdb_line_protocol", "num_cpus", "observability_deps", + "pbjson", + "pbjson_build", "proc-macro2", "prost", "prost-build", - "prost-types", "regex", "serde", "serde_json", @@ -1392,6 +1392,8 @@ version = "0.1.0" dependencies = [ "bytes", "chrono", + "pbjson", + "pbjson_build", "prost", "prost-build", "serde", @@ -2896,6 +2898,39 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf547ad0c65e31259204bd90935776d1c693cec2f4ff7abb7a1bbbd40dfe58" +[[package]] +name = "pbjson" +version = "0.1.0" +dependencies = [ + "base64 0.13.0", + "bytes", + "serde", +] + +[[package]] +name = "pbjson_build" +version = "0.1.0" +dependencies = [ + "heck", + "itertools 0.10.1", + "pbjson_test", + "prost", + "prost-types", + "tempfile", +] + +[[package]] +name = "pbjson_test" +version = "0.1.0" +dependencies = [ + "pbjson", + "pbjson_build", + "prost", + "prost-build", + "serde", + "serde_json", +] + [[package]] name = "peeking_take_while" version = "0.1.2" diff --git a/Cargo.toml b/Cargo.toml index a676c7c180..f2b5bd2b10 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,6 +57,9 @@ members = [ "observability_deps", "packers", "panic_logging", + "pbjson", + "pbjson_build", + "pbjson_test", "persistence_windows", "predicate", "query", diff --git a/data_types/src/job.rs b/data_types/src/job.rs index 2918c96e1c..33ca1165be 100644 --- a/data_types/src/job.rs +++ b/data_types/src/job.rs @@ -117,45 +117,3 @@ impl Job { } } } - -/// The status of a running operation -#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] -pub enum OperationStatus { - /// A task associated with the operation is running - Running, - /// All tasks associated with the operation have finished successfully - Success, - /// The operation was cancelled and no associated tasks are running - Cancelled, - /// An operation error was returned - Errored, -} - -/// A group of asynchronous tasks being performed by an IOx server -/// -/// TODO: Temporary until prost adds JSON support -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Operation { - /// ID of the running operation - pub id: usize, - // The total number of created tasks - pub total_count: u64, - // The number of pending tasks - pub pending_count: u64, - // The number of tasks that completed successfully - pub success_count: u64, - // The number of tasks that returned an error - pub error_count: u64, - // The number of tasks that were cancelled - pub cancelled_count: u64, - // The number of tasks that did not run to completion (e.g. panic) - pub dropped_count: u64, - /// Wall time spent executing this operation - pub wall_time: std::time::Duration, - /// CPU time spent executing this operation - pub cpu_time: std::time::Duration, - /// Additional job metadata - pub job: Option, - /// The status of the running operation - pub status: OperationStatus, -} diff --git a/generated_types/Cargo.toml b/generated_types/Cargo.toml index 7158d32911..9def5d286e 100644 --- a/generated_types/Cargo.toml +++ b/generated_types/Cargo.toml @@ -7,16 +7,12 @@ edition = "2018" [dependencies] # In alphabetical order bytes = { version = "1.0", features = ["serde"] } data_types = { path = "../data_types" } -# See docs/regenerating_flatbuffers.md about updating generated code when updating the -# version of the flatbuffers crate -#flatbuffers = "2" futures = "0.3" google_types = { path = "../google_types" } -influxdb_line_protocol = { path = "../influxdb_line_protocol" } -observability_deps = { path = "../observability_deps" } num_cpus = "1.13.0" +observability_deps = { path = "../observability_deps" } +pbjson = { path = "../pbjson" } prost = "0.8" -prost-types = "0.8" regex = "1.4" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0.67" @@ -31,3 +27,4 @@ chrono = { version = "0.4", features = ["serde"] } proc-macro2 = "=1.0.27" tonic-build = "0.5" prost-build = "0.8" +pbjson_build = { path = "../pbjson_build" } diff --git a/generated_types/build.rs b/generated_types/build.rs index 276e289e71..ff248f19b0 100644 --- a/generated_types/build.rs +++ b/generated_types/build.rs @@ -64,12 +64,6 @@ fn generate_grpc_types(root: &Path) -> Result<()> { config .compile_well_known_types() .disable_comments(&[".google"]) - // approximates jsonpb. This is still not enough to deal with the special cases like Any - // Tracking issue for proper jsonpb support in prost: https://github.com/danburkert/prost/issues/277 - .type_attribute( - ".", - "#[derive(serde::Serialize,serde::Deserialize)] #[serde(rename_all = \"camelCase\")]", - ) .extern_path(".google.protobuf", "::google_types::protobuf") .bytes(&[".influxdata.iox.catalog.v1.AddParquet.metadata"]); @@ -79,5 +73,11 @@ fn generate_grpc_types(root: &Path) -> Result<()> { .format(true) .compile_with_config(config, &proto_files, &[root.into()])?; + let descriptor_set = std::fs::read(descriptor_path)?; + + pbjson_build::Builder::new() + .register_descriptors(&descriptor_set)? + .build(&[".influxdata", ".google.longrunning", ".google.rpc"])?; + Ok(()) } diff --git a/generated_types/protos/influxdata/iox/management/v1/jobs.proto b/generated_types/protos/influxdata/iox/management/v1/jobs.proto index 2e5985f144..5a021c3d1b 100644 --- a/generated_types/protos/influxdata/iox/management/v1/jobs.proto +++ b/generated_types/protos/influxdata/iox/management/v1/jobs.proto @@ -27,12 +27,11 @@ message OperationMetadata { // The number of tasks that did not run to completion (e.g. panic) uint64 dropped_count = 16; + reserved 6; + // What kind of job is it? oneof job { Dummy dummy = 5; - /* historical artifact - PersistSegment persist_segment = 6; - */ CloseChunk close_chunk = 7; WriteChunk write_chunk = 8; WipePreservedCatalog wipe_preserved_catalog = 9; diff --git a/generated_types/src/google.rs b/generated_types/src/google.rs index bc9211d0ee..9ab7287fef 100644 --- a/generated_types/src/google.rs +++ b/generated_types/src/google.rs @@ -5,10 +5,17 @@ pub use google_types::*; pub mod rpc { include!(concat!(env!("OUT_DIR"), "/google.rpc.rs")); + include!(concat!(env!("OUT_DIR"), "/google.rpc.serde.rs")); } pub mod longrunning { include!(concat!(env!("OUT_DIR"), "/google.longrunning.rs")); + include!(concat!(env!("OUT_DIR"), "/google.longrunning.serde.rs")); + + use crate::google::{FieldViolation, FieldViolationExt}; + use crate::influxdata::iox::management::v1::{OperationMetadata, OPERATION_METADATA}; + use prost::{bytes::Bytes, Message}; + use std::convert::TryFrom; impl Operation { /// Return the IOx operation `id`. This `id` can @@ -19,6 +26,46 @@ pub mod longrunning { .parse() .expect("Internal error: id returned from server was not an integer") } + + /// Decodes an IOx `OperationMetadata` metadata payload + pub fn iox_metadata(&self) -> Result { + let metadata = self + .metadata + .as_ref() + .ok_or_else(|| FieldViolation::required("metadata"))?; + + if !crate::protobuf_type_url_eq(&metadata.type_url, OPERATION_METADATA) { + return Err(FieldViolation { + field: "metadata.type_url".to_string(), + description: "Unexpected field type".to_string(), + }); + } + + Message::decode(Bytes::clone(&metadata.value)).field("metadata.value") + } + } + + /// Groups together an `Operation` with a decoded `OperationMetadata` + /// + /// When serialized this will serialize the encoded Any field on `Operation` along + /// with its decoded representation as `OperationMetadata` + #[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)] + pub struct IoxOperation { + /// The `Operation` message returned from the API + pub operation: Operation, + /// The decoded `Operation::metadata` contained within `IoxOperation::operation` + pub metadata: OperationMetadata, + } + + impl TryFrom for IoxOperation { + type Error = FieldViolation; + + fn try_from(operation: Operation) -> Result { + Ok(Self { + metadata: operation.iox_metadata()?, + operation, + }) + } } } diff --git a/generated_types/src/job.rs b/generated_types/src/job.rs index 499826f455..d3d1429875 100644 --- a/generated_types/src/job.rs +++ b/generated_types/src/job.rs @@ -1,11 +1,5 @@ -use crate::google::{longrunning, protobuf::Any, FieldViolation, FieldViolationExt}; use crate::influxdata::iox::management::v1 as management; -use crate::protobuf_type_url_eq; -use data_types::chunk_metadata::ChunkAddr; -use data_types::job::{Job, OperationStatus}; -use data_types::partition_metadata::PartitionAddr; -use std::convert::TryFrom; -use std::sync::Arc; +use data_types::job::Job; impl From for management::operation_metadata::Job { fn from(job: Job) -> Self { @@ -61,142 +55,3 @@ impl From for management::operation_metadata::Job { } } } - -impl From for Job { - fn from(value: management::operation_metadata::Job) -> Self { - use management::operation_metadata::Job; - match value { - Job::Dummy(management::Dummy { nanos, db_name }) => Self::Dummy { - nanos, - db_name: (!db_name.is_empty()).then(|| Arc::from(db_name.as_str())), - }, - Job::CloseChunk(management::CloseChunk { - db_name, - partition_key, - table_name, - chunk_id, - }) => Self::CompactChunk { - chunk: ChunkAddr { - db_name: Arc::from(db_name.as_str()), - table_name: Arc::from(table_name.as_str()), - partition_key: Arc::from(partition_key.as_str()), - chunk_id, - }, - }, - Job::WriteChunk(management::WriteChunk { - db_name, - partition_key, - table_name, - chunk_id, - }) => Self::WriteChunk { - chunk: ChunkAddr { - db_name: Arc::from(db_name.as_str()), - table_name: Arc::from(table_name.as_str()), - partition_key: Arc::from(partition_key.as_str()), - chunk_id, - }, - }, - Job::WipePreservedCatalog(management::WipePreservedCatalog { db_name }) => { - Self::WipePreservedCatalog { - db_name: Arc::from(db_name.as_str()), - } - } - Job::CompactChunks(management::CompactChunks { - db_name, - partition_key, - table_name, - chunks, - }) => Self::CompactChunks { - partition: PartitionAddr { - db_name: Arc::from(db_name.as_str()), - table_name: Arc::from(table_name.as_str()), - partition_key: Arc::from(partition_key.as_str()), - }, - chunks, - }, - Job::PersistChunks(management::PersistChunks { - db_name, - partition_key, - table_name, - chunks, - }) => Self::PersistChunks { - partition: PartitionAddr { - db_name: Arc::from(db_name.as_str()), - table_name: Arc::from(table_name.as_str()), - partition_key: Arc::from(partition_key.as_str()), - }, - chunks, - }, - Job::DropChunk(management::DropChunk { - db_name, - partition_key, - table_name, - chunk_id, - }) => Self::DropChunk { - chunk: ChunkAddr { - db_name: Arc::from(db_name.as_str()), - table_name: Arc::from(table_name.as_str()), - partition_key: Arc::from(partition_key.as_str()), - chunk_id, - }, - }, - Job::DropPartition(management::DropPartition { - db_name, - partition_key, - table_name, - }) => Self::DropPartition { - partition: PartitionAddr { - db_name: Arc::from(db_name.as_str()), - table_name: Arc::from(table_name.as_str()), - partition_key: Arc::from(partition_key.as_str()), - }, - }, - } - } -} - -impl TryFrom for data_types::job::Operation { - type Error = FieldViolation; - - fn try_from(operation: longrunning::Operation) -> Result { - let metadata: Any = operation - .metadata - .ok_or_else(|| FieldViolation::required("metadata"))?; - - if !protobuf_type_url_eq(&metadata.type_url, management::OPERATION_METADATA) { - return Err(FieldViolation { - field: "metadata.type_url".to_string(), - description: "Unexpected field type".to_string(), - }); - } - - let meta: management::OperationMetadata = - prost::Message::decode(metadata.value).field("metadata.value")?; - - let status = match &operation.result { - None => OperationStatus::Running, - Some(longrunning::operation::Result::Response(_)) => OperationStatus::Success, - Some(longrunning::operation::Result::Error(status)) => { - if status.code == tonic::Code::Cancelled as i32 { - OperationStatus::Cancelled - } else { - OperationStatus::Errored - } - } - }; - - Ok(Self { - id: operation.name.parse().field("name")?, - total_count: meta.total_count, - pending_count: meta.pending_count, - success_count: meta.success_count, - error_count: meta.error_count, - cancelled_count: meta.cancelled_count, - dropped_count: meta.dropped_count, - wall_time: std::time::Duration::from_nanos(meta.wall_nanos), - cpu_time: std::time::Duration::from_nanos(meta.cpu_nanos), - job: meta.job.map(Into::into), - status, - }) - } -} diff --git a/generated_types/src/lib.rs b/generated_types/src/lib.rs index 56f376a368..0176d23b84 100644 --- a/generated_types/src/lib.rs +++ b/generated_types/src/lib.rs @@ -10,6 +10,10 @@ pub mod influxdata { pub mod platform { pub mod storage { include!(concat!(env!("OUT_DIR"), "/influxdata.platform.storage.rs")); + include!(concat!( + env!("OUT_DIR"), + "/influxdata.platform.storage.serde.rs" + )); // Can't implement `Default` because `prost::Message` implements `Default` impl TimestampRange { @@ -27,6 +31,10 @@ pub mod influxdata { pub mod catalog { pub mod v1 { include!(concat!(env!("OUT_DIR"), "/influxdata.iox.catalog.v1.rs")); + include!(concat!( + env!("OUT_DIR"), + "/influxdata.iox.catalog.v1.serde.rs" + )); } } @@ -37,12 +45,20 @@ pub mod influxdata { "influxdata.iox.management.v1.OperationMetadata"; include!(concat!(env!("OUT_DIR"), "/influxdata.iox.management.v1.rs")); + include!(concat!( + env!("OUT_DIR"), + "/influxdata.iox.management.v1.serde.rs" + )); } } pub mod write { pub mod v1 { include!(concat!(env!("OUT_DIR"), "/influxdata.iox.write.v1.rs")); + include!(concat!( + env!("OUT_DIR"), + "/influxdata.iox.write.v1.serde.rs" + )); } } } @@ -50,6 +66,7 @@ pub mod influxdata { pub mod pbdata { pub mod v1 { include!(concat!(env!("OUT_DIR"), "/influxdata.pbdata.v1.rs")); + include!(concat!(env!("OUT_DIR"), "/influxdata.pbdata.v1.serde.rs")); } } } diff --git a/google_types/Cargo.toml b/google_types/Cargo.toml index cca54af075..1c18f2c3d7 100644 --- a/google_types/Cargo.toml +++ b/google_types/Cargo.toml @@ -8,8 +8,10 @@ edition = "2018" [dependencies] # In alphabetical order bytes = { version = "1.0", features = ["serde"] } chrono = "0.4" +pbjson = { path = "../pbjson" } prost = "0.8" serde = { version = "1.0", features = ["derive"] } [build-dependencies] # In alphabetical order prost-build = "0.8" +pbjson_build = { path = "../pbjson_build" } diff --git a/google_types/build.rs b/google_types/build.rs index df67311be4..28818c1e0d 100644 --- a/google_types/build.rs +++ b/google_types/build.rs @@ -1,6 +1,7 @@ //! Compiles Protocol Buffers and FlatBuffers schema definitions into //! native Rust types. +use std::env; use std::path::PathBuf; type Error = Box; @@ -16,16 +17,18 @@ fn main() -> Result<()> { println!("cargo:rerun-if-changed={}", proto_file.display()); } + let descriptor_path = PathBuf::from(env::var("OUT_DIR").unwrap()).join("proto_descriptor.bin"); prost_build::Config::new() + .file_descriptor_set_path(&descriptor_path) .compile_well_known_types() .disable_comments(&["."]) - // approximates jsonpb. This is still not enough to deal with the special cases like Any. - .type_attribute( - ".google", - "#[derive(serde::Serialize,serde::Deserialize)] #[serde(rename_all = \"camelCase\")]", - ) .bytes(&[".google"]) .compile_protos(&proto_files, &[root])?; + let descriptor_set = std::fs::read(descriptor_path)?; + pbjson_build::Builder::new() + .register_descriptors(&descriptor_set)? + .build(&[".google"])?; + Ok(()) } diff --git a/google_types/src/lib.rs b/google_types/src/lib.rs index 13d3fad7e2..078916f103 100644 --- a/google_types/src/lib.rs +++ b/google_types/src/lib.rs @@ -17,6 +17,7 @@ mod pb { use std::convert::{TryFrom, TryInto}; include!(concat!(env!("OUT_DIR"), "/google.protobuf.rs")); + include!(concat!(env!("OUT_DIR"), "/google.protobuf.serde.rs")); impl TryFrom for std::time::Duration { type Error = std::num::TryFromIntError; diff --git a/influxdb_iox_client/src/client/management.rs b/influxdb_iox_client/src/client/management.rs index 5b6392281e..8a1ea54f9c 100644 --- a/influxdb_iox_client/src/client/management.rs +++ b/influxdb_iox_client/src/client/management.rs @@ -3,8 +3,8 @@ use thiserror::Error; use self::generated_types::{management_service_client::ManagementServiceClient, *}; use crate::connection::Connection; -use ::generated_types::google::longrunning::Operation; +use crate::google::{longrunning::IoxOperation, FieldViolation}; use std::convert::TryInto; use std::num::NonZeroU32; @@ -200,6 +200,10 @@ pub enum CreateDummyJobError { #[error("Server returned an empty response")] EmptyResponse, + /// Response payload was invalid + #[error("Invalid response: {0}")] + InvalidResponse(#[from] FieldViolation), + /// Client received an unexpected error from the server #[error("Unexpected server error: {}: {}", .0.code(), .0.message())] ServerError(tonic::Status), @@ -284,6 +288,10 @@ pub enum ClosePartitionChunkError { #[error("Server unavailable: {}", .0.message())] Unavailable(tonic::Status), + /// Response payload was invalid + #[error("Invalid response: {0}")] + InvalidResponse(#[from] FieldViolation), + /// Client received an unexpected error from the server #[error("Unexpected server error: {}: {}", .0.code(), .0.message())] ServerError(tonic::Status), @@ -336,6 +344,10 @@ pub enum WipePersistedCatalogError { #[error("Server returned an empty response")] EmptyResponse, + /// Response payload was invalid + #[error("Invalid response: {0}")] + InvalidResponse(#[from] FieldViolation), + /// Client received an unexpected error from the server #[error("Unexpected server error: {}: {}", .0.code(), .0.message())] ServerError(tonic::Status), @@ -842,7 +854,7 @@ impl Client { pub async fn create_dummy_job( &mut self, nanos: Vec, - ) -> Result { + ) -> Result { let response = self .inner .create_dummy_job(CreateDummyJobRequest { nanos }) @@ -852,7 +864,8 @@ impl Client { Ok(response .into_inner() .operation - .ok_or(CreateDummyJobError::EmptyResponse)?) + .ok_or(CreateDummyJobError::EmptyResponse)? + .try_into()?) } /// Closes the specified chunk in the specified partition and @@ -865,7 +878,7 @@ impl Client { table_name: impl Into + Send, partition_key: impl Into + Send, chunk_id: u32, - ) -> Result { + ) -> Result { let db_name = db_name.into(); let partition_key = partition_key.into(); let table_name = table_name.into(); @@ -888,7 +901,8 @@ impl Client { Ok(response .into_inner() .operation - .ok_or(ClosePartitionChunkError::EmptyResponse)?) + .ok_or(ClosePartitionChunkError::EmptyResponse)? + .try_into()?) } /// Unload chunk from read buffer but keep it in object store. @@ -929,7 +943,7 @@ impl Client { pub async fn wipe_persisted_catalog( &mut self, db_name: impl Into + Send, - ) -> Result { + ) -> Result { let db_name = db_name.into(); let response = self @@ -947,7 +961,8 @@ impl Client { Ok(response .into_inner() .operation - .ok_or(WipePersistedCatalogError::EmptyResponse)?) + .ok_or(WipePersistedCatalogError::EmptyResponse)? + .try_into()?) } /// Skip replay of an uninitialized database. diff --git a/influxdb_iox_client/src/client/operations.rs b/influxdb_iox_client/src/client/operations.rs index dcc0fb6fb0..781f2ec557 100644 --- a/influxdb_iox_client/src/client/operations.rs +++ b/influxdb_iox_client/src/client/operations.rs @@ -1,11 +1,9 @@ use thiserror::Error; -use ::generated_types::{ - google::FieldViolation, influxdata::iox::management::v1 as management, protobuf_type_url_eq, -}; - use self::generated_types::{operations_client::OperationsClient, *}; use crate::connection::Connection; +use std::convert::TryInto; + /// Re-export generated_types pub mod generated_types { pub use generated_types::google::longrunning::*; @@ -16,7 +14,7 @@ pub mod generated_types { pub enum Error { /// Client received an invalid response #[error("Invalid server response: {}", .0)] - InvalidResponse(#[from] FieldViolation), + InvalidResponse(#[from] ::generated_types::google::FieldViolation), /// Operation was not found #[error("Operation not found: {}", .0)] @@ -66,7 +64,7 @@ impl Client { } /// Get information of all client operation - pub async fn list_operations(&mut self) -> Result> { + pub async fn list_operations(&mut self) -> Result> { Ok(self .inner .list_operations(ListOperationsRequest::default()) @@ -75,12 +73,12 @@ impl Client { .into_inner() .operations .into_iter() - .map(|o| ClientOperation::try_new(o).unwrap()) - .collect()) + .map(TryInto::try_into) + .collect::>()?) } /// Get information about a specific operation - pub async fn get_operation(&mut self, id: usize) -> Result { + pub async fn get_operation(&mut self, id: usize) -> Result { Ok(self .inner .get_operation(GetOperationRequest { @@ -91,7 +89,8 @@ impl Client { tonic::Code::NotFound => Error::NotFound(id), _ => Error::ServerError(e), })? - .into_inner()) + .into_inner() + .try_into()?) } /// Cancel a given operation @@ -115,7 +114,7 @@ impl Client { &mut self, id: usize, timeout: Option, - ) -> Result { + ) -> Result { Ok(self .inner .wait_operation(WaitOperationRequest { @@ -127,50 +126,7 @@ impl Client { tonic::Code::NotFound => Error::NotFound(id), _ => Error::ServerError(e), })? - .into_inner()) - } - - /// Return the Client Operation - pub async fn client_operation(&mut self, id: usize) -> Result { - let operation = self.get_operation(id).await?; - ClientOperation::try_new(operation) - } -} - -/// IOx's Client Operation -#[derive(Debug, Clone)] -pub struct ClientOperation { - inner: generated_types::Operation, -} - -impl ClientOperation { - /// Create a new Cient Operation - pub fn try_new(operation: generated_types::Operation) -> Result { - if operation.metadata.is_some() { - let metadata = operation.metadata.clone().unwrap(); - if !protobuf_type_url_eq(&metadata.type_url, management::OPERATION_METADATA) { - return Err(Error::WrongOperationMetaData); - } - } else { - return Err(Error::NotFound(0)); - } - - Ok(Self { inner: operation }) - } - - /// Return Metadata for this client operation - pub fn metadata(&self) -> management::OperationMetadata { - prost::Message::decode(self.inner.metadata.clone().unwrap().value) - .expect("failed to decode metadata") - } - - /// Return name of this operation - pub fn name(&self) -> &str { - &self.inner.name - } - - /// Return the inner's Operation - pub fn operation(self) -> Operation { - self.inner + .into_inner() + .try_into()?) } } diff --git a/iox_data_generator/src/lib.rs b/iox_data_generator/src/lib.rs index b57edf3103..b14eeb0ec1 100644 --- a/iox_data_generator/src/lib.rs +++ b/iox_data_generator/src/lib.rs @@ -77,6 +77,15 @@ pub enum Error { /// Underlying `agent` module error that caused this problem source: agent::Error, }, + + /// Error that may happen when constructing an agent's writer + #[snafu(display("Could not create writer for agent `{}`, caused by:\n{}", name, source))] + CouldNotCreateAgentWriter { + /// The name of the relevant agent + name: String, + /// Underlying `write` module error that caused this problem + source: write::Error, + }, } type Result = std::result::Result; @@ -135,7 +144,9 @@ pub async fn generate( ) .context(CouldNotCreateAgent { name: &agent_name })?; - let agent_points_writer = points_writer_builder.build_for_agent(&agent_name); + let agent_points_writer = points_writer_builder + .build_for_agent(&agent_name) + .context(CouldNotCreateAgentWriter { name: &agent_name })?; handles.push(tokio::task::spawn(async move { agent.generate_all(agent_points_writer, batch_size).await diff --git a/iox_data_generator/src/write.rs b/iox_data_generator/src/write.rs index fb0966f8d0..ae68d93940 100644 --- a/iox_data_generator/src/write.rs +++ b/iox_data_generator/src/write.rs @@ -10,14 +10,23 @@ use std::{ }; use std::{ fs, - fs::OpenOptions, + fs::{File, OpenOptions}, + io::BufWriter, path::{Path, PathBuf}, }; -use tracing::info; /// Errors that may happen while writing points. #[derive(Snafu, Debug)] pub enum Error { + /// Error that may happen when writing line protocol to a file + #[snafu(display("Could open line protocol file {}: {}", filename.display(), source))] + CantOpenLineProtocolFile { + /// The location of the file we tried to open + filename: PathBuf, + /// Underlying IO error that caused this problem + source: std::io::Error, + }, + /// Error that may happen when writing line protocol to a no-op sink #[snafu(display("Could not generate line protocol: {}", source))] CantWriteToNoOp { @@ -174,7 +183,7 @@ impl PointsWriterBuilder { /// Create a writer out of this writer's configuration for a particular /// agent that runs in a separate thread/task. - pub fn build_for_agent(&mut self, agent_name: &str) -> PointsWriter { + pub fn build_for_agent(&mut self, agent_name: &str) -> Result { let inner_writer = match &mut self.config { PointsWriterConfig::Api { client, @@ -189,7 +198,16 @@ impl PointsWriterBuilder { let mut filename = dir_path.clone(); filename.push(agent_name); filename.set_extension("txt"); - InnerPointsWriter::File(filename) + + let file = OpenOptions::new() + .append(true) + .create(true) + .open(&filename) + .context(CantOpenLineProtocolFile { filename })?; + + let file = BufWriter::new(file); + + InnerPointsWriter::File { file } } PointsWriterConfig::NoOp { perform_write } => InnerPointsWriter::NoOp { perform_write: *perform_write, @@ -204,7 +222,7 @@ impl PointsWriterBuilder { PointsWriterConfig::Stdout => InnerPointsWriter::Stdout, }; - PointsWriter { inner_writer } + Ok(PointsWriter { inner_writer }) } } @@ -228,7 +246,9 @@ enum InnerPointsWriter { org: String, bucket: String, }, - File(PathBuf), + File { + file: BufWriter, + }, NoOp { perform_write: bool, }, @@ -250,22 +270,12 @@ impl InnerPointsWriter { .await .context(CantWriteToApi)?; } - Self::File(filename) => { - info!("Opening file {:?}", filename); - let num_points = points.len(); - let file = OpenOptions::new() - .append(true) - .create(true) - .open(&filename) - .context(CantWriteToLineProtocolFile)?; - - let mut file = std::io::BufWriter::new(file); + Self::File { file } => { for point in points { point - .write_data_point_to(&mut file) + .write_data_point_to(&mut *file) .context(CantWriteToLineProtocolFile)?; } - info!("Wrote {} points to {:?}", num_points, filename); } Self::NoOp { perform_write } => { if *perform_write { diff --git a/parquet_file/src/catalog/cleanup.rs b/parquet_file/src/catalog/cleanup.rs index eb6d9d303e..85bb8f2e58 100644 --- a/parquet_file/src/catalog/cleanup.rs +++ b/parquet_file/src/catalog/cleanup.rs @@ -1,7 +1,6 @@ //! Methods to cleanup the object store. use std::{collections::HashSet, sync::Arc}; -use crate::catalog::api::{CatalogParquetInfo, CatalogState, PreservedCatalog}; use futures::TryStreamExt; use iox_object_store::{IoxObjectStore, ParquetFilePath}; use object_store::{ObjectStore, ObjectStoreApi}; @@ -9,6 +8,11 @@ use observability_deps::tracing::info; use parking_lot::Mutex; use snafu::{ResultExt, Snafu}; +use crate::catalog::{ + core::PreservedCatalog, + interface::{CatalogParquetInfo, CatalogState, CatalogStateAddError, CatalogStateRemoveError}, +}; + #[derive(Debug, Snafu)] pub enum Error { #[snafu(display("Error from read operation while cleaning object store: {}", source))] @@ -22,7 +26,7 @@ pub enum Error { }, #[snafu(display("Error from catalog loading while cleaning object store: {}", source))] - CatalogLoadError { source: crate::catalog::api::Error }, + CatalogLoadError { source: crate::catalog::core::Error }, } pub type Result = std::result::Result; @@ -124,12 +128,12 @@ impl CatalogState for TracerCatalogState { &mut self, _iox_object_store: Arc, info: CatalogParquetInfo, - ) -> crate::catalog::api::Result<()> { + ) -> Result<(), CatalogStateAddError> { self.files.lock().insert(info.path); Ok(()) } - fn remove(&mut self, _path: &ParquetFilePath) -> crate::catalog::api::Result<()> { + fn remove(&mut self, _path: &ParquetFilePath) -> Result<(), CatalogStateRemoveError> { // Do NOT remove the file since we still need it for time travel Ok(()) } diff --git a/parquet_file/src/catalog/api.rs b/parquet_file/src/catalog/core.rs similarity index 95% rename from parquet_file/src/catalog/api.rs rename to parquet_file/src/catalog/core.rs index 507c974a7f..5550d6fdf6 100644 --- a/parquet_file/src/catalog/api.rs +++ b/parquet_file/src/catalog/core.rs @@ -1,10 +1,13 @@ //! Catalog preservation and transaction handling. use crate::{ - catalog::internals::{ - proto_io::{load_transaction_proto, store_transaction_proto}, - proto_parse, - types::{FileType, TransactionKey}, + catalog::{ + interface::{CatalogParquetInfo, CatalogState, CheckpointData}, + internals::{ + proto_io::{load_transaction_proto, store_transaction_proto}, + proto_parse, + types::{FileType, TransactionKey}, + }, }, metadata::IoxParquetMetaData, }; @@ -113,55 +116,9 @@ pub enum Error { #[snafu(display("Upgrade path not implemented/supported: {}", format))] UnsupportedUpgrade { format: String }, - #[snafu(display("Parquet already exists in catalog: {:?}", path))] - ParquetFileAlreadyExists { path: ParquetFilePath }, - - #[snafu(display("Parquet does not exist in catalog: {:?}", path))] - ParquetFileDoesNotExist { path: ParquetFilePath }, - #[snafu(display("Cannot decode parquet metadata: {}", source))] MetadataDecodingFailed { source: crate::metadata::Error }, - #[snafu( - display("Cannot extract metadata from {:?}: {}", path, source), - visibility(pub) - )] - MetadataExtractFailed { - source: crate::metadata::Error, - path: ParquetFilePath, - }, - - #[snafu( - display("Schema for {:?} does not work with existing schema: {}", path, source), - visibility(pub) - )] - SchemaError { - source: Box, - path: ParquetFilePath, - }, - - #[snafu( - display( - "Internal error: Using checkpoints from {:?} leads to broken replay plan: {}, catalog likely broken", - path, - source - ), - visibility(pub) - )] - ReplayPlanError { - source: Box, - path: ParquetFilePath, - }, - - #[snafu( - display("Cannot create parquet chunk from {:?}: {}", path, source), - visibility(pub) - )] - ChunkCreationFailed { - source: crate::chunk::Error, - path: ParquetFilePath, - }, - #[snafu(display("Catalog already exists"))] AlreadyExists {}, @@ -177,44 +134,20 @@ pub enum Error { #[snafu(display("Cannot commit transaction: {}", source))] CommitError { source: Box }, + + #[snafu(display("Cannot add parquet file during load: {}", source))] + AddError { + source: crate::catalog::interface::CatalogStateAddError, + }, + + #[snafu(display("Cannot remove parquet file during load: {}", source))] + RemoveError { + source: crate::catalog::interface::CatalogStateRemoveError, + }, } pub type Result = std::result::Result; -/// Struct containing all information that a catalog received for a new parquet file. -#[derive(Debug, Clone)] -pub struct CatalogParquetInfo { - /// Path within this database. - pub path: ParquetFilePath, - - /// Size of the parquet file, in bytes - pub file_size_bytes: usize, - - /// Associated parquet metadata. - pub metadata: Arc, -} - -/// Abstraction over how the in-memory state of the catalog works. -pub trait CatalogState { - /// Input to create a new empty instance. - /// - /// See [`new_empty`](Self::new_empty) for details. - type EmptyInput: Send; - - /// Create empty state w/o any known files. - fn new_empty(db_name: &str, data: Self::EmptyInput) -> Self; - - /// Add parquet file to state. - fn add( - &mut self, - iox_object_store: Arc, - info: CatalogParquetInfo, - ) -> Result<()>; - - /// Remove parquet file from state. - fn remove(&mut self, path: &ParquetFilePath) -> Result<()>; -} - /// In-memory view of the preserved catalog. pub struct PreservedCatalog { // We need an RWLock AND a semaphore, so that readers are NOT blocked during an open @@ -605,20 +538,22 @@ impl OpenTransaction { let metadata = Arc::new(metadata); - state.add( - Arc::clone(iox_object_store), - CatalogParquetInfo { - path, - file_size_bytes, - metadata, - }, - )?; + state + .add( + Arc::clone(iox_object_store), + CatalogParquetInfo { + path, + file_size_bytes, + metadata, + }, + ) + .context(AddError)?; } proto::transaction::action::Action::RemoveParquet(a) => { let path = proto_parse::parse_dirs_and_filename(a.path.as_ref().context(PathRequired)?) .context(ProtobufParseError)?; - state.remove(&path)?; + state.remove(&path).context(RemoveError)?; } }; Ok(()) @@ -737,20 +672,6 @@ impl OpenTransaction { } } -/// Structure that holds all information required to create a checkpoint. -/// -/// Note that while checkpoint are addressed using the same schema as we use for transaction -/// (revision counter, UUID), they contain the changes at the end (aka including) the transaction -/// they refer. -#[derive(Debug)] -pub struct CheckpointData { - /// List of all Parquet files that are currently (i.e. by the current version) tracked by the - /// catalog. - /// - /// If a file was once added but later removed it MUST NOT appear in the result. - pub files: HashMap, -} - /// Handle for an open uncommitted transaction. /// /// Dropping this object w/o calling [`commit`](Self::commit) will issue a warning. diff --git a/parquet_file/src/catalog/dump.rs b/parquet_file/src/catalog/dump.rs index 5fb7668e5c..55d83ffb20 100644 --- a/parquet_file/src/catalog/dump.rs +++ b/parquet_file/src/catalog/dump.rs @@ -223,8 +223,7 @@ mod tests { use crate::{ catalog::{ - api::{CatalogParquetInfo, PreservedCatalog}, - test_helpers::TestCatalogState, + core::PreservedCatalog, interface::CatalogParquetInfo, test_helpers::TestCatalogState, }, test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize}, }; diff --git a/parquet_file/src/catalog/interface.rs b/parquet_file/src/catalog/interface.rs new file mode 100644 index 0000000000..d0b609d814 --- /dev/null +++ b/parquet_file/src/catalog/interface.rs @@ -0,0 +1,99 @@ +//! Abstract interfaces to make different users work with the perserved catalog. +use std::{collections::HashMap, sync::Arc}; + +use iox_object_store::{IoxObjectStore, ParquetFilePath}; +use snafu::Snafu; + +use crate::metadata::IoxParquetMetaData; + +/// Struct containing all information that a catalog received for a new parquet file. +#[derive(Debug, Clone)] +pub struct CatalogParquetInfo { + /// Path within this database. + pub path: ParquetFilePath, + + /// Size of the parquet file, in bytes + pub file_size_bytes: usize, + + /// Associated parquet metadata. + pub metadata: Arc, +} + +#[derive(Debug, Snafu)] +#[snafu(visibility(pub))] +pub enum CatalogStateAddError { + #[snafu(display("Cannot extract metadata from {:?}: {}", path, source))] + MetadataExtractFailed { + source: crate::metadata::Error, + path: ParquetFilePath, + }, + + #[snafu(display("Schema for {:?} does not work with existing schema: {}", path, source))] + SchemaError { + source: Box, + path: ParquetFilePath, + }, + + #[snafu( + display( + "Internal error: Using checkpoints from {:?} leads to broken replay plan: {}, catalog likely broken", + path, + source + ), + )] + ReplayPlanError { + source: Box, + path: ParquetFilePath, + }, + + #[snafu(display("Cannot create parquet chunk from {:?}: {}", path, source))] + ChunkCreationFailed { + source: crate::chunk::Error, + path: ParquetFilePath, + }, + + #[snafu(display("Parquet already exists in catalog: {:?}", path))] + ParquetFileAlreadyExists { path: ParquetFilePath }, +} + +#[derive(Debug, Snafu)] +#[snafu(visibility(pub))] +pub enum CatalogStateRemoveError { + #[snafu(display("Parquet does not exist in catalog: {:?}", path))] + ParquetFileDoesNotExist { path: ParquetFilePath }, +} + +/// Abstraction over how the in-memory state of the catalog works. +pub trait CatalogState { + /// Input to create a new empty instance. + /// + /// See [`new_empty`](Self::new_empty) for details. + type EmptyInput: Send; + + /// Create empty state w/o any known files. + fn new_empty(db_name: &str, data: Self::EmptyInput) -> Self; + + /// Add parquet file to state. + fn add( + &mut self, + iox_object_store: Arc, + info: CatalogParquetInfo, + ) -> Result<(), CatalogStateAddError>; + + /// Remove parquet file from state. + fn remove(&mut self, path: &ParquetFilePath) -> Result<(), CatalogStateRemoveError>; +} + +/// Structure that holds all information required to create a checkpoint. +/// +/// Note that while checkpoint are addressed using the same schema as we use for transaction +/// (revision counter, UUID), they contain the changes at the end (aka including) the transaction +/// they refer. +#[derive(Debug)] +pub struct CheckpointData { + /// List of all Parquet files that are currently (i.e. by the current version) tracked by the + /// catalog. + /// + /// If a file was once added but later removed it MUST NOT appear in the result. + pub files: HashMap, +} diff --git a/parquet_file/src/catalog/mod.rs b/parquet_file/src/catalog/mod.rs index 70132e8c7f..ebdb5ae680 100644 --- a/parquet_file/src/catalog/mod.rs +++ b/parquet_file/src/catalog/mod.rs @@ -1,6 +1,7 @@ -pub mod api; pub mod cleanup; +pub mod core; pub mod dump; +pub mod interface; mod internals; pub mod prune; pub mod rebuild; diff --git a/parquet_file/src/catalog/prune.rs b/parquet_file/src/catalog/prune.rs index 442747712b..4104ab9ae4 100644 --- a/parquet_file/src/catalog/prune.rs +++ b/parquet_file/src/catalog/prune.rs @@ -8,7 +8,7 @@ use object_store::{ObjectStore, ObjectStoreApi}; use snafu::{ResultExt, Snafu}; use crate::catalog::{ - api::{ProtoIOError, ProtoParseError}, + core::{ProtoIOError, ProtoParseError}, internals::{proto_io::load_transaction_proto, proto_parse::parse_timestamp}, }; @@ -33,7 +33,7 @@ pub enum Error { pub type Result = std::result::Result; -/// Prune history of [`PreservedCatalog`](crate::catalog::api::PreservedCatalog). +/// Prune history of [`PreservedCatalog`](crate::catalog::core::PreservedCatalog). /// /// This deletes all transactions and checkpoints that were started prior to `before`. Note that this only deletes data /// that is safe to delete when time travel to `before` is allowed. For example image the following transactions: @@ -133,8 +133,7 @@ fn is_checkpoint_or_zero(path: &TransactionFilePath) -> bool { mod tests { use crate::{ catalog::{ - api::{CheckpointData, PreservedCatalog}, - test_helpers::TestCatalogState, + core::PreservedCatalog, interface::CheckpointData, test_helpers::TestCatalogState, }, test_utils::make_iox_object_store, }; diff --git a/parquet_file/src/catalog/rebuild.rs b/parquet_file/src/catalog/rebuild.rs index 663741d0df..0c2fd528bd 100644 --- a/parquet_file/src/catalog/rebuild.rs +++ b/parquet_file/src/catalog/rebuild.rs @@ -7,13 +7,17 @@ use observability_deps::tracing::error; use snafu::{ResultExt, Snafu}; use crate::{ - catalog::api::{CatalogParquetInfo, CatalogState, PreservedCatalog}, + catalog::{ + core::PreservedCatalog, + interface::{CatalogParquetInfo, CatalogState}, + }, metadata::IoxParquetMetaData, }; + #[derive(Debug, Snafu)] pub enum Error { #[snafu(display("Cannot create new empty catalog: {}", source))] - NewEmptyFailure { source: crate::catalog::api::Error }, + NewEmptyFailure { source: crate::catalog::core::Error }, #[snafu(display("Cannot read store: {}", source))] ReadFailure { source: object_store::Error }, @@ -25,13 +29,15 @@ pub enum Error { }, #[snafu(display("Cannot add file to transaction: {}", source))] - FileRecordFailure { source: crate::catalog::api::Error }, + FileRecordFailure { + source: crate::catalog::interface::CatalogStateAddError, + }, #[snafu(display("Cannot commit transaction: {}", source))] - CommitFailure { source: crate::catalog::api::Error }, + CommitFailure { source: crate::catalog::core::Error }, #[snafu(display("Cannot create checkpoint: {}", source))] - CheckpointFailure { source: crate::catalog::api::Error }, + CheckpointFailure { source: crate::catalog::core::Error }, } pub type Result = std::result::Result; @@ -164,7 +170,7 @@ async fn read_parquet( mod tests { use super::*; use crate::{ - catalog::{api::PreservedCatalog, test_helpers::TestCatalogState}, + catalog::{core::PreservedCatalog, test_helpers::TestCatalogState}, metadata::IoxMetadata, storage::{MemWriter, Storage}, test_utils::{ diff --git a/parquet_file/src/catalog/test_helpers.rs b/parquet_file/src/catalog/test_helpers.rs index 804b51ad02..ba2f4b5c46 100644 --- a/parquet_file/src/catalog/test_helpers.rs +++ b/parquet_file/src/catalog/test_helpers.rs @@ -12,7 +12,11 @@ use snafu::ResultExt; use crate::{ catalog::{ - api::{CatalogParquetInfo, CatalogState, CheckpointData, PreservedCatalog}, + core::PreservedCatalog, + interface::{ + CatalogParquetInfo, CatalogState, CatalogStateAddError, CatalogStateRemoveError, + CheckpointData, + }, internals::{ proto_io::{load_transaction_proto, store_transaction_proto}, types::TransactionKey, @@ -61,8 +65,8 @@ impl TestCatalogState { } /// Inserts a file into this catalog state - pub fn insert(&mut self, info: CatalogParquetInfo) -> crate::catalog::api::Result<()> { - use crate::catalog::api::{Error, MetadataExtractFailed}; + pub fn insert(&mut self, info: CatalogParquetInfo) -> Result<(), CatalogStateAddError> { + use crate::catalog::interface::MetadataExtractFailed; let iox_md = info .metadata @@ -80,7 +84,7 @@ impl TestCatalogState { match partition.chunks.entry(iox_md.chunk_id) { Occupied(o) => { - return Err(Error::ParquetFileAlreadyExists { + return Err(CatalogStateAddError::ParquetFileAlreadyExists { path: o.get().path.clone(), }); } @@ -104,13 +108,11 @@ impl CatalogState for TestCatalogState { &mut self, _iox_object_store: Arc, info: CatalogParquetInfo, - ) -> crate::catalog::api::Result<()> { + ) -> Result<(), CatalogStateAddError> { self.insert(info) } - fn remove(&mut self, path: &ParquetFilePath) -> crate::catalog::api::Result<()> { - use crate::catalog::api::Error; - + fn remove(&mut self, path: &ParquetFilePath) -> Result<(), CatalogStateRemoveError> { let partitions = self .tables .values_mut() @@ -136,7 +138,7 @@ impl CatalogState for TestCatalogState { } match removed { - 0 => Err(Error::ParquetFileDoesNotExist { path: path.clone() }), + 0 => Err(CatalogStateRemoveError::ParquetFileDoesNotExist { path: path.clone() }), _ => Ok(()), } } @@ -173,8 +175,6 @@ where S: CatalogState + Debug + Send + Sync, F: Fn(&S) -> CheckpointData + Send, { - use crate::catalog::api::Error; - // empty state let iox_object_store = make_iox_object_store().await; let (_catalog, mut state) = @@ -317,11 +317,17 @@ where }, ) .unwrap_err(); - assert!(matches!(err, Error::ParquetFileAlreadyExists { .. })); + assert!(matches!( + err, + CatalogStateAddError::ParquetFileAlreadyExists { .. } + )); // does not exist as has a different UUID let err = state.remove(&path).unwrap_err(); - assert!(matches!(err, Error::ParquetFileDoesNotExist { .. })); + assert!(matches!( + err, + CatalogStateRemoveError::ParquetFileDoesNotExist { .. } + )); } assert_checkpoint(&state, &f, &expected); @@ -340,7 +346,10 @@ where }, ) .unwrap_err(); - assert!(matches!(err, Error::ParquetFileAlreadyExists { .. })); + assert!(matches!( + err, + CatalogStateAddError::ParquetFileAlreadyExists { .. } + )); // this transaction will still work let (path, metadata) = @@ -369,12 +378,18 @@ where }, ) .unwrap_err(); - assert!(matches!(err, Error::ParquetFileAlreadyExists { .. })); + assert!(matches!( + err, + CatalogStateAddError::ParquetFileAlreadyExists { .. } + )); // does not exist - as different UUID let path = ParquetFilePath::new(&chunk_addr(7)); let err = state.remove(&path).unwrap_err(); - assert!(matches!(err, Error::ParquetFileDoesNotExist { .. })); + assert!(matches!( + err, + CatalogStateRemoveError::ParquetFileDoesNotExist { .. } + )); // this still works let (path, _) = expected.remove(&7).unwrap(); @@ -382,7 +397,10 @@ where // recently removed let err = state.remove(&path).unwrap_err(); - assert!(matches!(err, Error::ParquetFileDoesNotExist { .. })); + assert!(matches!( + err, + CatalogStateRemoveError::ParquetFileDoesNotExist { .. } + )); } assert_checkpoint(&state, &f, &expected); } diff --git a/parquet_file/src/metadata.rs b/parquet_file/src/metadata.rs index f530cfcfc8..450958bd55 100644 --- a/parquet_file/src/metadata.rs +++ b/parquet_file/src/metadata.rs @@ -120,7 +120,7 @@ use thrift::protocol::{TCompactInputProtocol, TCompactOutputProtocol, TOutputPro /// For breaking changes, this will change. /// /// **Important: When changing this structure, consider bumping the -/// [catalog transaction version](crate::catalog::api::TRANSACTION_VERSION)!** +/// [catalog transaction version](crate::catalog::core::TRANSACTION_VERSION)!** pub const METADATA_VERSION: u32 = 6; /// File-level metadata key to store the IOx-specific data. diff --git a/pbjson/Cargo.toml b/pbjson/Cargo.toml new file mode 100644 index 0000000000..04ce2185f3 --- /dev/null +++ b/pbjson/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "pbjson" +version = "0.1.0" +authors = ["Raphael Taylor-Davies "] +edition = "2018" +description = "Utilities for pbjson converion" + +[dependencies] + +serde = { version = "1.0", features = ["derive"] } +base64 = "0.13" + +[dev-dependencies] +bytes = "1.0" diff --git a/pbjson/src/lib.rs b/pbjson/src/lib.rs new file mode 100644 index 0000000000..cb9bdd0b3b --- /dev/null +++ b/pbjson/src/lib.rs @@ -0,0 +1,82 @@ +#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)] +#![warn( + missing_debug_implementations, + clippy::explicit_iter_loop, + clippy::use_self, + clippy::clone_on_ref_ptr, + clippy::future_not_send +)] + +#[doc(hidden)] +pub mod private { + /// Re-export base64 + pub use base64; + + use serde::Deserialize; + use std::str::FromStr; + + /// Used to parse a number from either a string or its raw representation + #[derive(Debug, Copy, Clone, PartialOrd, PartialEq, Hash, Ord, Eq)] + pub struct NumberDeserialize(pub T); + + #[derive(Deserialize)] + #[serde(untagged)] + enum Content<'a, T> { + Str(&'a str), + Number(T), + } + + impl<'de, T> serde::Deserialize<'de> for NumberDeserialize + where + T: FromStr + serde::Deserialize<'de>, + ::Err: std::error::Error, + { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let content = Content::deserialize(deserializer)?; + Ok(Self(match content { + Content::Str(v) => v.parse().map_err(serde::de::Error::custom)?, + Content::Number(v) => v, + })) + } + } + + #[derive(Debug, Copy, Clone, PartialOrd, PartialEq, Hash, Ord, Eq)] + pub struct BytesDeserialize(pub T); + + impl<'de, T> Deserialize<'de> for BytesDeserialize + where + T: From>, + { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let s: &str = Deserialize::deserialize(deserializer)?; + let decoded = base64::decode(s).map_err(serde::de::Error::custom)?; + Ok(Self(decoded.into())) + } + } + + #[cfg(test)] + mod tests { + use super::*; + use bytes::Bytes; + use serde::de::value::{BorrowedStrDeserializer, Error}; + + #[test] + fn test_bytes() { + let raw = vec![2, 5, 62, 2, 5, 7, 8, 43, 5, 8, 4, 23, 5, 7, 7, 3, 2, 5, 196]; + let encoded = base64::encode(&raw); + + let deserializer = BorrowedStrDeserializer::<'_, Error>::new(&encoded); + let a: Bytes = BytesDeserialize::deserialize(deserializer).unwrap().0; + let b: Vec = BytesDeserialize::deserialize(deserializer).unwrap().0; + + assert_eq!(raw.as_slice(), &a); + assert_eq!(raw.as_slice(), &b); + } + } +} diff --git a/pbjson_build/Cargo.toml b/pbjson_build/Cargo.toml new file mode 100644 index 0000000000..41983411dc --- /dev/null +++ b/pbjson_build/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "pbjson_build" +version = "0.1.0" +authors = ["Raphael Taylor-Davies "] +edition = "2018" +description = "Generates Serialize and Deserialize implementations for prost message types" + +[dependencies] + +heck = "0.3" +prost = "0.8" +prost-types = "0.8" +itertools = "0.10" + +[dev-dependencies] +tempfile = "3.1" +pbjson_test = { path = "../pbjson_test" } \ No newline at end of file diff --git a/pbjson_build/src/descriptor.rs b/pbjson_build/src/descriptor.rs new file mode 100644 index 0000000000..16501ea93a --- /dev/null +++ b/pbjson_build/src/descriptor.rs @@ -0,0 +1,260 @@ +//! This module contains code to parse and extract the protobuf descriptor +//! format for use by the rest of the codebase + +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::fmt::{Display, Formatter}; +use std::io::{Error, ErrorKind, Result}; + +use itertools::{EitherOrBoth, Itertools}; +use prost_types::{ + DescriptorProto, EnumDescriptorProto, EnumValueDescriptorProto, FieldDescriptorProto, + FileDescriptorSet, MessageOptions, OneofDescriptorProto, +}; + +#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct Package(String); + +impl Display for Package { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl Package { + pub fn new(s: impl Into) -> Self { + let s = s.into(); + assert!( + !s.starts_with('.'), + "package cannot start with \'.\', got \"{}\"", + s + ); + Self(s) + } +} + +#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct TypeName(String); + +impl Display for TypeName { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl TypeName { + pub fn new(s: impl Into) -> Self { + let s = s.into(); + assert!( + !s.contains('.'), + "type name cannot contain \'.\', got \"{}\"", + s + ); + Self(s) + } + + pub fn to_snake_case(&self) -> String { + use heck::SnakeCase; + self.0.to_snake_case() + } + + pub fn to_camel_case(&self) -> String { + use heck::CamelCase; + self.0.to_camel_case() + } + + pub fn to_shouty_snake_case(&self) -> String { + use heck::ShoutySnakeCase; + self.0.to_shouty_snake_case() + } +} + +#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct TypePath { + package: Package, + path: Vec, +} + +impl Display for TypePath { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.package.fmt(f)?; + for element in &self.path { + write!(f, ".{}", element)?; + } + Ok(()) + } +} + +impl TypePath { + pub fn new(package: Package) -> Self { + Self { + package, + path: Default::default(), + } + } + + pub fn package(&self) -> &Package { + &self.package + } + + pub fn path(&self) -> &[TypeName] { + self.path.as_slice() + } + + pub fn child(&self, name: TypeName) -> Self { + let path = self + .path + .iter() + .cloned() + .chain(std::iter::once(name)) + .collect(); + Self { + package: self.package.clone(), + path, + } + } + + pub fn matches_prefix(&self, prefix: &str) -> bool { + let prefix = match prefix.strip_prefix('.') { + Some(prefix) => prefix, + None => return false, + }; + + if prefix.len() <= self.package.0.len() { + return self.package.0.starts_with(prefix); + } + + match prefix.strip_prefix(&self.package.0) { + Some(prefix) => { + let split = prefix.split('.').skip(1); + for zipped in self.path.iter().zip_longest(split) { + match zipped { + EitherOrBoth::Both(a, b) if a.0.as_str() == b => continue, + EitherOrBoth::Left(_) => return true, + _ => return false, + } + } + true + } + None => false, + } + } +} + +#[derive(Debug, Clone, Default)] +pub struct DescriptorSet { + descriptors: BTreeMap, +} + +impl DescriptorSet { + pub fn new() -> Self { + Self::default() + } + + pub fn register_encoded(&mut self, encoded: &[u8]) -> Result<()> { + let descriptors: FileDescriptorSet = + prost::Message::decode(encoded).map_err(|e| Error::new(ErrorKind::InvalidData, e))?; + + for file in descriptors.file { + let syntax = match file.syntax.as_deref() { + None | Some("proto2") => Syntax::Proto2, + Some("proto3") => Syntax::Proto3, + Some(s) => panic!("unknown syntax: {}", s), + }; + + let package = Package::new(file.package.expect("expected package")); + let path = TypePath::new(package); + + for descriptor in file.message_type { + self.register_message(&path, descriptor, syntax) + } + + for descriptor in file.enum_type { + self.register_enum(&path, descriptor) + } + } + + Ok(()) + } + + pub fn iter(&self) -> impl Iterator { + self.descriptors.iter() + } + + fn register_message(&mut self, path: &TypePath, descriptor: DescriptorProto, syntax: Syntax) { + let name = TypeName::new(descriptor.name.expect("expected name")); + let child_path = path.child(name); + + for child_descriptor in descriptor.enum_type { + self.register_enum(&child_path, child_descriptor) + } + + for child_descriptor in descriptor.nested_type { + self.register_message(&child_path, child_descriptor, syntax) + } + + self.register_descriptor( + child_path.clone(), + Descriptor::Message(MessageDescriptor { + path: child_path, + options: descriptor.options, + one_of: descriptor.oneof_decl, + fields: descriptor.field, + syntax, + }), + ); + } + + fn register_enum(&mut self, path: &TypePath, descriptor: EnumDescriptorProto) { + let name = TypeName::new(descriptor.name.expect("expected name")); + self.register_descriptor( + path.child(name), + Descriptor::Enum(EnumDescriptor { + values: descriptor.value, + }), + ); + } + + fn register_descriptor(&mut self, path: TypePath, descriptor: Descriptor) { + match self.descriptors.entry(path) { + Entry::Occupied(o) => panic!("descriptor already registered for {}", o.key()), + Entry::Vacant(v) => v.insert(descriptor), + }; + } +} + +#[derive(Debug, Clone, Copy)] +pub enum Syntax { + Proto2, + Proto3, +} + +#[derive(Debug, Clone)] +pub enum Descriptor { + Enum(EnumDescriptor), + Message(MessageDescriptor), +} + +#[derive(Debug, Clone)] +pub struct EnumDescriptor { + pub values: Vec, +} + +#[derive(Debug, Clone)] +pub struct MessageDescriptor { + pub path: TypePath, + pub options: Option, + pub one_of: Vec, + pub fields: Vec, + pub syntax: Syntax, +} + +impl MessageDescriptor { + /// Whether this is an auto-generated type for the map field + pub fn is_map(&self) -> bool { + self.options + .as_ref() + .and_then(|options| options.map_entry) + .unwrap_or(false) + } +} diff --git a/pbjson_build/src/escape.rs b/pbjson_build/src/escape.rs new file mode 100644 index 0000000000..554ac85eec --- /dev/null +++ b/pbjson_build/src/escape.rs @@ -0,0 +1,26 @@ +///! Contains code to escape strings to avoid collisions with reserved Rust keywords + +pub fn escape_ident(mut ident: String) -> String { + // Copied from prost-build::ident + // + // Use a raw identifier if the identifier matches a Rust keyword: + // https://doc.rust-lang.org/reference/keywords.html. + match ident.as_str() { + // 2015 strict keywords. + | "as" | "break" | "const" | "continue" | "else" | "enum" | "false" + | "fn" | "for" | "if" | "impl" | "in" | "let" | "loop" | "match" | "mod" | "move" | "mut" + | "pub" | "ref" | "return" | "static" | "struct" | "trait" | "true" + | "type" | "unsafe" | "use" | "where" | "while" + // 2018 strict keywords. + | "dyn" + // 2015 reserved keywords. + | "abstract" | "become" | "box" | "do" | "final" | "macro" | "override" | "priv" | "typeof" + | "unsized" | "virtual" | "yield" + // 2018 reserved keywords. + | "async" | "await" | "try" => ident.insert_str(0, "r#"), + // the following keywords are not supported as raw identifiers and are therefore suffixed with an underscore. + "self" | "super" | "extern" | "crate" => ident += "_", + _ => (), + }; + ident +} diff --git a/pbjson_build/src/generator.rs b/pbjson_build/src/generator.rs new file mode 100644 index 0000000000..f9d205998d --- /dev/null +++ b/pbjson_build/src/generator.rs @@ -0,0 +1,129 @@ +//! This module contains the actual code generation logic + +use std::collections::BTreeMap; +use std::fmt::{Display, Formatter}; +use std::io::{Result, Write}; + +use crate::descriptor::TypePath; + +mod enumeration; +mod message; + +pub use enumeration::generate_enum; +pub use message::generate_message; + +#[derive(Debug, Clone, Copy)] +struct Indent(usize); + +impl Display for Indent { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + for _ in 0..self.0 { + write!(f, " ")?; + } + Ok(()) + } +} + +#[derive(Debug)] +pub struct Config { + pub extern_types: BTreeMap, +} + +impl Config { + fn rust_type(&self, path: &TypePath) -> String { + if let Some(t) = self.extern_types.get(path) { + return t.clone(); + } + + let mut ret = String::new(); + let path = path.path(); + assert!(!path.is_empty(), "path cannot be empty"); + + for i in &path[..(path.len() - 1)] { + ret.push_str(i.to_snake_case().as_str()); + ret.push_str("::"); + } + ret.push_str(path.last().unwrap().to_camel_case().as_str()); + ret + } + + fn rust_variant(&self, enumeration: &TypePath, variant: &str) -> String { + use heck::CamelCase; + assert!( + variant + .chars() + .all(|c| matches!(c, '0'..='9' | 'A'..='Z' | '_')), + "illegal variant - {}", + variant + ); + + // TODO: Config to disable stripping prefix + + let enumeration_name = enumeration.path().last().unwrap().to_shouty_snake_case(); + let variant = match variant.strip_prefix(&enumeration_name) { + Some("") => variant, + Some(stripped) => stripped, + None => variant, + }; + variant.to_camel_case() + } +} + +fn write_fields_array<'a, W: Write, I: Iterator>( + writer: &mut W, + indent: usize, + variants: I, +) -> Result<()> { + writeln!(writer, "{}const FIELDS: &[&str] = &[", Indent(indent))?; + for name in variants { + writeln!(writer, "{}\"{}\",", Indent(indent + 1), name)?; + } + writeln!(writer, "{}];", Indent(indent))?; + writeln!(writer) +} + +fn write_serialize_start(indent: usize, rust_type: &str, writer: &mut W) -> Result<()> { + writeln!( + writer, + r#"{indent}impl serde::Serialize for {rust_type} {{ +{indent} #[allow(deprecated)] +{indent} fn serialize(&self, serializer: S) -> Result +{indent} where +{indent} S: serde::Serializer, +{indent} {{"#, + indent = Indent(indent), + rust_type = rust_type + ) +} + +fn write_serialize_end(indent: usize, writer: &mut W) -> Result<()> { + writeln!( + writer, + r#"{indent} }} +{indent}}}"#, + indent = Indent(indent), + ) +} + +fn write_deserialize_start(indent: usize, rust_type: &str, writer: &mut W) -> Result<()> { + writeln!( + writer, + r#"{indent}impl<'de> serde::Deserialize<'de> for {rust_type} {{ +{indent} #[allow(deprecated)] +{indent} fn deserialize(deserializer: D) -> Result +{indent} where +{indent} D: serde::Deserializer<'de>, +{indent} {{"#, + indent = Indent(indent), + rust_type = rust_type + ) +} + +fn write_deserialize_end(indent: usize, writer: &mut W) -> Result<()> { + writeln!( + writer, + r#"{indent} }} +{indent}}}"#, + indent = Indent(indent), + ) +} diff --git a/pbjson_build/src/generator/enumeration.rs b/pbjson_build/src/generator/enumeration.rs new file mode 100644 index 0000000000..d5e2736bd1 --- /dev/null +++ b/pbjson_build/src/generator/enumeration.rs @@ -0,0 +1,138 @@ +//! This module contains the code to generate Serialize and Deserialize +//! implementations for enumeration type +//! +//! An enumeration should be decode-able from the full string variant name +//! or its integer tag number, and should encode to the string representation + +use super::{ + write_deserialize_end, write_deserialize_start, write_serialize_end, write_serialize_start, + Config, Indent, +}; +use crate::descriptor::{EnumDescriptor, TypePath}; +use crate::generator::write_fields_array; +use std::io::{Result, Write}; + +pub fn generate_enum( + config: &Config, + path: &TypePath, + descriptor: &EnumDescriptor, + writer: &mut W, +) -> Result<()> { + let rust_type = config.rust_type(path); + + let variants: Vec<_> = descriptor + .values + .iter() + .map(|variant| { + let variant_name = variant.name.clone().unwrap(); + let rust_variant = config.rust_variant(path, &variant_name); + (variant_name, rust_variant) + }) + .collect(); + + // Generate Serialize + write_serialize_start(0, &rust_type, writer)?; + writeln!(writer, "{}let variant = match self {{", Indent(2))?; + for (variant_name, rust_variant) in &variants { + writeln!( + writer, + "{}Self::{} => \"{}\",", + Indent(3), + rust_variant, + variant_name + )?; + } + writeln!(writer, "{}}};", Indent(2))?; + + writeln!(writer, "{}serializer.serialize_str(variant)", Indent(2))?; + write_serialize_end(0, writer)?; + + // Generate Deserialize + write_deserialize_start(0, &rust_type, writer)?; + write_fields_array(writer, 2, variants.iter().map(|(name, _)| name.as_str()))?; + write_visitor(writer, 2, &rust_type, &variants)?; + + // Use deserialize_any to allow users to provide integers or strings + writeln!( + writer, + "{}deserializer.deserialize_any(GeneratedVisitor)", + Indent(2) + )?; + + write_deserialize_end(0, writer)?; + Ok(()) +} + +fn write_visitor( + writer: &mut W, + indent: usize, + rust_type: &str, + variants: &[(String, String)], +) -> Result<()> { + // Protobuf supports deserialization of enumerations both from string and integer values + writeln!( + writer, + r#"{indent}struct GeneratedVisitor; + +{indent}impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {{ +{indent} type Value = {rust_type}; + +{indent} fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {{ +{indent} write!(formatter, "expected one of: {{:?}}", &FIELDS) +{indent} }} + +{indent} fn visit_i64(self, v: i64) -> Result +{indent} where +{indent} E: serde::de::Error, +{indent} {{ +{indent} use std::convert::TryFrom; +{indent} i32::try_from(v) +{indent} .ok() +{indent} .and_then({rust_type}::from_i32) +{indent} .ok_or_else(|| {{ +{indent} serde::de::Error::invalid_value(serde::de::Unexpected::Signed(v), &self) +{indent} }}) +{indent} }} + +{indent} fn visit_u64(self, v: u64) -> Result +{indent} where +{indent} E: serde::de::Error, +{indent} {{ +{indent} use std::convert::TryFrom; +{indent} i32::try_from(v) +{indent} .ok() +{indent} .and_then({rust_type}::from_i32) +{indent} .ok_or_else(|| {{ +{indent} serde::de::Error::invalid_value(serde::de::Unexpected::Unsigned(v), &self) +{indent} }}) +{indent} }} + +{indent} fn visit_str(self, value: &str) -> Result +{indent} where +{indent} E: serde::de::Error, +{indent} {{"#, + indent = Indent(indent), + rust_type = rust_type, + )?; + + writeln!(writer, "{}match value {{", Indent(indent + 2))?; + for (variant_name, rust_variant) in variants { + writeln!( + writer, + "{}\"{}\" => Ok({}::{}),", + Indent(indent + 3), + variant_name, + rust_type, + rust_variant + )?; + } + + writeln!( + writer, + "{indent}_ => Err(serde::de::Error::unknown_variant(value, FIELDS)),", + indent = Indent(indent + 3) + )?; + writeln!(writer, "{}}}", Indent(indent + 2))?; + writeln!(writer, "{}}}", Indent(indent + 1))?; + writeln!(writer, "{}}}", Indent(indent)) +} diff --git a/pbjson_build/src/generator/message.rs b/pbjson_build/src/generator/message.rs new file mode 100644 index 0000000000..9625554af3 --- /dev/null +++ b/pbjson_build/src/generator/message.rs @@ -0,0 +1,809 @@ +//! This module contains the code to generate Serialize and Deserialize +//! implementations for message types +//! +//! The implementation follows the proto3 [JSON mapping][1] with the default options +//! +//! Importantly: +//! - numeric types can be decoded from either a string or number +//! - 32-bit integers and floats are encoded as numbers +//! - 64-bit integers are encoded as strings +//! - repeated fields are encoded as arrays +//! - bytes are base64 encoded (NOT CURRENTLY SUPPORTED) +//! - messages and maps are encoded as objects +//! - fields are lowerCamelCase except where overridden by the proto definition +//! - default values are not emitted on encode +//! - unrecognised fields error on decode +//! +//! Note: This will not generate code to correctly serialize/deserialize well-known-types +//! such as google.protobuf.Any, google.protobuf.Duration, etc... conversions for these +//! special-cased messages will need to be manually implemented. Once done so, however, +//! any messages containing these types will serialize/deserialize correctly +//! +//! [1]: https://developers.google.com/protocol-buffers/docs/proto3#json + +use std::io::{Result, Write}; + +use crate::message::{Field, FieldModifier, FieldType, Message, OneOf, ScalarType}; + +use super::{ + write_deserialize_end, write_deserialize_start, write_serialize_end, write_serialize_start, + Config, Indent, +}; +use crate::descriptor::TypePath; +use crate::generator::write_fields_array; + +pub fn generate_message( + config: &Config, + message: &Message, + writer: &mut W, +) -> Result<()> { + let rust_type = config.rust_type(&message.path); + + // Generate Serialize + write_serialize_start(0, &rust_type, writer)?; + write_message_serialize(config, 2, message, writer)?; + write_serialize_end(0, writer)?; + + // Generate Deserialize + write_deserialize_start(0, &rust_type, writer)?; + write_deserialize_message(config, 2, message, &rust_type, writer)?; + write_deserialize_end(0, writer)?; + Ok(()) +} + +fn write_field_empty_predicate(member: &Field, writer: &mut W) -> Result<()> { + match (&member.field_type, &member.field_modifier) { + (_, FieldModifier::Required) => unreachable!(), + (_, FieldModifier::Repeated) + | (FieldType::Map(_, _), _) + | (FieldType::Scalar(ScalarType::String), FieldModifier::UseDefault) + | (FieldType::Scalar(ScalarType::Bytes), FieldModifier::UseDefault) => { + write!(writer, "!self.{}.is_empty()", member.rust_field_name()) + } + (_, FieldModifier::Optional) | (FieldType::Message(_), _) => { + write!(writer, "self.{}.is_some()", member.rust_field_name()) + } + (FieldType::Scalar(ScalarType::F64), FieldModifier::UseDefault) + | (FieldType::Scalar(ScalarType::F32), FieldModifier::UseDefault) => { + write!(writer, "self.{} != 0.", member.rust_field_name()) + } + (FieldType::Scalar(ScalarType::Bool), FieldModifier::UseDefault) => { + write!(writer, "self.{}", member.rust_field_name()) + } + (FieldType::Enum(_), FieldModifier::UseDefault) + | (FieldType::Scalar(ScalarType::I64), FieldModifier::UseDefault) + | (FieldType::Scalar(ScalarType::I32), FieldModifier::UseDefault) + | (FieldType::Scalar(ScalarType::U32), FieldModifier::UseDefault) + | (FieldType::Scalar(ScalarType::U64), FieldModifier::UseDefault) => { + write!(writer, "self.{} != 0", member.rust_field_name()) + } + } +} + +fn write_message_serialize( + config: &Config, + indent: usize, + message: &Message, + writer: &mut W, +) -> Result<()> { + write_struct_serialize_start(indent, message, writer)?; + + for field in &message.fields { + write_serialize_field(config, indent, field, writer)?; + } + + for one_of in &message.one_ofs { + write_serialize_one_of(indent, config, one_of, writer)?; + } + + write_struct_serialize_end(indent, writer) +} + +fn write_struct_serialize_start( + indent: usize, + message: &Message, + writer: &mut W, +) -> Result<()> { + writeln!(writer, "{}use serde::ser::SerializeStruct;", Indent(indent))?; + + let required_len = message + .fields + .iter() + .filter(|member| member.field_modifier.is_required()) + .count(); + + if required_len != message.fields.len() || !message.one_ofs.is_empty() { + writeln!(writer, "{}let mut len = {};", Indent(indent), required_len)?; + } else { + writeln!(writer, "{}let len = {};", Indent(indent), required_len)?; + } + + for field in &message.fields { + if field.field_modifier.is_required() { + continue; + } + write!(writer, "{}if ", Indent(indent))?; + write_field_empty_predicate(field, writer)?; + writeln!(writer, " {{")?; + writeln!(writer, "{}len += 1;", Indent(indent + 1))?; + writeln!(writer, "{}}}", Indent(indent))?; + } + + for one_of in &message.one_ofs { + writeln!( + writer, + "{}if self.{}.is_some() {{", + Indent(indent), + one_of.rust_field_name() + )?; + writeln!(writer, "{}len += 1;", Indent(indent + 1))?; + writeln!(writer, "{}}}", Indent(indent))?; + } + + if !message.fields.is_empty() || !message.one_ofs.is_empty() { + writeln!( + writer, + "{}let mut struct_ser = serializer.serialize_struct(\"{}\", len)?;", + Indent(indent), + message.path + )?; + } else { + writeln!( + writer, + "{}let struct_ser = serializer.serialize_struct(\"{}\", len)?;", + Indent(indent), + message.path + )?; + } + Ok(()) +} + +fn write_struct_serialize_end(indent: usize, writer: &mut W) -> Result<()> { + writeln!(writer, "{}struct_ser.end()", Indent(indent)) +} + +fn write_decode_variant( + config: &Config, + indent: usize, + value: &str, + path: &TypePath, + writer: &mut W, +) -> Result<()> { + writeln!(writer, "{}::from_i32({})", config.rust_type(path), value)?; + write!( + writer, + "{}.ok_or_else(|| serde::ser::Error::custom(format!(\"Invalid variant {{}}\", {})))", + Indent(indent), + value + ) +} + +/// Depending on the type of the field different ways of accessing field's value +/// are needed - this allows decoupling the type serialization logic from the logic +/// that manipulates its container e.g. Vec, Option, HashMap +struct Variable<'a> { + /// A reference to the field's value + as_ref: &'a str, + /// The field's value + as_unref: &'a str, + /// The field without any leading "&" or "*" + raw: &'a str, +} + +fn write_serialize_variable( + config: &Config, + indent: usize, + field: &Field, + variable: Variable<'_>, + writer: &mut W, +) -> Result<()> { + match &field.field_type { + FieldType::Scalar(scalar) => write_serialize_scalar_variable( + indent, + *scalar, + field.field_modifier, + variable, + field.json_name(), + writer, + ), + FieldType::Enum(path) => { + write!(writer, "{}let v = ", Indent(indent))?; + match field.field_modifier { + FieldModifier::Repeated => { + writeln!(writer, "{}.iter().cloned().map(|v| {{", variable.raw)?; + write!(writer, "{}", Indent(indent + 1))?; + write_decode_variant(config, indent + 2, "v", path, writer)?; + writeln!(writer)?; + write!( + writer, + "{}}}).collect::, _>>()", + Indent(indent + 1) + ) + } + _ => write_decode_variant(config, indent + 1, variable.as_unref, path, writer), + }?; + + writeln!(writer, "?;")?; + writeln!( + writer, + "{}struct_ser.serialize_field(\"{}\", &v)?;", + Indent(indent), + field.json_name() + ) + } + FieldType::Map(_, value_type) + if matches!( + value_type.as_ref(), + FieldType::Scalar(ScalarType::I64) + | FieldType::Scalar(ScalarType::U64) + | FieldType::Enum(_) + ) => + { + writeln!( + writer, + "{}let v: std::collections::HashMap<_, _> = {}.iter()", + Indent(indent), + variable.raw + )?; + + match value_type.as_ref() { + FieldType::Scalar(ScalarType::I64) | FieldType::Scalar(ScalarType::U64) => { + writeln!( + writer, + "{}.map(|(k, v)| (k, v.to_string())).collect();", + Indent(indent + 1) + )?; + } + FieldType::Enum(path) => { + writeln!(writer, "{}.map(|(k, v)| {{", Indent(indent + 1))?; + write!(writer, "{}let v = ", Indent(indent + 2))?; + write_decode_variant(config, indent + 3, "*v", path, writer)?; + writeln!(writer, "?;")?; + writeln!(writer, "{}Ok((k, v))", Indent(indent + 2))?; + writeln!( + writer, + "{}}}).collect::>()?;", + Indent(indent + 1) + )?; + } + _ => unreachable!(), + } + writeln!( + writer, + "{}struct_ser.serialize_field(\"{}\", &v)?;", + Indent(indent), + field.json_name() + ) + } + _ => { + writeln!( + writer, + "{}struct_ser.serialize_field(\"{}\", {})?;", + Indent(indent), + field.json_name(), + variable.as_ref + ) + } + } +} + +fn write_serialize_scalar_variable( + indent: usize, + scalar: ScalarType, + field_modifier: FieldModifier, + variable: Variable<'_>, + json_name: String, + writer: &mut W, +) -> Result<()> { + let conversion = match scalar { + ScalarType::I64 | ScalarType::U64 => "ToString::to_string", + ScalarType::Bytes => "pbjson::private::base64::encode", + _ => { + return writeln!( + writer, + "{}struct_ser.serialize_field(\"{}\", {})?;", + Indent(indent), + json_name, + variable.as_ref + ) + } + }; + + match field_modifier { + FieldModifier::Repeated => { + writeln!( + writer, + "{}struct_ser.serialize_field(\"{}\", &{}.iter().map({}).collect::>())?;", + Indent(indent), + json_name, + variable.raw, + conversion + ) + } + _ => { + writeln!( + writer, + "{}struct_ser.serialize_field(\"{}\", {}(&{}).as_str())?;", + Indent(indent), + json_name, + conversion, + variable.raw, + ) + } + } +} + +fn write_serialize_field( + config: &Config, + indent: usize, + field: &Field, + writer: &mut W, +) -> Result<()> { + let as_ref = format!("&self.{}", field.rust_field_name()); + let variable = Variable { + as_ref: as_ref.as_str(), + as_unref: &as_ref.as_str()[1..], + raw: &as_ref.as_str()[1..], + }; + + match &field.field_modifier { + FieldModifier::Required => { + write_serialize_variable(config, indent, field, variable, writer)?; + } + FieldModifier::Optional => { + writeln!( + writer, + "{}if let Some(v) = {}.as_ref() {{", + Indent(indent), + variable.as_unref + )?; + let variable = Variable { + as_ref: "v", + as_unref: "*v", + raw: "v", + }; + write_serialize_variable(config, indent + 1, field, variable, writer)?; + writeln!(writer, "{}}}", Indent(indent))?; + } + FieldModifier::Repeated | FieldModifier::UseDefault => { + write!(writer, "{}if ", Indent(indent))?; + write_field_empty_predicate(field, writer)?; + writeln!(writer, " {{")?; + write_serialize_variable(config, indent + 1, field, variable, writer)?; + writeln!(writer, "{}}}", Indent(indent))?; + } + } + Ok(()) +} + +fn write_serialize_one_of( + indent: usize, + config: &Config, + one_of: &OneOf, + writer: &mut W, +) -> Result<()> { + writeln!( + writer, + "{}if let Some(v) = self.{}.as_ref() {{", + Indent(indent), + one_of.rust_field_name() + )?; + + writeln!(writer, "{}match v {{", Indent(indent + 1))?; + for field in &one_of.fields { + writeln!( + writer, + "{}{}::{}(v) => {{", + Indent(indent + 2), + config.rust_type(&one_of.path), + field.rust_type_name(), + )?; + let variable = Variable { + as_ref: "v", + as_unref: "*v", + raw: "v", + }; + write_serialize_variable(config, indent + 3, field, variable, writer)?; + writeln!(writer, "{}}}", Indent(indent + 2))?; + } + + writeln!(writer, "{}}}", Indent(indent + 1),)?; + writeln!(writer, "{}}}", Indent(indent)) +} + +fn write_deserialize_message( + config: &Config, + indent: usize, + message: &Message, + rust_type: &str, + writer: &mut W, +) -> Result<()> { + write_deserialize_field_name(2, message, writer)?; + + writeln!(writer, "{}struct GeneratedVisitor;", Indent(indent))?; + + writeln!( + writer, + r#"{indent}impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {{ +{indent} type Value = {rust_type}; + +{indent} fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {{ +{indent} formatter.write_str("struct {name}") +{indent} }} + +{indent} fn visit_map(self, mut map: V) -> Result<{rust_type}, V::Error> +{indent} where +{indent} V: serde::de::MapAccess<'de>, +{indent} {{"#, + indent = Indent(indent), + name = message.path, + rust_type = rust_type, + )?; + + for field in &message.fields { + writeln!( + writer, + "{}let mut {} = None;", + Indent(indent + 2), + field.rust_field_name(), + )?; + } + + for one_of in &message.one_ofs { + writeln!( + writer, + "{}let mut {} = None;", + Indent(indent + 2), + one_of.rust_field_name(), + )?; + } + + if !message.fields.is_empty() || !message.one_ofs.is_empty() { + writeln!( + writer, + "{}while let Some(k) = map.next_key()? {{", + Indent(indent + 2) + )?; + + writeln!(writer, "{}match k {{", Indent(indent + 3))?; + + for field in &message.fields { + write_deserialize_field(config, indent + 4, field, None, writer)?; + } + + for one_of in &message.one_ofs { + for field in &one_of.fields { + write_deserialize_field(config, indent + 4, field, Some(one_of), writer)?; + } + } + + writeln!(writer, "{}}}", Indent(indent + 3))?; + writeln!(writer, "{}}}", Indent(indent + 2))?; + } else { + writeln!( + writer, + "{}while map.next_key::()?.is_some() {{}}", + Indent(indent + 2) + )?; + } + + writeln!(writer, "{}Ok({} {{", Indent(indent + 2), rust_type)?; + for field in &message.fields { + match field.field_modifier { + FieldModifier::Required => { + writeln!( + writer, + "{indent}{field}: {field}.ok_or_else(|| serde::de::Error::missing_field(\"{json_name}\"))?,", + indent=Indent(indent + 3), + field= field.rust_field_name(), + json_name= field.json_name() + )?; + } + FieldModifier::UseDefault | FieldModifier::Repeated => { + // Note: this currently does not hydrate optional proto2 fields with defaults + writeln!( + writer, + "{indent}{field}: {field}.unwrap_or_default(),", + indent = Indent(indent + 3), + field = field.rust_field_name() + )?; + } + _ => { + writeln!( + writer, + "{indent}{field},", + indent = Indent(indent + 3), + field = field.rust_field_name() + )?; + } + } + } + for one_of in &message.one_ofs { + writeln!( + writer, + "{indent}{field},", + indent = Indent(indent + 3), + field = one_of.rust_field_name(), + )?; + } + + writeln!(writer, "{}}})", Indent(indent + 2))?; + writeln!(writer, "{}}}", Indent(indent + 1))?; + writeln!(writer, "{}}}", Indent(indent))?; + writeln!( + writer, + "{}deserializer.deserialize_struct(\"{}\", FIELDS, GeneratedVisitor)", + Indent(indent), + message.path + ) +} + +fn write_deserialize_field_name( + indent: usize, + message: &Message, + writer: &mut W, +) -> Result<()> { + let fields: Vec<_> = message + .all_fields() + .map(|field| (field.json_name(), field.rust_type_name())) + .collect(); + + write_fields_array(writer, indent, fields.iter().map(|(name, _)| name.as_str()))?; + write_fields_enum(writer, indent, fields.iter().map(|(_, name)| name.as_str()))?; + + writeln!( + writer, + r#"{indent}impl<'de> serde::Deserialize<'de> for GeneratedField {{ +{indent} fn deserialize(deserializer: D) -> Result +{indent} where +{indent} D: serde::Deserializer<'de>, +{indent} {{ +{indent} struct GeneratedVisitor; + +{indent} impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {{ +{indent} type Value = GeneratedField; + +{indent} fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {{ +{indent} write!(formatter, "expected one of: {{:?}}", &FIELDS) +{indent} }} + +{indent} fn visit_str(self, value: &str) -> Result +{indent} where +{indent} E: serde::de::Error, +{indent} {{"#, + indent = Indent(indent) + )?; + + if !fields.is_empty() { + writeln!(writer, "{}match value {{", Indent(indent + 4))?; + for (json_name, type_name) in &fields { + writeln!( + writer, + "{}\"{}\" => Ok(GeneratedField::{}),", + Indent(indent + 5), + json_name, + type_name + )?; + } + writeln!( + writer, + "{}_ => Err(serde::de::Error::unknown_field(value, FIELDS)),", + Indent(indent + 5) + )?; + writeln!(writer, "{}}}", Indent(indent + 4))?; + } else { + writeln!( + writer, + "{}Err(serde::de::Error::unknown_field(value, FIELDS))", + Indent(indent + 4) + )?; + } + + writeln!( + writer, + r#"{indent} }} +{indent} }} +{indent} deserializer.deserialize_identifier(GeneratedVisitor) +{indent} }} +{indent}}}"#, + indent = Indent(indent) + ) +} + +fn write_fields_enum<'a, W: Write, I: Iterator>( + writer: &mut W, + indent: usize, + fields: I, +) -> Result<()> { + writeln!( + writer, + "{}#[allow(clippy::enum_variant_names)]", + Indent(indent) + )?; + writeln!(writer, "{}enum GeneratedField {{", Indent(indent))?; + for type_name in fields { + writeln!(writer, "{}{},", Indent(indent + 1), type_name)?; + } + writeln!(writer, "{}}}", Indent(indent)) +} + +fn write_deserialize_field( + config: &Config, + indent: usize, + field: &Field, + one_of: Option<&OneOf>, + writer: &mut W, +) -> Result<()> { + let field_name = match one_of { + Some(one_of) => one_of.rust_field_name(), + None => field.rust_field_name(), + }; + + let json_name = field.json_name(); + writeln!( + writer, + "{}GeneratedField::{} => {{", + Indent(indent), + field.rust_type_name() + )?; + writeln!( + writer, + "{}if {}.is_some() {{", + Indent(indent + 1), + field_name + )?; + + // Note: this will report duplicate field if multiple value are specified for a one of + writeln!( + writer, + "{}return Err(serde::de::Error::duplicate_field(\"{}\"));", + Indent(indent + 2), + json_name + )?; + writeln!(writer, "{}}}", Indent(indent + 1))?; + write!(writer, "{}{} = Some(", Indent(indent + 1), field_name)?; + + if let Some(one_of) = one_of { + write!( + writer, + "{}::{}(", + config.rust_type(&one_of.path), + field.rust_type_name() + )?; + } + + match &field.field_type { + FieldType::Scalar(scalar) => { + write_encode_scalar_field(indent + 1, *scalar, field.field_modifier, writer)?; + } + FieldType::Enum(path) => match field.field_modifier { + FieldModifier::Repeated => { + write!( + writer, + "map.next_value::>()?.into_iter().map(|x| x as i32).collect()", + config.rust_type(path) + )?; + } + _ => { + write!( + writer, + "map.next_value::<{}>()? as i32", + config.rust_type(path) + )?; + } + }, + FieldType::Map(key, value) => { + writeln!(writer)?; + write!( + writer, + "{}map.next_value:: { + // https://github.com/tokio-rs/prost/issues/531 + panic!("bytes are not currently supported as map keys") + } + _ if key.is_numeric() => { + write!( + writer, + "::pbjson::private::NumberDeserialize<{}>", + key.rust_type() + )?; + "k.0" + } + _ => { + write!(writer, "_")?; + "k" + } + }; + write!(writer, ", ")?; + let map_v = match value.as_ref() { + FieldType::Scalar(scalar) if scalar.is_numeric() => { + write!( + writer, + "::pbjson::private::NumberDeserialize<{}>", + scalar.rust_type() + )?; + "v.0" + } + FieldType::Scalar(ScalarType::Bytes) => { + // https://github.com/tokio-rs/prost/issues/531 + panic!("bytes are not currently supported as map values") + } + FieldType::Enum(path) => { + write!(writer, "{}", config.rust_type(path))?; + "v as i32" + } + FieldType::Map(_, _) => panic!("protobuf disallows nested maps"), + _ => { + write!(writer, "_")?; + "v" + } + }; + + writeln!(writer, ">>()?")?; + if map_k != "k" || map_v != "v" { + writeln!( + writer, + "{}.into_iter().map(|(k,v)| ({}, {})).collect()", + Indent(indent + 3), + map_k, + map_v, + )?; + } + write!(writer, "{}", Indent(indent + 1))?; + } + _ => { + write!(writer, "map.next_value()?",)?; + } + }; + + if one_of.is_some() { + write!(writer, ")")?; + } + + writeln!(writer, ");")?; + writeln!(writer, "{}}}", Indent(indent)) +} + +fn write_encode_scalar_field( + indent: usize, + scalar: ScalarType, + field_modifier: FieldModifier, + writer: &mut W, +) -> Result<()> { + let deserializer = match scalar { + ScalarType::Bytes => "BytesDeserialize", + _ if scalar.is_numeric() => "NumberDeserialize", + _ => return write!(writer, "map.next_value()?",), + }; + + writeln!(writer)?; + + match field_modifier { + FieldModifier::Repeated => { + writeln!( + writer, + "{}map.next_value::>>()?", + Indent(indent + 1), + deserializer + )?; + writeln!( + writer, + "{}.into_iter().map(|x| x.0).collect()", + Indent(indent + 2) + )?; + } + _ => { + writeln!( + writer, + "{}map.next_value::<::pbjson::private::{}<_>>()?.0", + Indent(indent + 1), + deserializer + )?; + } + } + write!(writer, "{}", Indent(indent)) +} diff --git a/pbjson_build/src/lib.rs b/pbjson_build/src/lib.rs new file mode 100644 index 0000000000..b985473ac4 --- /dev/null +++ b/pbjson_build/src/lib.rs @@ -0,0 +1,113 @@ +#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)] +#![warn( + missing_debug_implementations, + clippy::explicit_iter_loop, + clippy::use_self, + clippy::clone_on_ref_ptr, + clippy::future_not_send +)] + +use crate::descriptor::{Descriptor, DescriptorSet, Package}; +use crate::generator::{generate_enum, generate_message, Config}; +use crate::message::resolve_message; +use std::io::{BufWriter, Error, ErrorKind, Result, Write}; +use std::path::PathBuf; + +mod descriptor; +mod escape; +mod generator; +mod message; + +#[derive(Debug, Default)] +pub struct Builder { + descriptors: descriptor::DescriptorSet, + out_dir: Option, +} + +impl Builder { + /// Create a new `Builder` + pub fn new() -> Self { + Self { + descriptors: DescriptorSet::new(), + out_dir: None, + } + } + + /// Register an encoded `FileDescriptorSet` with this `Builder` + pub fn register_descriptors(&mut self, descriptors: &[u8]) -> Result<&mut Self> { + self.descriptors.register_encoded(descriptors)?; + Ok(self) + } + + /// Generates code for all registered types where `prefixes` contains a prefix of + /// the fully-qualified path of the type + pub fn build>(&mut self, prefixes: &[S]) -> Result<()> { + let mut output: PathBuf = self.out_dir.clone().map(Ok).unwrap_or_else(|| { + std::env::var_os("OUT_DIR") + .ok_or_else(|| { + Error::new(ErrorKind::Other, "OUT_DIR environment variable is not set") + }) + .map(Into::into) + })?; + output.push("FILENAME"); + + let write_factory = move |package: &Package| { + output.set_file_name(format!("{}.serde.rs", package)); + + let file = std::fs::OpenOptions::new() + .write(true) + .truncate(true) + .create(true) + .open(&output)?; + + Ok(BufWriter::new(file)) + }; + + let writers = generate(&self.descriptors, prefixes, write_factory)?; + for (_, mut writer) in writers { + writer.flush()?; + } + + Ok(()) + } +} + +fn generate, W: Write, F: FnMut(&Package) -> Result>( + descriptors: &DescriptorSet, + prefixes: &[S], + mut write_factory: F, +) -> Result> { + let config = Config { + extern_types: Default::default(), + }; + + let iter = descriptors.iter().filter(move |(t, _)| { + prefixes + .iter() + .any(|prefix| t.matches_prefix(prefix.as_ref())) + }); + + // Exploit the fact descriptors is ordered to group together types from the same package + let mut ret: Vec<(Package, W)> = Vec::new(); + for (type_path, descriptor) in iter { + let writer = match ret.last_mut() { + Some((package, writer)) if package == type_path.package() => writer, + _ => { + let package = type_path.package(); + ret.push((package.clone(), write_factory(package)?)); + &mut ret.last_mut().unwrap().1 + } + }; + + match descriptor { + Descriptor::Enum(descriptor) => generate_enum(&config, type_path, descriptor, writer)?, + Descriptor::Message(descriptor) => { + if let Some(message) = resolve_message(descriptors, descriptor) { + generate_message(&config, &message, writer)? + } + } + } + } + + Ok(ret) +} diff --git a/pbjson_build/src/message.rs b/pbjson_build/src/message.rs new file mode 100644 index 0000000000..5927a1f520 --- /dev/null +++ b/pbjson_build/src/message.rs @@ -0,0 +1,275 @@ +//! The raw descriptor format is not very easy to work with, a fact not aided +//! by prost making almost all members of proto2 syntax message optional +//! +//! This module therefore extracts a slightly less obtuse representation of a +//! message that can be used by the code generation logic + +use prost_types::{ + field_descriptor_proto::{Label, Type}, + FieldDescriptorProto, +}; + +use crate::descriptor::{Descriptor, DescriptorSet, MessageDescriptor, Syntax, TypeName, TypePath}; +use crate::escape::escape_ident; + +#[derive(Debug, Clone, Copy)] +pub enum ScalarType { + F64, + F32, + I32, + I64, + U32, + U64, + Bool, + String, + Bytes, +} + +impl ScalarType { + pub fn rust_type(&self) -> &'static str { + match self { + ScalarType::F64 => "f64", + ScalarType::F32 => "f32", + ScalarType::I32 => "i32", + ScalarType::I64 => "i64", + ScalarType::U32 => "u32", + ScalarType::U64 => "u64", + ScalarType::Bool => "bool", + ScalarType::String => "String", + ScalarType::Bytes => "Vec", + } + } + + pub fn is_numeric(&self) -> bool { + matches!( + self, + ScalarType::F64 + | ScalarType::F32 + | ScalarType::I32 + | ScalarType::I64 + | ScalarType::U32 + | ScalarType::U64 + ) + } +} + +#[derive(Debug, Clone)] +pub enum FieldType { + Scalar(ScalarType), + Enum(TypePath), + Message(TypePath), + Map(ScalarType, Box), +} + +#[derive(Debug, Clone, Copy)] +pub enum FieldModifier { + Required, + Optional, + UseDefault, + Repeated, +} + +impl FieldModifier { + pub fn is_required(&self) -> bool { + matches!(self, Self::Required) + } +} + +#[derive(Debug, Clone)] +pub struct Field { + pub name: String, + pub json_name: Option, + pub field_modifier: FieldModifier, + pub field_type: FieldType, +} + +impl Field { + pub fn rust_type_name(&self) -> String { + use heck::CamelCase; + self.name.to_camel_case() + } + + pub fn rust_field_name(&self) -> String { + use heck::SnakeCase; + escape_ident(self.name.to_snake_case()) + } + + pub fn json_name(&self) -> String { + use heck::MixedCase; + self.json_name + .clone() + .unwrap_or_else(|| self.name.to_mixed_case()) + } +} + +#[derive(Debug, Clone)] +pub struct OneOf { + pub name: String, + pub path: TypePath, + pub fields: Vec, +} + +impl OneOf { + pub fn rust_field_name(&self) -> String { + use heck::SnakeCase; + escape_ident(self.name.to_snake_case()) + } +} + +#[derive(Debug, Clone)] +pub struct Message { + pub path: TypePath, + pub fields: Vec, + pub one_ofs: Vec, +} + +impl Message { + pub fn all_fields(&self) -> impl Iterator + '_ { + self.fields + .iter() + .chain(self.one_ofs.iter().flat_map(|one_of| one_of.fields.iter())) + } +} + +/// Resolve the provided message descriptor into a slightly less obtuse representation +/// +/// Returns None if the provided provided message is auto-generated +pub fn resolve_message( + descriptors: &DescriptorSet, + message: &MessageDescriptor, +) -> Option { + if message.is_map() { + return None; + } + + let mut fields = Vec::new(); + let mut one_of_fields = vec![Vec::new(); message.one_of.len()]; + + for field in &message.fields { + let field_type = field_type(descriptors, field); + let field_modifier = field_modifier(message, field, &field_type); + + let resolved = Field { + name: field.name.clone().expect("expected field to have name"), + json_name: field.json_name.clone(), + field_type, + field_modifier, + }; + + // Treat synthetic one-of as normal + let proto3_optional = field.proto3_optional.unwrap_or(false); + match (field.oneof_index, proto3_optional) { + (Some(idx), false) => one_of_fields[idx as usize].push(resolved), + _ => fields.push(resolved), + } + } + + let mut one_ofs = Vec::new(); + + for (fields, descriptor) in one_of_fields.into_iter().zip(&message.one_of) { + // Might be empty in the event of a synthetic one-of + if !fields.is_empty() { + let name = descriptor.name.clone().expect("oneof with no name"); + let path = message.path.child(TypeName::new(&name)); + + one_ofs.push(OneOf { name, path, fields }) + } + } + + Some(Message { + path: message.path.clone(), + fields, + one_ofs, + }) +} + +fn field_modifier( + message: &MessageDescriptor, + field: &FieldDescriptorProto, + field_type: &FieldType, +) -> FieldModifier { + let label = Label::from_i32(field.label.expect("expected label")).expect("valid label"); + if field.proto3_optional.unwrap_or(false) { + assert_eq!(label, Label::Optional); + return FieldModifier::Optional; + } + + if field.oneof_index.is_some() { + assert_eq!(label, Label::Optional); + return FieldModifier::Optional; + } + + if matches!(field_type, FieldType::Map(_, _)) { + assert_eq!(label, Label::Repeated); + return FieldModifier::Repeated; + } + + match label { + Label::Optional => match message.syntax { + Syntax::Proto2 => FieldModifier::Optional, + Syntax::Proto3 => match field_type { + FieldType::Message(_) => FieldModifier::Optional, + _ => FieldModifier::UseDefault, + }, + }, + Label::Required => FieldModifier::Required, + Label::Repeated => FieldModifier::Repeated, + } +} + +fn field_type(descriptors: &DescriptorSet, field: &FieldDescriptorProto) -> FieldType { + match field.type_name.as_ref() { + Some(type_name) => resolve_type(descriptors, type_name.as_str()), + None => { + let scalar = + match Type::from_i32(field.r#type.expect("expected type")).expect("valid type") { + Type::Double => ScalarType::F64, + Type::Float => ScalarType::F32, + Type::Int64 | Type::Sfixed64 | Type::Sint64 => ScalarType::I64, + Type::Int32 | Type::Sfixed32 | Type::Sint32 => ScalarType::I32, + Type::Uint64 | Type::Fixed64 => ScalarType::U64, + Type::Uint32 | Type::Fixed32 => ScalarType::U32, + Type::Bool => ScalarType::Bool, + Type::String => ScalarType::String, + Type::Bytes => ScalarType::Bytes, + Type::Message | Type::Enum | Type::Group => panic!("no type name specified"), + }; + FieldType::Scalar(scalar) + } + } +} + +fn resolve_type(descriptors: &DescriptorSet, type_name: &str) -> FieldType { + assert!( + type_name.starts_with('.'), + "pbjson does not currently support resolving relative types" + ); + let maybe_descriptor = descriptors + .iter() + .find(|(path, _)| path.matches_prefix(type_name)); + + match maybe_descriptor { + Some((path, Descriptor::Enum(_))) => FieldType::Enum(path.clone()), + Some((path, Descriptor::Message(descriptor))) => match descriptor.is_map() { + true => { + assert_eq!(descriptor.fields.len(), 2, "expected map to have 2 fields"); + let key = &descriptor.fields[0]; + let value = &descriptor.fields[1]; + + assert_eq!("key", key.name()); + assert_eq!("value", value.name()); + + let key_type = match field_type(descriptors, key) { + FieldType::Scalar(scalar) => scalar, + _ => panic!("non scalar map key"), + }; + let value_type = field_type(descriptors, value); + FieldType::Map(key_type, Box::new(value_type)) + } + // Note: This may actually be a group but it is non-trivial to detect this, + // they're deprecated, and pbjson doesn't need to be able to distinguish + false => FieldType::Message(path.clone()), + }, + None => panic!("failed to resolve type: {}", type_name), + } +} diff --git a/pbjson_test/Cargo.toml b/pbjson_test/Cargo.toml new file mode 100644 index 0000000000..86c0566005 --- /dev/null +++ b/pbjson_test/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "pbjson_test" +version = "0.1.0" +authors = ["Raphael Taylor-Davies "] +edition = "2018" +description = "Test resources for pbjson converion" + +[dependencies] +prost = "0.8" +pbjson = { path = "../pbjson" } +serde = { version = "1.0", features = ["derive"] } + +[dev-dependencies] +serde_json = "1.0" + +[build-dependencies] +prost-build = "0.8" +pbjson_build = { path = "../pbjson_build" } diff --git a/pbjson_test/build.rs b/pbjson_test/build.rs new file mode 100644 index 0000000000..a9b294440a --- /dev/null +++ b/pbjson_test/build.rs @@ -0,0 +1,33 @@ +//! Compiles Protocol Buffers definitions into native Rust types + +use std::env; +use std::path::PathBuf; + +type Error = Box; +type Result = std::result::Result; + +fn main() -> Result<()> { + let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("protos"); + + let proto_files = vec![root.join("syntax3.proto")]; + + // Tell cargo to recompile if any of these proto files are changed + for proto_file in &proto_files { + println!("cargo:rerun-if-changed={}", proto_file.display()); + } + + let descriptor_path = PathBuf::from(env::var("OUT_DIR").unwrap()).join("proto_descriptor.bin"); + prost_build::Config::new() + .file_descriptor_set_path(&descriptor_path) + .compile_well_known_types() + .disable_comments(&["."]) + .bytes(&[".test"]) + .compile_protos(&proto_files, &[root])?; + + let descriptor_set = std::fs::read(descriptor_path)?; + pbjson_build::Builder::new() + .register_descriptors(&descriptor_set)? + .build(&[".test"])?; + + Ok(()) +} diff --git a/pbjson_test/protos/syntax3.proto b/pbjson_test/protos/syntax3.proto new file mode 100644 index 0000000000..73ef4fded7 --- /dev/null +++ b/pbjson_test/protos/syntax3.proto @@ -0,0 +1,72 @@ +syntax = "proto3"; + +package test.syntax3; + +message Empty {} + +message KitchenSink { + // Standard enum + enum Value { + VALUE_UNKNOWN = 0; + VALUE_A = 45; + VALUE_B = 63; + } + + // An enumeration without prefixed variants + enum Prefix { + UNKNOWN = 0; + A = 66; + B = 20; + } + + int32 i32 = 1; + optional int32 optional_i32 = 2; + repeated int32 repeated_i32 = 3; + + uint32 u32 = 4; + optional uint32 optional_u32 = 5; + repeated uint32 repeated_u32 = 6; + + int64 i64 = 7; + optional int64 optional_i64 = 8; + repeated int64 repeated_i64 = 9; + + uint64 u64 = 10; + optional uint64 optional_u64 = 11; + repeated uint64 repeated_u64 = 12; + + Value value = 13; + optional Value optional_value = 14; + repeated Value repeated_value = 15; + + Prefix prefix = 16; + Empty empty = 17; + + map string_dict = 18; + map message_dict = 19; + map enum_dict = 20; + map int64_dict = 21; + map int32_dict = 22; + map integer_dict = 23; + + bool bool = 24; + optional bool optional_bool = 25; + repeated bool repeated_bool = 26; + + oneof one_of { + int32 one_of_i32 = 27; + bool one_of_bool = 28; + Value one_of_value = 29; + Empty one_of_message = 30; + } + + bytes bytes = 31; + optional bytes optional_bytes = 32; + repeated bytes repeated_bytes = 33; + + // Bytes support is currently broken - https://github.com/tokio-rs/prost/issues/531 + // map bytes_dict = 34; + + string string = 35; + optional string optional_string = 36; +} \ No newline at end of file diff --git a/pbjson_test/src/lib.rs b/pbjson_test/src/lib.rs new file mode 100644 index 0000000000..70a9253aff --- /dev/null +++ b/pbjson_test/src/lib.rs @@ -0,0 +1,241 @@ +include!(concat!(env!("OUT_DIR"), "/test.syntax3.rs")); +include!(concat!(env!("OUT_DIR"), "/test.syntax3.serde.rs")); + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_empty() { + let message = Empty {}; + + let encoded = serde_json::to_string(&message).unwrap(); + let _decoded: Empty = serde_json::from_str(&encoded).unwrap(); + + let err = serde_json::from_str::("343").unwrap_err(); + assert_eq!( + err.to_string().as_str(), + "invalid type: integer `343`, expected struct test.syntax3.Empty at line 1 column 3" + ); + + let err = serde_json::from_str::("{\"foo\": \"bar\"}").unwrap_err(); + assert_eq!( + err.to_string().as_str(), + "unknown field `foo`, there are no fields at line 1 column 6" + ); + } + + #[test] + fn test_kitchen_sink() { + let mut decoded: KitchenSink = serde_json::from_str("{}").unwrap(); + + let verify_encode = |decoded: &KitchenSink, expected: &str| { + assert_eq!(serde_json::to_string(&decoded).unwrap().as_str(), expected); + }; + + let verify_decode = |decoded: &KitchenSink, expected: &str| { + assert_eq!(decoded, &serde_json::from_str(expected).unwrap()); + }; + + let verify = |decoded: &KitchenSink, expected: &str| { + verify_encode(decoded, expected); + verify_decode(decoded, expected); + }; + + verify(&decoded, "{}"); + decoded.i32 = 24; + verify(&decoded, r#"{"i32":24}"#); + decoded.i32 = 0; + verify_decode(&decoded, "{}"); + + // Explicit optional fields can distinguish between no value and default value + decoded.optional_i32 = Some(2); + verify(&decoded, r#"{"optionalI32":2}"#); + + decoded.optional_i32 = Some(0); + verify(&decoded, r#"{"optionalI32":0}"#); + + // Can also decode from string + verify_decode(&decoded, r#"{"optionalI32":"0"}"#); + + decoded.optional_i32 = None; + verify_decode(&decoded, "{}"); + + // 64-bit integers are encoded as strings + decoded.i64 = 123125; + verify(&decoded, r#"{"i64":"123125"}"#); + + decoded.i64 = 0; + verify_decode(&decoded, "{}"); + + decoded.optional_i64 = Some(532); + verify(&decoded, r#"{"optionalI64":"532"}"#); + + decoded.optional_i64 = Some(0); + verify(&decoded, r#"{"optionalI64":"0"}"#); + + // Can also decode from non-string + verify_decode(&decoded, r#"{"optionalI64":0}"#); + + decoded.optional_i64 = None; + verify_decode(&decoded, "{}"); + + decoded.u64 = 34346; + decoded.u32 = 567094456; + decoded.optional_u32 = Some(0); + decoded.optional_u64 = Some(3); + verify( + &decoded, + r#"{"u32":567094456,"optionalU32":0,"u64":"34346","optionalU64":"3"}"#, + ); + + decoded.u64 = 0; + decoded.u32 = 0; + decoded.optional_u32 = None; + decoded.optional_u64 = None; + verify_decode(&decoded, "{}"); + + decoded.repeated_i32 = vec![0, 23, 5, 6, 2, 34]; + verify(&decoded, r#"{"repeatedI32":[0,23,5,6,2,34]}"#); + // Can also mix in some strings + verify_decode(&decoded, r#"{"repeatedI32":[0,"23",5,6,"2",34]}"#); + + decoded.repeated_i32 = vec![]; + verify_decode(&decoded, "{}"); + + decoded.repeated_u64 = vec![0, 532, 2]; + verify(&decoded, r#"{"repeatedU64":["0","532","2"]}"#); + // Can also mix in some non-strings + verify_decode(&decoded, r#"{"repeatedU64":["0",532,"2"]}"#); + + decoded.repeated_u64 = vec![]; + verify_decode(&decoded, "{}"); + + // Enumerations should be encoded as strings + decoded.value = kitchen_sink::Value::A as i32; + verify(&decoded, r#"{"value":"VALUE_A"}"#); + + // Can also use variant number + verify_decode(&decoded, r#"{"value":45}"#); + + decoded.value = kitchen_sink::Value::Unknown as i32; + verify_decode(&decoded, "{}"); + + decoded.optional_value = Some(kitchen_sink::Value::Unknown as i32); + verify(&decoded, r#"{"optionalValue":"VALUE_UNKNOWN"}"#); + + // Can also use variant number + verify_decode(&decoded, r#"{"optionalValue":0}"#); + + decoded.optional_value = None; + verify_decode(&decoded, "{}"); + + decoded + .string_dict + .insert("foo".to_string(), "bar".to_string()); + verify(&decoded, r#"{"stringDict":{"foo":"bar"}}"#); + + decoded.string_dict = Default::default(); + verify_decode(&decoded, "{}"); + + decoded + .int32_dict + .insert(343, kitchen_sink::Prefix::A as i32); + // Dictionary keys should always be strings + // Enum dictionary values should be encoded as strings + verify(&decoded, r#"{"int32Dict":{"343":"A"}}"#); + // Enum dictionary values can be decoded from integers + verify_decode(&decoded, r#"{"int32Dict":{"343":66}}"#); + + decoded.int32_dict = Default::default(); + verify_decode(&decoded, "{}"); + + // 64-bit dictionary values should be encoded as strings + decoded.integer_dict.insert(12, 13); + verify(&decoded, r#"{"integerDict":{"12":"13"}}"#); + // 64-bit dictionary values can be decoded from numeric types + verify_decode(&decoded, r#"{"integerDict":{"12":13}}"#); + + decoded.integer_dict = Default::default(); + verify_decode(&decoded, "{}"); + + decoded.one_of = Some(kitchen_sink::OneOf::OneOfI32(0)); + verify(&decoded, r#"{"oneOfI32":0}"#); + // Can also specify string + verify_decode(&decoded, r#"{"oneOfI32":"0"}"#); + + decoded.one_of = Some(kitchen_sink::OneOf::OneOfI32(12)); + verify(&decoded, r#"{"oneOfI32":12}"#); + + decoded.one_of = Some(kitchen_sink::OneOf::OneOfBool(false)); + verify(&decoded, r#"{"oneOfBool":false}"#); + + decoded.one_of = Some(kitchen_sink::OneOf::OneOfBool(true)); + verify(&decoded, r#"{"oneOfBool":true}"#); + + decoded.one_of = Some(kitchen_sink::OneOf::OneOfValue( + kitchen_sink::Value::B as i32, + )); + verify(&decoded, r#"{"oneOfValue":"VALUE_B"}"#); + // Can also specify enum variant + verify_decode(&decoded, r#"{"oneOfValue":63}"#); + + decoded.one_of = None; + verify_decode(&decoded, "{}"); + + decoded.repeated_value = vec![ + kitchen_sink::Value::B as i32, + kitchen_sink::Value::B as i32, + kitchen_sink::Value::A as i32, + ]; + verify( + &decoded, + r#"{"repeatedValue":["VALUE_B","VALUE_B","VALUE_A"]}"#, + ); + verify_decode(&decoded, r#"{"repeatedValue":[63,"VALUE_B","VALUE_A"]}"#); + + decoded.repeated_value = Default::default(); + verify_decode(&decoded, "{}"); + + decoded.bytes = prost::bytes::Bytes::from_static(b"kjkjkj"); + verify(&decoded, r#"{"bytes":"a2pramtq"}"#); + + decoded.bytes = Default::default(); + verify_decode(&decoded, "{}"); + + decoded.optional_bytes = Some(prost::bytes::Bytes::from_static(b"kjkjkj")); + verify(&decoded, r#"{"optionalBytes":"a2pramtq"}"#); + + decoded.optional_bytes = Some(Default::default()); + verify(&decoded, r#"{"optionalBytes":""}"#); + + decoded.optional_bytes = None; + verify_decode(&decoded, "{}"); + + decoded.repeated_bytes = vec![ + prost::bytes::Bytes::from_static(b"sdfsd"), + prost::bytes::Bytes::from_static(b"fghfg"), + ]; + verify(&decoded, r#"{"repeatedBytes":["c2Rmc2Q=","ZmdoZmc="]}"#); + + decoded.repeated_bytes = Default::default(); + verify_decode(&decoded, "{}"); + + // decoded.bytes_dict.insert( + // "test".to_string(), + // prost::bytes::Bytes::from_static(b"asdf"), + // ); + // verify(&decoded, r#"{"bytesDict":{"test":"YXNkZgo="}}"#); + // + // decoded.bytes_dict = Default::default(); + // verify_decode(&decoded, "{}"); + + decoded.string = "test".to_string(); + verify(&decoded, r#"{"string":"test"}"#); + + decoded.string = Default::default(); + verify_decode(&decoded, "{}"); + + decoded.optional_string = Some(String::new()); + verify(&decoded, r#"{"optionalString":""}"#); + } +} diff --git a/predicate/src/predicate.rs b/predicate/src/predicate.rs index 2bbc879298..0cc3f0c16f 100644 --- a/predicate/src/predicate.rs +++ b/predicate/src/predicate.rs @@ -161,6 +161,51 @@ impl Predicate { pub fn is_empty(&self) -> bool { self == &EMPTY_PREDICATE } + + /// Add each range [start, stop] of the delete_predicates into the predicate in + /// the form "time < start OR time > stop" to eliminate that range from the query + pub fn add_delete_ranges(&mut self, delete_predicates: &[S]) + where + S: AsRef, + { + for pred in delete_predicates { + let pred = pred.as_ref(); + + if let Some(range) = pred.range { + let expr = col(TIME_COLUMN_NAME) + .lt(lit(range.start)) + .or(col(TIME_COLUMN_NAME).gt(lit(range.end))); + self.exprs.push(expr); + } + } + } + + /// Add a list of disjunctive negated expressions. + /// Example: there are two deletes as follows + /// . Delete_1: WHERE city != "Boston" AND temp = 70 + /// . Delete 2: WHERE state = "NY" AND route != "I90" + /// The negated list will be "NOT(Delete_1)", NOT(Delete_2)" which means + /// NOT(city != "Boston" AND temp = 70), NOT(state = "NY" AND route != "I90") which means + /// [NOT(city = Boston") OR NOT(temp = 70)], [NOT(state = "NY") OR NOT(route != "I90")] + pub fn add_delete_exprs(&mut self, delete_predicates: &[S]) + where + S: AsRef, + { + for pred in delete_predicates { + let pred = pred.as_ref(); + + let mut expr: Option = None; + for exp in &pred.exprs { + match expr { + None => expr = Some(exp.clone().not()), + Some(e) => expr = Some(e.or(exp.clone().not())), + } + } + if let Some(e) = expr { + self.exprs.push(e); + } + } + } } impl fmt::Display for Predicate { diff --git a/query/src/exec/context.rs b/query/src/exec/context.rs index b526a58773..ce72f303fb 100644 --- a/query/src/exec/context.rs +++ b/query/src/exec/context.rs @@ -11,18 +11,19 @@ use datafusion::{ logical_plan::{LogicalPlan, UserDefinedLogicalNode}, physical_plan::{ coalesce_partitions::CoalescePartitionsExec, - collect, displayable, + displayable, planner::{DefaultPhysicalPlanner, ExtensionPlanner}, ExecutionPlan, PhysicalPlanner, SendableRecordBatchStream, }, prelude::*, }; +use futures::TryStreamExt; use observability_deps::tracing::{debug, trace}; use trace::{ctx::SpanContext, span::SpanRecorder}; use crate::exec::{ fieldlist::{FieldList, IntoFieldList}, - query_tracing::send_metrics_to_tracing, + query_tracing::TracedStream, schema_pivot::{SchemaPivotExec, SchemaPivotNode}, seriesset::{SeriesSetConverter, SeriesSetItem}, split::StreamSplitExec, @@ -272,45 +273,63 @@ impl IOxExecutionContext { /// Executes the logical plan using DataFusion on a separate /// thread pool and produces RecordBatches pub async fn collect(&self, physical_plan: Arc) -> Result> { - let ctx = self.child_ctx("collect"); debug!( "Running plan, physical:\n{}", displayable(physical_plan.as_ref()).indent() ); + let ctx = self.child_ctx("collect"); + let stream = ctx.execute_stream(physical_plan).await?; - let res = ctx.run(collect(Arc::clone(&physical_plan))).await; - - // send metrics to tracing, even on error - ctx.save_metrics(physical_plan); - res + ctx.run( + stream + .err_into() // convert to DataFusionError + .try_collect(), + ) + .await } - /// Executes the physical plan and produces a RecordBatchStream to stream - /// over the result that iterates over the results. - pub async fn execute( + /// Executes the physical plan and produces a + /// `SendableRecordBatchStream` to stream over the result that + /// iterates over the results. The creation of the stream is + /// performed in a separate thread pool. + pub async fn execute_stream( &self, physical_plan: Arc, ) -> Result { match physical_plan.output_partitioning().partition_count() { 0 => unreachable!(), - 1 => self.execute_partition(physical_plan, 0).await, + 1 => self.execute_stream_partitioned(physical_plan, 0).await, _ => { // Merge into a single partition - self.execute_partition(Arc::new(CoalescePartitionsExec::new(physical_plan)), 0) - .await + self.execute_stream_partitioned( + Arc::new(CoalescePartitionsExec::new(physical_plan)), + 0, + ) + .await } } } - /// Executes a single partition of a physical plan and produces a RecordBatchStream to stream - /// over the result that iterates over the results. - pub async fn execute_partition( + /// Executes a single partition of a physical plan and produces a + /// `SendableRecordBatchStream` to stream over the result that + /// iterates over the results. The creation of the stream is + /// performed in a separate thread pool. + pub async fn execute_stream_partitioned( &self, physical_plan: Arc, partition: usize, ) -> Result { - self.run(async move { physical_plan.execute(partition).await }) - .await + let span = self + .recorder + .span() + .map(|span| span.child("execute_stream_partitioned")); + + self.run(async move { + let stream = physical_plan.execute(partition).await?; + let stream = TracedStream::new(stream, span, physical_plan); + Ok(Box::pin(stream) as _) + }) + .await } /// Executes the SeriesSetPlans on the query executor, in @@ -349,7 +368,7 @@ impl IOxExecutionContext { let physical_plan = ctx.prepare_plan(&plan)?; - let it = ctx.execute(physical_plan).await?; + let it = ctx.execute_stream(physical_plan).await?; SeriesSetConverter::default() .convert( @@ -486,19 +505,4 @@ impl IOxExecutionContext { recorder: self.recorder.child(name), } } - - /// Saves any DataFusion metrics that are currently present in - /// `physical_plan` to the span recorder so they show up in - /// distributed traces (e.g. Jaeger) - /// - /// This function should be invoked after `physical_plan` has - /// fully `collect`ed, meaning that `PhysicalPlan::execute()` has - /// been invoked and the resulting streams have been completely - /// consumed. Calling `save_metrics` metrics prior to this point - /// may result in saving incomplete information. - pub fn save_metrics(&self, physical_plan: Arc) { - if let Some(span) = self.recorder.span() { - send_metrics_to_tracing(span, physical_plan.as_ref()) - } - } } diff --git a/query/src/exec/query_tracing.rs b/query/src/exec/query_tracing.rs index 9dadc5e588..7fce5f545e 100644 --- a/query/src/exec/query_tracing.rs +++ b/query/src/exec/query_tracing.rs @@ -1,15 +1,67 @@ //! This module contains the code to map DataFusion metrics to `Span`s //! for use in distributed tracing (e.g. Jaeger) -use std::{borrow::Cow, fmt}; +use std::{borrow::Cow, fmt, sync::Arc}; +use arrow::record_batch::RecordBatch; use chrono::{DateTime, Utc}; use datafusion::physical_plan::{ metrics::{MetricValue, MetricsSet}, - DisplayFormatType, ExecutionPlan, + DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream, }; +use futures::StreamExt; use observability_deps::tracing::debug; -use trace::span::Span; +use trace::span::{Span, SpanRecorder}; + +/// Stream wrapper that records DataFusion `MetricSets` into IOx +/// [`Span`]s when it is dropped. +pub(crate) struct TracedStream { + inner: SendableRecordBatchStream, + span_recorder: SpanRecorder, + physical_plan: Arc, +} + +impl TracedStream { + /// Return a stream that records DataFusion `MetricSets` from + /// `physical_plan` into `span` when dropped. + pub(crate) fn new( + inner: SendableRecordBatchStream, + span: Option, + physical_plan: Arc, + ) -> Self { + Self { + inner, + span_recorder: SpanRecorder::new(span), + physical_plan, + } + } +} + +impl RecordBatchStream for TracedStream { + fn schema(&self) -> arrow::datatypes::SchemaRef { + self.inner.schema() + } +} + +impl futures::Stream for TracedStream { + type Item = arrow::error::Result; + + fn poll_next( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + self.inner.poll_next_unpin(cx) + } +} + +impl Drop for TracedStream { + fn drop(&mut self) { + if let Some(span) = self.span_recorder.span() { + let default_end_time = Utc::now(); + send_metrics_to_tracing(default_end_time, span, self.physical_plan.as_ref()); + } + } +} /// This function translates data in DataFusion `MetricSets` into IOx /// [`Span`]s. It records a snapshot of the current state of the @@ -26,15 +78,7 @@ use trace::span::Span; /// 1. If the ExecutionPlan had no metrics /// 2. The total number of rows produced by the ExecutionPlan (if available) /// 3. The elapsed compute time taken by the ExecutionPlan -pub(crate) fn send_metrics_to_tracing(parent_span: &Span, physical_plan: &dyn ExecutionPlan) { - // The parent span may be open, but since the physical_plan is - // assumed to be fully collected, using `now()` is a conservative - // estimate of the end time - let default_end_time = Utc::now(); - send_metrics_to_tracing_inner(default_end_time, parent_span, physical_plan) -} - -fn send_metrics_to_tracing_inner( +fn send_metrics_to_tracing( default_end_time: DateTime, parent_span: &Span, physical_plan: &dyn ExecutionPlan, @@ -101,7 +145,7 @@ fn send_metrics_to_tracing_inner( // recurse for child in physical_plan.children() { - send_metrics_to_tracing_inner(span_end, &span, child.as_ref()) + send_metrics_to_tracing(span_end, &span, child.as_ref()) } span.export() @@ -173,17 +217,9 @@ mod tests { Metric, }; - use std::{ - num::{NonZeroU128, NonZeroU64}, - sync::Arc, - time::Duration, - }; + use std::{sync::Arc, time::Duration}; - use trace::{ - ctx::{SpanContext, SpanId, TraceId}, - span::{MetaValue, SpanStatus}, - RingBufferTraceCollector, - }; + use trace::{ctx::SpanContext, span::MetaValue, RingBufferTraceCollector}; use super::*; @@ -193,7 +229,7 @@ mod tests { let exec = TestExec::new(name, Default::default()); let traces = TraceBuilder::new(); - send_metrics_to_tracing(&traces.make_span(), &exec); + send_metrics_to_tracing(Utc::now(), &traces.make_span(), &exec); let spans = traces.spans(); assert_eq!(spans.len(), 1); @@ -224,7 +260,7 @@ mod tests { exec.new_child("child4", make_time_metricset(None, None)); let traces = TraceBuilder::new(); - send_metrics_to_tracing_inner(ts5, &traces.make_span(), &exec); + send_metrics_to_tracing(ts5, &traces.make_span(), &exec); let spans = traces.spans(); println!("Spans: \n\n{:#?}", spans); @@ -250,7 +286,7 @@ mod tests { exec.metrics = None; let traces = TraceBuilder::new(); - send_metrics_to_tracing(&traces.make_span(), &exec); + send_metrics_to_tracing(Utc::now(), &traces.make_span(), &exec); let spans = traces.spans(); assert_eq!(spans.len(), 1); @@ -274,7 +310,7 @@ mod tests { add_elapsed_compute(exec.metrics_mut(), 2000, 2); let traces = TraceBuilder::new(); - send_metrics_to_tracing(&traces.make_span(), &exec); + send_metrics_to_tracing(Utc::now(), &traces.make_span(), &exec); // aggregated metrics should be reported let spans = traces.spans(); @@ -358,23 +394,7 @@ mod tests { // create a new span connected to the collector fn make_span(&self) -> Span { - let collector = Arc::clone(&self.collector); - - // lifted from make_span in trace/src/span.rs - Span { - name: "foo".into(), - ctx: SpanContext { - trace_id: TraceId(NonZeroU128::new(23948923).unwrap()), - parent_span_id: None, - span_id: SpanId(NonZeroU64::new(3498394).unwrap()), - collector: Some(collector), - }, - start: None, - end: None, - status: SpanStatus::Unknown, - metadata: Default::default(), - events: vec![], - } + SpanContext::new(Arc::clone(&self.collector) as _).child("foo") } /// return all collected spans diff --git a/query/src/frontend/influxrpc.rs b/query/src/frontend/influxrpc.rs index 689c371cb1..e5367ae81d 100644 --- a/query/src/frontend/influxrpc.rs +++ b/query/src/frontend/influxrpc.rs @@ -524,6 +524,8 @@ impl InfluxRpcPlanner { /// The data is sorted on (tag_col1, tag_col2, ...) so that all /// rows for a particular series (groups where all tags are the /// same) occur together in the plan + // NGA todo: may need to add delete predicate here to eliminate deleted data at read time + // https://github.com/influxdata/influxdb_iox/issues/2548 pub fn read_filter(&self, database: &D, predicate: Predicate) -> Result where D: QueryDatabase + 'static, diff --git a/query/src/lib.rs b/query/src/lib.rs index 9352a9a0e1..f1d6d4e8e1 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -18,7 +18,7 @@ use internal_types::{ schema::{sort::SortKey, Schema, TIME_COLUMN_NAME}, selection::Selection, }; -use observability_deps::tracing::trace; +use observability_deps::tracing::{debug, trace}; use predicate::predicate::{Predicate, PredicateMatch}; use hashbrown::HashMap; @@ -46,7 +46,7 @@ pub trait QueryChunkMeta: Sized { fn schema(&self) -> Arc; // return a reference to delete predicates of the chunk - fn delete_predicates(&self) -> Arc>; + fn delete_predicates(&self) -> &[Arc]; } /// A `Database` is the main trait implemented by the IOx subsystems @@ -137,6 +137,7 @@ pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync { &self, predicate: &Predicate, selection: Selection<'_>, + delete_predicates: &[Arc], ) -> Result; /// Returns true if data of this chunk is sorted @@ -165,8 +166,10 @@ where self.as_ref().schema() } - fn delete_predicates(&self) -> Arc> { - self.as_ref().delete_predicates() + fn delete_predicates(&self) -> &[Arc] { + let pred = self.as_ref().delete_predicates(); + debug!(?pred, "Delete predicate in QueryChunkMeta"); + pred } } diff --git a/query/src/provider/physical.rs b/query/src/provider/physical.rs index d76bd7217f..9f4bb0ce5a 100644 --- a/query/src/provider/physical.rs +++ b/query/src/provider/physical.rs @@ -116,14 +116,18 @@ impl ExecutionPlan for IOxReadFilterNode { let selection_cols = restrict_selection(selection_cols, &chunk_table_schema); let selection = Selection::Some(&selection_cols); - let stream = chunk.read_filter(&self.predicate, selection).map_err(|e| { - DataFusionError::Execution(format!( - "Error creating scan for table {} chunk {}: {}", - self.table_name, - chunk.id(), - e - )) - })?; + let del_preds = chunk.delete_predicates(); + + let stream = chunk + .read_filter(&self.predicate, selection, del_preds) + .map_err(|e| { + DataFusionError::Execution(format!( + "Error creating scan for table {} chunk {}: {}", + self.table_name, + chunk.id(), + e + )) + })?; // all CPU time is now done, pass in baseline metrics to adapter timer.done(); diff --git a/query/src/test.rs b/query/src/test.rs index 7f404ffe7a..d7c2661852 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -25,6 +25,7 @@ use internal_types::{ schema::{builder::SchemaBuilder, merge::SchemaMerger, InfluxColumnType, Schema}, selection::Selection, }; +use observability_deps::tracing::debug; use parking_lot::Mutex; use snafu::Snafu; use std::num::NonZeroU64; @@ -173,6 +174,9 @@ pub struct TestChunk { /// Return value for apply_predicate, if desired predicate_match: Option, + /// Copy of delete predicates passed + delete_predicates: Vec>, + /// Order of this chunk relative to other overlapping chunks. order: ChunkOrder, } @@ -248,6 +252,7 @@ impl TestChunk { table_data: Default::default(), saved_error: Default::default(), predicate_match: Default::default(), + delete_predicates: Default::default(), order: ChunkOrder::new(0), } } @@ -819,6 +824,7 @@ impl QueryChunk for TestChunk { &self, predicate: &Predicate, _selection: Selection<'_>, + _delete_predicates: &[Arc], ) -> Result { self.check_error()?; @@ -908,12 +914,11 @@ impl QueryChunkMeta for TestChunk { } // return a reference to delete predicates of the chunk - fn delete_predicates(&self) -> Arc> { - // Since this is the test chunk and its focus is not (yet) on - // deletion, return an empty delete predicate now which means nothing is deleted - // from this test chunk - let pred: Vec = vec![]; - Arc::new(pred) + fn delete_predicates(&self) -> &[Arc] { + let pred = &self.delete_predicates; + debug!(?pred, "Delete predicate in Test Chunk"); + + pred } } @@ -923,8 +928,9 @@ pub async fn raw_data(chunks: &[Arc]) -> Vec { for c in chunks { let pred = Predicate::default(); let selection = Selection::All; + let delete_predicates: Vec> = vec![]; let mut stream = c - .read_filter(&pred, selection) + .read_filter(&pred, selection, &delete_predicates) .expect("Error in read_filter"); while let Some(b) = stream.next().await { let b = b.expect("Error in stream"); diff --git a/query_tests/Cargo.toml b/query_tests/Cargo.toml index bdc1a6a427..4ac25e1f69 100644 --- a/query_tests/Cargo.toml +++ b/query_tests/Cargo.toml @@ -10,19 +10,19 @@ description = "Tests of the query engine against different database configuratio [dependencies] async-trait = "0.1" data_types = { path = "../data_types" } +datafusion = { path = "../datafusion" } once_cell = { version = "1.4.0", features = ["parking_lot"] } +predicate = { path = "../predicate" } query = { path = "../query" } server = { path = "../server" } [dev-dependencies] arrow = { version = "5.0", features = ["prettyprint"] } arrow_util = { path = "../arrow_util" } -datafusion = { path = "../datafusion" } data_types = { path = "../data_types" } internal_types = { path = "../internal_types" } metric = { path = "../metric" } object_store = { path = "../object_store" } -predicate = { path = "../predicate" } snafu = "0.6.3" tempfile = "3.1.0" test_helpers = { path = "../test_helpers" } diff --git a/query_tests/src/scenarios.rs b/query_tests/src/scenarios.rs index f6d100960f..4c50397d92 100644 --- a/query_tests/src/scenarios.rs +++ b/query_tests/src/scenarios.rs @@ -1,12 +1,13 @@ //! This module contains testing scenarios for Db - use std::collections::HashMap; use std::sync::Arc; use std::time::{Duration, Instant}; +use datafusion::logical_plan::{col, lit}; use once_cell::sync::OnceCell; -#[allow(unused_imports, dead_code, unused_macros)] +use predicate::predicate::{Predicate, PredicateBuilder}; + use query::QueryChunk; use async_trait::async_trait; @@ -1198,3 +1199,650 @@ impl DbSetup for ChunkOrder { vec![scenario] } } + +#[derive(Debug)] +/// Setup for delete query test with one table and one chunk moved from MUB to RUB to OS +pub struct DeleteFromMubOneMeasurementOneChunk {} +#[async_trait] +impl DbSetup for DeleteFromMubOneMeasurementOneChunk { + async fn make(&self) -> Vec { + // The main purpose of these scenarios is the delete predicate is added in MUB and + // is moved with chunk moving + + // General setup for all scenarios + let partition_key = "1970-01-01T00"; + let table_name = "cpu"; + // chunk data + let lp_lines = vec!["cpu bar=1 10", "cpu bar=2 20"]; + // delete predicate + let i: f64 = 1.0; + let expr = col("bar").eq(lit(i)); + let pred = PredicateBuilder::new() + .table("cpu") + .timestamp_range(0, 15) + .add_expr(expr) + .build(); + + // delete happens when data in MUB + let _scenario_mub = make_delete_mub(lp_lines.clone(), pred.clone()).await; + + // delete happens when data in MUB then moved to RUB + let scenario_rub = + make_delete_mub_to_rub(lp_lines.clone(), pred.clone(), table_name, partition_key).await; + + // delete happens when data in MUB then moved to RUB and then persisted + let scenario_rub_os = make_delete_mub_to_rub_and_os( + lp_lines.clone(), + pred.clone(), + table_name, + partition_key, + ) + .await; + + // delete happens when data in MUB then moved to RUB, then persisted, and then RUB is unloaded + let scenario_os = + make_delete_mub_to_os(lp_lines.clone(), pred, table_name, partition_key).await; + + // return scenarios to run queries + // NGA todo: add scenario_mub in this after the deleted data is removed in the scan + // right now MUB does not push predicate down so the result is not correct yet + vec![scenario_rub, scenario_rub_os, scenario_os] + } +} + +#[derive(Debug)] +/// Setup for delete query test with one table and one chunk moved from RUB to OS +pub struct DeleteFromRubOneMeasurementOneChunk {} +#[async_trait] +impl DbSetup for DeleteFromRubOneMeasurementOneChunk { + async fn make(&self) -> Vec { + // The main purpose of these scenarios is the delete predicate is added in RUB + // and is moved with chunk moving + + // General setup for all scenarios + let partition_key = "1970-01-01T00"; + let table_name = "cpu"; + // chunk data + let lp_lines = vec!["cpu bar=1 10", "cpu bar=2 20"]; + // delete predicate + let i: f64 = 1.0; + let expr = col("bar").eq(lit(i)); + let pred = PredicateBuilder::new() + .table("cpu") + .timestamp_range(0, 15) + .add_expr(expr) + .build(); + + // delete happens to data in RUB + let scenario_rub = + make_delete_rub(lp_lines.clone(), pred.clone(), table_name, partition_key).await; + + // delete happens to data in RUB then persisted + let scenario_rub_os = + make_delete_rub_to_os(lp_lines.clone(), pred.clone(), table_name, partition_key).await; + + // delete happens to data in RUB then persisted then RUB unloaded + let scenario_os = + make_delete_rub_to_os_and_unload_rub(lp_lines.clone(), pred, table_name, partition_key) + .await; + + // return scenarios to run queries + vec![scenario_rub, scenario_rub_os, scenario_os] + } +} + +#[derive(Debug)] +/// Setup for delete query test with one table and one chunk in both RUB and OS +pub struct DeleteFromOsOneMeasurementOneChunk {} +#[async_trait] +impl DbSetup for DeleteFromOsOneMeasurementOneChunk { + async fn make(&self) -> Vec { + // The main purpose of these scenarios is the delete predicate is added to persisted chunks + + // General setup for all scenarios + let partition_key = "1970-01-01T00"; + let table_name = "cpu"; + // chunk data + let lp_lines = vec!["cpu bar=1 10", "cpu bar=2 20"]; + // delete predicate + let i: f64 = 1.0; + let expr = col("bar").eq(lit(i)); + let pred = PredicateBuilder::new() + .table("cpu") + .timestamp_range(0, 15) + .add_expr(expr) + .build(); + + // delete happens after data is persisted but still in RUB + let scenario_rub_os = + make_delete_os_with_rub(lp_lines.clone(), pred.clone(), table_name, partition_key) + .await; + + // delete happens after data is persisted but still in RUB and then unload RUB + let _scenario_rub_os_unload_rub = make_delete_os_with_rub_then_unload_rub( + lp_lines.clone(), + pred.clone(), + table_name, + partition_key, + ) + .await; + + // delete happens after data is persisted and RUB is unloaded + let _scenario_os = make_delete_os(lp_lines.clone(), pred, table_name, partition_key).await; + + // return scenarios to run queries + //vec![scenario_rub_os, scenario_rub_os_unload_rub, scenario_os] + // NGA todo: turn the last 2 scenarios on when #2518 and #2550 are done + vec![scenario_rub_os] + } +} + +#[derive(Debug)] +/// Setup for multi-expression delete query test with one table and one chunk moved from MUB to RUB to OS +pub struct DeleteMultiExprsFromMubOneMeasurementOneChunk {} +#[async_trait] +impl DbSetup for DeleteMultiExprsFromMubOneMeasurementOneChunk { + async fn make(&self) -> Vec { + // The main purpose of these scenarios is the multi-expression delete predicate is added in MUB and + // is moved with chunk moving + + // General setup for all scenarios + let partition_key = "1970-01-01T00"; + let table_name = "cpu"; + // chunk data + let lp_lines = vec![ + "cpu,foo=me bar=1 10", + "cpu,foo=you bar=2 20", + "cpu,foo=me bar=1 30", + "cpu,foo=me bar=1 40", + ]; + // delete predicate + let i: f64 = 1.0; + let expr1 = col("bar").eq(lit(i)); + let expr2 = col("foo").eq(lit("me")); + let pred = PredicateBuilder::new() + .table("cpu") + .timestamp_range(0, 32) + .add_expr(expr1) + .add_expr(expr2) + .build(); + + // delete happens when data in MUB + let _scenario_mub = make_delete_mub(lp_lines.clone(), pred.clone()).await; + + // delete happens when data in MUB then moved to RUB + let scenario_rub = + make_delete_mub_to_rub(lp_lines.clone(), pred.clone(), table_name, partition_key).await; + + // delete happens when data in MUB then moved to RUB and then persisted + let scenario_rub_os = make_delete_mub_to_rub_and_os( + lp_lines.clone(), + pred.clone(), + table_name, + partition_key, + ) + .await; + + // delete happens when data in MUB then moved to RUB, then persisted, and then RUB is unloaded + let scenario_os = + make_delete_mub_to_os(lp_lines.clone(), pred, table_name, partition_key).await; + + // return scenarios to run queries + // NGA todo: add scenario_mub in this after the deleted data is removed in the scan + // right now MUB does not push predicate down so the result is not correct yet + vec![scenario_rub, scenario_rub_os, scenario_os] + } +} + +#[derive(Debug)] +/// Setup for multi-expression delete query test with one table and one chunk moved from MUB to RUB to OS +pub struct DeleteMultiExprsFromRubOneMeasurementOneChunk {} +#[async_trait] +impl DbSetup for DeleteMultiExprsFromRubOneMeasurementOneChunk { + async fn make(&self) -> Vec { + // The main purpose of these scenarios is the multi-expression delete predicate is added in MUB and + // is moved with chunk moving + + // General setup for all scenarios + let partition_key = "1970-01-01T00"; + let table_name = "cpu"; + // chunk data + let lp_lines = vec![ + "cpu,foo=me bar=1 10", + "cpu,foo=you bar=2 20", + "cpu,foo=me bar=1 30", + "cpu,foo=me bar=1 40", + ]; + // delete predicate + let i: f64 = 1.0; + let expr1 = col("bar").eq(lit(i)); + let expr2 = col("foo").eq(lit("me")); + let pred = PredicateBuilder::new() + .table("cpu") + .timestamp_range(0, 32) + .add_expr(expr1) + .add_expr(expr2) + .build(); + + // delete happens to data in RUB + let scenario_rub = + make_delete_rub(lp_lines.clone(), pred.clone(), table_name, partition_key).await; + + // delete happens to data in RUB then persisted + let scenario_rub_os = + make_delete_rub_to_os(lp_lines.clone(), pred.clone(), table_name, partition_key).await; + + // delete happens to data in RUB then persisted then RUB unloaded + let scenario_os = + make_delete_rub_to_os_and_unload_rub(lp_lines.clone(), pred, table_name, partition_key) + .await; + + // return scenarios to run queries + vec![scenario_rub, scenario_rub_os, scenario_os] + } +} + +#[derive(Debug)] +/// Setup for multi-expression delete query test with one table and one chunk moved from MUB to RUB to OS +pub struct DeleteMultiExprsFromOsOneMeasurementOneChunk {} +#[async_trait] +impl DbSetup for DeleteMultiExprsFromOsOneMeasurementOneChunk { + async fn make(&self) -> Vec { + // The main purpose of these scenarios is the multi-expression delete predicate is added in MUB and + // is moved with chunk moving + + // General setup for all scenarios + let partition_key = "1970-01-01T00"; + let table_name = "cpu"; + // chunk data + let lp_lines = vec![ + "cpu,foo=me bar=1 10", + "cpu,foo=you bar=2 20", + "cpu,foo=me bar=1 30", + "cpu,foo=me bar=1 40", + ]; + // delete predicate + let i: f64 = 1.0; + let expr1 = col("bar").eq(lit(i)); + let expr2 = col("foo").eq(lit("me")); + let pred = PredicateBuilder::new() + .table("cpu") + .timestamp_range(0, 32) + .add_expr(expr1) + .add_expr(expr2) + .build(); + + // delete happens after data is persisted but still in RUB + let scenario_rub_os = + make_delete_os_with_rub(lp_lines.clone(), pred.clone(), table_name, partition_key) + .await; + + // delete happens after data is persisted but still in RUB and then unload RUB + let _scenario_rub_os_unload_rub = make_delete_os_with_rub_then_unload_rub( + lp_lines.clone(), + pred.clone(), + table_name, + partition_key, + ) + .await; + + // delete happens after data is persisted and RUB is unloaded + let _scenario_os = make_delete_os(lp_lines.clone(), pred, table_name, partition_key).await; + + // return scenarios to run queries + //vec![scenario_rub_os, scenario_rub_os_unload_rub, scenario_os] + // NGA todo: turn the last 2 scenarios on when #2518band #2550 are done + vec![scenario_rub_os] + } +} + +// NGA todo: Add these scenarios after deleted data is eliminated from scan +// 1. Many deletes, each has one or/and multi expressions +// 2. Many different-type chunks when a delete happens +// 3. Combination of above + +async fn make_delete_mub(lp_lines: Vec<&str>, pred: Predicate) -> DbScenario { + let db = make_db().await.db; + // create an open MUB + write_lp(&db, &lp_lines.join("\n")).await; + // One open MUB, no RUB, no OS + assert_eq!(count_mutable_buffer_chunks(&db), 1); + assert_eq!(count_read_buffer_chunks(&db), 0); + assert_eq!(count_object_store_chunks(&db), 0); + db.delete("cpu", Arc::new(pred)).await.unwrap(); + // Still one but frozen MUB, no RUB, no OS + assert_eq!(count_mutable_buffer_chunks(&db), 1); + assert_eq!(count_read_buffer_chunks(&db), 0); + assert_eq!(count_object_store_chunks(&db), 0); + + DbScenario { + scenario_name: "Deleted data in MUB".into(), + db, + } +} + +async fn make_delete_mub_to_rub( + lp_lines: Vec<&str>, + pred: Predicate, + table_name: &str, + partition_key: &str, +) -> DbScenario { + let db = make_db().await.db; + // create an open MUB + write_lp(&db, &lp_lines.join("\n")).await; + // delete data in MUB + db.delete("cpu", Arc::new(pred)).await.unwrap(); + // move MUB to RUB and the delete predicate will be automatically included in RUB + db.rollover_partition(table_name, partition_key) + .await + .unwrap(); + db.move_chunk_to_read_buffer(table_name, partition_key, 0) + .await + .unwrap(); + // No MUB, one RUB, no OS + assert_eq!(count_mutable_buffer_chunks(&db), 0); + assert_eq!(count_read_buffer_chunks(&db), 1); + assert_eq!(count_object_store_chunks(&db), 0); + + DbScenario { + scenario_name: "Deleted data in RUB moved from MUB".into(), + db, + } +} + +async fn make_delete_mub_to_rub_and_os( + lp_lines: Vec<&str>, + pred: Predicate, + table_name: &str, + partition_key: &str, +) -> DbScenario { + let db = make_db().await.db; + // create an open MUB + write_lp(&db, &lp_lines.join("\n")).await; + // delete data in MUB + db.delete("cpu", Arc::new(pred)).await.unwrap(); + // move MUB to RUB and the delete predicate will be automatically included in RUB + db.rollover_partition(table_name, partition_key) + .await + .unwrap(); + db.move_chunk_to_read_buffer(table_name, partition_key, 0) + .await + .unwrap(); + // persist RUB and the delete predicate will be automatically included in the OS chunk + db.persist_partition( + table_name, + partition_key, + Instant::now() + Duration::from_secs(1), + ) + .await + .unwrap(); + // No MUB, one RUB, one OS + assert_eq!(count_mutable_buffer_chunks(&db), 0); + assert_eq!(count_read_buffer_chunks(&db), 1); + assert_eq!(count_object_store_chunks(&db), 1); + + DbScenario { + scenario_name: "Deleted data in RUB and OS".into(), + db, + } +} + +async fn make_delete_mub_to_os( + lp_lines: Vec<&str>, + pred: Predicate, + table_name: &str, + partition_key: &str, +) -> DbScenario { + let db = make_db().await.db; + // create an open MUB + write_lp(&db, &lp_lines.join("\n")).await; + // delete data in MUB + db.delete("cpu", Arc::new(pred)).await.unwrap(); + // move MUB to RUB and the delete predicate will be automatically included in RUB + db.rollover_partition(table_name, partition_key) + .await + .unwrap(); + db.move_chunk_to_read_buffer(table_name, partition_key, 0) + .await + .unwrap(); + // persist RUB and the delete predicate will be automatically included in the OS chunk + db.persist_partition( + table_name, + partition_key, + Instant::now() + Duration::from_secs(1), + ) + .await + .unwrap(); + // remove RUB + db.unload_read_buffer(table_name, partition_key, 1).unwrap(); + // No MUB, no RUB, one OS + assert_eq!(count_mutable_buffer_chunks(&db), 0); + assert_eq!(count_read_buffer_chunks(&db), 0); + assert_eq!(count_object_store_chunks(&db), 1); + + DbScenario { + scenario_name: "Deleted data in OS".into(), + db, + } +} + +async fn make_delete_rub( + lp_lines: Vec<&str>, + pred: Predicate, + table_name: &str, + partition_key: &str, +) -> DbScenario { + let db = make_db().await.db; + // create an open MUB + write_lp(&db, &lp_lines.join("\n")).await; + // move MUB to RUB + db.rollover_partition(table_name, partition_key) + .await + .unwrap(); + db.move_chunk_to_read_buffer(table_name, partition_key, 0) + .await + .unwrap(); + // delete data in RUB + db.delete("cpu", Arc::new(pred)).await.unwrap(); + // No MUB, one RUB, no OS + assert_eq!(count_mutable_buffer_chunks(&db), 0); + assert_eq!(count_read_buffer_chunks(&db), 1); + assert_eq!(count_object_store_chunks(&db), 0); + + DbScenario { + scenario_name: "Deleted data in RUB".into(), + db, + } +} + +async fn make_delete_rub_to_os( + lp_lines: Vec<&str>, + pred: Predicate, + table_name: &str, + partition_key: &str, +) -> DbScenario { + let db = make_db().await.db; + // create an open MUB + write_lp(&db, &lp_lines.join("\n")).await; + // move MUB to RUB + db.rollover_partition(table_name, partition_key) + .await + .unwrap(); + db.move_chunk_to_read_buffer(table_name, partition_key, 0) + .await + .unwrap(); + // delete data in RUB + db.delete("cpu", Arc::new(pred)).await.unwrap(); + // persist RUB and the delete predicate will be automatically included in the OS chunk + db.persist_partition( + table_name, + partition_key, + Instant::now() + Duration::from_secs(1), + ) + .await + .unwrap(); + // No MUB, one RUB, one OS + assert_eq!(count_mutable_buffer_chunks(&db), 0); + assert_eq!(count_read_buffer_chunks(&db), 1); + assert_eq!(count_object_store_chunks(&db), 1); + + DbScenario { + scenario_name: "Deleted data in RUB and then persisted to OS".into(), + db, + } +} + +async fn make_delete_rub_to_os_and_unload_rub( + lp_lines: Vec<&str>, + pred: Predicate, + table_name: &str, + partition_key: &str, +) -> DbScenario { + let db = make_db().await.db; + // create an open MUB + write_lp(&db, &lp_lines.join("\n")).await; + // move MUB to RUB + db.rollover_partition(table_name, partition_key) + .await + .unwrap(); + db.move_chunk_to_read_buffer(table_name, partition_key, 0) + .await + .unwrap(); + // delete data in RUB + db.delete("cpu", Arc::new(pred)).await.unwrap(); + // persist RUB and the delete predicate will be automatically included in the OS chunk + db.persist_partition( + table_name, + partition_key, + Instant::now() + Duration::from_secs(1), + ) + .await + .unwrap(); + // remove RUB + db.unload_read_buffer(table_name, partition_key, 1).unwrap(); + // No MUB, no RUB, one OS + assert_eq!(count_mutable_buffer_chunks(&db), 0); + assert_eq!(count_read_buffer_chunks(&db), 0); + assert_eq!(count_object_store_chunks(&db), 1); + + DbScenario { + scenario_name: "Deleted data in RUB then persisted to OS then RUB unloaded".into(), + db, + } +} + +async fn make_delete_os_with_rub( + lp_lines: Vec<&str>, + pred: Predicate, + table_name: &str, + partition_key: &str, +) -> DbScenario { + let db = make_db().await.db; + // create an open MUB + write_lp(&db, &lp_lines.join("\n")).await; + // move MUB to RUB + db.rollover_partition(table_name, partition_key) + .await + .unwrap(); + db.move_chunk_to_read_buffer(table_name, partition_key, 0) + .await + .unwrap(); + // persist RUB and the delete predicate will be automatically included in the OS chunk + db.persist_partition( + table_name, + partition_key, + Instant::now() + Duration::from_secs(1), + ) + .await + .unwrap(); + // delete data after persisted but RUB still available + db.delete("cpu", Arc::new(pred)).await.unwrap(); + // No MUB, one RUB, one OS + assert_eq!(count_mutable_buffer_chunks(&db), 0); + assert_eq!(count_read_buffer_chunks(&db), 1); + assert_eq!(count_object_store_chunks(&db), 1); + + DbScenario { + scenario_name: "Deleted data in OS with RUB".into(), + db, + } +} + +async fn make_delete_os_with_rub_then_unload_rub( + lp_lines: Vec<&str>, + pred: Predicate, + table_name: &str, + partition_key: &str, +) -> DbScenario { + let db = make_db().await.db; + // create an open MUB + write_lp(&db, &lp_lines.join("\n")).await; + // move MUB to RUB + db.rollover_partition(table_name, partition_key) + .await + .unwrap(); + db.move_chunk_to_read_buffer(table_name, partition_key, 0) + .await + .unwrap(); + // persist RUB and the delete predicate will be automatically included in the OS chunk + db.persist_partition( + table_name, + partition_key, + Instant::now() + Duration::from_secs(1), + ) + .await + .unwrap(); + // delete data after persisted but RUB still available + db.delete("cpu", Arc::new(pred)).await.unwrap(); + // remove RUB + db.unload_read_buffer(table_name, partition_key, 1).unwrap(); + // No MUB, no RUB, one OS + assert_eq!(count_mutable_buffer_chunks(&db), 0); + assert_eq!(count_read_buffer_chunks(&db), 0); + assert_eq!(count_object_store_chunks(&db), 1); + + DbScenario { + scenario_name: "Deleted data in OS only but the delete happens before RUB is unloaded" + .into(), + db, + } +} + +async fn make_delete_os( + lp_lines: Vec<&str>, + pred: Predicate, + table_name: &str, + partition_key: &str, +) -> DbScenario { + let db = make_db().await.db; + // create an open MUB + write_lp(&db, &lp_lines.join("\n")).await; + // move MUB to RUB + db.rollover_partition(table_name, partition_key) + .await + .unwrap(); + db.move_chunk_to_read_buffer(table_name, partition_key, 0) + .await + .unwrap(); + // persist RUB and the delete predicate will be automatically included in the OS chunk + db.persist_partition( + table_name, + partition_key, + Instant::now() + Duration::from_secs(1), + ) + .await + .unwrap(); + // remove RUB + db.unload_read_buffer(table_name, partition_key, 1).unwrap(); + // delete data after persisted but RUB still available + db.delete("cpu", Arc::new(pred)).await.unwrap(); + // No MUB, no RUB, one OS + assert_eq!(count_mutable_buffer_chunks(&db), 0); + assert_eq!(count_read_buffer_chunks(&db), 0); + assert_eq!(count_object_store_chunks(&db), 1); + + DbScenario { + scenario_name: "Deleted data in OS and the delete happens after RUB is unloaded".into(), + db, + } +} diff --git a/query_tests/src/sql.rs b/query_tests/src/sql.rs index 325de8d0c1..7c49f0408a 100644 --- a/query_tests/src/sql.rs +++ b/query_tests/src/sql.rs @@ -808,3 +808,68 @@ async fn sql_select_all_different_tags_chunks() { &expected ); } + +#[tokio::test] +async fn sql_select_with_deleted_data_from_one_expr() { + let expected = vec![ + "+-----+--------------------------------+", + "| bar | time |", + "+-----+--------------------------------+", + "| 2 | 1970-01-01T00:00:00.000000020Z |", + "+-----+--------------------------------+", + ]; + + // Data deleted when it is in MUB, and then moved to RUB and OS + run_sql_test_case!( + DeleteFromMubOneMeasurementOneChunk {}, + "SELECT * from cpu", + &expected + ); + + // Data deleted when it is in RUB, and then moved OS + run_sql_test_case!( + DeleteFromRubOneMeasurementOneChunk {}, + "SELECT * from cpu", + &expected + ); + + // Data deleted when it is in OS + run_sql_test_case!( + DeleteFromOsOneMeasurementOneChunk {}, + "SELECT * from cpu", + &expected + ); +} + +#[tokio::test] +async fn sql_select_with_deleted_data_from_multi_exprs() { + let expected = vec![ + "+-----+-----+--------------------------------+", + "| bar | foo | time |", + "+-----+-----+--------------------------------+", + "| 1 | me | 1970-01-01T00:00:00.000000040Z |", + "| 2 | you | 1970-01-01T00:00:00.000000020Z |", + "+-----+-----+--------------------------------+", + ]; + + // Data deleted when it is in MUB, and then moved to RUB and OS + run_sql_test_case!( + DeleteMultiExprsFromMubOneMeasurementOneChunk {}, + "SELECT * from cpu", + &expected + ); + + // Data deleted when it is in RUB, and then moved OS + run_sql_test_case!( + DeleteMultiExprsFromRubOneMeasurementOneChunk {}, + "SELECT * from cpu", + &expected + ); + + // Data deleted when it is in OS + run_sql_test_case!( + DeleteMultiExprsFromOsOneMeasurementOneChunk {}, + "SELECT * from cpu", + &expected + ); +} diff --git a/read_buffer/src/column.rs b/read_buffer/src/column.rs index 6a565e0031..85349b917c 100644 --- a/read_buffer/src/column.rs +++ b/read_buffer/src/column.rs @@ -2067,6 +2067,40 @@ mod test { assert!(matches!(row_ids, RowIDsOption::All(_))); } + #[test] + fn row_ids_filter_float_trimmed() { + let input = &[100.0, 200.0, 300.0, 2.0, 22.0, 30.0]; + + let col = Column::from(&input[..]); + let mut row_ids = col.row_ids_filter( + &cmp::Operator::Equal, + &Value::from(200.0), + RowIDs::new_bitmap(), + ); + assert_eq!(row_ids.unwrap().to_vec(), vec![1]); + + row_ids = col.row_ids_filter( + &cmp::Operator::LT, + &Value::from(64000.0), + RowIDs::new_bitmap(), + ); + assert!(matches!(row_ids, RowIDsOption::All(_))); + + row_ids = col.row_ids_filter( + &cmp::Operator::GTE, + &Value::from(-1_000_000.0), + RowIDs::new_bitmap(), + ); + assert!(matches!(row_ids, RowIDsOption::All(_))); + + row_ids = col.row_ids_filter( + &cmp::Operator::NotEqual, + &Value::from(1_000_000.3), + RowIDs::new_bitmap(), + ); + assert!(matches!(row_ids, RowIDsOption::All(_))); + } + #[test] fn row_ids_range() { let input = &[100_i64, 200, 300, 2, 200, 22, 30]; diff --git a/read_buffer/src/column/encoding/scalar/fixed.rs b/read_buffer/src/column/encoding/scalar/fixed.rs index 6cf6b0b63e..f8d8193a0b 100644 --- a/read_buffer/src/column/encoding/scalar/fixed.rs +++ b/read_buffer/src/column/encoding/scalar/fixed.rs @@ -7,6 +7,7 @@ //! allow results to be emitted as some logical type `L` via a transformation //! `T`. use either::Either; +use observability_deps::tracing::debug; use std::cmp::Ordering; use std::fmt::Debug; use std::marker::PhantomData; @@ -373,14 +374,36 @@ where Some(self.transcoder.decode(max)) } - fn row_ids_filter(&self, value: L, op: &cmp::Operator, dst: RowIDs) -> RowIDs { - let value = self.transcoder.encode(value); + fn row_ids_filter(&self, value: L, op: &cmp::Operator, mut dst: RowIDs) -> RowIDs { + debug!(value=?value, operator=?op, encoding=?ENCODING_NAME, "row_ids_filter"); + let (value, op) = match self.transcoder.encode_comparable(value, *op) { + Some((value, op)) => (value, op), + None => { + // The value is not encodable. This can happen with the == or != + // operator. In the case of ==, no values in the encoding could + // possible satisfy the expression. In the case of !=, all + // values would satisfy the expression. + dst.clear(); + return match op { + cmp::Operator::Equal => dst, + cmp::Operator::NotEqual => { + dst.add_range(0, self.num_rows()); + dst + } + op => panic!("operator {:?} not expected", op), + }; + } + }; + debug!(value=?value, operator=?op, encoding=?ENCODING_NAME, "row_ids_filter encoded expr"); + + // N.B, the transcoder may have changed the operator depending on the + // value provided. match op { cmp::Operator::GT => self.row_ids_cmp_order(&value, PartialOrd::gt, dst), cmp::Operator::GTE => self.row_ids_cmp_order(&value, PartialOrd::ge, dst), cmp::Operator::LT => self.row_ids_cmp_order(&value, PartialOrd::lt, dst), cmp::Operator::LTE => self.row_ids_cmp_order(&value, PartialOrd::le, dst), - _ => self.row_ids_equal(&value, op, dst), + _ => self.row_ids_equal(&value, &op, dst), } } @@ -390,8 +413,16 @@ where right: (L, &cmp::Operator), dst: RowIDs, ) -> RowIDs { - let left = (self.transcoder.encode(left.0), left.1); - let right = (self.transcoder.encode(right.0), right.1); + debug!(left=?left, right=?right, encoding=?ENCODING_NAME, "row_ids_filter_range"); + let left = self + .transcoder + .encode_comparable(left.0, *left.1) + .expect("transcoder must return Some variant"); + let right = self + .transcoder + .encode_comparable(right.0, *right.1) + .expect("transcoder must return Some variant"); + debug!(left=?left, right=?right, encoding=?ENCODING_NAME, "row_ids_filter_range encoded expr"); match (&left.1, &right.1) { (cmp::Operator::GT, cmp::Operator::LT) @@ -402,8 +433,8 @@ where | (cmp::Operator::LT, cmp::Operator::GTE) | (cmp::Operator::LTE, cmp::Operator::GT) | (cmp::Operator::LTE, cmp::Operator::GTE) => self.row_ids_cmp_range_order( - (&left.0, Self::ord_from_op(left.1)), - (&right.0, Self::ord_from_op(right.1)), + (&left.0, Self::ord_from_op(&left.1)), + (&right.0, Self::ord_from_op(&right.1)), dst, ), diff --git a/read_buffer/src/column/encoding/scalar/fixed_null.rs b/read_buffer/src/column/encoding/scalar/fixed_null.rs index 7168a28d75..ed7a2e73bd 100644 --- a/read_buffer/src/column/encoding/scalar/fixed_null.rs +++ b/read_buffer/src/column/encoding/scalar/fixed_null.rs @@ -3,6 +3,7 @@ //! This encoding stores a column of fixed-width numerical values backed by an //! an Arrow array, allowing for storage of NULL values. use either::Either; +use observability_deps::tracing::debug; use std::fmt::Debug; use std::marker::PhantomData; use std::mem::size_of; @@ -227,6 +228,43 @@ where } dst } + + // Identify all row IDs that contain a non-null value. + fn all_non_null_row_ids(&self, mut dst: RowIDs) -> RowIDs { + dst.clear(); + + if self.null_count() == 0 { + dst.add_range(0, self.num_rows()); + return dst; + } + + let mut found = false; + let mut count = 0; + for i in 0..self.num_rows() as usize { + if self.arr.is_null(i) { + if found { + // add the non-null range + let (min, max) = (i as u32 - count, i as u32); + dst.add_range(min, max); + found = false; + count = 0; + } + continue; + } + + if !found { + found = true; + } + count += 1; + } + + // add any remaining range. + if found { + let (min, max) = (self.num_rows() - count, self.num_rows()); + dst.add_range(min, max); + } + dst + } } impl ScalarEncoding for FixedNull @@ -411,14 +449,34 @@ where max.map(|v| self.transcoder.decode(v)) } - fn row_ids_filter(&self, value: L, op: &cmp::Operator, dst: RowIDs) -> RowIDs { - let value = self.transcoder.encode(value); + fn row_ids_filter(&self, value: L, op: &cmp::Operator, mut dst: RowIDs) -> RowIDs { + debug!(value=?value, operator=?op, encoding=?ENCODING_NAME, "row_ids_filter"); + let (value, op) = match self.transcoder.encode_comparable(value, *op) { + Some((value, op)) => (value, op), + None => { + // The value is not encodable. This can happen with the == or != + // operator. In the case of ==, no values in the encoding could + // possible satisfy the expression. In the case of !=, all + // non-null values would satisfy the expression. + dst.clear(); + return match op { + cmp::Operator::Equal => dst, + cmp::Operator::NotEqual => { + dst = self.all_non_null_row_ids(dst); + dst + } + op => panic!("operator {:?} not expected", op), + }; + } + }; + debug!(value=?value, operator=?op, encoding=?ENCODING_NAME, "row_ids_filter encoded expr"); + match op { - cmp::Operator::GT => self.row_ids_cmp_order(value, Self::ord_from_op(op), dst), - cmp::Operator::GTE => self.row_ids_cmp_order(value, Self::ord_from_op(op), dst), - cmp::Operator::LT => self.row_ids_cmp_order(value, Self::ord_from_op(op), dst), - cmp::Operator::LTE => self.row_ids_cmp_order(value, Self::ord_from_op(op), dst), - _ => self.row_ids_equal(value, op, dst), + cmp::Operator::GT => self.row_ids_cmp_order(value, Self::ord_from_op(&op), dst), + cmp::Operator::GTE => self.row_ids_cmp_order(value, Self::ord_from_op(&op), dst), + cmp::Operator::LT => self.row_ids_cmp_order(value, Self::ord_from_op(&op), dst), + cmp::Operator::LTE => self.row_ids_cmp_order(value, Self::ord_from_op(&op), dst), + _ => self.row_ids_equal(value, &op, dst), } } @@ -428,8 +486,16 @@ where right: (L, &cmp::Operator), dst: RowIDs, ) -> RowIDs { - let left = (self.transcoder.encode(left.0), left.1); - let right = (self.transcoder.encode(right.0), right.1); + debug!(left=?left, right=?right, encoding=?ENCODING_NAME, "row_ids_filter_range"); + let left = self + .transcoder + .encode_comparable(left.0, *left.1) + .expect("transcoder must return Some variant"); + let right = self + .transcoder + .encode_comparable(right.0, *right.1) + .expect("transcoder must return Some variant"); + debug!(left=?left, right=?right, encoding=?ENCODING_NAME, "row_ids_filter_range encoded expr"); match (left.1, right.1) { (cmp::Operator::GT, cmp::Operator::LT) @@ -440,8 +506,8 @@ where | (cmp::Operator::LT, cmp::Operator::GTE) | (cmp::Operator::LTE, cmp::Operator::GT) | (cmp::Operator::LTE, cmp::Operator::GTE) => self.row_ids_cmp_range_order( - (left.0, Self::ord_from_op(left.1)), - (right.0, Self::ord_from_op(right.1)), + (left.0, Self::ord_from_op(&left.1)), + (right.0, Self::ord_from_op(&right.1)), dst, ), @@ -789,6 +855,38 @@ mod test { assert_eq!(row_ids.to_vec(), vec![1, 2, 4]); } + #[test] + fn row_ids_filter_range_all_non_null() { + let cases = vec![ + (vec![None], vec![]), + (vec![None, None, None], vec![]), + (vec![Some(22)], vec![0_u32]), + (vec![Some(22), Some(3), Some(3)], vec![0, 1, 2]), + (vec![Some(22), None], vec![0]), + ( + vec![Some(22), None, Some(1), None, Some(3), None], + vec![0, 2, 4], + ), + (vec![Some(22), None, None, Some(33)], vec![0, 3]), + (vec![None, None, Some(33)], vec![2]), + ( + vec![None, None, Some(33), None, None, Some(3), Some(3), Some(1)], + vec![2, 5, 6, 7], + ), + ]; + + for (i, (data, exp)) in cases.into_iter().enumerate() { + let (v, _) = new_encoding(data); + let dst = RowIDs::new_vector(); + assert_eq!( + v.all_non_null_row_ids(dst).unwrap_vector(), + &exp, + "example {:?} failed", + i + ); + } + } + #[test] fn has_non_null_value() { let (v, _) = new_encoding(vec![None, None]); diff --git a/read_buffer/src/column/encoding/scalar/rle.rs b/read_buffer/src/column/encoding/scalar/rle.rs index fc8c1b3bc2..80d81e777f 100644 --- a/read_buffer/src/column/encoding/scalar/rle.rs +++ b/read_buffer/src/column/encoding/scalar/rle.rs @@ -1,4 +1,5 @@ use either::Either; +use observability_deps::tracing::debug; use crate::column::cmp; use crate::column::RowIDs; @@ -288,18 +289,35 @@ where let left_cmp_result = next.partial_cmp(left.0); let right_cmp_result = next.partial_cmp(right.0); - // TODO(edd): eurgh I still don't understand how I got this to - // be correct. Need to revisit to make it simpler. let left_result_ok = - !(left_cmp_result == Some(left_op.0) || left_cmp_result == Some(left_op.1)); + left_cmp_result == Some(left_op.0) || left_cmp_result == Some(left_op.1); let right_result_ok = - !(right_cmp_result == Some(right_op.0) || right_cmp_result == Some(right_op.1)); + right_cmp_result == Some(right_op.0) || right_cmp_result == Some(right_op.1); - if !(left_result_ok || right_result_ok) { + if left_result_ok && right_result_ok { dst.add_range(curr_logical_row_id, curr_logical_row_id + rl); } - curr_logical_row_id += rl; } + curr_logical_row_id += rl; + } + + dst + } + + fn all_non_null_row_ids(&self, mut dst: RowIDs) -> RowIDs { + dst.clear(); + + if self.null_count() == 0 { + dst.add_range(0, self.num_rows()); + return dst; + } + + let mut curr_logical_row_id = 0; + for (rl, next) in &self.run_lengths { + if next.is_some() { + dst.add_range(curr_logical_row_id, curr_logical_row_id + rl); + } + curr_logical_row_id += rl; } dst @@ -375,15 +393,34 @@ where self.num_rows } - fn row_ids_filter(&self, value: L, op: &cmp::Operator, dst: RowIDs) -> RowIDs { - let value = self.transcoder.encode(value); + fn row_ids_filter(&self, value: L, op: &cmp::Operator, mut dst: RowIDs) -> RowIDs { + debug!(value=?value, operator=?op, encoding=?ENCODING_NAME, "row_ids_filter"); + let (value, op) = match self.transcoder.encode_comparable(value, *op) { + Some((value, op)) => (value, op), + None => { + // The value is not encodable. This can happen with the == or != + // operator. In the case of ==, no values in the encoding could + // possible satisfy the expression. In the case of !=, all + // non-null values would satisfy the expression. + dst.clear(); + return match op { + cmp::Operator::Equal => dst, + cmp::Operator::NotEqual => { + dst = self.all_non_null_row_ids(dst); + dst + } + op => panic!("operator {:?} not expected", op), + }; + } + }; + debug!(value=?value, operator=?op, encoding=?ENCODING_NAME, "row_ids_filter encoded expr"); + match op { - cmp::Operator::Equal | cmp::Operator::NotEqual => { - self.row_ids_cmp_equal(value, op, dst) - } - cmp::Operator::LT | cmp::Operator::LTE | cmp::Operator::GT | cmp::Operator::GTE => { - self.row_ids_cmp(value, op, dst) - } + cmp::Operator::GT => self.row_ids_cmp(value, &op, dst), + cmp::Operator::GTE => self.row_ids_cmp(value, &op, dst), + cmp::Operator::LT => self.row_ids_cmp(value, &op, dst), + cmp::Operator::LTE => self.row_ids_cmp(value, &op, dst), + _ => self.row_ids_cmp_equal(value, &op, dst), } } @@ -393,8 +430,16 @@ where right: (L, &cmp::Operator), dst: RowIDs, ) -> RowIDs { - let left = (self.transcoder.encode(left.0), left.1); - let right = (self.transcoder.encode(right.0), right.1); + debug!(left=?left, right=?right, encoding=?ENCODING_NAME, "row_ids_filter_range"); + let left = self + .transcoder + .encode_comparable(left.0, *left.1) + .expect("transcoder must return Some variant"); + let right = self + .transcoder + .encode_comparable(right.0, *right.1) + .expect("transcoder must return Some variant"); + debug!(left=?left, right=?right, encoding=?ENCODING_NAME, "row_ids_filter_range encoded expr"); match (&left.1, &right.1) { (cmp::Operator::GT, cmp::Operator::LT) @@ -405,8 +450,8 @@ where | (cmp::Operator::LT, cmp::Operator::GTE) | (cmp::Operator::LTE, cmp::Operator::GT) | (cmp::Operator::LTE, cmp::Operator::GTE) => self.row_ids_cmp_range( - (&left.0, Self::ord_from_op(left.1)), - (&right.0, Self::ord_from_op(right.1)), + (&left.0, Self::ord_from_op(&left.1)), + (&right.0, Self::ord_from_op(&right.1)), dst, ), @@ -622,6 +667,16 @@ mod test { ) } + fn new_encoding_opt( + values: Vec>, + ) -> (RLE>, Arc) { + let mock = Arc::new(MockTranscoder::default()); + ( + RLE::new_from_iter_opt(values.into_iter(), Arc::clone(&mock)), + mock, + ) + } + #[test] fn new_from_iter() { let cases = vec![ @@ -981,6 +1036,38 @@ mod test { } } + #[test] + fn row_ids_filter_range_all_non_null() { + let cases = vec![ + (vec![None], vec![]), + (vec![None, None, None], vec![]), + (vec![Some(22)], vec![0_u32]), + (vec![Some(22), Some(3), Some(3)], vec![0, 1, 2]), + (vec![Some(22), None], vec![0]), + ( + vec![Some(22), None, Some(1), None, Some(3), None], + vec![0, 2, 4], + ), + (vec![Some(22), None, None, Some(33)], vec![0, 3]), + (vec![None, None, Some(33)], vec![2]), + ( + vec![None, None, Some(33), None, None, Some(3), Some(3), Some(1)], + vec![2, 5, 6, 7], + ), + ]; + + for (i, (data, exp)) in cases.into_iter().enumerate() { + let (v, _) = new_encoding_opt(data); + let dst = RowIDs::new_vector(); + assert_eq!( + v.all_non_null_row_ids(dst).unwrap_vector(), + &exp, + "example {:?} failed", + i + ); + } + } + #[test] fn row_ids_filter_range() { let (enc, transcoder) = new_encoding(vec![ @@ -1016,6 +1103,37 @@ mod test { assert_eq!(transcoder.encodings(), calls * 2); } + #[test] + fn row_ids_filter_range_nulls() { + let (enc, transcoder) = new_encoding_opt(vec![ + Some(100), + None, + None, + None, + Some(100), + Some(101), + Some(101), + ]); + + let cases = vec![ + ( + (100, &Operator::GTE), + (240, &Operator::LT), + vec![0, 4, 5, 6], + ), + ((100, &Operator::GT), (240, &Operator::LT), vec![5, 6]), + ((10, &Operator::LT), (-100, &Operator::GT), vec![]), + ((21, &Operator::GTE), (100, &Operator::LTE), vec![0, 4]), + ]; + + let calls = cases.len(); + for (left, right, exp) in cases { + let dst = enc.row_ids_filter_range(left, right, RowIDs::new_vector()); + assert_eq!(dst.unwrap_vector(), &exp); + } + assert_eq!(transcoder.encodings(), calls * 2); + } + #[test] fn estimate_rle_size() { let cases = vec![ diff --git a/read_buffer/src/column/encoding/scalar/transcoders.rs b/read_buffer/src/column/encoding/scalar/transcoders.rs index 2628fde983..da5db2c6b7 100644 --- a/read_buffer/src/column/encoding/scalar/transcoders.rs +++ b/read_buffer/src/column/encoding/scalar/transcoders.rs @@ -1,3 +1,4 @@ +use crate::column::cmp::Operator; use std::{ convert::TryFrom, fmt::{Debug, Display}, @@ -13,7 +14,17 @@ use std::{ // `P` is a physical type that is stored directly within an encoding, `L` is // a logical type callers expect to be returned. pub trait Transcoder: Debug + Display { + /// A function that encodes a logical value into a physical representation. fn encode(&self, _: L) -> P; + + /// A function that attempts to encode a logical value, within the context + /// of a comparison operator, into a physical representation. + /// + /// Implementation should return a suitable operator for the physical + /// representation, which may differ from the provided operator. + fn encode_comparable(&self, _: L, _: Operator) -> Option<(P, Operator)>; + + /// A function to decode a physical representation back into a logical value. fn decode(&self, _: P) -> L; } @@ -29,6 +40,10 @@ impl Transcoder for NoOpTranscoder { v } + fn encode_comparable(&self, v: T, op: Operator) -> Option<(T, Operator)> { + Some((v, op)) + } + fn decode(&self, v: T) -> T { v } @@ -56,13 +71,17 @@ pub struct ByteTrimmer {} impl Transcoder for ByteTrimmer where L: From

, - P: TryFrom, + P: TryFrom + PartialEq + PartialOrd,

>::Error: std::fmt::Debug, { fn encode(&self, v: L) -> P { P::try_from(v).unwrap() } + fn encode_comparable(&self, v: L, op: Operator) -> Option<(P, Operator)> { + P::try_from(v).ok().map(|p| (p, op)) + } + fn decode(&self, v: P) -> L { L::from(v) } @@ -91,6 +110,54 @@ macro_rules! make_float_trimmer { v as $type } + fn encode_comparable(&self, v: f64, op: Operator) -> Option<($type, Operator)> { + assert!(v <= <$type>::MAX as f64); + if v == ((v as $type) as f64) { + return Some((v as $type, op)); + } + + match op { + Operator::Equal => { + None // no encoded values will == v + } + Operator::NotEqual => { + None // all encoded values will != v + } + Operator::LT => { + // convert to next highest encodable value. For example + // given '< 23.2` return 24.0 encoded as the physical + // type. < 23.2 is logically equivalent to < 24.0 since + // there are no valid values in the domain (23.2, 24.0). + Some((v.ceil() as $type, op)) + } + Operator::LTE => { + // convert to next highest encodable value and change + // operator to <. + // For example given '<= 23.2` return 24.0 encoded as + // the physical type. <= 23.2 is logically equivalent + // to < 24.0 since there are no valid values in the + // domain [23.2, 24.0). + Some((v.ceil() as $type, Operator::LT)) + } + Operator::GT => { + // convert to next lowest encodable value. For example + // given '> 23.2` return 23.0 encoded as the physical + // type. > 23.2 is logically equivalent to > 23.0 since + // there are no valid values in the domain (23.0, 23.2]. + Some((v.floor() as $type, op)) + } + Operator::GTE => { + // convert to next lowest encodable value and change + // operator to >. + // For example given '>= 23.2` return 23.0 encoded as + // the physical type. >= 23.2 is logically equivalent + // to > 24.0 since there are no valid values in the + // domain [23.2, 24.0). + Some((v.floor() as $type, Operator::GT)) + } + } + } + fn decode(&self, v: $type) -> f64 { v.into() } @@ -119,7 +186,7 @@ impl Display for FloatByteTrimmer { // result. #[cfg(test)] -use std::{sync::atomic::AtomicUsize, sync::atomic::Ordering, sync::Arc}; +use std::{sync::atomic, sync::atomic::AtomicUsize, sync::Arc}; #[cfg(test)] /// A mock implementation of Transcoder that tracks calls to encode and decode. /// This is useful for testing encoder implementations. @@ -127,6 +194,7 @@ use std::{sync::atomic::AtomicUsize, sync::atomic::Ordering, sync::Arc}; pub struct MockTranscoder { encoding_calls: AtomicUsize, decoding_calls: AtomicUsize, + partial_cmp_calls: AtomicUsize, } #[cfg(test)] @@ -135,6 +203,7 @@ impl Default for MockTranscoder { Self { encoding_calls: AtomicUsize::default(), decoding_calls: AtomicUsize::default(), + partial_cmp_calls: AtomicUsize::default(), } } } @@ -142,23 +211,28 @@ impl Default for MockTranscoder { #[cfg(test)] impl MockTranscoder { pub fn encodings(&self) -> usize { - self.encoding_calls.load(Ordering::Relaxed) + self.encoding_calls.load(atomic::Ordering::Relaxed) } pub fn decodings(&self) -> usize { - self.decoding_calls.load(Ordering::Relaxed) + self.decoding_calls.load(atomic::Ordering::Relaxed) } } #[cfg(test)] impl Transcoder for MockTranscoder { fn encode(&self, v: T) -> T { - self.encoding_calls.fetch_add(1, Ordering::Relaxed); + self.encoding_calls.fetch_add(1, atomic::Ordering::Relaxed); v } + fn encode_comparable(&self, v: T, op: Operator) -> Option<(T, Operator)> { + self.encoding_calls.fetch_add(1, atomic::Ordering::Relaxed); + Some((v, op)) + } + fn decode(&self, v: T) -> T { - self.decoding_calls.fetch_add(1, Ordering::Relaxed); + self.decoding_calls.fetch_add(1, atomic::Ordering::Relaxed); v } } @@ -166,12 +240,17 @@ impl Transcoder for MockTranscoder { #[cfg(test)] impl Transcoder for Arc { fn encode(&self, v: T) -> T { - self.encoding_calls.fetch_add(1, Ordering::Relaxed); + self.encoding_calls.fetch_add(1, atomic::Ordering::Relaxed); v } + fn encode_comparable(&self, v: T, op: Operator) -> Option<(T, Operator)> { + self.encoding_calls.fetch_add(1, atomic::Ordering::Relaxed); + Some((v, op)) + } + fn decode(&self, v: T) -> T { - self.decoding_calls.fetch_add(1, Ordering::Relaxed); + self.decoding_calls.fetch_add(1, atomic::Ordering::Relaxed); v } } diff --git a/read_buffer/src/column/float.rs b/read_buffer/src/column/float.rs index 56022c180f..d0755115f4 100644 --- a/read_buffer/src/column/float.rs +++ b/read_buffer/src/column/float.rs @@ -754,4 +754,303 @@ mod test { //assert_eq!(dst.unwrap_vector(), &exp, "example '{} {:?}' failed", op, v); } } + + #[test] + fn row_ids_filter_float_trimmer() { + let data = vec![100.0, 200.0, 100.0, 300.0, 400.0]; + + let float_trimmer = FloatByteTrimmer {}; + let data_float_trimmed = data + .iter() + .cloned() + .map::(|x| float_trimmer.encode(x)) + .collect::>(); + + let cases: Vec>> = vec![ + Box::new(RLE::::new_from_iter( + data_float_trimmed.iter().cloned(), + float_trimmer, + )), + Box::new(Fixed::::new( + data_float_trimmed.clone(), + FloatByteTrimmer {}, + )), + Box::new(FixedNull::::new( + PrimitiveArray::from(data_float_trimmed), + FloatByteTrimmer {}, + )), + ]; + + for enc in cases { + _row_ids_filter_float_trimmer(enc) + } + } + + fn _row_ids_filter_float_trimmer(enc: Box>) { + // [100.0, 200.0, 100.0, 300.0, 400.0] + let cases = vec![ + (100.0, Operator::Equal, vec![0, 2]), // 100.0, 100.0 + (100.0, Operator::NotEqual, vec![1, 3, 4]), // 200.0, 300.0, 400.0 + (100.0, Operator::LT, vec![]), // + (100.0, Operator::LTE, vec![0, 2]), // 100.0, 100.0 + (100.0, Operator::GT, vec![1, 3, 4]), // 200.0, 300.0, 400.0 + (100.0, Operator::GTE, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0 + (200.0, Operator::Equal, vec![1]), // 200.0 + (200.0, Operator::NotEqual, vec![0, 2, 3, 4]), // 100.0, 100.0, 300.0, 400.0 + (200.0, Operator::LT, vec![0, 2]), // 100.0, 100.0 + (200.0, Operator::LTE, vec![0, 1, 2]), // 100.0, 200.0, 100.0 + (200.0, Operator::GT, vec![3, 4]), // 300.0, 400.0 + (200.0, Operator::GTE, vec![1, 3, 4]), // 200.0, 300.0, 400.0 + (400.0, Operator::Equal, vec![4]), // 400.0 + (400.0, Operator::NotEqual, vec![0, 1, 2, 3]), // 100.0, 200.0, 100.0, 300.0 + (400.0, Operator::LT, vec![0, 1, 2, 3]), // 100.0, 200.0, 100.0, 300.0 + (400.0, Operator::LTE, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0 + (400.0, Operator::GT, vec![]), // + (400.0, Operator::GTE, vec![4]), // 400.0 + // Values not present in the column + (99.0, Operator::Equal, vec![]), // + (99.0, Operator::NotEqual, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0 + (99.0, Operator::LT, vec![]), // + (99.0, Operator::LTE, vec![]), // + (99.0, Operator::GT, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0 + (99.0, Operator::GTE, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0 + (200.4, Operator::Equal, vec![]), // + (200.4, Operator::NotEqual, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0 + (200.4, Operator::LT, vec![0, 1, 2]), // 100.0, 200.0, 100.0 + (200.4, Operator::LTE, vec![0, 1, 2]), // 100.0, 200.0, 100.0 + (200.4, Operator::GT, vec![3, 4]), // 300.0, 400.0 + (200.4, Operator::GTE, vec![3, 4]), // 300.0, 400.0 + (201.0, Operator::Equal, vec![]), // + (201.0, Operator::NotEqual, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0 + (201.0, Operator::LT, vec![0, 1, 2]), // 100.0, 200.0, 100.0 + (201.0, Operator::LTE, vec![0, 1, 2]), // 100.0, 200.0, 100.0 + (201.0, Operator::GT, vec![3, 4]), // 300.0, 400.0 + (201.0, Operator::GTE, vec![3, 4]), // 300.0, 400.0 + (401.0, Operator::Equal, vec![]), // + (401.0, Operator::NotEqual, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0 + (401.0, Operator::LT, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0 + (401.0, Operator::LTE, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0 + (401.0, Operator::GT, vec![]), // + (401.0, Operator::GTE, vec![]), // + ]; + + for (v, op, exp) in cases { + let dst = enc.row_ids_filter(v, &op, RowIDs::new_vector()); + assert_eq!( + dst.unwrap_vector(), + &exp, + "example '{} {:?}' failed for {:?}", + op, + v, + enc.name() + ); + } + } + + #[test] + fn row_ids_filter_float_trimmer_with_nulls() { + let data = vec![Some(100.0), None, None, Some(200.0), None]; + + let float_trimmer = FloatByteTrimmer {}; + + let cases: Vec>> = vec![ + Box::new(RLE::::new_from_iter_opt( + data.iter() + .cloned() + .map(|x| x.map(|v| float_trimmer.encode(v))), + FloatByteTrimmer {}, + )), + Box::new(FixedNull::::new( + data.iter() + .cloned() + .map(|v| v.map(|v| float_trimmer.encode(v))) + .collect(), + FloatByteTrimmer {}, + )), + ]; + + for enc in cases { + _row_ids_filter_float_trimmer_with_nulls(enc) + } + } + + fn _row_ids_filter_float_trimmer_with_nulls(enc: Box>) { + // [100.0, NULL, NULL, 200.0] + let cases = vec![ + (100.0, Operator::Equal, vec![0]), // 100.0 + (100.0, Operator::NotEqual, vec![3]), // 200.0 + (100.0, Operator::LT, vec![]), // + (100.0, Operator::LTE, vec![0]), // 100.0 + (100.0, Operator::GT, vec![3]), // 200.0 + (100.0, Operator::GTE, vec![0, 3]), // 100.0, 200.0 + (200.0, Operator::Equal, vec![3]), // 200.0 + (200.0, Operator::NotEqual, vec![0]), // 100.0 + (200.0, Operator::LT, vec![0]), // 100.0 + (200.0, Operator::LTE, vec![0, 3]), // 100.0, 200.0 + (200.0, Operator::GT, vec![]), // + (200.0, Operator::GTE, vec![3]), // 200.0 + // Values not present in the column + (99.0, Operator::Equal, vec![]), // + (99.0, Operator::NotEqual, vec![0, 3]), // 100.0, 200.0 + (99.0, Operator::LT, vec![]), // + (99.0, Operator::LTE, vec![]), // + (99.0, Operator::GT, vec![0, 3]), // 100.0, 200.0 + (99.0, Operator::GTE, vec![0, 3]), // 100.0, 200.0 + (200.4, Operator::Equal, vec![]), // + (200.4, Operator::NotEqual, vec![0, 3]), // 100.0, 200.0 + (200.4, Operator::LT, vec![0, 3]), // 100.0,200.0 + (200.4, Operator::LTE, vec![0, 3]), // 100.0, 200.0 + (200.4, Operator::GT, vec![]), // + (200.4, Operator::GTE, vec![]), // + (201.0, Operator::Equal, vec![]), // + (201.0, Operator::NotEqual, vec![0, 3]), // 100.0, 200.0 + (201.0, Operator::LT, vec![0, 3]), // 100.0, 200.0 + (201.0, Operator::LTE, vec![0, 3]), // 100.0, 200.0 + (201.0, Operator::GT, vec![]), // + (201.0, Operator::GTE, vec![]), // + (401.0, Operator::Equal, vec![]), // + (401.0, Operator::NotEqual, vec![0, 3]), // 100.0, 200.0 + (401.0, Operator::LT, vec![0, 3]), // 100.0, 200.0 + (401.0, Operator::LTE, vec![0, 3]), // 100.0, 200.0 + (401.0, Operator::GT, vec![]), // + (401.0, Operator::GTE, vec![]), // + ]; + + for (v, op, exp) in cases { + let dst = enc.row_ids_filter(v, &op, RowIDs::new_vector()); + assert_eq!( + dst.unwrap_vector(), + &exp, + "example '{} {:?}' failed for {:?}", + op, + v, + enc.name() + ); + } + } + + #[test] + fn row_ids_filter_range_float_trimmer() { + let data = vec![100.0, 200.0, 100.0, 300.0, 400.0]; + + let float_trimmer = FloatByteTrimmer {}; + let data_float_trimmed = data + .iter() + .cloned() + .map::(|x| float_trimmer.encode(x)) + .collect::>(); + + let cases: Vec>> = vec![ + Box::new(RLE::::new_from_iter( + data_float_trimmed.iter().cloned(), + float_trimmer, + )), + Box::new(Fixed::::new( + data_float_trimmed.clone(), + FloatByteTrimmer {}, + )), + Box::new(FixedNull::::new( + PrimitiveArray::from(data_float_trimmed), + FloatByteTrimmer {}, + )), + ]; + + for enc in cases { + _row_ids_filter_range_float_trimmer(enc) + } + } + + fn _row_ids_filter_range_float_trimmer(enc: Box>) { + // [100.0, 200.0, 100.0, 300.0, 400.0] + let cases = vec![ + ((100.0, &Operator::LT), (99.0, &Operator::GT), vec![]), // + ((100.0, &Operator::LTE), (100.0, &Operator::GTE), vec![0, 2]), // 100.0, 100.0 + ( + (100.0, &Operator::GT), + (400.0, &Operator::LTE), + vec![1, 3, 4], + ), // 200.0, 300.0, 400.0 + ( + (100.0, &Operator::GTE), + (401.0, &Operator::LTE), + vec![0, 1, 2, 3, 4], + ), // 100.0, 200.0, 100.0, 300.0, 400.0 + ((200.0, &Operator::LT), (99.6, &Operator::GT), vec![0, 2]), // 100.0, 100.0 + ((200.0, &Operator::GT), (401.2, &Operator::LTE), vec![3, 4]), // 300.0, 400.0 + ( + (200.0, &Operator::GTE), + (400.9, &Operator::LT), + vec![1, 3, 4], + ), // 200.0, 300.0, 400.0 + ( + (99.8, &Operator::GT), + (500.87, &Operator::LT), + vec![0, 1, 2, 3, 4], + ), // 100.0, 200.0, 100.0, 300.0, 400.0 + ]; + + for (left, right, exp) in cases { + let dst = enc.row_ids_filter_range(left, right, RowIDs::new_vector()); + assert_eq!( + dst.unwrap_vector(), + &exp, + "example '{:?} {:?}' failed for {:?}", + left, + right, + enc.name(), + ); + } + } + + #[test] + fn row_ids_filter_range_float_trimmer_with_nulls() { + let data = vec![Some(100.0), None, None, Some(200.0), None]; + + let float_trimmer = FloatByteTrimmer {}; + + let cases: Vec>> = vec![ + Box::new(RLE::::new_from_iter_opt( + data.iter() + .cloned() + .map(|x| x.map(|v| float_trimmer.encode(v))), + FloatByteTrimmer {}, + )), + Box::new(FixedNull::::new( + data.iter() + .cloned() + .map(|v| v.map(|v| float_trimmer.encode(v))) + .collect(), + FloatByteTrimmer {}, + )), + ]; + + for enc in cases { + _row_ids_filter_range_float_trimmer_with_nulls(enc) + } + } + + fn _row_ids_filter_range_float_trimmer_with_nulls(enc: Box>) { + // [100.0, NULL, NULL, 200.0, NULL] + let cases = vec![ + ((100.0, &Operator::LT), (99.0, &Operator::GT), vec![]), // + ((100.0, &Operator::LTE), (100.0, &Operator::GTE), vec![0]), // 100.0 + ((100.0, &Operator::GT), (400.0, &Operator::LTE), vec![3]), // 200.0 + ((100.0, &Operator::GTE), (401.0, &Operator::LTE), vec![0, 3]), // 100.0, 200.0 + ((200.0, &Operator::LT), (99.6, &Operator::GT), vec![0]), // 100.0 + ((200.0, &Operator::GT), (401.2, &Operator::LTE), vec![]), // + ((99.8, &Operator::GT), (500.87, &Operator::LT), vec![0, 3]), // 100.0, 200.0 + ]; + + for (left, right, exp) in cases { + let dst = enc.row_ids_filter_range(left, right, RowIDs::new_vector()); + assert_eq!( + dst.unwrap_vector(), + &exp, + "example '{:?} {:?}' failed for {:?}", + left, + right, + enc.name(), + ); + } + } } diff --git a/server/src/database.rs b/server/src/database.rs index 60b62992a0..1cbbe5eee6 100644 --- a/server/src/database.rs +++ b/server/src/database.rs @@ -21,7 +21,7 @@ use internal_types::freezable::Freezable; use iox_object_store::IoxObjectStore; use observability_deps::tracing::{error, info, warn}; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard}; -use parquet_file::catalog::api::PreservedCatalog; +use parquet_file::catalog::core::PreservedCatalog; use persistence_windows::checkpoint::ReplayPlan; use snafu::{ensure, OptionExt, ResultExt, Snafu}; use std::{future::Future, sync::Arc, time::Duration}; @@ -63,7 +63,7 @@ pub enum Error { ))] WipePreservedCatalog { db_name: String, - source: Box, + source: Box, }, #[snafu(display("failed to skip replay for database ({}): {}", db_name, source))] diff --git a/server/src/db.rs b/server/src/db.rs index 1b75e37638..22e65d36dd 100644 --- a/server/src/db.rs +++ b/server/src/db.rs @@ -32,8 +32,9 @@ use iox_object_store::IoxObjectStore; use mutable_buffer::chunk::{ChunkMetrics as MutableBufferChunkMetrics, MBChunk}; use observability_deps::tracing::{debug, error, info}; use parquet_file::catalog::{ - api::{CatalogParquetInfo, CheckpointData, PreservedCatalog}, cleanup::{delete_files as delete_parquet_files, get_unreferenced_parquet_files}, + core::PreservedCatalog, + interface::{CatalogParquetInfo, CheckpointData}, prune::prune_history as prune_catalog_transaction_history, }; use persistence_windows::{checkpoint::ReplayPlan, persistence_windows::PersistenceWindows}; @@ -518,7 +519,7 @@ impl Db { pub async fn delete( self: &Arc, table_name: &str, - delete_predicate: &Predicate, + delete_predicate: Arc, ) -> Result<()> { // get all partitions of this table let table = self @@ -533,7 +534,7 @@ impl Db { // save the delete predicate in the chunk let mut chunk = chunk.write(); chunk - .add_delete_predicate(delete_predicate) + .add_delete_predicate(Arc::clone(&delete_predicate)) .context(AddDeletePredicateError)?; } } @@ -1331,7 +1332,7 @@ mod tests { assert_store_sequenced_entry_failures, db::{ catalog::chunk::ChunkStage, - test_helpers::{run_query, try_write_lp, write_lp}, + test_helpers::{run_query, try_write_lp, write_lp, write_lp_with_time}, }, utils::{make_db, TestDb}, }; @@ -1875,9 +1876,8 @@ mod tests { let test_db = make_db().await; let db = Arc::new(test_db.db); - let time0 = Utc::now(); - write_lp(db.as_ref(), "cpu bar=1 10").await; - let time1 = Utc::now(); + let t_write1 = Utc::now(); + write_lp_with_time(db.as_ref(), "cpu bar=1 10", t_write1).await; let partition_key = "1970-01-01T00"; let mb_chunk = db @@ -1892,14 +1892,12 @@ mod tests { let first_old_rb_write = old_rb_chunk.time_of_first_write(); let last_old_rb_write = old_rb_chunk.time_of_last_write(); - assert!(time0 < first_old_rb_write); assert_eq!(first_old_rb_write, last_old_rb_write); - assert!(first_old_rb_write < time1); + assert_eq!(first_old_rb_write, t_write1); // Put new data into the mutable buffer - let time2 = Utc::now(); - write_lp(db.as_ref(), "cpu bar=2 20").await; - let time3 = Utc::now(); + let t_write2 = Utc::now(); + write_lp_with_time(db.as_ref(), "cpu bar=2 20", t_write2).await; // now, compact it let compacted_rb_chunk = db.compact_partition("cpu", partition_key).await.unwrap(); @@ -1917,8 +1915,7 @@ mod tests { let last_compacted_write = compacted_rb_chunk.time_of_last_write(); assert_eq!(first_old_rb_write, first_compacted_write); assert_ne!(last_old_rb_write, last_compacted_write); - assert!(time2 < last_compacted_write); - assert!(last_compacted_write < time3); + assert_eq!(last_compacted_write, t_write2); // data should be readable let expected = vec![ @@ -1935,7 +1932,7 @@ mod tests { async fn collect_read_filter(chunk: &DbChunk) -> Vec { chunk - .read_filter(&Default::default(), Selection::All) + .read_filter(&Default::default(), Selection::All, &[]) .unwrap() .collect::>() .await @@ -2483,12 +2480,16 @@ mod tests { #[tokio::test] async fn partition_chunk_summaries_timestamp() { let db = Arc::new(make_db().await.db); - let start = Utc::now(); - write_lp(&db, "cpu bar=1 1").await; - let after_first_write = Utc::now(); - write_lp(&db, "cpu bar=2 2").await; + + let t_first_write = Utc::now(); + write_lp_with_time(&db, "cpu bar=1 1", t_first_write).await; + + let t_second_write = Utc::now(); + write_lp_with_time(&db, "cpu bar=2 2", t_second_write).await; + + let t_close_before = Utc::now(); db.rollover_partition("cpu", "1970-01-01T00").await.unwrap(); - let after_close = Utc::now(); + let t_close_after = Utc::now(); let mut chunk_summaries = db.chunk_summaries().unwrap(); @@ -2496,59 +2497,18 @@ mod tests { let summary = &chunk_summaries[0]; assert_eq!(summary.id, 0, "summary; {:#?}", summary); - assert!( - summary.time_of_first_write > start, - "summary; {:#?}", - summary - ); - assert!( - summary.time_of_first_write < after_close, - "summary; {:#?}", - summary - ); - - assert!( - summary.time_of_last_write > after_first_write, - "summary; {:#?}", - summary - ); - assert!( - summary.time_of_last_write < after_close, - "summary; {:#?}", - summary - ); - - assert!( - summary.time_closed.unwrap() > after_first_write, - "summary; {:#?}", - summary - ); - assert!( - summary.time_closed.unwrap() < after_close, - "summary; {:#?}", - summary - ); + assert_eq!(summary.time_of_first_write, t_first_write); + assert_eq!(summary.time_of_last_write, t_second_write); + assert!(t_close_before <= summary.time_closed.unwrap()); + assert!(summary.time_closed.unwrap() <= t_close_after); } - fn assert_first_last_times_eq(chunk_summary: &ChunkSummary) { + fn assert_first_last_times_eq(chunk_summary: &ChunkSummary, expected: DateTime) { let first_write = chunk_summary.time_of_first_write; let last_write = chunk_summary.time_of_last_write; assert_eq!(first_write, last_write); - } - - fn assert_first_last_times_between( - chunk_summary: &ChunkSummary, - before: DateTime, - after: DateTime, - ) { - let first_write = chunk_summary.time_of_first_write; - let last_write = chunk_summary.time_of_last_write; - - assert!(before < first_write); - assert!(before < last_write); - assert!(first_write < after); - assert!(last_write < after); + assert_eq!(first_write, expected); } fn assert_chunks_times_ordered(before: &ChunkSummary, after: &ChunkSummary) { @@ -2581,21 +2541,17 @@ mod tests { let db = make_db().await.db; // get three chunks: one open, one closed in mb and one close in rb - // TIME 0 --------------------------------------------------------------------------------- - let time0 = Utc::now(); // In open chunk, will end up in rb/os - write_lp(&db, "cpu bar=1 1").await; - // TIME 1 --------------------------------------------------------------------------------- - let time1 = Utc::now(); + let t_write1 = Utc::now(); + write_lp_with_time(&db, "cpu bar=1 1", t_write1).await; + // Move open chunk to closed db.rollover_partition("cpu", "1970-01-01T00").await.unwrap(); - // TIME 2 --------------------------------------------------------------------------------- - let time2 = Utc::now(); + // New open chunk in mb // This point will end up in rb/os - write_lp(&db, "cpu bar=1,baz=2 2").await; - // TIME 3 --------------------------------------------------------------------------------- - let time3 = Utc::now(); + let t_write2 = Utc::now(); + write_lp_with_time(&db, "cpu bar=1,baz=2 2", t_write2).await; // Check first/last write times on the chunks at this point let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return"); @@ -2604,19 +2560,15 @@ mod tests { // Each chunk has one write, so both chunks should have first write == last write let closed_mb_t3 = chunk_summaries[0].clone(); assert_eq!(closed_mb_t3.storage, ChunkStorage::ClosedMutableBuffer); - assert_first_last_times_eq(&closed_mb_t3); - assert_first_last_times_between(&closed_mb_t3, time0, time1); + assert_first_last_times_eq(&closed_mb_t3, t_write1); let open_mb_t3 = chunk_summaries[1].clone(); assert_eq!(open_mb_t3.storage, ChunkStorage::OpenMutableBuffer); - assert_first_last_times_eq(&open_mb_t3); - assert_first_last_times_between(&open_mb_t3, time2, time3); + assert_first_last_times_eq(&open_mb_t3, t_write2); assert_chunks_times_ordered(&closed_mb_t3, &open_mb_t3); // This point makes a new open mb chunk and will end up in the closed mb chunk - write_lp(&db, "cpu bar=1,baz=2,frob=3 400000000000000").await; - // TIME 4 --------------------------------------------------------------------------------- - // we don't need to check this value with anything because no timestamps - // should be between time3 and time4 + let t_write3 = Utc::now(); + write_lp_with_time(&db, "cpu bar=1,baz=2,frob=3 400000000000000", t_write3).await; // Check first/last write times on the chunks at this point let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return"); @@ -2639,9 +2591,6 @@ mod tests { db.move_chunk_to_read_buffer("cpu", "1970-01-01T00", 0) .await .unwrap(); - // TIME 5 --------------------------------------------------------------------------------- - // we don't need to check this value with anything because no timestamps - // should be between time4 and time5 // Check first/last write times on the chunks at this point let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return"); @@ -2668,9 +2617,6 @@ mod tests { ) .await .unwrap(); - // TIME 6 --------------------------------------------------------------------------------- - // we don't need to check this value with anything because no timestamps - // should be between time5 and time6 // Check first/last write times on the chunks at this point let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return"); @@ -2692,8 +2638,6 @@ mod tests { // Move open chunk to closed db.rollover_partition("cpu", "1970-01-05T15").await.unwrap(); - // TIME 7 --------------------------------------------------------------------------------- - let time7 = Utc::now(); // Check first/last write times on the chunks at this point let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return"); @@ -2710,9 +2654,8 @@ mod tests { // New open chunk in mb // This point will stay in this open mb chunk - write_lp(&db, "cpu bar=1,baz=3,blargh=3 400000000000000").await; - // TIME 8 --------------------------------------------------------------------------------- - let time8 = Utc::now(); + let t_write4 = Utc::now(); + write_lp_with_time(&db, "cpu bar=1,baz=3,blargh=3 400000000000000", t_write4).await; // Check first/last write times on the chunks at this point let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return"); @@ -2730,8 +2673,7 @@ mod tests { // times should be the same let open_mb_t8 = chunk_summaries[2].clone(); assert_eq!(open_mb_t8.storage, ChunkStorage::OpenMutableBuffer); - assert_first_last_times_eq(&open_mb_t8); - assert_first_last_times_between(&open_mb_t8, time7, time8); + assert_first_last_times_eq(&open_mb_t8, t_write4); let lifecycle_action = None; diff --git a/server/src/db/catalog/chunk.rs b/server/src/db/catalog/chunk.rs index dcba0b4576..bf474d3403 100644 --- a/server/src/db/catalog/chunk.rs +++ b/server/src/db/catalog/chunk.rs @@ -16,7 +16,7 @@ use internal_types::{access::AccessRecorder, schema::Schema}; use mutable_buffer::chunk::{snapshot::ChunkSnapshot as MBChunkSnapshot, MBChunk}; use observability_deps::tracing::debug; use parquet_file::chunk::ParquetChunk; -use predicate::predicate::{Predicate, PredicateBuilder}; +use predicate::predicate::Predicate; use read_buffer::RBChunk; use tracker::{TaskRegistration, TaskTracker}; @@ -80,7 +80,7 @@ pub struct ChunkMetadata { pub schema: Arc, /// Delete predicates of this chunk - pub delete_predicates: Arc>, + pub delete_predicates: Vec>, } /// Different memory representations of a frozen chunk. @@ -307,14 +307,14 @@ impl CatalogChunk { time_of_last_write: DateTime, schema: Arc, metrics: ChunkMetrics, - delete_predicates: Arc>, + delete_predicates: Vec>, order: ChunkOrder, ) -> Self { let stage = ChunkStage::Frozen { meta: Arc::new(ChunkMetadata { table_summary: Arc::new(chunk.table_summary()), schema, - delete_predicates: Arc::clone(&delete_predicates), + delete_predicates, }), representation: ChunkStageFrozenRepr::ReadBuffer(Arc::new(chunk)), }; @@ -342,7 +342,7 @@ impl CatalogChunk { time_of_first_write: DateTime, time_of_last_write: DateTime, metrics: ChunkMetrics, - delete_predicates: Arc>, + delete_predicates: Vec>, order: ChunkOrder, ) -> Self { assert_eq!(chunk.table_name(), addr.table_name.as_ref()); @@ -469,30 +469,24 @@ impl CatalogChunk { } } - pub fn add_delete_predicate(&mut self, delete_predicate: &Predicate) -> Result<()> { + pub fn add_delete_predicate(&mut self, delete_predicate: Arc) -> Result<()> { + debug!( + ?delete_predicate, + "Input delete predicate to CatalogChunk add_delete_predicate" + ); match &mut self.stage { ChunkStage::Open { mb_chunk: _ } => { // Freeze/close this chunk and add delete_predicate to its frozen one self.freeze_with_predicate(delete_predicate)?; } - ChunkStage::Frozen { meta, .. } => { + ChunkStage::Frozen { meta, .. } | ChunkStage::Persisted { meta, .. } => { // Add the delete_predicate into the chunk's metadata - let mut del_preds: Vec = (*meta.delete_predicates).clone(); - del_preds.push(delete_predicate.clone()); + let mut del_preds = meta.delete_predicates.clone(); + del_preds.push(delete_predicate); *meta = Arc::new(ChunkMetadata { table_summary: Arc::clone(&meta.table_summary), schema: Arc::clone(&meta.schema), - delete_predicates: Arc::new(del_preds), - }); - } - ChunkStage::Persisted { meta, .. } => { - // Add the delete_predicate into the chunk's metadata - let mut del_preds: Vec = (*meta.delete_predicates).clone(); - del_preds.push(delete_predicate.clone()); - *meta = Arc::new(ChunkMetadata { - table_summary: Arc::clone(&meta.table_summary), - schema: Arc::clone(&meta.schema), - delete_predicates: Arc::new(del_preds), + delete_predicates: del_preds, }); } } @@ -500,14 +494,23 @@ impl CatalogChunk { Ok(()) } - pub fn delete_predicates(&mut self) -> Arc> { + pub fn delete_predicates(&mut self) -> &[Arc] { match &self.stage { ChunkStage::Open { mb_chunk: _ } => { // no delete predicate for open chunk - Arc::new(vec![]) + debug!("delete_predicates of Open chunk is empty"); + &[] + } + ChunkStage::Frozen { meta, .. } => { + let preds = &meta.delete_predicates; + debug!(?preds, "delete_predicates of Frozen chunk"); + preds + } + ChunkStage::Persisted { meta, .. } => { + let preds = &meta.delete_predicates; + debug!(?preds, "delete_predicates of Persisted chunk"); + preds } - ChunkStage::Frozen { meta, .. } => Arc::clone(&meta.delete_predicates), - ChunkStage::Persisted { meta, .. } => Arc::clone(&meta.delete_predicates), } } @@ -679,7 +682,14 @@ impl CatalogChunk { /// /// This only works for chunks in the _open_ stage (chunk is converted) and the _frozen_ stage /// (no-op) and will fail for other stages. - pub fn freeze_with_predicate(&mut self, delete_predicate: &Predicate) -> Result<()> { + pub fn freeze_with_predicate(&mut self, delete_predicate: Arc) -> Result<()> { + self.freeze_with_delete_predicates(vec![delete_predicate]) + } + + fn freeze_with_delete_predicates( + &mut self, + delete_predicates: Vec>, + ) -> Result<()> { match &self.stage { ChunkStage::Open { mb_chunk, .. } => { debug!(%self.addr, row_count=mb_chunk.rows(), "freezing chunk"); @@ -692,7 +702,7 @@ impl CatalogChunk { let metadata = ChunkMetadata { table_summary: Arc::new(mb_chunk.table_summary()), schema: s.full_schema(), - delete_predicates: Arc::new(vec![delete_predicate.clone()]), + delete_predicates, }; self.stage = ChunkStage::Frozen { @@ -714,7 +724,7 @@ impl CatalogChunk { } pub fn freeze(&mut self) -> Result<()> { - self.freeze_with_predicate(&PredicateBuilder::default().build()) + self.freeze_with_delete_predicates(vec![]) } /// Set the chunk to the Moving state, returning a handle to the underlying storage @@ -776,7 +786,7 @@ impl CatalogChunk { *meta = Arc::new(ChunkMetadata { table_summary: Arc::clone(&meta.table_summary), schema, - delete_predicates: Arc::clone(&meta.delete_predicates), + delete_predicates: meta.delete_predicates.clone(), }); match &representation { @@ -1151,7 +1161,7 @@ mod tests { expected_exprs1.push(e); // Add a delete predicate into a chunk the open chunk = delete simulation for open chunk - chunk.add_delete_predicate(&del_pred1).unwrap(); + chunk.add_delete_predicate(Arc::new(del_pred1)).unwrap(); // chunk must be in frozen stage now assert_eq!(chunk.stage().name(), "Frozen"); // chunk must have a delete predicate @@ -1182,7 +1192,7 @@ mod tests { let mut expected_exprs2 = vec![]; let e = col("cost").not_eq(lit(15)); expected_exprs2.push(e); - chunk.add_delete_predicate(&del_pred2).unwrap(); + chunk.add_delete_predicate(Arc::new(del_pred2)).unwrap(); // chunk still must be in frozen stage now assert_eq!(chunk.stage().name(), "Frozen"); // chunk must have 2 delete predicates @@ -1248,7 +1258,7 @@ mod tests { now, now, ChunkMetrics::new_unregistered(), - Arc::new(vec![] as Vec), + vec![], ChunkOrder::new(6), ) } diff --git a/server/src/db/catalog/partition.rs b/server/src/db/catalog/partition.rs index 3c989e6a7f..bcd07e8435 100644 --- a/server/src/db/catalog/partition.rs +++ b/server/src/db/catalog/partition.rs @@ -176,7 +176,7 @@ impl Partition { time_of_first_write: DateTime, time_of_last_write: DateTime, schema: Arc, - delete_predicates: Arc>, + delete_predicates: Vec>, chunk_order: ChunkOrder, ) -> (u32, Arc>) { let chunk_id = Self::pick_next(&mut self.next_chunk_id, "Chunk ID Overflow"); @@ -231,7 +231,7 @@ impl Partition { chunk: Arc, time_of_first_write: DateTime, time_of_last_write: DateTime, - delete_predicates: Arc>, + delete_predicates: Vec>, chunk_order: ChunkOrder, ) -> Arc> { assert_eq!(chunk.table_name(), self.table_name()); @@ -246,7 +246,7 @@ impl Partition { time_of_first_write, time_of_last_write, self.metrics.new_chunk_metrics(), - Arc::clone(&delete_predicates), + delete_predicates, chunk_order, )), ); diff --git a/server/src/db/chunk.rs b/server/src/db/chunk.rs index 854c219180..b9386d93c4 100644 --- a/server/src/db/chunk.rs +++ b/server/src/db/chunk.rs @@ -121,7 +121,7 @@ impl DbChunk { let meta = ChunkMetadata { table_summary: Arc::new(mb_chunk.table_summary()), schema: snapshot.full_schema(), - delete_predicates: Arc::new(vec![]), //todo: consider to use the one of the given chunk if appropriate + delete_predicates: vec![], // open chunk does not have delete predicate }; (state, Arc::new(meta)) } @@ -224,6 +224,19 @@ impl DbChunk { pub fn time_of_last_write(&self) -> DateTime { self.time_of_last_write } + + pub fn to_rub_negated_predicates( + delete_predicates: &[Arc], + ) -> Result> { + let mut rub_preds: Vec = vec![]; + for pred in delete_predicates { + let rub_pred = to_read_buffer_predicate(pred).context(PredicateConversion)?; + rub_preds.push(rub_pred); + } + + debug!(?rub_preds, "RUB delete predicates"); + Ok(rub_preds) + } } impl QueryChunk for DbChunk { @@ -314,11 +327,11 @@ impl QueryChunk for DbChunk { Ok(pred_result) } - // NGA todo: add delete predicate here to eliminate data at query time fn read_filter( &self, predicate: &Predicate, selection: Selection<'_>, + delete_predicates: &[Arc], ) -> Result { // Predicate is not required to be applied for correctness. We only pushed it down // when possible for performance gain @@ -326,6 +339,24 @@ impl QueryChunk for DbChunk { debug!(?predicate, "Input Predicate to read_filter"); self.access_recorder.record_access_now(); + debug!(?delete_predicates, "Input Delete Predicates to read_filter"); + + // add negated deleted ranges to the predicate + let mut pred_with_deleted_ranges = predicate.clone(); + pred_with_deleted_ranges.add_delete_ranges(delete_predicates); + debug!( + ?pred_with_deleted_ranges, + "Input Predicate plus deleted ranges" + ); + + // add negated deleted predicates + let mut pred_wth_deleted_exprs = pred_with_deleted_ranges.clone(); + pred_wth_deleted_exprs.add_delete_exprs(delete_predicates); + debug!( + ?pred_wth_deleted_exprs, + "Input Predicate plus deleted ranges and deleted predicates" + ); + match &self.state { State::MutableBuffer { chunk, .. } => { let batch = chunk.read_filter(selection).context(MutableBufferChunk)?; @@ -339,12 +370,16 @@ impl QueryChunk for DbChunk { Ok(predicate) => predicate, Err(_) => read_buffer::Predicate::default(), }; - debug!(?rb_predicate, "Predicate pushed down to RUB"); - // TODO: add collection of delete predicates associated with - // this chunk. - let read_results = chunk.read_filter(rb_predicate, selection, vec![]); + // combine all delete expressions to RUB's negated ones + let negated_delete_exprs = Self::to_rub_negated_predicates(delete_predicates)?; + debug!( + ?negated_delete_exprs, + "Negated Predicate pushed down to RUB" + ); + + let read_results = chunk.read_filter(rb_predicate, selection, negated_delete_exprs); let schema = chunk .read_filter_table_schema(selection) @@ -357,13 +392,11 @@ impl QueryChunk for DbChunk { schema.into(), ))) } - State::ParquetFile { chunk, .. } => { - chunk - .read_filter(predicate, selection) - .context(ParquetFileChunkError { - chunk_id: self.id(), - }) - } + State::ParquetFile { chunk, .. } => chunk + .read_filter(&pred_wth_deleted_exprs, selection) + .context(ParquetFileChunkError { + chunk_id: self.id(), + }), } } @@ -503,8 +536,11 @@ impl QueryChunkMeta for DbChunk { } // return a reference to delete predicates of the chunk - fn delete_predicates(&self) -> Arc> { - Arc::clone(&self.meta.delete_predicates) + fn delete_predicates(&self) -> &[Arc] { + let pred = &self.meta.delete_predicates; + debug!(?pred, "Delete predicate in DbChunk"); + + pred } } @@ -514,7 +550,7 @@ mod tests { use crate::{ db::{ catalog::chunk::{CatalogChunk, ChunkStage}, - test_helpers::write_lp, + test_helpers::{write_lp, write_lp_with_time}, }, utils::make_db, }; @@ -527,7 +563,7 @@ mod tests { let t2 = chunk.access_recorder().get_metrics(); snapshot - .read_filter(&Default::default(), Selection::All) + .read_filter(&Default::default(), Selection::All, &[]) .unwrap(); let t3 = chunk.access_recorder().get_metrics(); @@ -608,9 +644,8 @@ mod tests { async fn parquet_records_access() { let db = make_db().await.db; - let before_creation = Utc::now(); - write_lp(&db, "cpu,tag=1 bar=1 1").await; - let after_creation = Utc::now(); + let creation_time = Utc::now(); + write_lp_with_time(&db, "cpu,tag=1 bar=1 1", creation_time).await; let id = db .persist_partition( @@ -632,8 +667,7 @@ mod tests { let first_write = chunk.time_of_first_write(); let last_write = chunk.time_of_last_write(); assert_eq!(first_write, last_write); - assert!(before_creation < first_write); - assert!(last_write < after_creation); + assert_eq!(first_write, creation_time); test_chunk_access(&chunk).await } diff --git a/server/src/db/lifecycle/compact.rs b/server/src/db/lifecycle/compact.rs index 576fa328a3..b661dfc546 100644 --- a/server/src/db/lifecycle/compact.rs +++ b/server/src/db/lifecycle/compact.rs @@ -45,7 +45,7 @@ pub(crate) fn compact_chunks( let mut input_rows = 0; let mut time_of_first_write: Option> = None; let mut time_of_last_write: Option> = None; - let mut delete_predicates: Vec = vec![]; + let mut delete_predicates: Vec> = vec![]; let mut min_order = ChunkOrder::MAX; let query_chunks = chunks .into_iter() @@ -66,8 +66,7 @@ pub(crate) fn compact_chunks( .map(|prev_last| prev_last.max(candidate_last)) .or(Some(candidate_last)); - let mut preds = (*chunk.delete_predicates()).clone(); - delete_predicates.append(&mut preds); + delete_predicates.extend(chunk.delete_predicates().iter().cloned()); min_order = min_order.min(chunk.order()); @@ -103,7 +102,7 @@ pub(crate) fn compact_chunks( ReorgPlanner::new().compact_plan(schema, query_chunks.iter().map(Arc::clone), key)?; let physical_plan = ctx.prepare_plan(&plan)?; - let stream = ctx.execute(physical_plan).await?; + let stream = ctx.execute_stream(physical_plan).await?; let rb_chunk = collect_rub(stream, &addr, metric_registry.as_ref()) .await? .expect("chunk has zero rows"); @@ -119,7 +118,7 @@ pub(crate) fn compact_chunks( time_of_first_write, time_of_last_write, schema, - Arc::new(delete_predicates), + delete_predicates, min_order, ) }; @@ -148,7 +147,7 @@ pub(crate) fn compact_chunks( #[cfg(test)] mod tests { use super::*; - use crate::{db::test_helpers::write_lp, utils::make_db}; + use crate::{db::test_helpers::write_lp_with_time, utils::make_db}; use data_types::chunk_metadata::ChunkStorage; use lifecycle::{LockableChunk, LockablePartition}; use query::QueryDatabase; @@ -158,15 +157,13 @@ mod tests { let test_db = make_db().await; let db = test_db.db; - let time0 = Utc::now(); - - write_lp(db.as_ref(), "cpu,tag1=cupcakes bar=1 10").await; - write_lp(db.as_ref(), "cpu,tag1=asfd,tag2=foo bar=2 20").await; - write_lp(db.as_ref(), "cpu,tag1=bingo,tag2=foo bar=2 10").await; - write_lp(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 20").await; - write_lp(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 10").await; - - let time1 = Utc::now(); + let t_first_write = Utc::now(); + write_lp_with_time(db.as_ref(), "cpu,tag1=cupcakes bar=1 10", t_first_write).await; + write_lp_with_time(db.as_ref(), "cpu,tag1=asfd,tag2=foo bar=2 20", Utc::now()).await; + write_lp_with_time(db.as_ref(), "cpu,tag1=bingo,tag2=foo bar=2 10", Utc::now()).await; + write_lp_with_time(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 20", Utc::now()).await; + let t_last_write = Utc::now(); + write_lp_with_time(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 10", t_last_write).await; let partition_keys = db.partition_keys().unwrap(); assert_eq!(partition_keys.len(), 1); @@ -182,9 +179,8 @@ mod tests { let (_, fut) = compact_chunks(partition.upgrade(), vec![chunk.upgrade()]).unwrap(); // NB: perform the write before spawning the background task that performs the compaction - let time2 = Utc::now(); - write_lp(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 40").await; - let time3 = Utc::now(); + let t_later_write = Utc::now(); + write_lp_with_time(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 40", t_later_write).await; tokio::spawn(fut).await.unwrap().unwrap().unwrap(); let mut chunk_summaries: Vec<_> = db_partition.read().chunk_summaries().collect(); @@ -194,16 +190,14 @@ mod tests { let mub_summary = &chunk_summaries[0]; let first_mub_write = mub_summary.time_of_first_write; let last_mub_write = mub_summary.time_of_last_write; - assert!(time2 < first_mub_write); assert_eq!(first_mub_write, last_mub_write); - assert!(first_mub_write < time3); + assert_eq!(first_mub_write, t_later_write); let rub_summary = &chunk_summaries[1]; let first_rub_write = rub_summary.time_of_first_write; let last_rub_write = rub_summary.time_of_last_write; - assert!(time0 < first_rub_write); - assert!(first_rub_write < last_rub_write); - assert!(last_rub_write < time1); + assert_eq!(first_rub_write, t_first_write); + assert_eq!(last_rub_write, t_last_write); let summaries: Vec<_> = chunk_summaries .iter() diff --git a/server/src/db/lifecycle/error.rs b/server/src/db/lifecycle/error.rs index 4f9a15838b..036edea016 100644 --- a/server/src/db/lifecycle/error.rs +++ b/server/src/db/lifecycle/error.rs @@ -49,7 +49,7 @@ pub enum Error { #[snafu(display("Error while commiting transaction on preserved catalog: {}", source))] CommitError { - source: parquet_file::catalog::api::Error, + source: parquet_file::catalog::core::Error, }, #[snafu(display("Cannot write chunk: {}", addr))] diff --git a/server/src/db/lifecycle/move_chunk.rs b/server/src/db/lifecycle/move_chunk.rs index e0ddf48b36..2dbe716eb1 100644 --- a/server/src/db/lifecycle/move_chunk.rs +++ b/server/src/db/lifecycle/move_chunk.rs @@ -54,7 +54,7 @@ pub fn move_chunk_to_read_buffer( ReorgPlanner::new().compact_plan(schema, query_chunks.iter().map(Arc::clone), key)?; let physical_plan = ctx.prepare_plan(&plan)?; - let stream = ctx.execute(physical_plan).await?; + let stream = ctx.execute_stream(physical_plan).await?; let rb_chunk = collect_rub( stream, &addr.clone().into_partition(), diff --git a/server/src/db/lifecycle/persist.rs b/server/src/db/lifecycle/persist.rs index 8bf5bddc78..7fa38818f7 100644 --- a/server/src/db/lifecycle/persist.rs +++ b/server/src/db/lifecycle/persist.rs @@ -52,7 +52,7 @@ pub fn persist_chunks( let mut time_of_first_write: Option> = None; let mut time_of_last_write: Option> = None; let mut query_chunks = vec![]; - let mut delete_predicates: Vec = vec![]; + let mut delete_predicates: Vec> = vec![]; let mut min_order = ChunkOrder::MAX; for mut chunk in chunks { // Sanity-check @@ -72,8 +72,7 @@ pub fn persist_chunks( .map(|prev_last| prev_last.max(candidate_last)) .or(Some(candidate_last)); - let mut preds = (*chunk.delete_predicates()).clone(); - delete_predicates.append(&mut preds); + delete_predicates.extend(chunk.delete_predicates().iter().cloned()); min_order = min_order.min(chunk.order()); @@ -112,8 +111,10 @@ pub fn persist_chunks( "Expected split plan to produce exactly 2 partitions" ); - let to_persist_stream = ctx.execute_partition(Arc::clone(&physical_plan), 0).await?; - let remainder_stream = ctx.execute_partition(physical_plan, 1).await?; + let to_persist_stream = ctx + .execute_stream_partitioned(Arc::clone(&physical_plan), 0) + .await?; + let remainder_stream = ctx.execute_stream_partitioned(physical_plan, 1).await?; let (to_persist, remainder) = futures::future::try_join( collect_rub(to_persist_stream, &addr, metric_registry.as_ref()), @@ -131,7 +132,6 @@ pub fn persist_chunks( partition_write.force_drop_chunk(id) } - let del_preds = Arc::new(delete_predicates); // Upsert remainder to catalog if let Some(remainder) = remainder { partition_write.create_rub_chunk( @@ -139,11 +139,13 @@ pub fn persist_chunks( time_of_first_write, time_of_last_write, Arc::clone(&schema), - Arc::clone(&del_preds), + delete_predicates.clone(), min_order, ); } + // NGA todo: we hit this error if there are rows but they are deleted + // Need to think a way to handle this (https://github.com/influxdata/influxdb_iox/issues/2546) let to_persist = to_persist.expect("should be rows to persist"); let (new_chunk_id, new_chunk) = partition_write.create_rub_chunk( @@ -151,7 +153,7 @@ pub fn persist_chunks( time_of_first_write, time_of_last_write, schema, - del_preds, + delete_predicates, min_order, ); let to_persist = LockableCatalogChunk { diff --git a/server/src/db/lifecycle/write.rs b/server/src/db/lifecycle/write.rs index 1b28f8f1e2..bfd7b7df71 100644 --- a/server/src/db/lifecycle/write.rs +++ b/server/src/db/lifecycle/write.rs @@ -17,7 +17,7 @@ use data_types::{chunk_metadata::ChunkLifecycleAction, job::Job}; use internal_types::selection::Selection; use observability_deps::tracing::{debug, warn}; use parquet_file::{ - catalog::api::CatalogParquetInfo, + catalog::interface::CatalogParquetInfo, chunk::{ChunkMetrics as ParquetChunkMetrics, ParquetChunk}, metadata::IoxMetadata, storage::Storage, @@ -26,7 +26,7 @@ use persistence_windows::{ checkpoint::{DatabaseCheckpoint, PartitionCheckpoint, PersistCheckpointBuilder}, persistence_windows::FlushHandle, }; -use query::QueryChunk; +use query::{QueryChunk, QueryChunkMeta}; use snafu::ResultExt; use std::{future::Future, sync::Arc}; use tracker::{TaskTracker, TrackedFuture, TrackedFutureExt}; @@ -89,7 +89,11 @@ pub(super) fn write_chunk_to_object_store( // Get RecordBatchStream of data from the read buffer chunk let stream = db_chunk - .read_filter(&Default::default(), Selection::All) + .read_filter( + &Default::default(), + Selection::All, + db_chunk.delete_predicates(), + ) .expect("read filter should be infallible"); // check that the upcoming state change will very likely succeed diff --git a/server/src/db/load.rs b/server/src/db/load.rs index 1afd1b0eb0..d0cdd1817d 100644 --- a/server/src/db/load.rs +++ b/server/src/db/load.rs @@ -1,11 +1,17 @@ //! Functionality to load a [`Catalog`](crate::db::catalog::Catalog) and other information from a -//! [`PreservedCatalog`](parquet_file::catalog::api::PreservedCatalog). +//! [`PreservedCatalog`](parquet_file::catalog::core::PreservedCatalog). use super::catalog::{chunk::ChunkStage, table::TableSchemaUpsertHandle, Catalog}; use iox_object_store::{IoxObjectStore, ParquetFilePath}; use observability_deps::tracing::{error, info}; use parquet_file::{ - catalog::api::{CatalogParquetInfo, CatalogState, ChunkCreationFailed, PreservedCatalog}, + catalog::{ + core::PreservedCatalog, + interface::{ + CatalogParquetInfo, CatalogState, CatalogStateAddError, CatalogStateRemoveError, + ChunkCreationFailed, + }, + }, chunk::{ChunkMetrics as ParquetChunkMetrics, ParquetChunk}, }; use persistence_windows::checkpoint::{ReplayPlan, ReplayPlanner}; @@ -22,17 +28,17 @@ pub enum Error { #[snafu(display("Cannot create new empty preserved catalog: {}", source))] CannotCreateCatalog { - source: parquet_file::catalog::api::Error, + source: parquet_file::catalog::core::Error, }, #[snafu(display("Cannot load preserved catalog: {}", source))] CannotLoadCatalog { - source: parquet_file::catalog::api::Error, + source: parquet_file::catalog::core::Error, }, #[snafu(display("Cannot wipe preserved catalog: {}", source))] CannotWipeCatalog { - source: parquet_file::catalog::api::Error, + source: parquet_file::catalog::core::Error, }, } pub type Result = std::result::Result; @@ -166,8 +172,10 @@ impl CatalogState for Loader { &mut self, iox_object_store: Arc, info: CatalogParquetInfo, - ) -> parquet_file::catalog::api::Result<()> { - use parquet_file::catalog::api::{MetadataExtractFailed, ReplayPlanError, SchemaError}; + ) -> Result<(), CatalogStateAddError> { + use parquet_file::catalog::interface::{ + MetadataExtractFailed, ReplayPlanError, SchemaError, + }; // extract relevant bits from parquet file metadata let iox_md = info @@ -212,15 +220,13 @@ impl CatalogState for Loader { .get_or_create_partition(&iox_md.table_name, &iox_md.partition_key); let mut partition = partition.write(); if partition.chunk(iox_md.chunk_id).is_some() { - return Err( - parquet_file::catalog::api::Error::ParquetFileAlreadyExists { path: info.path }, - ); + return Err(CatalogStateAddError::ParquetFileAlreadyExists { path: info.path }); } let schema_handle = TableSchemaUpsertHandle::new(&table_schema, &parquet_chunk.schema()) .map_err(|e| Box::new(e) as _) .context(SchemaError { path: info.path })?; - let delete_predicates: Arc> = Arc::new(vec![]); // NGA todo: After Marco save delete predicate into the catalog, it will need to extract into this variable + let delete_predicates: Vec> = vec![]; // NGA todo: After Marco saves delete predicate into the catalog, it will need to get extracted into this variable partition.insert_object_store_only_chunk( iox_md.chunk_id, parquet_chunk, @@ -234,7 +240,7 @@ impl CatalogState for Loader { Ok(()) } - fn remove(&mut self, path: &ParquetFilePath) -> parquet_file::catalog::api::Result<()> { + fn remove(&mut self, path: &ParquetFilePath) -> Result<(), CatalogStateRemoveError> { let mut removed_any = false; for partition in self.catalog.partitions() { @@ -261,7 +267,7 @@ impl CatalogState for Loader { if removed_any { Ok(()) } else { - Err(parquet_file::catalog::api::Error::ParquetFileDoesNotExist { path: path.clone() }) + Err(CatalogStateRemoveError::ParquetFileDoesNotExist { path: path.clone() }) } } } @@ -273,7 +279,7 @@ mod tests { use data_types::{server_id::ServerId, DatabaseName}; use object_store::ObjectStore; use parquet_file::catalog::{ - api::CheckpointData, + interface::CheckpointData, test_helpers::{assert_catalog_state_implementation, TestCatalogState}, }; use std::convert::TryFrom; diff --git a/server/src/lib.rs b/server/src/lib.rs index 10f29cd73b..804aee59d2 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -1195,7 +1195,7 @@ mod tests { path::{parsed::DirsAndFileName, ObjectStorePath}, ObjectStore, ObjectStoreApi, }; - use parquet_file::catalog::{api::PreservedCatalog, test_helpers::TestCatalogState}; + use parquet_file::catalog::{core::PreservedCatalog, test_helpers::TestCatalogState}; use query::{exec::ExecutionContextProvider, frontend::sql::SqlQueryPlanner, QueryDatabase}; use std::{ convert::{TryFrom, TryInto}, diff --git a/src/commands/database/partition.rs b/src/commands/database/partition.rs index 22046f8046..72d8dd9988 100644 --- a/src/commands/database/partition.rs +++ b/src/commands/database/partition.rs @@ -1,6 +1,5 @@ //! This module implements the `partition` CLI command use data_types::chunk_metadata::ChunkSummary; -use data_types::job::Operation; use generated_types::google::FieldViolation; use influxdb_iox_client::{ connection::Connection, @@ -10,7 +9,7 @@ use influxdb_iox_client::{ PersistPartitionError, UnloadPartitionChunkError, }, }; -use std::convert::{TryFrom, TryInto}; +use std::convert::TryFrom; use structopt::StructOpt; use thiserror::Error; @@ -283,10 +282,9 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> { chunk_id, } = close_chunk; - let operation: Operation = client + let operation = client .close_partition_chunk(db_name, table_name, partition_key, chunk_id) - .await? - .try_into()?; + .await?; serde_json::to_writer_pretty(std::io::stdout(), &operation)?; } diff --git a/src/commands/database/recover.rs b/src/commands/database/recover.rs index 2df1dee01b..5b9b8aa6eb 100644 --- a/src/commands/database/recover.rs +++ b/src/commands/database/recover.rs @@ -1,6 +1,3 @@ -use std::convert::TryInto; - -use data_types::job::Operation; use generated_types::google::FieldViolation; use influxdb_iox_client::{connection::Connection, management}; use snafu::{ResultExt, Snafu}; @@ -74,12 +71,10 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> { return Err(Error::NeedsTheForceError); } - let operation: Operation = client + let operation = client .wipe_persisted_catalog(db_name) .await - .context(WipeError)? - .try_into() - .context(InvalidResponse)?; + .context(WipeError)?; serde_json::to_writer_pretty(std::io::stdout(), &operation).context(WritingJson)?; } diff --git a/src/commands/operations.rs b/src/commands/operations.rs index 44d284f0f2..af741e446d 100644 --- a/src/commands/operations.rs +++ b/src/commands/operations.rs @@ -1,11 +1,8 @@ -use data_types::job::Operation; -use generated_types::google::FieldViolation; use influxdb_iox_client::{ connection::Connection, management, operations::{self, Client}, }; -use std::convert::TryInto; use structopt::StructOpt; use thiserror::Error; @@ -15,9 +12,6 @@ pub enum Error { #[error("Client error: {0}")] ClientError(#[from] operations::Error), - #[error("Received invalid response: {0}")] - InvalidResponse(#[from] FieldViolation), - #[error("Failed to create dummy job: {0}")] CreateDummyJobError(#[from] management::CreateDummyJobError), @@ -68,29 +62,16 @@ enum Command { pub async fn command(connection: Connection, config: Config) -> Result<()> { match config.command { Command::List => { - let result: Result, _> = Client::new(connection) - .list_operations() - .await? - .into_iter() - .map(|c| c.operation()) - .map(TryInto::try_into) - .collect(); - let operations = result?; + let operations = Client::new(connection).list_operations().await?; serde_json::to_writer_pretty(std::io::stdout(), &operations)?; } Command::Get { id } => { - let operation: Operation = Client::new(connection) - .get_operation(id) - .await? - .try_into()?; + let operation = Client::new(connection).get_operation(id).await?; serde_json::to_writer_pretty(std::io::stdout(), &operation)?; } Command::Wait { id, nanos } => { let timeout = nanos.map(std::time::Duration::from_nanos); - let operation: Operation = Client::new(connection) - .wait_operation(id, timeout) - .await? - .try_into()?; + let operation = Client::new(connection).wait_operation(id, timeout).await?; serde_json::to_writer_pretty(std::io::stdout(), &operation)?; } Command::Cancel { id } => { @@ -98,10 +79,9 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> { println!("Ok"); } Command::Test { nanos } => { - let operation: Operation = management::Client::new(connection) + let operation = management::Client::new(connection) .create_dummy_job(nanos) - .await? - .try_into()?; + .await?; serde_json::to_writer_pretty(std::io::stdout(), &operation)?; } } diff --git a/src/influxdb_ioxd.rs b/src/influxdb_ioxd.rs index 1a22ffea0e..afcffc61bd 100644 --- a/src/influxdb_ioxd.rs +++ b/src/influxdb_ioxd.rs @@ -753,9 +753,11 @@ mod tests { child(prepare_sql_span, "prepare_plan").unwrap(); let collect_span = child(ctx_span, "collect").unwrap(); + let execute_span = child(collect_span, "execute_stream_partitioned").unwrap(); + let coalesce_span = child(execute_span, "CoalescePartitionsEx").unwrap(); // validate spans from DataFusion ExecutionPlan are present - child(collect_span, "ProjectionExec: expr").unwrap(); + child(coalesce_span, "ProjectionExec: expr").unwrap(); let database_not_found = root_spans[3]; assert_eq!(database_not_found.status, SpanStatus::Err); diff --git a/src/influxdb_ioxd/http.rs b/src/influxdb_ioxd/http.rs index 76ac71d79a..04d4df9e20 100644 --- a/src/influxdb_ioxd/http.rs +++ b/src/influxdb_ioxd/http.rs @@ -28,7 +28,7 @@ use data_types::{ }; use influxdb_iox_client::format::QueryOutputFormat; use influxdb_line_protocol::parse_lines; -use query::{exec::ExecutionContextProvider, QueryDatabase}; +use query::exec::ExecutionContextProvider; use server::{ApplicationState, ConnectionManager, Error, Server as AppServer}; // External crates @@ -392,7 +392,6 @@ where .get("/health", health::) .get("/metrics", handle_metrics::) .get("/iox/api/v1/databases/:name/query", query::) - .get("/api/v1/partitions", list_partitions::) .get("/debug/pprof", pprof_home::) .get("/debug/pprof/profile", pprof_profile::) .get("/debug/pprof/allocs", pprof_heappy_profile::) @@ -644,43 +643,6 @@ async fn handle_metrics( Ok(Response::new(Body::from(body))) } -#[derive(Deserialize, Debug)] -/// Arguments in the query string of the request to /partitions -struct DatabaseInfo { - org: String, - bucket: String, -} - -#[tracing::instrument(level = "debug")] -async fn list_partitions( - req: Request, -) -> Result, ApplicationError> { - let server = Arc::clone(&req.data::>().expect("server state").app_server); - - let query = req.uri().query().context(ExpectedQueryString {})?; - - let info: DatabaseInfo = serde_urlencoded::from_str(query).context(InvalidQueryString { - query_string: query, - })?; - - let db_name = - org_and_bucket_to_database(&info.org, &info.bucket).context(BucketMappingError)?; - - let db = server.db(&db_name)?; - - let partition_keys = - db.partition_keys() - .map_err(|e| Box::new(e) as _) - .context(BucketByName { - org: &info.org, - bucket_name: &info.bucket, - })?; - - let result = serde_json::to_string(&partition_keys).context(JsonGenerationError)?; - - Ok(Response::new(Body::from(result))) -} - #[derive(Deserialize, Debug)] /// Arguments in the query string of the request to /snapshot struct SnapshotInfo { diff --git a/src/influxdb_ioxd/rpc/management.rs b/src/influxdb_ioxd/rpc/management.rs index 278ee303ac..e9ac840c8d 100644 --- a/src/influxdb_ioxd/rpc/management.rs +++ b/src/influxdb_ioxd/rpc/management.rs @@ -631,7 +631,7 @@ where del_predicate.exprs.push(expr); } - db.delete(&table_name, &del_predicate) + db.delete(&table_name, Arc::new(del_predicate)) .await .map_err(default_db_error_handler)?; } diff --git a/tests/end_to_end_cases/management_api.rs b/tests/end_to_end_cases/management_api.rs index e8c025ff0e..5bbcee73cf 100644 --- a/tests/end_to_end_cases/management_api.rs +++ b/tests/end_to_end_cases/management_api.rs @@ -9,7 +9,6 @@ use generated_types::{ }; use influxdb_iox_client::{ management::{Client, CreateDatabaseError}, - operations, write::WriteError, }; @@ -880,20 +879,16 @@ async fn test_close_partition_chunk() { assert_eq!(chunks[0].storage, ChunkStorage::OpenMutableBuffer as i32); // Move the chunk to read buffer - let operation = management_client + let iox_operation = management_client .close_partition_chunk(&db_name, table_name, partition_key, 0) .await .expect("new partition chunk"); - println!("Operation response is {:?}", operation); - let operation_id = operation.id(); - - let meta = operations::ClientOperation::try_new(operation) - .unwrap() - .metadata(); + println!("Operation response is {:?}", iox_operation); + let operation_id = iox_operation.operation.id(); // ensure we got a legit job description back - if let Some(Job::CloseChunk(close_chunk)) = meta.job { + if let Some(Job::CloseChunk(close_chunk)) = iox_operation.metadata.job { assert_eq!(close_chunk.db_name, db_name); assert_eq!(close_chunk.partition_key, partition_key); assert_eq!(close_chunk.chunk_id, 0); @@ -1020,20 +1015,16 @@ async fn test_wipe_preserved_catalog() { // Recover by wiping preserved catalog // - let operation = management_client + let iox_operation = management_client .wipe_persisted_catalog(&db_name) .await .expect("wipe persisted catalog"); - println!("Operation response is {:?}", operation); - let operation_id = operation.id(); - - let meta = operations::ClientOperation::try_new(operation) - .unwrap() - .metadata(); + println!("Operation response is {:?}", iox_operation); + let operation_id = iox_operation.operation.id(); // ensure we got a legit job description back - if let Some(Job::WipePreservedCatalog(wipe_persisted_catalog)) = meta.job { + if let Some(Job::WipePreservedCatalog(wipe_persisted_catalog)) = iox_operation.metadata.job { assert_eq!(wipe_persisted_catalog.db_name, db_name); } else { panic!("unexpected job returned") diff --git a/tests/end_to_end_cases/management_cli.rs b/tests/end_to_end_cases/management_cli.rs index ce03987073..8236398a34 100644 --- a/tests/end_to_end_cases/management_cli.rs +++ b/tests/end_to_end_cases/management_cli.rs @@ -1,13 +1,12 @@ -use std::sync::Arc; use std::time::Duration; use assert_cmd::Command; use predicates::prelude::*; -use data_types::chunk_metadata::ChunkAddr; -use data_types::{ - chunk_metadata::ChunkStorage, - job::{Job, Operation}, +use data_types::chunk_metadata::ChunkStorage; +use generated_types::google::longrunning::IoxOperation; +use generated_types::influxdata::iox::management::v1::{ + operation_metadata::Job, CloseChunk, WipePreservedCatalog, }; use test_helpers::make_temp_file; use write_buffer::maybe_skip_kafka_integration; @@ -112,7 +111,7 @@ async fn test_create_database() { .and(predicate::str::contains(format!(r#""name": "{}"#, db))) // validate the defaults have been set reasonably .and(predicate::str::contains("%Y-%m-%d %H:00:00")) - .and(predicate::str::contains(r#""bufferSizeHard": 104857600"#)) + .and(predicate::str::contains(r#""bufferSizeHard": "104857600""#)) .and(predicate::str::contains("lifecycleRules")), ); } @@ -147,7 +146,7 @@ async fn test_create_database_size() { .assert() .success() .stdout( - predicate::str::contains(r#""bufferSizeHard": 1000"#) + predicate::str::contains(r#""bufferSizeHard": "1000""#) .and(predicate::str::contains("lifecycleRules")), ); } @@ -765,7 +764,7 @@ async fn test_close_partition_chunk() { let lp_data = vec!["cpu,region=west user=23.2 100"]; load_lp(addr, &db_name, lp_data); - let stdout: Operation = serde_json::from_slice( + let stdout: IoxOperation = serde_json::from_slice( &Command::cargo_bin("influxdb_iox") .unwrap() .arg("database") @@ -784,18 +783,16 @@ async fn test_close_partition_chunk() { ) .expect("Expected JSON output"); - let expected_job = Job::CompactChunk { - chunk: ChunkAddr { - db_name: Arc::from(db_name.as_str()), - table_name: Arc::from("cpu"), - partition_key: Arc::from("cpu"), - chunk_id: 0, - }, - }; + let expected_job = Job::CloseChunk(CloseChunk { + db_name, + table_name: "cpu".to_string(), + partition_key: "cpu".to_string(), + chunk_id: 0, + }); assert_eq!( Some(expected_job), - stdout.job, + stdout.metadata.job, "operation was {:#?}", stdout ); @@ -828,7 +825,7 @@ async fn test_wipe_persisted_catalog() { let server_fixture = fixture_broken_catalog(&db_name).await; let addr = server_fixture.grpc_base(); - let stdout: Operation = serde_json::from_slice( + let stdout: IoxOperation = serde_json::from_slice( &Command::cargo_bin("influxdb_iox") .unwrap() .arg("database") @@ -845,13 +842,11 @@ async fn test_wipe_persisted_catalog() { ) .expect("Expected JSON output"); - let expected_job = Job::WipePreservedCatalog { - db_name: Arc::from(db_name.as_str()), - }; + let expected_job = Job::WipePreservedCatalog(WipePreservedCatalog { db_name }); assert_eq!( Some(expected_job), - stdout.job, + stdout.metadata.job, "operation was {:#?}", stdout ); diff --git a/tests/end_to_end_cases/operations_api.rs b/tests/end_to_end_cases/operations_api.rs index b42ce4a887..798ddad7b9 100644 --- a/tests/end_to_end_cases/operations_api.rs +++ b/tests/end_to_end_cases/operations_api.rs @@ -17,7 +17,7 @@ async fn test_operations() { let nanos = vec![Duration::from_secs(20).as_nanos() as _, 1]; - let operation = management_client + let iox_operation = management_client .create_dummy_job(nanos.clone()) .await .expect("create dummy job failed"); @@ -28,20 +28,15 @@ async fn test_operations() { .expect("list operations failed"); assert_eq!(running_ops.len(), 1); - assert_eq!(running_ops[0].name(), operation.name); + assert_eq!(running_ops[0].operation.name, iox_operation.operation.name); - let id = operation.name.parse().expect("not an integer"); + let id = iox_operation.operation.id(); + let iox_operation = operations_client.get_operation(id).await.unwrap(); - let meta = operations_client - .client_operation(id) - .await - .unwrap() - .metadata(); + let job = iox_operation.metadata.job.expect("expected a job"); - let job = meta.job.expect("expected a job"); - - assert_eq!(meta.total_count, 2); - assert_eq!(meta.pending_count, 1); + assert_eq!(iox_operation.metadata.total_count, 2); + assert_eq!(iox_operation.metadata.pending_count, 1); assert_eq!( job, operation_metadata::Job::Dummy(Dummy { @@ -51,14 +46,14 @@ async fn test_operations() { ); // Check wait times out correctly - let fetched = operations_client + let iox_operation = operations_client .wait_operation(id, Some(Duration::from_micros(10))) .await .expect("failed to wait operation"); - assert!(!fetched.done); + assert!(!iox_operation.operation.done); // Shouldn't specify wall_nanos as not complete - assert_eq!(meta.wall_nanos, 0); + assert_eq!(iox_operation.metadata.wall_nanos, 0); let wait = tokio::spawn(async move { let mut operations_client = server_fixture.operations_client(); @@ -74,18 +69,15 @@ async fn test_operations() { .expect("failed to cancel operation"); let waited = wait.await.unwrap(); - let meta = operations::ClientOperation::try_new(waited.clone()) - .unwrap() - .metadata(); - assert!(waited.done); - assert!(meta.wall_nanos > 0); - assert!(meta.cpu_nanos > 0); - assert_eq!(meta.pending_count, 0); - assert_eq!(meta.total_count, 2); - assert_eq!(meta.cancelled_count, 1); + assert!(waited.operation.done); + assert!(waited.metadata.wall_nanos > 0); + assert!(waited.metadata.cpu_nanos > 0); + assert_eq!(waited.metadata.pending_count, 0); + assert_eq!(waited.metadata.total_count, 2); + assert_eq!(waited.metadata.cancelled_count, 1); - match waited.result { + match waited.operation.result { Some(operations::generated_types::operation::Result::Error(status)) => { assert_eq!(status.code, tonic::Code::Cancelled as i32) } diff --git a/tests/end_to_end_cases/operations_cli.rs b/tests/end_to_end_cases/operations_cli.rs index 24e63c1c22..0284e55a69 100644 --- a/tests/end_to_end_cases/operations_cli.rs +++ b/tests/end_to_end_cases/operations_cli.rs @@ -1,6 +1,7 @@ use crate::common::server_fixture::ServerFixture; use assert_cmd::Command; -use data_types::job::{Job, Operation, OperationStatus}; +use generated_types::google::longrunning::IoxOperation; +use generated_types::influxdata::iox::management::v1::{operation_metadata::Job, Dummy}; use predicates::prelude::*; #[tokio::test] @@ -9,7 +10,7 @@ async fn test_start_stop() { let addr = server_fixture.grpc_base(); let duration = std::time::Duration::from_secs(10).as_nanos() as u64; - let stdout: Operation = serde_json::from_slice( + let stdout: IoxOperation = serde_json::from_slice( &Command::cargo_bin("influxdb_iox") .unwrap() .arg("operation") @@ -24,13 +25,13 @@ async fn test_start_stop() { ) .expect("expected JSON output"); - assert_eq!(stdout.total_count, 1); - match stdout.job { - Some(Job::Dummy { nanos, .. }) => assert_eq!(nanos, vec![duration]), - _ => panic!("expected dummy job got {:?}", stdout.job), + assert_eq!(stdout.metadata.total_count, 1); + match stdout.metadata.job { + Some(Job::Dummy(Dummy { nanos, .. })) => assert_eq!(nanos, vec![duration]), + _ => panic!("expected dummy job got {:?}", stdout.metadata.job), } - let operations: Vec = serde_json::from_slice( + let operations: Vec = serde_json::from_slice( &Command::cargo_bin("influxdb_iox") .unwrap() .arg("operation") @@ -45,33 +46,33 @@ async fn test_start_stop() { .expect("expected JSON output"); assert_eq!(operations.len(), 1); - match &operations[0].job { - Some(Job::Dummy { nanos, .. }) => { + match &operations[0].metadata.job { + Some(Job::Dummy(Dummy { nanos, .. })) => { assert_eq!(nanos.len(), 1); assert_eq!(nanos[0], duration); } - _ => panic!("expected dummy job got {:?}", &operations[0].job), + _ => panic!("expected dummy job got {:?}", &operations[0].metadata.job), } - let id = operations[0].id; + let name = &operations[0].operation.name; Command::cargo_bin("influxdb_iox") .unwrap() .arg("operation") .arg("cancel") - .arg(id.to_string()) + .arg(name.clone()) .arg("--host") .arg(addr) .assert() .success() .stdout(predicate::str::contains("Ok")); - let completed: Operation = serde_json::from_slice( + let completed: IoxOperation = serde_json::from_slice( &Command::cargo_bin("influxdb_iox") .unwrap() .arg("operation") .arg("wait") - .arg(id.to_string()) + .arg(name.to_string()) .arg("--host") .arg(addr) .assert() @@ -81,9 +82,8 @@ async fn test_start_stop() { ) .expect("expected JSON output"); - assert_eq!(completed.pending_count, 0); - assert_eq!(completed.total_count, 1); - assert_eq!(completed.cancelled_count, 1); - assert_eq!(completed.status, OperationStatus::Cancelled); - assert_eq!(&completed.job, &operations[0].job) + assert_eq!(completed.metadata.pending_count, 0); + assert_eq!(completed.metadata.total_count, 1); + assert_eq!(completed.metadata.cancelled_count, 1); + assert_eq!(&completed.metadata.job, &operations[0].metadata.job) } diff --git a/tests/end_to_end_cases/persistence.rs b/tests/end_to_end_cases/persistence.rs index 46a6f8c23e..2061e71642 100644 --- a/tests/end_to_end_cases/persistence.rs +++ b/tests/end_to_end_cases/persistence.rs @@ -2,7 +2,6 @@ use itertools::Itertools; use arrow_util::assert_batches_eq; use data_types::chunk_metadata::ChunkStorage; -use influxdb_iox_client::operations; use crate::{ common::server_fixture::ServerFixture, @@ -125,11 +124,11 @@ async fn test_full_lifecycle() { .await .unwrap() .iter() - .any(|operation| match operation.metadata().job { + .any(|operation| match &operation.metadata.job { Some(Job::CompactChunks(CompactChunks { db_name: operation_db_name, .. - })) => operation_db_name == db_name, + })) => operation_db_name == &db_name, _ => false, }); assert!(performed_compaction); @@ -269,20 +268,16 @@ async fn create_readbuffer_chunk(fixture: &ServerFixture, db_name: &str) -> u32 assert_eq!(chunks[0].storage, ChunkStorage::OpenMutableBuffer); // Move the chunk to read buffer - let operation = management_client + let iox_operation = management_client .close_partition_chunk(db_name, table_name, partition_key, 0) .await .expect("new partition chunk"); - println!("Operation response is {:?}", operation); - let operation_id = operation.id(); - - let meta = operations::ClientOperation::try_new(operation) - .unwrap() - .metadata(); + println!("Operation response is {:?}", iox_operation); + let operation_id = iox_operation.operation.id(); // ensure we got a legit job description back - if let Some(Job::CloseChunk(close_chunk)) = meta.job { + if let Some(Job::CloseChunk(close_chunk)) = iox_operation.metadata.job { assert_eq!(close_chunk.db_name, db_name); assert_eq!(close_chunk.partition_key, partition_key); assert_eq!(close_chunk.chunk_id, 0); diff --git a/tests/end_to_end_cases/scenario.rs b/tests/end_to_end_cases/scenario.rs index 8dfd6a7ebb..03c889ee71 100644 --- a/tests/end_to_end_cases/scenario.rs +++ b/tests/end_to_end_cases/scenario.rs @@ -556,12 +556,8 @@ where } if t_start.elapsed() >= wait_time { - let operations = fixture.operations_client().list_operations().await.unwrap(); - let mut operations: Vec<_> = operations - .into_iter() - .map(|x| (x.name().parse::().unwrap(), x.metadata())) - .collect(); - operations.sort_by_key(|x| x.0); + let mut operations = fixture.operations_client().list_operations().await.unwrap(); + operations.sort_by(|a, b| a.operation.name.cmp(&b.operation.name)); panic!( "Could not find {} within {:?}.\nChunks were: {:#?}\nOperations were: {:#?}", diff --git a/tests/end_to_end_cases/sql_cli.rs b/tests/end_to_end_cases/sql_cli.rs index 6c4a128926..e670710d1b 100644 --- a/tests/end_to_end_cases/sql_cli.rs +++ b/tests/end_to_end_cases/sql_cli.rs @@ -306,17 +306,20 @@ async fn test_sql_observer_operations() { let partition_key = "cpu"; let table_name = "cpu"; // Move the chunk to read buffer - let operation = management_client + let iox_operation = management_client .close_partition_chunk(&db_name, table_name, partition_key, 0) .await .expect("new partition chunk"); - println!("Operation response is {:?}", operation); + println!("Operation response is {:?}", iox_operation); // wait for the job to be done fixture .operations_client() - .wait_operation(operation.id(), Some(std::time::Duration::from_secs(1))) + .wait_operation( + iox_operation.operation.id(), + Some(std::time::Duration::from_secs(1)), + ) .await .expect("failed to wait operation"); diff --git a/tests/end_to_end_cases/system_tables.rs b/tests/end_to_end_cases/system_tables.rs index 530911f236..594ad0dba8 100644 --- a/tests/end_to_end_cases/system_tables.rs +++ b/tests/end_to_end_cases/system_tables.rs @@ -27,12 +27,12 @@ async fn test_operations() { .expect("write succeded"); // Move the chunk to read buffer - let operation = management_client + let iox_operation = management_client .close_partition_chunk(&db_name1, table_name, partition_key, 0) .await .expect("new partition chunk"); - let operation_id = operation.id(); + let operation_id = iox_operation.operation.id(); operations_client .wait_operation(operation_id, Some(std::time::Duration::from_secs(1))) .await diff --git a/trace/src/ctx.rs b/trace/src/ctx.rs index 94d4a41f47..22a7739044 100644 --- a/trace/src/ctx.rs +++ b/trace/src/ctx.rs @@ -55,6 +55,22 @@ pub struct SpanContext { } impl SpanContext { + /// Create a new root span context, sent to `collector`. The + /// new span context has a random trace_id and span_id, and thus + /// is not connected to any existing span or trace. + pub fn new(collector: Arc) -> Self { + let mut rng = rand::thread_rng(); + let trace_id: u128 = rng.gen_range(1..u128::MAX); + let span_id: u64 = rng.gen_range(1..u64::MAX); + + Self { + trace_id: TraceId(NonZeroU128::new(trace_id).unwrap()), + parent_span_id: None, + span_id: SpanId(NonZeroU64::new(span_id).unwrap()), + collector: Some(collector), + } + } + /// Creates a new child of the Span described by this TraceContext pub fn child(&self, name: impl Into>) -> Span { Span { @@ -73,3 +89,22 @@ impl SpanContext { } } } + +#[cfg(test)] +mod tests { + use super::*; + + use crate::RingBufferTraceCollector; + + #[test] + fn test_new() { + // two newly created spans should not have duplicated trace or span ids + let collector = Arc::new(RingBufferTraceCollector::new(5)) as _; + + let ctx1 = SpanContext::new(Arc::clone(&collector)); + let ctx2 = SpanContext::new(collector); + + assert_ne!(ctx1.trace_id, ctx2.trace_id); + assert_ne!(ctx1.span_id, ctx2.span_id); + } +} diff --git a/trace/src/span.rs b/trace/src/span.rs index 55c5808a92..953cc7b99e 100644 --- a/trace/src/span.rs +++ b/trace/src/span.rs @@ -201,29 +201,14 @@ impl<'a> Drop for SpanRecorder { #[cfg(test)] mod tests { - use std::num::{NonZeroU128, NonZeroU64}; use std::sync::Arc; - use crate::ctx::{SpanId, TraceId}; use crate::{RingBufferTraceCollector, TraceCollector}; use super::*; fn make_span(collector: Arc) -> Span { - Span { - name: "foo".into(), - ctx: SpanContext { - trace_id: TraceId(NonZeroU128::new(23948923).unwrap()), - parent_span_id: None, - span_id: SpanId(NonZeroU64::new(3498394).unwrap()), - collector: Some(collector), - }, - start: None, - end: None, - status: SpanStatus::Unknown, - metadata: Default::default(), - events: vec![], - } + SpanContext::new(collector).child("foo") } #[test]