Merge branch 'main' into cn/restore
commit
23cc980d9e
|
@ -1318,13 +1318,13 @@ dependencies = [
|
|||
"data_types",
|
||||
"futures",
|
||||
"google_types",
|
||||
"influxdb_line_protocol",
|
||||
"num_cpus",
|
||||
"observability_deps",
|
||||
"pbjson",
|
||||
"pbjson_build",
|
||||
"proc-macro2",
|
||||
"prost",
|
||||
"prost-build",
|
||||
"prost-types",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
@ -1392,6 +1392,8 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"bytes",
|
||||
"chrono",
|
||||
"pbjson",
|
||||
"pbjson_build",
|
||||
"prost",
|
||||
"prost-build",
|
||||
"serde",
|
||||
|
@ -2896,6 +2898,39 @@ version = "1.0.5"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "acbf547ad0c65e31259204bd90935776d1c693cec2f4ff7abb7a1bbbd40dfe58"
|
||||
|
||||
[[package]]
|
||||
name = "pbjson"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"base64 0.13.0",
|
||||
"bytes",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pbjson_build"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"itertools 0.10.1",
|
||||
"pbjson_test",
|
||||
"prost",
|
||||
"prost-types",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pbjson_test"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"pbjson",
|
||||
"pbjson_build",
|
||||
"prost",
|
||||
"prost-build",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "peeking_take_while"
|
||||
version = "0.1.2"
|
||||
|
|
|
@ -57,6 +57,9 @@ members = [
|
|||
"observability_deps",
|
||||
"packers",
|
||||
"panic_logging",
|
||||
"pbjson",
|
||||
"pbjson_build",
|
||||
"pbjson_test",
|
||||
"persistence_windows",
|
||||
"predicate",
|
||||
"query",
|
||||
|
|
|
@ -117,45 +117,3 @@ impl Job {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The status of a running operation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub enum OperationStatus {
|
||||
/// A task associated with the operation is running
|
||||
Running,
|
||||
/// All tasks associated with the operation have finished successfully
|
||||
Success,
|
||||
/// The operation was cancelled and no associated tasks are running
|
||||
Cancelled,
|
||||
/// An operation error was returned
|
||||
Errored,
|
||||
}
|
||||
|
||||
/// A group of asynchronous tasks being performed by an IOx server
|
||||
///
|
||||
/// TODO: Temporary until prost adds JSON support
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Operation {
|
||||
/// ID of the running operation
|
||||
pub id: usize,
|
||||
// The total number of created tasks
|
||||
pub total_count: u64,
|
||||
// The number of pending tasks
|
||||
pub pending_count: u64,
|
||||
// The number of tasks that completed successfully
|
||||
pub success_count: u64,
|
||||
// The number of tasks that returned an error
|
||||
pub error_count: u64,
|
||||
// The number of tasks that were cancelled
|
||||
pub cancelled_count: u64,
|
||||
// The number of tasks that did not run to completion (e.g. panic)
|
||||
pub dropped_count: u64,
|
||||
/// Wall time spent executing this operation
|
||||
pub wall_time: std::time::Duration,
|
||||
/// CPU time spent executing this operation
|
||||
pub cpu_time: std::time::Duration,
|
||||
/// Additional job metadata
|
||||
pub job: Option<Job>,
|
||||
/// The status of the running operation
|
||||
pub status: OperationStatus,
|
||||
}
|
||||
|
|
|
@ -7,16 +7,12 @@ edition = "2018"
|
|||
[dependencies] # In alphabetical order
|
||||
bytes = { version = "1.0", features = ["serde"] }
|
||||
data_types = { path = "../data_types" }
|
||||
# See docs/regenerating_flatbuffers.md about updating generated code when updating the
|
||||
# version of the flatbuffers crate
|
||||
#flatbuffers = "2"
|
||||
futures = "0.3"
|
||||
google_types = { path = "../google_types" }
|
||||
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
num_cpus = "1.13.0"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
pbjson = { path = "../pbjson" }
|
||||
prost = "0.8"
|
||||
prost-types = "0.8"
|
||||
regex = "1.4"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.67"
|
||||
|
@ -31,3 +27,4 @@ chrono = { version = "0.4", features = ["serde"] }
|
|||
proc-macro2 = "=1.0.27"
|
||||
tonic-build = "0.5"
|
||||
prost-build = "0.8"
|
||||
pbjson_build = { path = "../pbjson_build" }
|
||||
|
|
|
@ -64,12 +64,6 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
|
|||
config
|
||||
.compile_well_known_types()
|
||||
.disable_comments(&[".google"])
|
||||
// approximates jsonpb. This is still not enough to deal with the special cases like Any
|
||||
// Tracking issue for proper jsonpb support in prost: https://github.com/danburkert/prost/issues/277
|
||||
.type_attribute(
|
||||
".",
|
||||
"#[derive(serde::Serialize,serde::Deserialize)] #[serde(rename_all = \"camelCase\")]",
|
||||
)
|
||||
.extern_path(".google.protobuf", "::google_types::protobuf")
|
||||
.bytes(&[".influxdata.iox.catalog.v1.AddParquet.metadata"]);
|
||||
|
||||
|
@ -79,5 +73,11 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
|
|||
.format(true)
|
||||
.compile_with_config(config, &proto_files, &[root.into()])?;
|
||||
|
||||
let descriptor_set = std::fs::read(descriptor_path)?;
|
||||
|
||||
pbjson_build::Builder::new()
|
||||
.register_descriptors(&descriptor_set)?
|
||||
.build(&[".influxdata", ".google.longrunning", ".google.rpc"])?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -27,12 +27,11 @@ message OperationMetadata {
|
|||
// The number of tasks that did not run to completion (e.g. panic)
|
||||
uint64 dropped_count = 16;
|
||||
|
||||
reserved 6;
|
||||
|
||||
// What kind of job is it?
|
||||
oneof job {
|
||||
Dummy dummy = 5;
|
||||
/* historical artifact
|
||||
PersistSegment persist_segment = 6;
|
||||
*/
|
||||
CloseChunk close_chunk = 7;
|
||||
WriteChunk write_chunk = 8;
|
||||
WipePreservedCatalog wipe_preserved_catalog = 9;
|
||||
|
|
|
@ -5,10 +5,17 @@ pub use google_types::*;
|
|||
|
||||
pub mod rpc {
|
||||
include!(concat!(env!("OUT_DIR"), "/google.rpc.rs"));
|
||||
include!(concat!(env!("OUT_DIR"), "/google.rpc.serde.rs"));
|
||||
}
|
||||
|
||||
pub mod longrunning {
|
||||
include!(concat!(env!("OUT_DIR"), "/google.longrunning.rs"));
|
||||
include!(concat!(env!("OUT_DIR"), "/google.longrunning.serde.rs"));
|
||||
|
||||
use crate::google::{FieldViolation, FieldViolationExt};
|
||||
use crate::influxdata::iox::management::v1::{OperationMetadata, OPERATION_METADATA};
|
||||
use prost::{bytes::Bytes, Message};
|
||||
use std::convert::TryFrom;
|
||||
|
||||
impl Operation {
|
||||
/// Return the IOx operation `id`. This `id` can
|
||||
|
@ -19,6 +26,46 @@ pub mod longrunning {
|
|||
.parse()
|
||||
.expect("Internal error: id returned from server was not an integer")
|
||||
}
|
||||
|
||||
/// Decodes an IOx `OperationMetadata` metadata payload
|
||||
pub fn iox_metadata(&self) -> Result<OperationMetadata, FieldViolation> {
|
||||
let metadata = self
|
||||
.metadata
|
||||
.as_ref()
|
||||
.ok_or_else(|| FieldViolation::required("metadata"))?;
|
||||
|
||||
if !crate::protobuf_type_url_eq(&metadata.type_url, OPERATION_METADATA) {
|
||||
return Err(FieldViolation {
|
||||
field: "metadata.type_url".to_string(),
|
||||
description: "Unexpected field type".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
Message::decode(Bytes::clone(&metadata.value)).field("metadata.value")
|
||||
}
|
||||
}
|
||||
|
||||
/// Groups together an `Operation` with a decoded `OperationMetadata`
|
||||
///
|
||||
/// When serialized this will serialize the encoded Any field on `Operation` along
|
||||
/// with its decoded representation as `OperationMetadata`
|
||||
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct IoxOperation {
|
||||
/// The `Operation` message returned from the API
|
||||
pub operation: Operation,
|
||||
/// The decoded `Operation::metadata` contained within `IoxOperation::operation`
|
||||
pub metadata: OperationMetadata,
|
||||
}
|
||||
|
||||
impl TryFrom<Operation> for IoxOperation {
|
||||
type Error = FieldViolation;
|
||||
|
||||
fn try_from(operation: Operation) -> Result<Self, Self::Error> {
|
||||
Ok(Self {
|
||||
metadata: operation.iox_metadata()?,
|
||||
operation,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,11 +1,5 @@
|
|||
use crate::google::{longrunning, protobuf::Any, FieldViolation, FieldViolationExt};
|
||||
use crate::influxdata::iox::management::v1 as management;
|
||||
use crate::protobuf_type_url_eq;
|
||||
use data_types::chunk_metadata::ChunkAddr;
|
||||
use data_types::job::{Job, OperationStatus};
|
||||
use data_types::partition_metadata::PartitionAddr;
|
||||
use std::convert::TryFrom;
|
||||
use std::sync::Arc;
|
||||
use data_types::job::Job;
|
||||
|
||||
impl From<Job> for management::operation_metadata::Job {
|
||||
fn from(job: Job) -> Self {
|
||||
|
@ -61,142 +55,3 @@ impl From<Job> for management::operation_metadata::Job {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<management::operation_metadata::Job> for Job {
|
||||
fn from(value: management::operation_metadata::Job) -> Self {
|
||||
use management::operation_metadata::Job;
|
||||
match value {
|
||||
Job::Dummy(management::Dummy { nanos, db_name }) => Self::Dummy {
|
||||
nanos,
|
||||
db_name: (!db_name.is_empty()).then(|| Arc::from(db_name.as_str())),
|
||||
},
|
||||
Job::CloseChunk(management::CloseChunk {
|
||||
db_name,
|
||||
partition_key,
|
||||
table_name,
|
||||
chunk_id,
|
||||
}) => Self::CompactChunk {
|
||||
chunk: ChunkAddr {
|
||||
db_name: Arc::from(db_name.as_str()),
|
||||
table_name: Arc::from(table_name.as_str()),
|
||||
partition_key: Arc::from(partition_key.as_str()),
|
||||
chunk_id,
|
||||
},
|
||||
},
|
||||
Job::WriteChunk(management::WriteChunk {
|
||||
db_name,
|
||||
partition_key,
|
||||
table_name,
|
||||
chunk_id,
|
||||
}) => Self::WriteChunk {
|
||||
chunk: ChunkAddr {
|
||||
db_name: Arc::from(db_name.as_str()),
|
||||
table_name: Arc::from(table_name.as_str()),
|
||||
partition_key: Arc::from(partition_key.as_str()),
|
||||
chunk_id,
|
||||
},
|
||||
},
|
||||
Job::WipePreservedCatalog(management::WipePreservedCatalog { db_name }) => {
|
||||
Self::WipePreservedCatalog {
|
||||
db_name: Arc::from(db_name.as_str()),
|
||||
}
|
||||
}
|
||||
Job::CompactChunks(management::CompactChunks {
|
||||
db_name,
|
||||
partition_key,
|
||||
table_name,
|
||||
chunks,
|
||||
}) => Self::CompactChunks {
|
||||
partition: PartitionAddr {
|
||||
db_name: Arc::from(db_name.as_str()),
|
||||
table_name: Arc::from(table_name.as_str()),
|
||||
partition_key: Arc::from(partition_key.as_str()),
|
||||
},
|
||||
chunks,
|
||||
},
|
||||
Job::PersistChunks(management::PersistChunks {
|
||||
db_name,
|
||||
partition_key,
|
||||
table_name,
|
||||
chunks,
|
||||
}) => Self::PersistChunks {
|
||||
partition: PartitionAddr {
|
||||
db_name: Arc::from(db_name.as_str()),
|
||||
table_name: Arc::from(table_name.as_str()),
|
||||
partition_key: Arc::from(partition_key.as_str()),
|
||||
},
|
||||
chunks,
|
||||
},
|
||||
Job::DropChunk(management::DropChunk {
|
||||
db_name,
|
||||
partition_key,
|
||||
table_name,
|
||||
chunk_id,
|
||||
}) => Self::DropChunk {
|
||||
chunk: ChunkAddr {
|
||||
db_name: Arc::from(db_name.as_str()),
|
||||
table_name: Arc::from(table_name.as_str()),
|
||||
partition_key: Arc::from(partition_key.as_str()),
|
||||
chunk_id,
|
||||
},
|
||||
},
|
||||
Job::DropPartition(management::DropPartition {
|
||||
db_name,
|
||||
partition_key,
|
||||
table_name,
|
||||
}) => Self::DropPartition {
|
||||
partition: PartitionAddr {
|
||||
db_name: Arc::from(db_name.as_str()),
|
||||
table_name: Arc::from(table_name.as_str()),
|
||||
partition_key: Arc::from(partition_key.as_str()),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<longrunning::Operation> for data_types::job::Operation {
|
||||
type Error = FieldViolation;
|
||||
|
||||
fn try_from(operation: longrunning::Operation) -> Result<Self, Self::Error> {
|
||||
let metadata: Any = operation
|
||||
.metadata
|
||||
.ok_or_else(|| FieldViolation::required("metadata"))?;
|
||||
|
||||
if !protobuf_type_url_eq(&metadata.type_url, management::OPERATION_METADATA) {
|
||||
return Err(FieldViolation {
|
||||
field: "metadata.type_url".to_string(),
|
||||
description: "Unexpected field type".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
let meta: management::OperationMetadata =
|
||||
prost::Message::decode(metadata.value).field("metadata.value")?;
|
||||
|
||||
let status = match &operation.result {
|
||||
None => OperationStatus::Running,
|
||||
Some(longrunning::operation::Result::Response(_)) => OperationStatus::Success,
|
||||
Some(longrunning::operation::Result::Error(status)) => {
|
||||
if status.code == tonic::Code::Cancelled as i32 {
|
||||
OperationStatus::Cancelled
|
||||
} else {
|
||||
OperationStatus::Errored
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
id: operation.name.parse().field("name")?,
|
||||
total_count: meta.total_count,
|
||||
pending_count: meta.pending_count,
|
||||
success_count: meta.success_count,
|
||||
error_count: meta.error_count,
|
||||
cancelled_count: meta.cancelled_count,
|
||||
dropped_count: meta.dropped_count,
|
||||
wall_time: std::time::Duration::from_nanos(meta.wall_nanos),
|
||||
cpu_time: std::time::Duration::from_nanos(meta.cpu_nanos),
|
||||
job: meta.job.map(Into::into),
|
||||
status,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,6 +10,10 @@ pub mod influxdata {
|
|||
pub mod platform {
|
||||
pub mod storage {
|
||||
include!(concat!(env!("OUT_DIR"), "/influxdata.platform.storage.rs"));
|
||||
include!(concat!(
|
||||
env!("OUT_DIR"),
|
||||
"/influxdata.platform.storage.serde.rs"
|
||||
));
|
||||
|
||||
// Can't implement `Default` because `prost::Message` implements `Default`
|
||||
impl TimestampRange {
|
||||
|
@ -27,6 +31,10 @@ pub mod influxdata {
|
|||
pub mod catalog {
|
||||
pub mod v1 {
|
||||
include!(concat!(env!("OUT_DIR"), "/influxdata.iox.catalog.v1.rs"));
|
||||
include!(concat!(
|
||||
env!("OUT_DIR"),
|
||||
"/influxdata.iox.catalog.v1.serde.rs"
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -37,12 +45,20 @@ pub mod influxdata {
|
|||
"influxdata.iox.management.v1.OperationMetadata";
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/influxdata.iox.management.v1.rs"));
|
||||
include!(concat!(
|
||||
env!("OUT_DIR"),
|
||||
"/influxdata.iox.management.v1.serde.rs"
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
pub mod write {
|
||||
pub mod v1 {
|
||||
include!(concat!(env!("OUT_DIR"), "/influxdata.iox.write.v1.rs"));
|
||||
include!(concat!(
|
||||
env!("OUT_DIR"),
|
||||
"/influxdata.iox.write.v1.serde.rs"
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -50,6 +66,7 @@ pub mod influxdata {
|
|||
pub mod pbdata {
|
||||
pub mod v1 {
|
||||
include!(concat!(env!("OUT_DIR"), "/influxdata.pbdata.v1.rs"));
|
||||
include!(concat!(env!("OUT_DIR"), "/influxdata.pbdata.v1.serde.rs"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,8 +8,10 @@ edition = "2018"
|
|||
[dependencies] # In alphabetical order
|
||||
bytes = { version = "1.0", features = ["serde"] }
|
||||
chrono = "0.4"
|
||||
pbjson = { path = "../pbjson" }
|
||||
prost = "0.8"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
|
||||
[build-dependencies] # In alphabetical order
|
||||
prost-build = "0.8"
|
||||
pbjson_build = { path = "../pbjson_build" }
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
//! Compiles Protocol Buffers and FlatBuffers schema definitions into
|
||||
//! native Rust types.
|
||||
|
||||
use std::env;
|
||||
use std::path::PathBuf;
|
||||
|
||||
type Error = Box<dyn std::error::Error>;
|
||||
|
@ -16,16 +17,18 @@ fn main() -> Result<()> {
|
|||
println!("cargo:rerun-if-changed={}", proto_file.display());
|
||||
}
|
||||
|
||||
let descriptor_path = PathBuf::from(env::var("OUT_DIR").unwrap()).join("proto_descriptor.bin");
|
||||
prost_build::Config::new()
|
||||
.file_descriptor_set_path(&descriptor_path)
|
||||
.compile_well_known_types()
|
||||
.disable_comments(&["."])
|
||||
// approximates jsonpb. This is still not enough to deal with the special cases like Any.
|
||||
.type_attribute(
|
||||
".google",
|
||||
"#[derive(serde::Serialize,serde::Deserialize)] #[serde(rename_all = \"camelCase\")]",
|
||||
)
|
||||
.bytes(&[".google"])
|
||||
.compile_protos(&proto_files, &[root])?;
|
||||
|
||||
let descriptor_set = std::fs::read(descriptor_path)?;
|
||||
pbjson_build::Builder::new()
|
||||
.register_descriptors(&descriptor_set)?
|
||||
.build(&[".google"])?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ mod pb {
|
|||
use std::convert::{TryFrom, TryInto};
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/google.protobuf.rs"));
|
||||
include!(concat!(env!("OUT_DIR"), "/google.protobuf.serde.rs"));
|
||||
|
||||
impl TryFrom<Duration> for std::time::Duration {
|
||||
type Error = std::num::TryFromIntError;
|
||||
|
|
|
@ -3,8 +3,8 @@ use thiserror::Error;
|
|||
use self::generated_types::{management_service_client::ManagementServiceClient, *};
|
||||
|
||||
use crate::connection::Connection;
|
||||
use ::generated_types::google::longrunning::Operation;
|
||||
|
||||
use crate::google::{longrunning::IoxOperation, FieldViolation};
|
||||
use std::convert::TryInto;
|
||||
use std::num::NonZeroU32;
|
||||
|
||||
|
@ -200,6 +200,10 @@ pub enum CreateDummyJobError {
|
|||
#[error("Server returned an empty response")]
|
||||
EmptyResponse,
|
||||
|
||||
/// Response payload was invalid
|
||||
#[error("Invalid response: {0}")]
|
||||
InvalidResponse(#[from] FieldViolation),
|
||||
|
||||
/// Client received an unexpected error from the server
|
||||
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
|
||||
ServerError(tonic::Status),
|
||||
|
@ -284,6 +288,10 @@ pub enum ClosePartitionChunkError {
|
|||
#[error("Server unavailable: {}", .0.message())]
|
||||
Unavailable(tonic::Status),
|
||||
|
||||
/// Response payload was invalid
|
||||
#[error("Invalid response: {0}")]
|
||||
InvalidResponse(#[from] FieldViolation),
|
||||
|
||||
/// Client received an unexpected error from the server
|
||||
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
|
||||
ServerError(tonic::Status),
|
||||
|
@ -336,6 +344,10 @@ pub enum WipePersistedCatalogError {
|
|||
#[error("Server returned an empty response")]
|
||||
EmptyResponse,
|
||||
|
||||
/// Response payload was invalid
|
||||
#[error("Invalid response: {0}")]
|
||||
InvalidResponse(#[from] FieldViolation),
|
||||
|
||||
/// Client received an unexpected error from the server
|
||||
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
|
||||
ServerError(tonic::Status),
|
||||
|
@ -842,7 +854,7 @@ impl Client {
|
|||
pub async fn create_dummy_job(
|
||||
&mut self,
|
||||
nanos: Vec<u64>,
|
||||
) -> Result<Operation, CreateDummyJobError> {
|
||||
) -> Result<IoxOperation, CreateDummyJobError> {
|
||||
let response = self
|
||||
.inner
|
||||
.create_dummy_job(CreateDummyJobRequest { nanos })
|
||||
|
@ -852,7 +864,8 @@ impl Client {
|
|||
Ok(response
|
||||
.into_inner()
|
||||
.operation
|
||||
.ok_or(CreateDummyJobError::EmptyResponse)?)
|
||||
.ok_or(CreateDummyJobError::EmptyResponse)?
|
||||
.try_into()?)
|
||||
}
|
||||
|
||||
/// Closes the specified chunk in the specified partition and
|
||||
|
@ -865,7 +878,7 @@ impl Client {
|
|||
table_name: impl Into<String> + Send,
|
||||
partition_key: impl Into<String> + Send,
|
||||
chunk_id: u32,
|
||||
) -> Result<Operation, ClosePartitionChunkError> {
|
||||
) -> Result<IoxOperation, ClosePartitionChunkError> {
|
||||
let db_name = db_name.into();
|
||||
let partition_key = partition_key.into();
|
||||
let table_name = table_name.into();
|
||||
|
@ -888,7 +901,8 @@ impl Client {
|
|||
Ok(response
|
||||
.into_inner()
|
||||
.operation
|
||||
.ok_or(ClosePartitionChunkError::EmptyResponse)?)
|
||||
.ok_or(ClosePartitionChunkError::EmptyResponse)?
|
||||
.try_into()?)
|
||||
}
|
||||
|
||||
/// Unload chunk from read buffer but keep it in object store.
|
||||
|
@ -929,7 +943,7 @@ impl Client {
|
|||
pub async fn wipe_persisted_catalog(
|
||||
&mut self,
|
||||
db_name: impl Into<String> + Send,
|
||||
) -> Result<Operation, WipePersistedCatalogError> {
|
||||
) -> Result<IoxOperation, WipePersistedCatalogError> {
|
||||
let db_name = db_name.into();
|
||||
|
||||
let response = self
|
||||
|
@ -947,7 +961,8 @@ impl Client {
|
|||
Ok(response
|
||||
.into_inner()
|
||||
.operation
|
||||
.ok_or(WipePersistedCatalogError::EmptyResponse)?)
|
||||
.ok_or(WipePersistedCatalogError::EmptyResponse)?
|
||||
.try_into()?)
|
||||
}
|
||||
|
||||
/// Skip replay of an uninitialized database.
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
use thiserror::Error;
|
||||
|
||||
use ::generated_types::{
|
||||
google::FieldViolation, influxdata::iox::management::v1 as management, protobuf_type_url_eq,
|
||||
};
|
||||
|
||||
use self::generated_types::{operations_client::OperationsClient, *};
|
||||
use crate::connection::Connection;
|
||||
use std::convert::TryInto;
|
||||
|
||||
/// Re-export generated_types
|
||||
pub mod generated_types {
|
||||
pub use generated_types::google::longrunning::*;
|
||||
|
@ -16,7 +14,7 @@ pub mod generated_types {
|
|||
pub enum Error {
|
||||
/// Client received an invalid response
|
||||
#[error("Invalid server response: {}", .0)]
|
||||
InvalidResponse(#[from] FieldViolation),
|
||||
InvalidResponse(#[from] ::generated_types::google::FieldViolation),
|
||||
|
||||
/// Operation was not found
|
||||
#[error("Operation not found: {}", .0)]
|
||||
|
@ -66,7 +64,7 @@ impl Client {
|
|||
}
|
||||
|
||||
/// Get information of all client operation
|
||||
pub async fn list_operations(&mut self) -> Result<Vec<ClientOperation>> {
|
||||
pub async fn list_operations(&mut self) -> Result<Vec<IoxOperation>> {
|
||||
Ok(self
|
||||
.inner
|
||||
.list_operations(ListOperationsRequest::default())
|
||||
|
@ -75,12 +73,12 @@ impl Client {
|
|||
.into_inner()
|
||||
.operations
|
||||
.into_iter()
|
||||
.map(|o| ClientOperation::try_new(o).unwrap())
|
||||
.collect())
|
||||
.map(TryInto::try_into)
|
||||
.collect::<Result<_, _>>()?)
|
||||
}
|
||||
|
||||
/// Get information about a specific operation
|
||||
pub async fn get_operation(&mut self, id: usize) -> Result<Operation> {
|
||||
pub async fn get_operation(&mut self, id: usize) -> Result<IoxOperation> {
|
||||
Ok(self
|
||||
.inner
|
||||
.get_operation(GetOperationRequest {
|
||||
|
@ -91,7 +89,8 @@ impl Client {
|
|||
tonic::Code::NotFound => Error::NotFound(id),
|
||||
_ => Error::ServerError(e),
|
||||
})?
|
||||
.into_inner())
|
||||
.into_inner()
|
||||
.try_into()?)
|
||||
}
|
||||
|
||||
/// Cancel a given operation
|
||||
|
@ -115,7 +114,7 @@ impl Client {
|
|||
&mut self,
|
||||
id: usize,
|
||||
timeout: Option<std::time::Duration>,
|
||||
) -> Result<Operation> {
|
||||
) -> Result<IoxOperation> {
|
||||
Ok(self
|
||||
.inner
|
||||
.wait_operation(WaitOperationRequest {
|
||||
|
@ -127,50 +126,7 @@ impl Client {
|
|||
tonic::Code::NotFound => Error::NotFound(id),
|
||||
_ => Error::ServerError(e),
|
||||
})?
|
||||
.into_inner())
|
||||
}
|
||||
|
||||
/// Return the Client Operation
|
||||
pub async fn client_operation(&mut self, id: usize) -> Result<ClientOperation> {
|
||||
let operation = self.get_operation(id).await?;
|
||||
ClientOperation::try_new(operation)
|
||||
}
|
||||
}
|
||||
|
||||
/// IOx's Client Operation
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ClientOperation {
|
||||
inner: generated_types::Operation,
|
||||
}
|
||||
|
||||
impl ClientOperation {
|
||||
/// Create a new Cient Operation
|
||||
pub fn try_new(operation: generated_types::Operation) -> Result<Self> {
|
||||
if operation.metadata.is_some() {
|
||||
let metadata = operation.metadata.clone().unwrap();
|
||||
if !protobuf_type_url_eq(&metadata.type_url, management::OPERATION_METADATA) {
|
||||
return Err(Error::WrongOperationMetaData);
|
||||
}
|
||||
} else {
|
||||
return Err(Error::NotFound(0));
|
||||
}
|
||||
|
||||
Ok(Self { inner: operation })
|
||||
}
|
||||
|
||||
/// Return Metadata for this client operation
|
||||
pub fn metadata(&self) -> management::OperationMetadata {
|
||||
prost::Message::decode(self.inner.metadata.clone().unwrap().value)
|
||||
.expect("failed to decode metadata")
|
||||
}
|
||||
|
||||
/// Return name of this operation
|
||||
pub fn name(&self) -> &str {
|
||||
&self.inner.name
|
||||
}
|
||||
|
||||
/// Return the inner's Operation
|
||||
pub fn operation(self) -> Operation {
|
||||
self.inner
|
||||
.into_inner()
|
||||
.try_into()?)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -77,6 +77,15 @@ pub enum Error {
|
|||
/// Underlying `agent` module error that caused this problem
|
||||
source: agent::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen when constructing an agent's writer
|
||||
#[snafu(display("Could not create writer for agent `{}`, caused by:\n{}", name, source))]
|
||||
CouldNotCreateAgentWriter {
|
||||
/// The name of the relevant agent
|
||||
name: String,
|
||||
/// Underlying `write` module error that caused this problem
|
||||
source: write::Error,
|
||||
},
|
||||
}
|
||||
|
||||
type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
@ -135,7 +144,9 @@ pub async fn generate<T: DataGenRng>(
|
|||
)
|
||||
.context(CouldNotCreateAgent { name: &agent_name })?;
|
||||
|
||||
let agent_points_writer = points_writer_builder.build_for_agent(&agent_name);
|
||||
let agent_points_writer = points_writer_builder
|
||||
.build_for_agent(&agent_name)
|
||||
.context(CouldNotCreateAgentWriter { name: &agent_name })?;
|
||||
|
||||
handles.push(tokio::task::spawn(async move {
|
||||
agent.generate_all(agent_points_writer, batch_size).await
|
||||
|
|
|
@ -10,14 +10,23 @@ use std::{
|
|||
};
|
||||
use std::{
|
||||
fs,
|
||||
fs::OpenOptions,
|
||||
fs::{File, OpenOptions},
|
||||
io::BufWriter,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
use tracing::info;
|
||||
|
||||
/// Errors that may happen while writing points.
|
||||
#[derive(Snafu, Debug)]
|
||||
pub enum Error {
|
||||
/// Error that may happen when writing line protocol to a file
|
||||
#[snafu(display("Could open line protocol file {}: {}", filename.display(), source))]
|
||||
CantOpenLineProtocolFile {
|
||||
/// The location of the file we tried to open
|
||||
filename: PathBuf,
|
||||
/// Underlying IO error that caused this problem
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen when writing line protocol to a no-op sink
|
||||
#[snafu(display("Could not generate line protocol: {}", source))]
|
||||
CantWriteToNoOp {
|
||||
|
@ -174,7 +183,7 @@ impl PointsWriterBuilder {
|
|||
|
||||
/// Create a writer out of this writer's configuration for a particular
|
||||
/// agent that runs in a separate thread/task.
|
||||
pub fn build_for_agent(&mut self, agent_name: &str) -> PointsWriter {
|
||||
pub fn build_for_agent(&mut self, agent_name: &str) -> Result<PointsWriter> {
|
||||
let inner_writer = match &mut self.config {
|
||||
PointsWriterConfig::Api {
|
||||
client,
|
||||
|
@ -189,7 +198,16 @@ impl PointsWriterBuilder {
|
|||
let mut filename = dir_path.clone();
|
||||
filename.push(agent_name);
|
||||
filename.set_extension("txt");
|
||||
InnerPointsWriter::File(filename)
|
||||
|
||||
let file = OpenOptions::new()
|
||||
.append(true)
|
||||
.create(true)
|
||||
.open(&filename)
|
||||
.context(CantOpenLineProtocolFile { filename })?;
|
||||
|
||||
let file = BufWriter::new(file);
|
||||
|
||||
InnerPointsWriter::File { file }
|
||||
}
|
||||
PointsWriterConfig::NoOp { perform_write } => InnerPointsWriter::NoOp {
|
||||
perform_write: *perform_write,
|
||||
|
@ -204,7 +222,7 @@ impl PointsWriterBuilder {
|
|||
PointsWriterConfig::Stdout => InnerPointsWriter::Stdout,
|
||||
};
|
||||
|
||||
PointsWriter { inner_writer }
|
||||
Ok(PointsWriter { inner_writer })
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -228,7 +246,9 @@ enum InnerPointsWriter {
|
|||
org: String,
|
||||
bucket: String,
|
||||
},
|
||||
File(PathBuf),
|
||||
File {
|
||||
file: BufWriter<File>,
|
||||
},
|
||||
NoOp {
|
||||
perform_write: bool,
|
||||
},
|
||||
|
@ -250,22 +270,12 @@ impl InnerPointsWriter {
|
|||
.await
|
||||
.context(CantWriteToApi)?;
|
||||
}
|
||||
Self::File(filename) => {
|
||||
info!("Opening file {:?}", filename);
|
||||
let num_points = points.len();
|
||||
let file = OpenOptions::new()
|
||||
.append(true)
|
||||
.create(true)
|
||||
.open(&filename)
|
||||
.context(CantWriteToLineProtocolFile)?;
|
||||
|
||||
let mut file = std::io::BufWriter::new(file);
|
||||
Self::File { file } => {
|
||||
for point in points {
|
||||
point
|
||||
.write_data_point_to(&mut file)
|
||||
.write_data_point_to(&mut *file)
|
||||
.context(CantWriteToLineProtocolFile)?;
|
||||
}
|
||||
info!("Wrote {} points to {:?}", num_points, filename);
|
||||
}
|
||||
Self::NoOp { perform_write } => {
|
||||
if *perform_write {
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
//! Methods to cleanup the object store.
|
||||
use std::{collections::HashSet, sync::Arc};
|
||||
|
||||
use crate::catalog::api::{CatalogParquetInfo, CatalogState, PreservedCatalog};
|
||||
use futures::TryStreamExt;
|
||||
use iox_object_store::{IoxObjectStore, ParquetFilePath};
|
||||
use object_store::{ObjectStore, ObjectStoreApi};
|
||||
|
@ -9,6 +8,11 @@ use observability_deps::tracing::info;
|
|||
use parking_lot::Mutex;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
||||
use crate::catalog::{
|
||||
core::PreservedCatalog,
|
||||
interface::{CatalogParquetInfo, CatalogState, CatalogStateAddError, CatalogStateRemoveError},
|
||||
};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Error from read operation while cleaning object store: {}", source))]
|
||||
|
@ -22,7 +26,7 @@ pub enum Error {
|
|||
},
|
||||
|
||||
#[snafu(display("Error from catalog loading while cleaning object store: {}", source))]
|
||||
CatalogLoadError { source: crate::catalog::api::Error },
|
||||
CatalogLoadError { source: crate::catalog::core::Error },
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
@ -124,12 +128,12 @@ impl CatalogState for TracerCatalogState {
|
|||
&mut self,
|
||||
_iox_object_store: Arc<IoxObjectStore>,
|
||||
info: CatalogParquetInfo,
|
||||
) -> crate::catalog::api::Result<()> {
|
||||
) -> Result<(), CatalogStateAddError> {
|
||||
self.files.lock().insert(info.path);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn remove(&mut self, _path: &ParquetFilePath) -> crate::catalog::api::Result<()> {
|
||||
fn remove(&mut self, _path: &ParquetFilePath) -> Result<(), CatalogStateRemoveError> {
|
||||
// Do NOT remove the file since we still need it for time travel
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
//! Catalog preservation and transaction handling.
|
||||
|
||||
use crate::{
|
||||
catalog::internals::{
|
||||
proto_io::{load_transaction_proto, store_transaction_proto},
|
||||
proto_parse,
|
||||
types::{FileType, TransactionKey},
|
||||
catalog::{
|
||||
interface::{CatalogParquetInfo, CatalogState, CheckpointData},
|
||||
internals::{
|
||||
proto_io::{load_transaction_proto, store_transaction_proto},
|
||||
proto_parse,
|
||||
types::{FileType, TransactionKey},
|
||||
},
|
||||
},
|
||||
metadata::IoxParquetMetaData,
|
||||
};
|
||||
|
@ -113,55 +116,9 @@ pub enum Error {
|
|||
#[snafu(display("Upgrade path not implemented/supported: {}", format))]
|
||||
UnsupportedUpgrade { format: String },
|
||||
|
||||
#[snafu(display("Parquet already exists in catalog: {:?}", path))]
|
||||
ParquetFileAlreadyExists { path: ParquetFilePath },
|
||||
|
||||
#[snafu(display("Parquet does not exist in catalog: {:?}", path))]
|
||||
ParquetFileDoesNotExist { path: ParquetFilePath },
|
||||
|
||||
#[snafu(display("Cannot decode parquet metadata: {}", source))]
|
||||
MetadataDecodingFailed { source: crate::metadata::Error },
|
||||
|
||||
#[snafu(
|
||||
display("Cannot extract metadata from {:?}: {}", path, source),
|
||||
visibility(pub)
|
||||
)]
|
||||
MetadataExtractFailed {
|
||||
source: crate::metadata::Error,
|
||||
path: ParquetFilePath,
|
||||
},
|
||||
|
||||
#[snafu(
|
||||
display("Schema for {:?} does not work with existing schema: {}", path, source),
|
||||
visibility(pub)
|
||||
)]
|
||||
SchemaError {
|
||||
source: Box<dyn std::error::Error + Send + Sync>,
|
||||
path: ParquetFilePath,
|
||||
},
|
||||
|
||||
#[snafu(
|
||||
display(
|
||||
"Internal error: Using checkpoints from {:?} leads to broken replay plan: {}, catalog likely broken",
|
||||
path,
|
||||
source
|
||||
),
|
||||
visibility(pub)
|
||||
)]
|
||||
ReplayPlanError {
|
||||
source: Box<dyn std::error::Error + Send + Sync>,
|
||||
path: ParquetFilePath,
|
||||
},
|
||||
|
||||
#[snafu(
|
||||
display("Cannot create parquet chunk from {:?}: {}", path, source),
|
||||
visibility(pub)
|
||||
)]
|
||||
ChunkCreationFailed {
|
||||
source: crate::chunk::Error,
|
||||
path: ParquetFilePath,
|
||||
},
|
||||
|
||||
#[snafu(display("Catalog already exists"))]
|
||||
AlreadyExists {},
|
||||
|
||||
|
@ -177,44 +134,20 @@ pub enum Error {
|
|||
|
||||
#[snafu(display("Cannot commit transaction: {}", source))]
|
||||
CommitError { source: Box<Error> },
|
||||
|
||||
#[snafu(display("Cannot add parquet file during load: {}", source))]
|
||||
AddError {
|
||||
source: crate::catalog::interface::CatalogStateAddError,
|
||||
},
|
||||
|
||||
#[snafu(display("Cannot remove parquet file during load: {}", source))]
|
||||
RemoveError {
|
||||
source: crate::catalog::interface::CatalogStateRemoveError,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Struct containing all information that a catalog received for a new parquet file.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CatalogParquetInfo {
|
||||
/// Path within this database.
|
||||
pub path: ParquetFilePath,
|
||||
|
||||
/// Size of the parquet file, in bytes
|
||||
pub file_size_bytes: usize,
|
||||
|
||||
/// Associated parquet metadata.
|
||||
pub metadata: Arc<IoxParquetMetaData>,
|
||||
}
|
||||
|
||||
/// Abstraction over how the in-memory state of the catalog works.
|
||||
pub trait CatalogState {
|
||||
/// Input to create a new empty instance.
|
||||
///
|
||||
/// See [`new_empty`](Self::new_empty) for details.
|
||||
type EmptyInput: Send;
|
||||
|
||||
/// Create empty state w/o any known files.
|
||||
fn new_empty(db_name: &str, data: Self::EmptyInput) -> Self;
|
||||
|
||||
/// Add parquet file to state.
|
||||
fn add(
|
||||
&mut self,
|
||||
iox_object_store: Arc<IoxObjectStore>,
|
||||
info: CatalogParquetInfo,
|
||||
) -> Result<()>;
|
||||
|
||||
/// Remove parquet file from state.
|
||||
fn remove(&mut self, path: &ParquetFilePath) -> Result<()>;
|
||||
}
|
||||
|
||||
/// In-memory view of the preserved catalog.
|
||||
pub struct PreservedCatalog {
|
||||
// We need an RWLock AND a semaphore, so that readers are NOT blocked during an open
|
||||
|
@ -605,20 +538,22 @@ impl OpenTransaction {
|
|||
|
||||
let metadata = Arc::new(metadata);
|
||||
|
||||
state.add(
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes,
|
||||
metadata,
|
||||
},
|
||||
)?;
|
||||
state
|
||||
.add(
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes,
|
||||
metadata,
|
||||
},
|
||||
)
|
||||
.context(AddError)?;
|
||||
}
|
||||
proto::transaction::action::Action::RemoveParquet(a) => {
|
||||
let path =
|
||||
proto_parse::parse_dirs_and_filename(a.path.as_ref().context(PathRequired)?)
|
||||
.context(ProtobufParseError)?;
|
||||
state.remove(&path)?;
|
||||
state.remove(&path).context(RemoveError)?;
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
|
@ -737,20 +672,6 @@ impl OpenTransaction {
|
|||
}
|
||||
}
|
||||
|
||||
/// Structure that holds all information required to create a checkpoint.
|
||||
///
|
||||
/// Note that while checkpoint are addressed using the same schema as we use for transaction
|
||||
/// (revision counter, UUID), they contain the changes at the end (aka including) the transaction
|
||||
/// they refer.
|
||||
#[derive(Debug)]
|
||||
pub struct CheckpointData {
|
||||
/// List of all Parquet files that are currently (i.e. by the current version) tracked by the
|
||||
/// catalog.
|
||||
///
|
||||
/// If a file was once added but later removed it MUST NOT appear in the result.
|
||||
pub files: HashMap<ParquetFilePath, CatalogParquetInfo>,
|
||||
}
|
||||
|
||||
/// Handle for an open uncommitted transaction.
|
||||
///
|
||||
/// Dropping this object w/o calling [`commit`](Self::commit) will issue a warning.
|
|
@ -223,8 +223,7 @@ mod tests {
|
|||
|
||||
use crate::{
|
||||
catalog::{
|
||||
api::{CatalogParquetInfo, PreservedCatalog},
|
||||
test_helpers::TestCatalogState,
|
||||
core::PreservedCatalog, interface::CatalogParquetInfo, test_helpers::TestCatalogState,
|
||||
},
|
||||
test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize},
|
||||
};
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
//! Abstract interfaces to make different users work with the perserved catalog.
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use iox_object_store::{IoxObjectStore, ParquetFilePath};
|
||||
use snafu::Snafu;
|
||||
|
||||
use crate::metadata::IoxParquetMetaData;
|
||||
|
||||
/// Struct containing all information that a catalog received for a new parquet file.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CatalogParquetInfo {
|
||||
/// Path within this database.
|
||||
pub path: ParquetFilePath,
|
||||
|
||||
/// Size of the parquet file, in bytes
|
||||
pub file_size_bytes: usize,
|
||||
|
||||
/// Associated parquet metadata.
|
||||
pub metadata: Arc<IoxParquetMetaData>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum CatalogStateAddError {
|
||||
#[snafu(display("Cannot extract metadata from {:?}: {}", path, source))]
|
||||
MetadataExtractFailed {
|
||||
source: crate::metadata::Error,
|
||||
path: ParquetFilePath,
|
||||
},
|
||||
|
||||
#[snafu(display("Schema for {:?} does not work with existing schema: {}", path, source))]
|
||||
SchemaError {
|
||||
source: Box<dyn std::error::Error + Send + Sync>,
|
||||
path: ParquetFilePath,
|
||||
},
|
||||
|
||||
#[snafu(
|
||||
display(
|
||||
"Internal error: Using checkpoints from {:?} leads to broken replay plan: {}, catalog likely broken",
|
||||
path,
|
||||
source
|
||||
),
|
||||
)]
|
||||
ReplayPlanError {
|
||||
source: Box<dyn std::error::Error + Send + Sync>,
|
||||
path: ParquetFilePath,
|
||||
},
|
||||
|
||||
#[snafu(display("Cannot create parquet chunk from {:?}: {}", path, source))]
|
||||
ChunkCreationFailed {
|
||||
source: crate::chunk::Error,
|
||||
path: ParquetFilePath,
|
||||
},
|
||||
|
||||
#[snafu(display("Parquet already exists in catalog: {:?}", path))]
|
||||
ParquetFileAlreadyExists { path: ParquetFilePath },
|
||||
}
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum CatalogStateRemoveError {
|
||||
#[snafu(display("Parquet does not exist in catalog: {:?}", path))]
|
||||
ParquetFileDoesNotExist { path: ParquetFilePath },
|
||||
}
|
||||
|
||||
/// Abstraction over how the in-memory state of the catalog works.
|
||||
pub trait CatalogState {
|
||||
/// Input to create a new empty instance.
|
||||
///
|
||||
/// See [`new_empty`](Self::new_empty) for details.
|
||||
type EmptyInput: Send;
|
||||
|
||||
/// Create empty state w/o any known files.
|
||||
fn new_empty(db_name: &str, data: Self::EmptyInput) -> Self;
|
||||
|
||||
/// Add parquet file to state.
|
||||
fn add(
|
||||
&mut self,
|
||||
iox_object_store: Arc<IoxObjectStore>,
|
||||
info: CatalogParquetInfo,
|
||||
) -> Result<(), CatalogStateAddError>;
|
||||
|
||||
/// Remove parquet file from state.
|
||||
fn remove(&mut self, path: &ParquetFilePath) -> Result<(), CatalogStateRemoveError>;
|
||||
}
|
||||
|
||||
/// Structure that holds all information required to create a checkpoint.
|
||||
///
|
||||
/// Note that while checkpoint are addressed using the same schema as we use for transaction
|
||||
/// (revision counter, UUID), they contain the changes at the end (aka including) the transaction
|
||||
/// they refer.
|
||||
#[derive(Debug)]
|
||||
pub struct CheckpointData {
|
||||
/// List of all Parquet files that are currently (i.e. by the current version) tracked by the
|
||||
/// catalog.
|
||||
///
|
||||
/// If a file was once added but later removed it MUST NOT appear in the result.
|
||||
pub files: HashMap<ParquetFilePath, CatalogParquetInfo>,
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
pub mod api;
|
||||
pub mod cleanup;
|
||||
pub mod core;
|
||||
pub mod dump;
|
||||
pub mod interface;
|
||||
mod internals;
|
||||
pub mod prune;
|
||||
pub mod rebuild;
|
||||
|
|
|
@ -8,7 +8,7 @@ use object_store::{ObjectStore, ObjectStoreApi};
|
|||
use snafu::{ResultExt, Snafu};
|
||||
|
||||
use crate::catalog::{
|
||||
api::{ProtoIOError, ProtoParseError},
|
||||
core::{ProtoIOError, ProtoParseError},
|
||||
internals::{proto_io::load_transaction_proto, proto_parse::parse_timestamp},
|
||||
};
|
||||
|
||||
|
@ -33,7 +33,7 @@ pub enum Error {
|
|||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Prune history of [`PreservedCatalog`](crate::catalog::api::PreservedCatalog).
|
||||
/// Prune history of [`PreservedCatalog`](crate::catalog::core::PreservedCatalog).
|
||||
///
|
||||
/// This deletes all transactions and checkpoints that were started prior to `before`. Note that this only deletes data
|
||||
/// that is safe to delete when time travel to `before` is allowed. For example image the following transactions:
|
||||
|
@ -133,8 +133,7 @@ fn is_checkpoint_or_zero(path: &TransactionFilePath) -> bool {
|
|||
mod tests {
|
||||
use crate::{
|
||||
catalog::{
|
||||
api::{CheckpointData, PreservedCatalog},
|
||||
test_helpers::TestCatalogState,
|
||||
core::PreservedCatalog, interface::CheckpointData, test_helpers::TestCatalogState,
|
||||
},
|
||||
test_utils::make_iox_object_store,
|
||||
};
|
||||
|
|
|
@ -7,13 +7,17 @@ use observability_deps::tracing::error;
|
|||
use snafu::{ResultExt, Snafu};
|
||||
|
||||
use crate::{
|
||||
catalog::api::{CatalogParquetInfo, CatalogState, PreservedCatalog},
|
||||
catalog::{
|
||||
core::PreservedCatalog,
|
||||
interface::{CatalogParquetInfo, CatalogState},
|
||||
},
|
||||
metadata::IoxParquetMetaData,
|
||||
};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Cannot create new empty catalog: {}", source))]
|
||||
NewEmptyFailure { source: crate::catalog::api::Error },
|
||||
NewEmptyFailure { source: crate::catalog::core::Error },
|
||||
|
||||
#[snafu(display("Cannot read store: {}", source))]
|
||||
ReadFailure { source: object_store::Error },
|
||||
|
@ -25,13 +29,15 @@ pub enum Error {
|
|||
},
|
||||
|
||||
#[snafu(display("Cannot add file to transaction: {}", source))]
|
||||
FileRecordFailure { source: crate::catalog::api::Error },
|
||||
FileRecordFailure {
|
||||
source: crate::catalog::interface::CatalogStateAddError,
|
||||
},
|
||||
|
||||
#[snafu(display("Cannot commit transaction: {}", source))]
|
||||
CommitFailure { source: crate::catalog::api::Error },
|
||||
CommitFailure { source: crate::catalog::core::Error },
|
||||
|
||||
#[snafu(display("Cannot create checkpoint: {}", source))]
|
||||
CheckpointFailure { source: crate::catalog::api::Error },
|
||||
CheckpointFailure { source: crate::catalog::core::Error },
|
||||
}
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
|
@ -164,7 +170,7 @@ async fn read_parquet(
|
|||
mod tests {
|
||||
use super::*;
|
||||
use crate::{
|
||||
catalog::{api::PreservedCatalog, test_helpers::TestCatalogState},
|
||||
catalog::{core::PreservedCatalog, test_helpers::TestCatalogState},
|
||||
metadata::IoxMetadata,
|
||||
storage::{MemWriter, Storage},
|
||||
test_utils::{
|
||||
|
|
|
@ -12,7 +12,11 @@ use snafu::ResultExt;
|
|||
|
||||
use crate::{
|
||||
catalog::{
|
||||
api::{CatalogParquetInfo, CatalogState, CheckpointData, PreservedCatalog},
|
||||
core::PreservedCatalog,
|
||||
interface::{
|
||||
CatalogParquetInfo, CatalogState, CatalogStateAddError, CatalogStateRemoveError,
|
||||
CheckpointData,
|
||||
},
|
||||
internals::{
|
||||
proto_io::{load_transaction_proto, store_transaction_proto},
|
||||
types::TransactionKey,
|
||||
|
@ -61,8 +65,8 @@ impl TestCatalogState {
|
|||
}
|
||||
|
||||
/// Inserts a file into this catalog state
|
||||
pub fn insert(&mut self, info: CatalogParquetInfo) -> crate::catalog::api::Result<()> {
|
||||
use crate::catalog::api::{Error, MetadataExtractFailed};
|
||||
pub fn insert(&mut self, info: CatalogParquetInfo) -> Result<(), CatalogStateAddError> {
|
||||
use crate::catalog::interface::MetadataExtractFailed;
|
||||
|
||||
let iox_md = info
|
||||
.metadata
|
||||
|
@ -80,7 +84,7 @@ impl TestCatalogState {
|
|||
|
||||
match partition.chunks.entry(iox_md.chunk_id) {
|
||||
Occupied(o) => {
|
||||
return Err(Error::ParquetFileAlreadyExists {
|
||||
return Err(CatalogStateAddError::ParquetFileAlreadyExists {
|
||||
path: o.get().path.clone(),
|
||||
});
|
||||
}
|
||||
|
@ -104,13 +108,11 @@ impl CatalogState for TestCatalogState {
|
|||
&mut self,
|
||||
_iox_object_store: Arc<IoxObjectStore>,
|
||||
info: CatalogParquetInfo,
|
||||
) -> crate::catalog::api::Result<()> {
|
||||
) -> Result<(), CatalogStateAddError> {
|
||||
self.insert(info)
|
||||
}
|
||||
|
||||
fn remove(&mut self, path: &ParquetFilePath) -> crate::catalog::api::Result<()> {
|
||||
use crate::catalog::api::Error;
|
||||
|
||||
fn remove(&mut self, path: &ParquetFilePath) -> Result<(), CatalogStateRemoveError> {
|
||||
let partitions = self
|
||||
.tables
|
||||
.values_mut()
|
||||
|
@ -136,7 +138,7 @@ impl CatalogState for TestCatalogState {
|
|||
}
|
||||
|
||||
match removed {
|
||||
0 => Err(Error::ParquetFileDoesNotExist { path: path.clone() }),
|
||||
0 => Err(CatalogStateRemoveError::ParquetFileDoesNotExist { path: path.clone() }),
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
|
@ -173,8 +175,6 @@ where
|
|||
S: CatalogState + Debug + Send + Sync,
|
||||
F: Fn(&S) -> CheckpointData + Send,
|
||||
{
|
||||
use crate::catalog::api::Error;
|
||||
|
||||
// empty state
|
||||
let iox_object_store = make_iox_object_store().await;
|
||||
let (_catalog, mut state) =
|
||||
|
@ -317,11 +317,17 @@ where
|
|||
},
|
||||
)
|
||||
.unwrap_err();
|
||||
assert!(matches!(err, Error::ParquetFileAlreadyExists { .. }));
|
||||
assert!(matches!(
|
||||
err,
|
||||
CatalogStateAddError::ParquetFileAlreadyExists { .. }
|
||||
));
|
||||
|
||||
// does not exist as has a different UUID
|
||||
let err = state.remove(&path).unwrap_err();
|
||||
assert!(matches!(err, Error::ParquetFileDoesNotExist { .. }));
|
||||
assert!(matches!(
|
||||
err,
|
||||
CatalogStateRemoveError::ParquetFileDoesNotExist { .. }
|
||||
));
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected);
|
||||
|
||||
|
@ -340,7 +346,10 @@ where
|
|||
},
|
||||
)
|
||||
.unwrap_err();
|
||||
assert!(matches!(err, Error::ParquetFileAlreadyExists { .. }));
|
||||
assert!(matches!(
|
||||
err,
|
||||
CatalogStateAddError::ParquetFileAlreadyExists { .. }
|
||||
));
|
||||
|
||||
// this transaction will still work
|
||||
let (path, metadata) =
|
||||
|
@ -369,12 +378,18 @@ where
|
|||
},
|
||||
)
|
||||
.unwrap_err();
|
||||
assert!(matches!(err, Error::ParquetFileAlreadyExists { .. }));
|
||||
assert!(matches!(
|
||||
err,
|
||||
CatalogStateAddError::ParquetFileAlreadyExists { .. }
|
||||
));
|
||||
|
||||
// does not exist - as different UUID
|
||||
let path = ParquetFilePath::new(&chunk_addr(7));
|
||||
let err = state.remove(&path).unwrap_err();
|
||||
assert!(matches!(err, Error::ParquetFileDoesNotExist { .. }));
|
||||
assert!(matches!(
|
||||
err,
|
||||
CatalogStateRemoveError::ParquetFileDoesNotExist { .. }
|
||||
));
|
||||
|
||||
// this still works
|
||||
let (path, _) = expected.remove(&7).unwrap();
|
||||
|
@ -382,7 +397,10 @@ where
|
|||
|
||||
// recently removed
|
||||
let err = state.remove(&path).unwrap_err();
|
||||
assert!(matches!(err, Error::ParquetFileDoesNotExist { .. }));
|
||||
assert!(matches!(
|
||||
err,
|
||||
CatalogStateRemoveError::ParquetFileDoesNotExist { .. }
|
||||
));
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected);
|
||||
}
|
||||
|
|
|
@ -120,7 +120,7 @@ use thrift::protocol::{TCompactInputProtocol, TCompactOutputProtocol, TOutputPro
|
|||
/// For breaking changes, this will change.
|
||||
///
|
||||
/// **Important: When changing this structure, consider bumping the
|
||||
/// [catalog transaction version](crate::catalog::api::TRANSACTION_VERSION)!**
|
||||
/// [catalog transaction version](crate::catalog::core::TRANSACTION_VERSION)!**
|
||||
pub const METADATA_VERSION: u32 = 6;
|
||||
|
||||
/// File-level metadata key to store the IOx-specific data.
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
[package]
|
||||
name = "pbjson"
|
||||
version = "0.1.0"
|
||||
authors = ["Raphael Taylor-Davies <r.taylordavies@googlemail.com>"]
|
||||
edition = "2018"
|
||||
description = "Utilities for pbjson converion"
|
||||
|
||||
[dependencies]
|
||||
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
base64 = "0.13"
|
||||
|
||||
[dev-dependencies]
|
||||
bytes = "1.0"
|
|
@ -0,0 +1,82 @@
|
|||
#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)]
|
||||
#![warn(
|
||||
missing_debug_implementations,
|
||||
clippy::explicit_iter_loop,
|
||||
clippy::use_self,
|
||||
clippy::clone_on_ref_ptr,
|
||||
clippy::future_not_send
|
||||
)]
|
||||
|
||||
#[doc(hidden)]
|
||||
pub mod private {
|
||||
/// Re-export base64
|
||||
pub use base64;
|
||||
|
||||
use serde::Deserialize;
|
||||
use std::str::FromStr;
|
||||
|
||||
/// Used to parse a number from either a string or its raw representation
|
||||
#[derive(Debug, Copy, Clone, PartialOrd, PartialEq, Hash, Ord, Eq)]
|
||||
pub struct NumberDeserialize<T>(pub T);
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(untagged)]
|
||||
enum Content<'a, T> {
|
||||
Str(&'a str),
|
||||
Number(T),
|
||||
}
|
||||
|
||||
impl<'de, T> serde::Deserialize<'de> for NumberDeserialize<T>
|
||||
where
|
||||
T: FromStr + serde::Deserialize<'de>,
|
||||
<T as FromStr>::Err: std::error::Error,
|
||||
{
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
let content = Content::deserialize(deserializer)?;
|
||||
Ok(Self(match content {
|
||||
Content::Str(v) => v.parse().map_err(serde::de::Error::custom)?,
|
||||
Content::Number(v) => v,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialOrd, PartialEq, Hash, Ord, Eq)]
|
||||
pub struct BytesDeserialize<T>(pub T);
|
||||
|
||||
impl<'de, T> Deserialize<'de> for BytesDeserialize<T>
|
||||
where
|
||||
T: From<Vec<u8>>,
|
||||
{
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
let s: &str = Deserialize::deserialize(deserializer)?;
|
||||
let decoded = base64::decode(s).map_err(serde::de::Error::custom)?;
|
||||
Ok(Self(decoded.into()))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use bytes::Bytes;
|
||||
use serde::de::value::{BorrowedStrDeserializer, Error};
|
||||
|
||||
#[test]
|
||||
fn test_bytes() {
|
||||
let raw = vec![2, 5, 62, 2, 5, 7, 8, 43, 5, 8, 4, 23, 5, 7, 7, 3, 2, 5, 196];
|
||||
let encoded = base64::encode(&raw);
|
||||
|
||||
let deserializer = BorrowedStrDeserializer::<'_, Error>::new(&encoded);
|
||||
let a: Bytes = BytesDeserialize::deserialize(deserializer).unwrap().0;
|
||||
let b: Vec<u8> = BytesDeserialize::deserialize(deserializer).unwrap().0;
|
||||
|
||||
assert_eq!(raw.as_slice(), &a);
|
||||
assert_eq!(raw.as_slice(), &b);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
[package]
|
||||
name = "pbjson_build"
|
||||
version = "0.1.0"
|
||||
authors = ["Raphael Taylor-Davies <r.taylordavies@googlemail.com>"]
|
||||
edition = "2018"
|
||||
description = "Generates Serialize and Deserialize implementations for prost message types"
|
||||
|
||||
[dependencies]
|
||||
|
||||
heck = "0.3"
|
||||
prost = "0.8"
|
||||
prost-types = "0.8"
|
||||
itertools = "0.10"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.1"
|
||||
pbjson_test = { path = "../pbjson_test" }
|
|
@ -0,0 +1,260 @@
|
|||
//! This module contains code to parse and extract the protobuf descriptor
|
||||
//! format for use by the rest of the codebase
|
||||
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::io::{Error, ErrorKind, Result};
|
||||
|
||||
use itertools::{EitherOrBoth, Itertools};
|
||||
use prost_types::{
|
||||
DescriptorProto, EnumDescriptorProto, EnumValueDescriptorProto, FieldDescriptorProto,
|
||||
FileDescriptorSet, MessageOptions, OneofDescriptorProto,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
|
||||
pub struct Package(String);
|
||||
|
||||
impl Display for Package {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl Package {
|
||||
pub fn new(s: impl Into<String>) -> Self {
|
||||
let s = s.into();
|
||||
assert!(
|
||||
!s.starts_with('.'),
|
||||
"package cannot start with \'.\', got \"{}\"",
|
||||
s
|
||||
);
|
||||
Self(s)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
|
||||
pub struct TypeName(String);
|
||||
|
||||
impl Display for TypeName {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl TypeName {
|
||||
pub fn new(s: impl Into<String>) -> Self {
|
||||
let s = s.into();
|
||||
assert!(
|
||||
!s.contains('.'),
|
||||
"type name cannot contain \'.\', got \"{}\"",
|
||||
s
|
||||
);
|
||||
Self(s)
|
||||
}
|
||||
|
||||
pub fn to_snake_case(&self) -> String {
|
||||
use heck::SnakeCase;
|
||||
self.0.to_snake_case()
|
||||
}
|
||||
|
||||
pub fn to_camel_case(&self) -> String {
|
||||
use heck::CamelCase;
|
||||
self.0.to_camel_case()
|
||||
}
|
||||
|
||||
pub fn to_shouty_snake_case(&self) -> String {
|
||||
use heck::ShoutySnakeCase;
|
||||
self.0.to_shouty_snake_case()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
|
||||
pub struct TypePath {
|
||||
package: Package,
|
||||
path: Vec<TypeName>,
|
||||
}
|
||||
|
||||
impl Display for TypePath {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
self.package.fmt(f)?;
|
||||
for element in &self.path {
|
||||
write!(f, ".{}", element)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl TypePath {
|
||||
pub fn new(package: Package) -> Self {
|
||||
Self {
|
||||
package,
|
||||
path: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn package(&self) -> &Package {
|
||||
&self.package
|
||||
}
|
||||
|
||||
pub fn path(&self) -> &[TypeName] {
|
||||
self.path.as_slice()
|
||||
}
|
||||
|
||||
pub fn child(&self, name: TypeName) -> Self {
|
||||
let path = self
|
||||
.path
|
||||
.iter()
|
||||
.cloned()
|
||||
.chain(std::iter::once(name))
|
||||
.collect();
|
||||
Self {
|
||||
package: self.package.clone(),
|
||||
path,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn matches_prefix(&self, prefix: &str) -> bool {
|
||||
let prefix = match prefix.strip_prefix('.') {
|
||||
Some(prefix) => prefix,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
if prefix.len() <= self.package.0.len() {
|
||||
return self.package.0.starts_with(prefix);
|
||||
}
|
||||
|
||||
match prefix.strip_prefix(&self.package.0) {
|
||||
Some(prefix) => {
|
||||
let split = prefix.split('.').skip(1);
|
||||
for zipped in self.path.iter().zip_longest(split) {
|
||||
match zipped {
|
||||
EitherOrBoth::Both(a, b) if a.0.as_str() == b => continue,
|
||||
EitherOrBoth::Left(_) => return true,
|
||||
_ => return false,
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct DescriptorSet {
|
||||
descriptors: BTreeMap<TypePath, Descriptor>,
|
||||
}
|
||||
|
||||
impl DescriptorSet {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn register_encoded(&mut self, encoded: &[u8]) -> Result<()> {
|
||||
let descriptors: FileDescriptorSet =
|
||||
prost::Message::decode(encoded).map_err(|e| Error::new(ErrorKind::InvalidData, e))?;
|
||||
|
||||
for file in descriptors.file {
|
||||
let syntax = match file.syntax.as_deref() {
|
||||
None | Some("proto2") => Syntax::Proto2,
|
||||
Some("proto3") => Syntax::Proto3,
|
||||
Some(s) => panic!("unknown syntax: {}", s),
|
||||
};
|
||||
|
||||
let package = Package::new(file.package.expect("expected package"));
|
||||
let path = TypePath::new(package);
|
||||
|
||||
for descriptor in file.message_type {
|
||||
self.register_message(&path, descriptor, syntax)
|
||||
}
|
||||
|
||||
for descriptor in file.enum_type {
|
||||
self.register_enum(&path, descriptor)
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = (&TypePath, &Descriptor)> {
|
||||
self.descriptors.iter()
|
||||
}
|
||||
|
||||
fn register_message(&mut self, path: &TypePath, descriptor: DescriptorProto, syntax: Syntax) {
|
||||
let name = TypeName::new(descriptor.name.expect("expected name"));
|
||||
let child_path = path.child(name);
|
||||
|
||||
for child_descriptor in descriptor.enum_type {
|
||||
self.register_enum(&child_path, child_descriptor)
|
||||
}
|
||||
|
||||
for child_descriptor in descriptor.nested_type {
|
||||
self.register_message(&child_path, child_descriptor, syntax)
|
||||
}
|
||||
|
||||
self.register_descriptor(
|
||||
child_path.clone(),
|
||||
Descriptor::Message(MessageDescriptor {
|
||||
path: child_path,
|
||||
options: descriptor.options,
|
||||
one_of: descriptor.oneof_decl,
|
||||
fields: descriptor.field,
|
||||
syntax,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
fn register_enum(&mut self, path: &TypePath, descriptor: EnumDescriptorProto) {
|
||||
let name = TypeName::new(descriptor.name.expect("expected name"));
|
||||
self.register_descriptor(
|
||||
path.child(name),
|
||||
Descriptor::Enum(EnumDescriptor {
|
||||
values: descriptor.value,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
fn register_descriptor(&mut self, path: TypePath, descriptor: Descriptor) {
|
||||
match self.descriptors.entry(path) {
|
||||
Entry::Occupied(o) => panic!("descriptor already registered for {}", o.key()),
|
||||
Entry::Vacant(v) => v.insert(descriptor),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum Syntax {
|
||||
Proto2,
|
||||
Proto3,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Descriptor {
|
||||
Enum(EnumDescriptor),
|
||||
Message(MessageDescriptor),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EnumDescriptor {
|
||||
pub values: Vec<EnumValueDescriptorProto>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MessageDescriptor {
|
||||
pub path: TypePath,
|
||||
pub options: Option<MessageOptions>,
|
||||
pub one_of: Vec<OneofDescriptorProto>,
|
||||
pub fields: Vec<FieldDescriptorProto>,
|
||||
pub syntax: Syntax,
|
||||
}
|
||||
|
||||
impl MessageDescriptor {
|
||||
/// Whether this is an auto-generated type for the map field
|
||||
pub fn is_map(&self) -> bool {
|
||||
self.options
|
||||
.as_ref()
|
||||
.and_then(|options| options.map_entry)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
///! Contains code to escape strings to avoid collisions with reserved Rust keywords
|
||||
|
||||
pub fn escape_ident(mut ident: String) -> String {
|
||||
// Copied from prost-build::ident
|
||||
//
|
||||
// Use a raw identifier if the identifier matches a Rust keyword:
|
||||
// https://doc.rust-lang.org/reference/keywords.html.
|
||||
match ident.as_str() {
|
||||
// 2015 strict keywords.
|
||||
| "as" | "break" | "const" | "continue" | "else" | "enum" | "false"
|
||||
| "fn" | "for" | "if" | "impl" | "in" | "let" | "loop" | "match" | "mod" | "move" | "mut"
|
||||
| "pub" | "ref" | "return" | "static" | "struct" | "trait" | "true"
|
||||
| "type" | "unsafe" | "use" | "where" | "while"
|
||||
// 2018 strict keywords.
|
||||
| "dyn"
|
||||
// 2015 reserved keywords.
|
||||
| "abstract" | "become" | "box" | "do" | "final" | "macro" | "override" | "priv" | "typeof"
|
||||
| "unsized" | "virtual" | "yield"
|
||||
// 2018 reserved keywords.
|
||||
| "async" | "await" | "try" => ident.insert_str(0, "r#"),
|
||||
// the following keywords are not supported as raw identifiers and are therefore suffixed with an underscore.
|
||||
"self" | "super" | "extern" | "crate" => ident += "_",
|
||||
_ => (),
|
||||
};
|
||||
ident
|
||||
}
|
|
@ -0,0 +1,129 @@
|
|||
//! This module contains the actual code generation logic
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::io::{Result, Write};
|
||||
|
||||
use crate::descriptor::TypePath;
|
||||
|
||||
mod enumeration;
|
||||
mod message;
|
||||
|
||||
pub use enumeration::generate_enum;
|
||||
pub use message::generate_message;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct Indent(usize);
|
||||
|
||||
impl Display for Indent {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
for _ in 0..self.0 {
|
||||
write!(f, " ")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Config {
|
||||
pub extern_types: BTreeMap<TypePath, String>,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
fn rust_type(&self, path: &TypePath) -> String {
|
||||
if let Some(t) = self.extern_types.get(path) {
|
||||
return t.clone();
|
||||
}
|
||||
|
||||
let mut ret = String::new();
|
||||
let path = path.path();
|
||||
assert!(!path.is_empty(), "path cannot be empty");
|
||||
|
||||
for i in &path[..(path.len() - 1)] {
|
||||
ret.push_str(i.to_snake_case().as_str());
|
||||
ret.push_str("::");
|
||||
}
|
||||
ret.push_str(path.last().unwrap().to_camel_case().as_str());
|
||||
ret
|
||||
}
|
||||
|
||||
fn rust_variant(&self, enumeration: &TypePath, variant: &str) -> String {
|
||||
use heck::CamelCase;
|
||||
assert!(
|
||||
variant
|
||||
.chars()
|
||||
.all(|c| matches!(c, '0'..='9' | 'A'..='Z' | '_')),
|
||||
"illegal variant - {}",
|
||||
variant
|
||||
);
|
||||
|
||||
// TODO: Config to disable stripping prefix
|
||||
|
||||
let enumeration_name = enumeration.path().last().unwrap().to_shouty_snake_case();
|
||||
let variant = match variant.strip_prefix(&enumeration_name) {
|
||||
Some("") => variant,
|
||||
Some(stripped) => stripped,
|
||||
None => variant,
|
||||
};
|
||||
variant.to_camel_case()
|
||||
}
|
||||
}
|
||||
|
||||
fn write_fields_array<'a, W: Write, I: Iterator<Item = &'a str>>(
|
||||
writer: &mut W,
|
||||
indent: usize,
|
||||
variants: I,
|
||||
) -> Result<()> {
|
||||
writeln!(writer, "{}const FIELDS: &[&str] = &[", Indent(indent))?;
|
||||
for name in variants {
|
||||
writeln!(writer, "{}\"{}\",", Indent(indent + 1), name)?;
|
||||
}
|
||||
writeln!(writer, "{}];", Indent(indent))?;
|
||||
writeln!(writer)
|
||||
}
|
||||
|
||||
fn write_serialize_start<W: Write>(indent: usize, rust_type: &str, writer: &mut W) -> Result<()> {
|
||||
writeln!(
|
||||
writer,
|
||||
r#"{indent}impl serde::Serialize for {rust_type} {{
|
||||
{indent} #[allow(deprecated)]
|
||||
{indent} fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
{indent} where
|
||||
{indent} S: serde::Serializer,
|
||||
{indent} {{"#,
|
||||
indent = Indent(indent),
|
||||
rust_type = rust_type
|
||||
)
|
||||
}
|
||||
|
||||
fn write_serialize_end<W: Write>(indent: usize, writer: &mut W) -> Result<()> {
|
||||
writeln!(
|
||||
writer,
|
||||
r#"{indent} }}
|
||||
{indent}}}"#,
|
||||
indent = Indent(indent),
|
||||
)
|
||||
}
|
||||
|
||||
fn write_deserialize_start<W: Write>(indent: usize, rust_type: &str, writer: &mut W) -> Result<()> {
|
||||
writeln!(
|
||||
writer,
|
||||
r#"{indent}impl<'de> serde::Deserialize<'de> for {rust_type} {{
|
||||
{indent} #[allow(deprecated)]
|
||||
{indent} fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
{indent} where
|
||||
{indent} D: serde::Deserializer<'de>,
|
||||
{indent} {{"#,
|
||||
indent = Indent(indent),
|
||||
rust_type = rust_type
|
||||
)
|
||||
}
|
||||
|
||||
fn write_deserialize_end<W: Write>(indent: usize, writer: &mut W) -> Result<()> {
|
||||
writeln!(
|
||||
writer,
|
||||
r#"{indent} }}
|
||||
{indent}}}"#,
|
||||
indent = Indent(indent),
|
||||
)
|
||||
}
|
|
@ -0,0 +1,138 @@
|
|||
//! This module contains the code to generate Serialize and Deserialize
|
||||
//! implementations for enumeration type
|
||||
//!
|
||||
//! An enumeration should be decode-able from the full string variant name
|
||||
//! or its integer tag number, and should encode to the string representation
|
||||
|
||||
use super::{
|
||||
write_deserialize_end, write_deserialize_start, write_serialize_end, write_serialize_start,
|
||||
Config, Indent,
|
||||
};
|
||||
use crate::descriptor::{EnumDescriptor, TypePath};
|
||||
use crate::generator::write_fields_array;
|
||||
use std::io::{Result, Write};
|
||||
|
||||
pub fn generate_enum<W: Write>(
|
||||
config: &Config,
|
||||
path: &TypePath,
|
||||
descriptor: &EnumDescriptor,
|
||||
writer: &mut W,
|
||||
) -> Result<()> {
|
||||
let rust_type = config.rust_type(path);
|
||||
|
||||
let variants: Vec<_> = descriptor
|
||||
.values
|
||||
.iter()
|
||||
.map(|variant| {
|
||||
let variant_name = variant.name.clone().unwrap();
|
||||
let rust_variant = config.rust_variant(path, &variant_name);
|
||||
(variant_name, rust_variant)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Generate Serialize
|
||||
write_serialize_start(0, &rust_type, writer)?;
|
||||
writeln!(writer, "{}let variant = match self {{", Indent(2))?;
|
||||
for (variant_name, rust_variant) in &variants {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}Self::{} => \"{}\",",
|
||||
Indent(3),
|
||||
rust_variant,
|
||||
variant_name
|
||||
)?;
|
||||
}
|
||||
writeln!(writer, "{}}};", Indent(2))?;
|
||||
|
||||
writeln!(writer, "{}serializer.serialize_str(variant)", Indent(2))?;
|
||||
write_serialize_end(0, writer)?;
|
||||
|
||||
// Generate Deserialize
|
||||
write_deserialize_start(0, &rust_type, writer)?;
|
||||
write_fields_array(writer, 2, variants.iter().map(|(name, _)| name.as_str()))?;
|
||||
write_visitor(writer, 2, &rust_type, &variants)?;
|
||||
|
||||
// Use deserialize_any to allow users to provide integers or strings
|
||||
writeln!(
|
||||
writer,
|
||||
"{}deserializer.deserialize_any(GeneratedVisitor)",
|
||||
Indent(2)
|
||||
)?;
|
||||
|
||||
write_deserialize_end(0, writer)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_visitor<W: Write>(
|
||||
writer: &mut W,
|
||||
indent: usize,
|
||||
rust_type: &str,
|
||||
variants: &[(String, String)],
|
||||
) -> Result<()> {
|
||||
// Protobuf supports deserialization of enumerations both from string and integer values
|
||||
writeln!(
|
||||
writer,
|
||||
r#"{indent}struct GeneratedVisitor;
|
||||
|
||||
{indent}impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {{
|
||||
{indent} type Value = {rust_type};
|
||||
|
||||
{indent} fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {{
|
||||
{indent} write!(formatter, "expected one of: {{:?}}", &FIELDS)
|
||||
{indent} }}
|
||||
|
||||
{indent} fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
|
||||
{indent} where
|
||||
{indent} E: serde::de::Error,
|
||||
{indent} {{
|
||||
{indent} use std::convert::TryFrom;
|
||||
{indent} i32::try_from(v)
|
||||
{indent} .ok()
|
||||
{indent} .and_then({rust_type}::from_i32)
|
||||
{indent} .ok_or_else(|| {{
|
||||
{indent} serde::de::Error::invalid_value(serde::de::Unexpected::Signed(v), &self)
|
||||
{indent} }})
|
||||
{indent} }}
|
||||
|
||||
{indent} fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
|
||||
{indent} where
|
||||
{indent} E: serde::de::Error,
|
||||
{indent} {{
|
||||
{indent} use std::convert::TryFrom;
|
||||
{indent} i32::try_from(v)
|
||||
{indent} .ok()
|
||||
{indent} .and_then({rust_type}::from_i32)
|
||||
{indent} .ok_or_else(|| {{
|
||||
{indent} serde::de::Error::invalid_value(serde::de::Unexpected::Unsigned(v), &self)
|
||||
{indent} }})
|
||||
{indent} }}
|
||||
|
||||
{indent} fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
|
||||
{indent} where
|
||||
{indent} E: serde::de::Error,
|
||||
{indent} {{"#,
|
||||
indent = Indent(indent),
|
||||
rust_type = rust_type,
|
||||
)?;
|
||||
|
||||
writeln!(writer, "{}match value {{", Indent(indent + 2))?;
|
||||
for (variant_name, rust_variant) in variants {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}\"{}\" => Ok({}::{}),",
|
||||
Indent(indent + 3),
|
||||
variant_name,
|
||||
rust_type,
|
||||
rust_variant
|
||||
)?;
|
||||
}
|
||||
|
||||
writeln!(
|
||||
writer,
|
||||
"{indent}_ => Err(serde::de::Error::unknown_variant(value, FIELDS)),",
|
||||
indent = Indent(indent + 3)
|
||||
)?;
|
||||
writeln!(writer, "{}}}", Indent(indent + 2))?;
|
||||
writeln!(writer, "{}}}", Indent(indent + 1))?;
|
||||
writeln!(writer, "{}}}", Indent(indent))
|
||||
}
|
|
@ -0,0 +1,809 @@
|
|||
//! This module contains the code to generate Serialize and Deserialize
|
||||
//! implementations for message types
|
||||
//!
|
||||
//! The implementation follows the proto3 [JSON mapping][1] with the default options
|
||||
//!
|
||||
//! Importantly:
|
||||
//! - numeric types can be decoded from either a string or number
|
||||
//! - 32-bit integers and floats are encoded as numbers
|
||||
//! - 64-bit integers are encoded as strings
|
||||
//! - repeated fields are encoded as arrays
|
||||
//! - bytes are base64 encoded (NOT CURRENTLY SUPPORTED)
|
||||
//! - messages and maps are encoded as objects
|
||||
//! - fields are lowerCamelCase except where overridden by the proto definition
|
||||
//! - default values are not emitted on encode
|
||||
//! - unrecognised fields error on decode
|
||||
//!
|
||||
//! Note: This will not generate code to correctly serialize/deserialize well-known-types
|
||||
//! such as google.protobuf.Any, google.protobuf.Duration, etc... conversions for these
|
||||
//! special-cased messages will need to be manually implemented. Once done so, however,
|
||||
//! any messages containing these types will serialize/deserialize correctly
|
||||
//!
|
||||
//! [1]: https://developers.google.com/protocol-buffers/docs/proto3#json
|
||||
|
||||
use std::io::{Result, Write};
|
||||
|
||||
use crate::message::{Field, FieldModifier, FieldType, Message, OneOf, ScalarType};
|
||||
|
||||
use super::{
|
||||
write_deserialize_end, write_deserialize_start, write_serialize_end, write_serialize_start,
|
||||
Config, Indent,
|
||||
};
|
||||
use crate::descriptor::TypePath;
|
||||
use crate::generator::write_fields_array;
|
||||
|
||||
pub fn generate_message<W: Write>(
|
||||
config: &Config,
|
||||
message: &Message,
|
||||
writer: &mut W,
|
||||
) -> Result<()> {
|
||||
let rust_type = config.rust_type(&message.path);
|
||||
|
||||
// Generate Serialize
|
||||
write_serialize_start(0, &rust_type, writer)?;
|
||||
write_message_serialize(config, 2, message, writer)?;
|
||||
write_serialize_end(0, writer)?;
|
||||
|
||||
// Generate Deserialize
|
||||
write_deserialize_start(0, &rust_type, writer)?;
|
||||
write_deserialize_message(config, 2, message, &rust_type, writer)?;
|
||||
write_deserialize_end(0, writer)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_field_empty_predicate<W: Write>(member: &Field, writer: &mut W) -> Result<()> {
|
||||
match (&member.field_type, &member.field_modifier) {
|
||||
(_, FieldModifier::Required) => unreachable!(),
|
||||
(_, FieldModifier::Repeated)
|
||||
| (FieldType::Map(_, _), _)
|
||||
| (FieldType::Scalar(ScalarType::String), FieldModifier::UseDefault)
|
||||
| (FieldType::Scalar(ScalarType::Bytes), FieldModifier::UseDefault) => {
|
||||
write!(writer, "!self.{}.is_empty()", member.rust_field_name())
|
||||
}
|
||||
(_, FieldModifier::Optional) | (FieldType::Message(_), _) => {
|
||||
write!(writer, "self.{}.is_some()", member.rust_field_name())
|
||||
}
|
||||
(FieldType::Scalar(ScalarType::F64), FieldModifier::UseDefault)
|
||||
| (FieldType::Scalar(ScalarType::F32), FieldModifier::UseDefault) => {
|
||||
write!(writer, "self.{} != 0.", member.rust_field_name())
|
||||
}
|
||||
(FieldType::Scalar(ScalarType::Bool), FieldModifier::UseDefault) => {
|
||||
write!(writer, "self.{}", member.rust_field_name())
|
||||
}
|
||||
(FieldType::Enum(_), FieldModifier::UseDefault)
|
||||
| (FieldType::Scalar(ScalarType::I64), FieldModifier::UseDefault)
|
||||
| (FieldType::Scalar(ScalarType::I32), FieldModifier::UseDefault)
|
||||
| (FieldType::Scalar(ScalarType::U32), FieldModifier::UseDefault)
|
||||
| (FieldType::Scalar(ScalarType::U64), FieldModifier::UseDefault) => {
|
||||
write!(writer, "self.{} != 0", member.rust_field_name())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn write_message_serialize<W: Write>(
|
||||
config: &Config,
|
||||
indent: usize,
|
||||
message: &Message,
|
||||
writer: &mut W,
|
||||
) -> Result<()> {
|
||||
write_struct_serialize_start(indent, message, writer)?;
|
||||
|
||||
for field in &message.fields {
|
||||
write_serialize_field(config, indent, field, writer)?;
|
||||
}
|
||||
|
||||
for one_of in &message.one_ofs {
|
||||
write_serialize_one_of(indent, config, one_of, writer)?;
|
||||
}
|
||||
|
||||
write_struct_serialize_end(indent, writer)
|
||||
}
|
||||
|
||||
fn write_struct_serialize_start<W: Write>(
|
||||
indent: usize,
|
||||
message: &Message,
|
||||
writer: &mut W,
|
||||
) -> Result<()> {
|
||||
writeln!(writer, "{}use serde::ser::SerializeStruct;", Indent(indent))?;
|
||||
|
||||
let required_len = message
|
||||
.fields
|
||||
.iter()
|
||||
.filter(|member| member.field_modifier.is_required())
|
||||
.count();
|
||||
|
||||
if required_len != message.fields.len() || !message.one_ofs.is_empty() {
|
||||
writeln!(writer, "{}let mut len = {};", Indent(indent), required_len)?;
|
||||
} else {
|
||||
writeln!(writer, "{}let len = {};", Indent(indent), required_len)?;
|
||||
}
|
||||
|
||||
for field in &message.fields {
|
||||
if field.field_modifier.is_required() {
|
||||
continue;
|
||||
}
|
||||
write!(writer, "{}if ", Indent(indent))?;
|
||||
write_field_empty_predicate(field, writer)?;
|
||||
writeln!(writer, " {{")?;
|
||||
writeln!(writer, "{}len += 1;", Indent(indent + 1))?;
|
||||
writeln!(writer, "{}}}", Indent(indent))?;
|
||||
}
|
||||
|
||||
for one_of in &message.one_ofs {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}if self.{}.is_some() {{",
|
||||
Indent(indent),
|
||||
one_of.rust_field_name()
|
||||
)?;
|
||||
writeln!(writer, "{}len += 1;", Indent(indent + 1))?;
|
||||
writeln!(writer, "{}}}", Indent(indent))?;
|
||||
}
|
||||
|
||||
if !message.fields.is_empty() || !message.one_ofs.is_empty() {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}let mut struct_ser = serializer.serialize_struct(\"{}\", len)?;",
|
||||
Indent(indent),
|
||||
message.path
|
||||
)?;
|
||||
} else {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}let struct_ser = serializer.serialize_struct(\"{}\", len)?;",
|
||||
Indent(indent),
|
||||
message.path
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_struct_serialize_end<W: Write>(indent: usize, writer: &mut W) -> Result<()> {
|
||||
writeln!(writer, "{}struct_ser.end()", Indent(indent))
|
||||
}
|
||||
|
||||
fn write_decode_variant<W: Write>(
|
||||
config: &Config,
|
||||
indent: usize,
|
||||
value: &str,
|
||||
path: &TypePath,
|
||||
writer: &mut W,
|
||||
) -> Result<()> {
|
||||
writeln!(writer, "{}::from_i32({})", config.rust_type(path), value)?;
|
||||
write!(
|
||||
writer,
|
||||
"{}.ok_or_else(|| serde::ser::Error::custom(format!(\"Invalid variant {{}}\", {})))",
|
||||
Indent(indent),
|
||||
value
|
||||
)
|
||||
}
|
||||
|
||||
/// Depending on the type of the field different ways of accessing field's value
|
||||
/// are needed - this allows decoupling the type serialization logic from the logic
|
||||
/// that manipulates its container e.g. Vec, Option, HashMap
|
||||
struct Variable<'a> {
|
||||
/// A reference to the field's value
|
||||
as_ref: &'a str,
|
||||
/// The field's value
|
||||
as_unref: &'a str,
|
||||
/// The field without any leading "&" or "*"
|
||||
raw: &'a str,
|
||||
}
|
||||
|
||||
fn write_serialize_variable<W: Write>(
|
||||
config: &Config,
|
||||
indent: usize,
|
||||
field: &Field,
|
||||
variable: Variable<'_>,
|
||||
writer: &mut W,
|
||||
) -> Result<()> {
|
||||
match &field.field_type {
|
||||
FieldType::Scalar(scalar) => write_serialize_scalar_variable(
|
||||
indent,
|
||||
*scalar,
|
||||
field.field_modifier,
|
||||
variable,
|
||||
field.json_name(),
|
||||
writer,
|
||||
),
|
||||
FieldType::Enum(path) => {
|
||||
write!(writer, "{}let v = ", Indent(indent))?;
|
||||
match field.field_modifier {
|
||||
FieldModifier::Repeated => {
|
||||
writeln!(writer, "{}.iter().cloned().map(|v| {{", variable.raw)?;
|
||||
write!(writer, "{}", Indent(indent + 1))?;
|
||||
write_decode_variant(config, indent + 2, "v", path, writer)?;
|
||||
writeln!(writer)?;
|
||||
write!(
|
||||
writer,
|
||||
"{}}}).collect::<Result<Vec<_>, _>>()",
|
||||
Indent(indent + 1)
|
||||
)
|
||||
}
|
||||
_ => write_decode_variant(config, indent + 1, variable.as_unref, path, writer),
|
||||
}?;
|
||||
|
||||
writeln!(writer, "?;")?;
|
||||
writeln!(
|
||||
writer,
|
||||
"{}struct_ser.serialize_field(\"{}\", &v)?;",
|
||||
Indent(indent),
|
||||
field.json_name()
|
||||
)
|
||||
}
|
||||
FieldType::Map(_, value_type)
|
||||
if matches!(
|
||||
value_type.as_ref(),
|
||||
FieldType::Scalar(ScalarType::I64)
|
||||
| FieldType::Scalar(ScalarType::U64)
|
||||
| FieldType::Enum(_)
|
||||
) =>
|
||||
{
|
||||
writeln!(
|
||||
writer,
|
||||
"{}let v: std::collections::HashMap<_, _> = {}.iter()",
|
||||
Indent(indent),
|
||||
variable.raw
|
||||
)?;
|
||||
|
||||
match value_type.as_ref() {
|
||||
FieldType::Scalar(ScalarType::I64) | FieldType::Scalar(ScalarType::U64) => {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}.map(|(k, v)| (k, v.to_string())).collect();",
|
||||
Indent(indent + 1)
|
||||
)?;
|
||||
}
|
||||
FieldType::Enum(path) => {
|
||||
writeln!(writer, "{}.map(|(k, v)| {{", Indent(indent + 1))?;
|
||||
write!(writer, "{}let v = ", Indent(indent + 2))?;
|
||||
write_decode_variant(config, indent + 3, "*v", path, writer)?;
|
||||
writeln!(writer, "?;")?;
|
||||
writeln!(writer, "{}Ok((k, v))", Indent(indent + 2))?;
|
||||
writeln!(
|
||||
writer,
|
||||
"{}}}).collect::<Result<_,_>>()?;",
|
||||
Indent(indent + 1)
|
||||
)?;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
writeln!(
|
||||
writer,
|
||||
"{}struct_ser.serialize_field(\"{}\", &v)?;",
|
||||
Indent(indent),
|
||||
field.json_name()
|
||||
)
|
||||
}
|
||||
_ => {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}struct_ser.serialize_field(\"{}\", {})?;",
|
||||
Indent(indent),
|
||||
field.json_name(),
|
||||
variable.as_ref
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn write_serialize_scalar_variable<W: Write>(
|
||||
indent: usize,
|
||||
scalar: ScalarType,
|
||||
field_modifier: FieldModifier,
|
||||
variable: Variable<'_>,
|
||||
json_name: String,
|
||||
writer: &mut W,
|
||||
) -> Result<()> {
|
||||
let conversion = match scalar {
|
||||
ScalarType::I64 | ScalarType::U64 => "ToString::to_string",
|
||||
ScalarType::Bytes => "pbjson::private::base64::encode",
|
||||
_ => {
|
||||
return writeln!(
|
||||
writer,
|
||||
"{}struct_ser.serialize_field(\"{}\", {})?;",
|
||||
Indent(indent),
|
||||
json_name,
|
||||
variable.as_ref
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
match field_modifier {
|
||||
FieldModifier::Repeated => {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}struct_ser.serialize_field(\"{}\", &{}.iter().map({}).collect::<Vec<_>>())?;",
|
||||
Indent(indent),
|
||||
json_name,
|
||||
variable.raw,
|
||||
conversion
|
||||
)
|
||||
}
|
||||
_ => {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}struct_ser.serialize_field(\"{}\", {}(&{}).as_str())?;",
|
||||
Indent(indent),
|
||||
json_name,
|
||||
conversion,
|
||||
variable.raw,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn write_serialize_field<W: Write>(
|
||||
config: &Config,
|
||||
indent: usize,
|
||||
field: &Field,
|
||||
writer: &mut W,
|
||||
) -> Result<()> {
|
||||
let as_ref = format!("&self.{}", field.rust_field_name());
|
||||
let variable = Variable {
|
||||
as_ref: as_ref.as_str(),
|
||||
as_unref: &as_ref.as_str()[1..],
|
||||
raw: &as_ref.as_str()[1..],
|
||||
};
|
||||
|
||||
match &field.field_modifier {
|
||||
FieldModifier::Required => {
|
||||
write_serialize_variable(config, indent, field, variable, writer)?;
|
||||
}
|
||||
FieldModifier::Optional => {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}if let Some(v) = {}.as_ref() {{",
|
||||
Indent(indent),
|
||||
variable.as_unref
|
||||
)?;
|
||||
let variable = Variable {
|
||||
as_ref: "v",
|
||||
as_unref: "*v",
|
||||
raw: "v",
|
||||
};
|
||||
write_serialize_variable(config, indent + 1, field, variable, writer)?;
|
||||
writeln!(writer, "{}}}", Indent(indent))?;
|
||||
}
|
||||
FieldModifier::Repeated | FieldModifier::UseDefault => {
|
||||
write!(writer, "{}if ", Indent(indent))?;
|
||||
write_field_empty_predicate(field, writer)?;
|
||||
writeln!(writer, " {{")?;
|
||||
write_serialize_variable(config, indent + 1, field, variable, writer)?;
|
||||
writeln!(writer, "{}}}", Indent(indent))?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_serialize_one_of<W: Write>(
|
||||
indent: usize,
|
||||
config: &Config,
|
||||
one_of: &OneOf,
|
||||
writer: &mut W,
|
||||
) -> Result<()> {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}if let Some(v) = self.{}.as_ref() {{",
|
||||
Indent(indent),
|
||||
one_of.rust_field_name()
|
||||
)?;
|
||||
|
||||
writeln!(writer, "{}match v {{", Indent(indent + 1))?;
|
||||
for field in &one_of.fields {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}{}::{}(v) => {{",
|
||||
Indent(indent + 2),
|
||||
config.rust_type(&one_of.path),
|
||||
field.rust_type_name(),
|
||||
)?;
|
||||
let variable = Variable {
|
||||
as_ref: "v",
|
||||
as_unref: "*v",
|
||||
raw: "v",
|
||||
};
|
||||
write_serialize_variable(config, indent + 3, field, variable, writer)?;
|
||||
writeln!(writer, "{}}}", Indent(indent + 2))?;
|
||||
}
|
||||
|
||||
writeln!(writer, "{}}}", Indent(indent + 1),)?;
|
||||
writeln!(writer, "{}}}", Indent(indent))
|
||||
}
|
||||
|
||||
fn write_deserialize_message<W: Write>(
|
||||
config: &Config,
|
||||
indent: usize,
|
||||
message: &Message,
|
||||
rust_type: &str,
|
||||
writer: &mut W,
|
||||
) -> Result<()> {
|
||||
write_deserialize_field_name(2, message, writer)?;
|
||||
|
||||
writeln!(writer, "{}struct GeneratedVisitor;", Indent(indent))?;
|
||||
|
||||
writeln!(
|
||||
writer,
|
||||
r#"{indent}impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {{
|
||||
{indent} type Value = {rust_type};
|
||||
|
||||
{indent} fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {{
|
||||
{indent} formatter.write_str("struct {name}")
|
||||
{indent} }}
|
||||
|
||||
{indent} fn visit_map<V>(self, mut map: V) -> Result<{rust_type}, V::Error>
|
||||
{indent} where
|
||||
{indent} V: serde::de::MapAccess<'de>,
|
||||
{indent} {{"#,
|
||||
indent = Indent(indent),
|
||||
name = message.path,
|
||||
rust_type = rust_type,
|
||||
)?;
|
||||
|
||||
for field in &message.fields {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}let mut {} = None;",
|
||||
Indent(indent + 2),
|
||||
field.rust_field_name(),
|
||||
)?;
|
||||
}
|
||||
|
||||
for one_of in &message.one_ofs {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}let mut {} = None;",
|
||||
Indent(indent + 2),
|
||||
one_of.rust_field_name(),
|
||||
)?;
|
||||
}
|
||||
|
||||
if !message.fields.is_empty() || !message.one_ofs.is_empty() {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}while let Some(k) = map.next_key()? {{",
|
||||
Indent(indent + 2)
|
||||
)?;
|
||||
|
||||
writeln!(writer, "{}match k {{", Indent(indent + 3))?;
|
||||
|
||||
for field in &message.fields {
|
||||
write_deserialize_field(config, indent + 4, field, None, writer)?;
|
||||
}
|
||||
|
||||
for one_of in &message.one_ofs {
|
||||
for field in &one_of.fields {
|
||||
write_deserialize_field(config, indent + 4, field, Some(one_of), writer)?;
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(writer, "{}}}", Indent(indent + 3))?;
|
||||
writeln!(writer, "{}}}", Indent(indent + 2))?;
|
||||
} else {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}while map.next_key::<GeneratedField>()?.is_some() {{}}",
|
||||
Indent(indent + 2)
|
||||
)?;
|
||||
}
|
||||
|
||||
writeln!(writer, "{}Ok({} {{", Indent(indent + 2), rust_type)?;
|
||||
for field in &message.fields {
|
||||
match field.field_modifier {
|
||||
FieldModifier::Required => {
|
||||
writeln!(
|
||||
writer,
|
||||
"{indent}{field}: {field}.ok_or_else(|| serde::de::Error::missing_field(\"{json_name}\"))?,",
|
||||
indent=Indent(indent + 3),
|
||||
field= field.rust_field_name(),
|
||||
json_name= field.json_name()
|
||||
)?;
|
||||
}
|
||||
FieldModifier::UseDefault | FieldModifier::Repeated => {
|
||||
// Note: this currently does not hydrate optional proto2 fields with defaults
|
||||
writeln!(
|
||||
writer,
|
||||
"{indent}{field}: {field}.unwrap_or_default(),",
|
||||
indent = Indent(indent + 3),
|
||||
field = field.rust_field_name()
|
||||
)?;
|
||||
}
|
||||
_ => {
|
||||
writeln!(
|
||||
writer,
|
||||
"{indent}{field},",
|
||||
indent = Indent(indent + 3),
|
||||
field = field.rust_field_name()
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
for one_of in &message.one_ofs {
|
||||
writeln!(
|
||||
writer,
|
||||
"{indent}{field},",
|
||||
indent = Indent(indent + 3),
|
||||
field = one_of.rust_field_name(),
|
||||
)?;
|
||||
}
|
||||
|
||||
writeln!(writer, "{}}})", Indent(indent + 2))?;
|
||||
writeln!(writer, "{}}}", Indent(indent + 1))?;
|
||||
writeln!(writer, "{}}}", Indent(indent))?;
|
||||
writeln!(
|
||||
writer,
|
||||
"{}deserializer.deserialize_struct(\"{}\", FIELDS, GeneratedVisitor)",
|
||||
Indent(indent),
|
||||
message.path
|
||||
)
|
||||
}
|
||||
|
||||
fn write_deserialize_field_name<W: Write>(
|
||||
indent: usize,
|
||||
message: &Message,
|
||||
writer: &mut W,
|
||||
) -> Result<()> {
|
||||
let fields: Vec<_> = message
|
||||
.all_fields()
|
||||
.map(|field| (field.json_name(), field.rust_type_name()))
|
||||
.collect();
|
||||
|
||||
write_fields_array(writer, indent, fields.iter().map(|(name, _)| name.as_str()))?;
|
||||
write_fields_enum(writer, indent, fields.iter().map(|(_, name)| name.as_str()))?;
|
||||
|
||||
writeln!(
|
||||
writer,
|
||||
r#"{indent}impl<'de> serde::Deserialize<'de> for GeneratedField {{
|
||||
{indent} fn deserialize<D>(deserializer: D) -> Result<GeneratedField, D::Error>
|
||||
{indent} where
|
||||
{indent} D: serde::Deserializer<'de>,
|
||||
{indent} {{
|
||||
{indent} struct GeneratedVisitor;
|
||||
|
||||
{indent} impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {{
|
||||
{indent} type Value = GeneratedField;
|
||||
|
||||
{indent} fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {{
|
||||
{indent} write!(formatter, "expected one of: {{:?}}", &FIELDS)
|
||||
{indent} }}
|
||||
|
||||
{indent} fn visit_str<E>(self, value: &str) -> Result<GeneratedField, E>
|
||||
{indent} where
|
||||
{indent} E: serde::de::Error,
|
||||
{indent} {{"#,
|
||||
indent = Indent(indent)
|
||||
)?;
|
||||
|
||||
if !fields.is_empty() {
|
||||
writeln!(writer, "{}match value {{", Indent(indent + 4))?;
|
||||
for (json_name, type_name) in &fields {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}\"{}\" => Ok(GeneratedField::{}),",
|
||||
Indent(indent + 5),
|
||||
json_name,
|
||||
type_name
|
||||
)?;
|
||||
}
|
||||
writeln!(
|
||||
writer,
|
||||
"{}_ => Err(serde::de::Error::unknown_field(value, FIELDS)),",
|
||||
Indent(indent + 5)
|
||||
)?;
|
||||
writeln!(writer, "{}}}", Indent(indent + 4))?;
|
||||
} else {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}Err(serde::de::Error::unknown_field(value, FIELDS))",
|
||||
Indent(indent + 4)
|
||||
)?;
|
||||
}
|
||||
|
||||
writeln!(
|
||||
writer,
|
||||
r#"{indent} }}
|
||||
{indent} }}
|
||||
{indent} deserializer.deserialize_identifier(GeneratedVisitor)
|
||||
{indent} }}
|
||||
{indent}}}"#,
|
||||
indent = Indent(indent)
|
||||
)
|
||||
}
|
||||
|
||||
fn write_fields_enum<'a, W: Write, I: Iterator<Item = &'a str>>(
|
||||
writer: &mut W,
|
||||
indent: usize,
|
||||
fields: I,
|
||||
) -> Result<()> {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}#[allow(clippy::enum_variant_names)]",
|
||||
Indent(indent)
|
||||
)?;
|
||||
writeln!(writer, "{}enum GeneratedField {{", Indent(indent))?;
|
||||
for type_name in fields {
|
||||
writeln!(writer, "{}{},", Indent(indent + 1), type_name)?;
|
||||
}
|
||||
writeln!(writer, "{}}}", Indent(indent))
|
||||
}
|
||||
|
||||
fn write_deserialize_field<W: Write>(
|
||||
config: &Config,
|
||||
indent: usize,
|
||||
field: &Field,
|
||||
one_of: Option<&OneOf>,
|
||||
writer: &mut W,
|
||||
) -> Result<()> {
|
||||
let field_name = match one_of {
|
||||
Some(one_of) => one_of.rust_field_name(),
|
||||
None => field.rust_field_name(),
|
||||
};
|
||||
|
||||
let json_name = field.json_name();
|
||||
writeln!(
|
||||
writer,
|
||||
"{}GeneratedField::{} => {{",
|
||||
Indent(indent),
|
||||
field.rust_type_name()
|
||||
)?;
|
||||
writeln!(
|
||||
writer,
|
||||
"{}if {}.is_some() {{",
|
||||
Indent(indent + 1),
|
||||
field_name
|
||||
)?;
|
||||
|
||||
// Note: this will report duplicate field if multiple value are specified for a one of
|
||||
writeln!(
|
||||
writer,
|
||||
"{}return Err(serde::de::Error::duplicate_field(\"{}\"));",
|
||||
Indent(indent + 2),
|
||||
json_name
|
||||
)?;
|
||||
writeln!(writer, "{}}}", Indent(indent + 1))?;
|
||||
write!(writer, "{}{} = Some(", Indent(indent + 1), field_name)?;
|
||||
|
||||
if let Some(one_of) = one_of {
|
||||
write!(
|
||||
writer,
|
||||
"{}::{}(",
|
||||
config.rust_type(&one_of.path),
|
||||
field.rust_type_name()
|
||||
)?;
|
||||
}
|
||||
|
||||
match &field.field_type {
|
||||
FieldType::Scalar(scalar) => {
|
||||
write_encode_scalar_field(indent + 1, *scalar, field.field_modifier, writer)?;
|
||||
}
|
||||
FieldType::Enum(path) => match field.field_modifier {
|
||||
FieldModifier::Repeated => {
|
||||
write!(
|
||||
writer,
|
||||
"map.next_value::<Vec<{}>>()?.into_iter().map(|x| x as i32).collect()",
|
||||
config.rust_type(path)
|
||||
)?;
|
||||
}
|
||||
_ => {
|
||||
write!(
|
||||
writer,
|
||||
"map.next_value::<{}>()? as i32",
|
||||
config.rust_type(path)
|
||||
)?;
|
||||
}
|
||||
},
|
||||
FieldType::Map(key, value) => {
|
||||
writeln!(writer)?;
|
||||
write!(
|
||||
writer,
|
||||
"{}map.next_value::<std::collections::HashMap<",
|
||||
Indent(indent + 2),
|
||||
)?;
|
||||
|
||||
let map_k = match key {
|
||||
ScalarType::Bytes => {
|
||||
// https://github.com/tokio-rs/prost/issues/531
|
||||
panic!("bytes are not currently supported as map keys")
|
||||
}
|
||||
_ if key.is_numeric() => {
|
||||
write!(
|
||||
writer,
|
||||
"::pbjson::private::NumberDeserialize<{}>",
|
||||
key.rust_type()
|
||||
)?;
|
||||
"k.0"
|
||||
}
|
||||
_ => {
|
||||
write!(writer, "_")?;
|
||||
"k"
|
||||
}
|
||||
};
|
||||
write!(writer, ", ")?;
|
||||
let map_v = match value.as_ref() {
|
||||
FieldType::Scalar(scalar) if scalar.is_numeric() => {
|
||||
write!(
|
||||
writer,
|
||||
"::pbjson::private::NumberDeserialize<{}>",
|
||||
scalar.rust_type()
|
||||
)?;
|
||||
"v.0"
|
||||
}
|
||||
FieldType::Scalar(ScalarType::Bytes) => {
|
||||
// https://github.com/tokio-rs/prost/issues/531
|
||||
panic!("bytes are not currently supported as map values")
|
||||
}
|
||||
FieldType::Enum(path) => {
|
||||
write!(writer, "{}", config.rust_type(path))?;
|
||||
"v as i32"
|
||||
}
|
||||
FieldType::Map(_, _) => panic!("protobuf disallows nested maps"),
|
||||
_ => {
|
||||
write!(writer, "_")?;
|
||||
"v"
|
||||
}
|
||||
};
|
||||
|
||||
writeln!(writer, ">>()?")?;
|
||||
if map_k != "k" || map_v != "v" {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}.into_iter().map(|(k,v)| ({}, {})).collect()",
|
||||
Indent(indent + 3),
|
||||
map_k,
|
||||
map_v,
|
||||
)?;
|
||||
}
|
||||
write!(writer, "{}", Indent(indent + 1))?;
|
||||
}
|
||||
_ => {
|
||||
write!(writer, "map.next_value()?",)?;
|
||||
}
|
||||
};
|
||||
|
||||
if one_of.is_some() {
|
||||
write!(writer, ")")?;
|
||||
}
|
||||
|
||||
writeln!(writer, ");")?;
|
||||
writeln!(writer, "{}}}", Indent(indent))
|
||||
}
|
||||
|
||||
fn write_encode_scalar_field<W: Write>(
|
||||
indent: usize,
|
||||
scalar: ScalarType,
|
||||
field_modifier: FieldModifier,
|
||||
writer: &mut W,
|
||||
) -> Result<()> {
|
||||
let deserializer = match scalar {
|
||||
ScalarType::Bytes => "BytesDeserialize",
|
||||
_ if scalar.is_numeric() => "NumberDeserialize",
|
||||
_ => return write!(writer, "map.next_value()?",),
|
||||
};
|
||||
|
||||
writeln!(writer)?;
|
||||
|
||||
match field_modifier {
|
||||
FieldModifier::Repeated => {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}map.next_value::<Vec<::pbjson::private::{}<_>>>()?",
|
||||
Indent(indent + 1),
|
||||
deserializer
|
||||
)?;
|
||||
writeln!(
|
||||
writer,
|
||||
"{}.into_iter().map(|x| x.0).collect()",
|
||||
Indent(indent + 2)
|
||||
)?;
|
||||
}
|
||||
_ => {
|
||||
writeln!(
|
||||
writer,
|
||||
"{}map.next_value::<::pbjson::private::{}<_>>()?.0",
|
||||
Indent(indent + 1),
|
||||
deserializer
|
||||
)?;
|
||||
}
|
||||
}
|
||||
write!(writer, "{}", Indent(indent))
|
||||
}
|
|
@ -0,0 +1,113 @@
|
|||
#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)]
|
||||
#![warn(
|
||||
missing_debug_implementations,
|
||||
clippy::explicit_iter_loop,
|
||||
clippy::use_self,
|
||||
clippy::clone_on_ref_ptr,
|
||||
clippy::future_not_send
|
||||
)]
|
||||
|
||||
use crate::descriptor::{Descriptor, DescriptorSet, Package};
|
||||
use crate::generator::{generate_enum, generate_message, Config};
|
||||
use crate::message::resolve_message;
|
||||
use std::io::{BufWriter, Error, ErrorKind, Result, Write};
|
||||
use std::path::PathBuf;
|
||||
|
||||
mod descriptor;
|
||||
mod escape;
|
||||
mod generator;
|
||||
mod message;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Builder {
|
||||
descriptors: descriptor::DescriptorSet,
|
||||
out_dir: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
/// Create a new `Builder`
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
descriptors: DescriptorSet::new(),
|
||||
out_dir: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Register an encoded `FileDescriptorSet` with this `Builder`
|
||||
pub fn register_descriptors(&mut self, descriptors: &[u8]) -> Result<&mut Self> {
|
||||
self.descriptors.register_encoded(descriptors)?;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Generates code for all registered types where `prefixes` contains a prefix of
|
||||
/// the fully-qualified path of the type
|
||||
pub fn build<S: AsRef<str>>(&mut self, prefixes: &[S]) -> Result<()> {
|
||||
let mut output: PathBuf = self.out_dir.clone().map(Ok).unwrap_or_else(|| {
|
||||
std::env::var_os("OUT_DIR")
|
||||
.ok_or_else(|| {
|
||||
Error::new(ErrorKind::Other, "OUT_DIR environment variable is not set")
|
||||
})
|
||||
.map(Into::into)
|
||||
})?;
|
||||
output.push("FILENAME");
|
||||
|
||||
let write_factory = move |package: &Package| {
|
||||
output.set_file_name(format!("{}.serde.rs", package));
|
||||
|
||||
let file = std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.create(true)
|
||||
.open(&output)?;
|
||||
|
||||
Ok(BufWriter::new(file))
|
||||
};
|
||||
|
||||
let writers = generate(&self.descriptors, prefixes, write_factory)?;
|
||||
for (_, mut writer) in writers {
|
||||
writer.flush()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn generate<S: AsRef<str>, W: Write, F: FnMut(&Package) -> Result<W>>(
|
||||
descriptors: &DescriptorSet,
|
||||
prefixes: &[S],
|
||||
mut write_factory: F,
|
||||
) -> Result<Vec<(Package, W)>> {
|
||||
let config = Config {
|
||||
extern_types: Default::default(),
|
||||
};
|
||||
|
||||
let iter = descriptors.iter().filter(move |(t, _)| {
|
||||
prefixes
|
||||
.iter()
|
||||
.any(|prefix| t.matches_prefix(prefix.as_ref()))
|
||||
});
|
||||
|
||||
// Exploit the fact descriptors is ordered to group together types from the same package
|
||||
let mut ret: Vec<(Package, W)> = Vec::new();
|
||||
for (type_path, descriptor) in iter {
|
||||
let writer = match ret.last_mut() {
|
||||
Some((package, writer)) if package == type_path.package() => writer,
|
||||
_ => {
|
||||
let package = type_path.package();
|
||||
ret.push((package.clone(), write_factory(package)?));
|
||||
&mut ret.last_mut().unwrap().1
|
||||
}
|
||||
};
|
||||
|
||||
match descriptor {
|
||||
Descriptor::Enum(descriptor) => generate_enum(&config, type_path, descriptor, writer)?,
|
||||
Descriptor::Message(descriptor) => {
|
||||
if let Some(message) = resolve_message(descriptors, descriptor) {
|
||||
generate_message(&config, &message, writer)?
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
|
@ -0,0 +1,275 @@
|
|||
//! The raw descriptor format is not very easy to work with, a fact not aided
|
||||
//! by prost making almost all members of proto2 syntax message optional
|
||||
//!
|
||||
//! This module therefore extracts a slightly less obtuse representation of a
|
||||
//! message that can be used by the code generation logic
|
||||
|
||||
use prost_types::{
|
||||
field_descriptor_proto::{Label, Type},
|
||||
FieldDescriptorProto,
|
||||
};
|
||||
|
||||
use crate::descriptor::{Descriptor, DescriptorSet, MessageDescriptor, Syntax, TypeName, TypePath};
|
||||
use crate::escape::escape_ident;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum ScalarType {
|
||||
F64,
|
||||
F32,
|
||||
I32,
|
||||
I64,
|
||||
U32,
|
||||
U64,
|
||||
Bool,
|
||||
String,
|
||||
Bytes,
|
||||
}
|
||||
|
||||
impl ScalarType {
|
||||
pub fn rust_type(&self) -> &'static str {
|
||||
match self {
|
||||
ScalarType::F64 => "f64",
|
||||
ScalarType::F32 => "f32",
|
||||
ScalarType::I32 => "i32",
|
||||
ScalarType::I64 => "i64",
|
||||
ScalarType::U32 => "u32",
|
||||
ScalarType::U64 => "u64",
|
||||
ScalarType::Bool => "bool",
|
||||
ScalarType::String => "String",
|
||||
ScalarType::Bytes => "Vec<u8>",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_numeric(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ScalarType::F64
|
||||
| ScalarType::F32
|
||||
| ScalarType::I32
|
||||
| ScalarType::I64
|
||||
| ScalarType::U32
|
||||
| ScalarType::U64
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum FieldType {
|
||||
Scalar(ScalarType),
|
||||
Enum(TypePath),
|
||||
Message(TypePath),
|
||||
Map(ScalarType, Box<FieldType>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum FieldModifier {
|
||||
Required,
|
||||
Optional,
|
||||
UseDefault,
|
||||
Repeated,
|
||||
}
|
||||
|
||||
impl FieldModifier {
|
||||
pub fn is_required(&self) -> bool {
|
||||
matches!(self, Self::Required)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Field {
|
||||
pub name: String,
|
||||
pub json_name: Option<String>,
|
||||
pub field_modifier: FieldModifier,
|
||||
pub field_type: FieldType,
|
||||
}
|
||||
|
||||
impl Field {
|
||||
pub fn rust_type_name(&self) -> String {
|
||||
use heck::CamelCase;
|
||||
self.name.to_camel_case()
|
||||
}
|
||||
|
||||
pub fn rust_field_name(&self) -> String {
|
||||
use heck::SnakeCase;
|
||||
escape_ident(self.name.to_snake_case())
|
||||
}
|
||||
|
||||
pub fn json_name(&self) -> String {
|
||||
use heck::MixedCase;
|
||||
self.json_name
|
||||
.clone()
|
||||
.unwrap_or_else(|| self.name.to_mixed_case())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct OneOf {
|
||||
pub name: String,
|
||||
pub path: TypePath,
|
||||
pub fields: Vec<Field>,
|
||||
}
|
||||
|
||||
impl OneOf {
|
||||
pub fn rust_field_name(&self) -> String {
|
||||
use heck::SnakeCase;
|
||||
escape_ident(self.name.to_snake_case())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Message {
|
||||
pub path: TypePath,
|
||||
pub fields: Vec<Field>,
|
||||
pub one_ofs: Vec<OneOf>,
|
||||
}
|
||||
|
||||
impl Message {
|
||||
pub fn all_fields(&self) -> impl Iterator<Item = &Field> + '_ {
|
||||
self.fields
|
||||
.iter()
|
||||
.chain(self.one_ofs.iter().flat_map(|one_of| one_of.fields.iter()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve the provided message descriptor into a slightly less obtuse representation
|
||||
///
|
||||
/// Returns None if the provided provided message is auto-generated
|
||||
pub fn resolve_message(
|
||||
descriptors: &DescriptorSet,
|
||||
message: &MessageDescriptor,
|
||||
) -> Option<Message> {
|
||||
if message.is_map() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut fields = Vec::new();
|
||||
let mut one_of_fields = vec![Vec::new(); message.one_of.len()];
|
||||
|
||||
for field in &message.fields {
|
||||
let field_type = field_type(descriptors, field);
|
||||
let field_modifier = field_modifier(message, field, &field_type);
|
||||
|
||||
let resolved = Field {
|
||||
name: field.name.clone().expect("expected field to have name"),
|
||||
json_name: field.json_name.clone(),
|
||||
field_type,
|
||||
field_modifier,
|
||||
};
|
||||
|
||||
// Treat synthetic one-of as normal
|
||||
let proto3_optional = field.proto3_optional.unwrap_or(false);
|
||||
match (field.oneof_index, proto3_optional) {
|
||||
(Some(idx), false) => one_of_fields[idx as usize].push(resolved),
|
||||
_ => fields.push(resolved),
|
||||
}
|
||||
}
|
||||
|
||||
let mut one_ofs = Vec::new();
|
||||
|
||||
for (fields, descriptor) in one_of_fields.into_iter().zip(&message.one_of) {
|
||||
// Might be empty in the event of a synthetic one-of
|
||||
if !fields.is_empty() {
|
||||
let name = descriptor.name.clone().expect("oneof with no name");
|
||||
let path = message.path.child(TypeName::new(&name));
|
||||
|
||||
one_ofs.push(OneOf { name, path, fields })
|
||||
}
|
||||
}
|
||||
|
||||
Some(Message {
|
||||
path: message.path.clone(),
|
||||
fields,
|
||||
one_ofs,
|
||||
})
|
||||
}
|
||||
|
||||
fn field_modifier(
|
||||
message: &MessageDescriptor,
|
||||
field: &FieldDescriptorProto,
|
||||
field_type: &FieldType,
|
||||
) -> FieldModifier {
|
||||
let label = Label::from_i32(field.label.expect("expected label")).expect("valid label");
|
||||
if field.proto3_optional.unwrap_or(false) {
|
||||
assert_eq!(label, Label::Optional);
|
||||
return FieldModifier::Optional;
|
||||
}
|
||||
|
||||
if field.oneof_index.is_some() {
|
||||
assert_eq!(label, Label::Optional);
|
||||
return FieldModifier::Optional;
|
||||
}
|
||||
|
||||
if matches!(field_type, FieldType::Map(_, _)) {
|
||||
assert_eq!(label, Label::Repeated);
|
||||
return FieldModifier::Repeated;
|
||||
}
|
||||
|
||||
match label {
|
||||
Label::Optional => match message.syntax {
|
||||
Syntax::Proto2 => FieldModifier::Optional,
|
||||
Syntax::Proto3 => match field_type {
|
||||
FieldType::Message(_) => FieldModifier::Optional,
|
||||
_ => FieldModifier::UseDefault,
|
||||
},
|
||||
},
|
||||
Label::Required => FieldModifier::Required,
|
||||
Label::Repeated => FieldModifier::Repeated,
|
||||
}
|
||||
}
|
||||
|
||||
fn field_type(descriptors: &DescriptorSet, field: &FieldDescriptorProto) -> FieldType {
|
||||
match field.type_name.as_ref() {
|
||||
Some(type_name) => resolve_type(descriptors, type_name.as_str()),
|
||||
None => {
|
||||
let scalar =
|
||||
match Type::from_i32(field.r#type.expect("expected type")).expect("valid type") {
|
||||
Type::Double => ScalarType::F64,
|
||||
Type::Float => ScalarType::F32,
|
||||
Type::Int64 | Type::Sfixed64 | Type::Sint64 => ScalarType::I64,
|
||||
Type::Int32 | Type::Sfixed32 | Type::Sint32 => ScalarType::I32,
|
||||
Type::Uint64 | Type::Fixed64 => ScalarType::U64,
|
||||
Type::Uint32 | Type::Fixed32 => ScalarType::U32,
|
||||
Type::Bool => ScalarType::Bool,
|
||||
Type::String => ScalarType::String,
|
||||
Type::Bytes => ScalarType::Bytes,
|
||||
Type::Message | Type::Enum | Type::Group => panic!("no type name specified"),
|
||||
};
|
||||
FieldType::Scalar(scalar)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_type(descriptors: &DescriptorSet, type_name: &str) -> FieldType {
|
||||
assert!(
|
||||
type_name.starts_with('.'),
|
||||
"pbjson does not currently support resolving relative types"
|
||||
);
|
||||
let maybe_descriptor = descriptors
|
||||
.iter()
|
||||
.find(|(path, _)| path.matches_prefix(type_name));
|
||||
|
||||
match maybe_descriptor {
|
||||
Some((path, Descriptor::Enum(_))) => FieldType::Enum(path.clone()),
|
||||
Some((path, Descriptor::Message(descriptor))) => match descriptor.is_map() {
|
||||
true => {
|
||||
assert_eq!(descriptor.fields.len(), 2, "expected map to have 2 fields");
|
||||
let key = &descriptor.fields[0];
|
||||
let value = &descriptor.fields[1];
|
||||
|
||||
assert_eq!("key", key.name());
|
||||
assert_eq!("value", value.name());
|
||||
|
||||
let key_type = match field_type(descriptors, key) {
|
||||
FieldType::Scalar(scalar) => scalar,
|
||||
_ => panic!("non scalar map key"),
|
||||
};
|
||||
let value_type = field_type(descriptors, value);
|
||||
FieldType::Map(key_type, Box::new(value_type))
|
||||
}
|
||||
// Note: This may actually be a group but it is non-trivial to detect this,
|
||||
// they're deprecated, and pbjson doesn't need to be able to distinguish
|
||||
false => FieldType::Message(path.clone()),
|
||||
},
|
||||
None => panic!("failed to resolve type: {}", type_name),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
[package]
|
||||
name = "pbjson_test"
|
||||
version = "0.1.0"
|
||||
authors = ["Raphael Taylor-Davies <r.taylordavies@googlemail.com>"]
|
||||
edition = "2018"
|
||||
description = "Test resources for pbjson converion"
|
||||
|
||||
[dependencies]
|
||||
prost = "0.8"
|
||||
pbjson = { path = "../pbjson" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
|
||||
[dev-dependencies]
|
||||
serde_json = "1.0"
|
||||
|
||||
[build-dependencies]
|
||||
prost-build = "0.8"
|
||||
pbjson_build = { path = "../pbjson_build" }
|
|
@ -0,0 +1,33 @@
|
|||
//! Compiles Protocol Buffers definitions into native Rust types
|
||||
|
||||
use std::env;
|
||||
use std::path::PathBuf;
|
||||
|
||||
type Error = Box<dyn std::error::Error>;
|
||||
type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("protos");
|
||||
|
||||
let proto_files = vec![root.join("syntax3.proto")];
|
||||
|
||||
// Tell cargo to recompile if any of these proto files are changed
|
||||
for proto_file in &proto_files {
|
||||
println!("cargo:rerun-if-changed={}", proto_file.display());
|
||||
}
|
||||
|
||||
let descriptor_path = PathBuf::from(env::var("OUT_DIR").unwrap()).join("proto_descriptor.bin");
|
||||
prost_build::Config::new()
|
||||
.file_descriptor_set_path(&descriptor_path)
|
||||
.compile_well_known_types()
|
||||
.disable_comments(&["."])
|
||||
.bytes(&[".test"])
|
||||
.compile_protos(&proto_files, &[root])?;
|
||||
|
||||
let descriptor_set = std::fs::read(descriptor_path)?;
|
||||
pbjson_build::Builder::new()
|
||||
.register_descriptors(&descriptor_set)?
|
||||
.build(&[".test"])?;
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
syntax = "proto3";
|
||||
|
||||
package test.syntax3;
|
||||
|
||||
message Empty {}
|
||||
|
||||
message KitchenSink {
|
||||
// Standard enum
|
||||
enum Value {
|
||||
VALUE_UNKNOWN = 0;
|
||||
VALUE_A = 45;
|
||||
VALUE_B = 63;
|
||||
}
|
||||
|
||||
// An enumeration without prefixed variants
|
||||
enum Prefix {
|
||||
UNKNOWN = 0;
|
||||
A = 66;
|
||||
B = 20;
|
||||
}
|
||||
|
||||
int32 i32 = 1;
|
||||
optional int32 optional_i32 = 2;
|
||||
repeated int32 repeated_i32 = 3;
|
||||
|
||||
uint32 u32 = 4;
|
||||
optional uint32 optional_u32 = 5;
|
||||
repeated uint32 repeated_u32 = 6;
|
||||
|
||||
int64 i64 = 7;
|
||||
optional int64 optional_i64 = 8;
|
||||
repeated int64 repeated_i64 = 9;
|
||||
|
||||
uint64 u64 = 10;
|
||||
optional uint64 optional_u64 = 11;
|
||||
repeated uint64 repeated_u64 = 12;
|
||||
|
||||
Value value = 13;
|
||||
optional Value optional_value = 14;
|
||||
repeated Value repeated_value = 15;
|
||||
|
||||
Prefix prefix = 16;
|
||||
Empty empty = 17;
|
||||
|
||||
map<string, string> string_dict = 18;
|
||||
map<string, Empty> message_dict = 19;
|
||||
map<string, Prefix> enum_dict = 20;
|
||||
map<int64, Prefix> int64_dict = 21;
|
||||
map<int32, Prefix> int32_dict = 22;
|
||||
map<int32, uint64> integer_dict = 23;
|
||||
|
||||
bool bool = 24;
|
||||
optional bool optional_bool = 25;
|
||||
repeated bool repeated_bool = 26;
|
||||
|
||||
oneof one_of {
|
||||
int32 one_of_i32 = 27;
|
||||
bool one_of_bool = 28;
|
||||
Value one_of_value = 29;
|
||||
Empty one_of_message = 30;
|
||||
}
|
||||
|
||||
bytes bytes = 31;
|
||||
optional bytes optional_bytes = 32;
|
||||
repeated bytes repeated_bytes = 33;
|
||||
|
||||
// Bytes support is currently broken - https://github.com/tokio-rs/prost/issues/531
|
||||
// map<string, bytes> bytes_dict = 34;
|
||||
|
||||
string string = 35;
|
||||
optional string optional_string = 36;
|
||||
}
|
|
@ -0,0 +1,241 @@
|
|||
include!(concat!(env!("OUT_DIR"), "/test.syntax3.rs"));
|
||||
include!(concat!(env!("OUT_DIR"), "/test.syntax3.serde.rs"));
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_empty() {
|
||||
let message = Empty {};
|
||||
|
||||
let encoded = serde_json::to_string(&message).unwrap();
|
||||
let _decoded: Empty = serde_json::from_str(&encoded).unwrap();
|
||||
|
||||
let err = serde_json::from_str::<Empty>("343").unwrap_err();
|
||||
assert_eq!(
|
||||
err.to_string().as_str(),
|
||||
"invalid type: integer `343`, expected struct test.syntax3.Empty at line 1 column 3"
|
||||
);
|
||||
|
||||
let err = serde_json::from_str::<Empty>("{\"foo\": \"bar\"}").unwrap_err();
|
||||
assert_eq!(
|
||||
err.to_string().as_str(),
|
||||
"unknown field `foo`, there are no fields at line 1 column 6"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_kitchen_sink() {
|
||||
let mut decoded: KitchenSink = serde_json::from_str("{}").unwrap();
|
||||
|
||||
let verify_encode = |decoded: &KitchenSink, expected: &str| {
|
||||
assert_eq!(serde_json::to_string(&decoded).unwrap().as_str(), expected);
|
||||
};
|
||||
|
||||
let verify_decode = |decoded: &KitchenSink, expected: &str| {
|
||||
assert_eq!(decoded, &serde_json::from_str(expected).unwrap());
|
||||
};
|
||||
|
||||
let verify = |decoded: &KitchenSink, expected: &str| {
|
||||
verify_encode(decoded, expected);
|
||||
verify_decode(decoded, expected);
|
||||
};
|
||||
|
||||
verify(&decoded, "{}");
|
||||
decoded.i32 = 24;
|
||||
verify(&decoded, r#"{"i32":24}"#);
|
||||
decoded.i32 = 0;
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
// Explicit optional fields can distinguish between no value and default value
|
||||
decoded.optional_i32 = Some(2);
|
||||
verify(&decoded, r#"{"optionalI32":2}"#);
|
||||
|
||||
decoded.optional_i32 = Some(0);
|
||||
verify(&decoded, r#"{"optionalI32":0}"#);
|
||||
|
||||
// Can also decode from string
|
||||
verify_decode(&decoded, r#"{"optionalI32":"0"}"#);
|
||||
|
||||
decoded.optional_i32 = None;
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
// 64-bit integers are encoded as strings
|
||||
decoded.i64 = 123125;
|
||||
verify(&decoded, r#"{"i64":"123125"}"#);
|
||||
|
||||
decoded.i64 = 0;
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
decoded.optional_i64 = Some(532);
|
||||
verify(&decoded, r#"{"optionalI64":"532"}"#);
|
||||
|
||||
decoded.optional_i64 = Some(0);
|
||||
verify(&decoded, r#"{"optionalI64":"0"}"#);
|
||||
|
||||
// Can also decode from non-string
|
||||
verify_decode(&decoded, r#"{"optionalI64":0}"#);
|
||||
|
||||
decoded.optional_i64 = None;
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
decoded.u64 = 34346;
|
||||
decoded.u32 = 567094456;
|
||||
decoded.optional_u32 = Some(0);
|
||||
decoded.optional_u64 = Some(3);
|
||||
verify(
|
||||
&decoded,
|
||||
r#"{"u32":567094456,"optionalU32":0,"u64":"34346","optionalU64":"3"}"#,
|
||||
);
|
||||
|
||||
decoded.u64 = 0;
|
||||
decoded.u32 = 0;
|
||||
decoded.optional_u32 = None;
|
||||
decoded.optional_u64 = None;
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
decoded.repeated_i32 = vec![0, 23, 5, 6, 2, 34];
|
||||
verify(&decoded, r#"{"repeatedI32":[0,23,5,6,2,34]}"#);
|
||||
// Can also mix in some strings
|
||||
verify_decode(&decoded, r#"{"repeatedI32":[0,"23",5,6,"2",34]}"#);
|
||||
|
||||
decoded.repeated_i32 = vec![];
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
decoded.repeated_u64 = vec![0, 532, 2];
|
||||
verify(&decoded, r#"{"repeatedU64":["0","532","2"]}"#);
|
||||
// Can also mix in some non-strings
|
||||
verify_decode(&decoded, r#"{"repeatedU64":["0",532,"2"]}"#);
|
||||
|
||||
decoded.repeated_u64 = vec![];
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
// Enumerations should be encoded as strings
|
||||
decoded.value = kitchen_sink::Value::A as i32;
|
||||
verify(&decoded, r#"{"value":"VALUE_A"}"#);
|
||||
|
||||
// Can also use variant number
|
||||
verify_decode(&decoded, r#"{"value":45}"#);
|
||||
|
||||
decoded.value = kitchen_sink::Value::Unknown as i32;
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
decoded.optional_value = Some(kitchen_sink::Value::Unknown as i32);
|
||||
verify(&decoded, r#"{"optionalValue":"VALUE_UNKNOWN"}"#);
|
||||
|
||||
// Can also use variant number
|
||||
verify_decode(&decoded, r#"{"optionalValue":0}"#);
|
||||
|
||||
decoded.optional_value = None;
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
decoded
|
||||
.string_dict
|
||||
.insert("foo".to_string(), "bar".to_string());
|
||||
verify(&decoded, r#"{"stringDict":{"foo":"bar"}}"#);
|
||||
|
||||
decoded.string_dict = Default::default();
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
decoded
|
||||
.int32_dict
|
||||
.insert(343, kitchen_sink::Prefix::A as i32);
|
||||
// Dictionary keys should always be strings
|
||||
// Enum dictionary values should be encoded as strings
|
||||
verify(&decoded, r#"{"int32Dict":{"343":"A"}}"#);
|
||||
// Enum dictionary values can be decoded from integers
|
||||
verify_decode(&decoded, r#"{"int32Dict":{"343":66}}"#);
|
||||
|
||||
decoded.int32_dict = Default::default();
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
// 64-bit dictionary values should be encoded as strings
|
||||
decoded.integer_dict.insert(12, 13);
|
||||
verify(&decoded, r#"{"integerDict":{"12":"13"}}"#);
|
||||
// 64-bit dictionary values can be decoded from numeric types
|
||||
verify_decode(&decoded, r#"{"integerDict":{"12":13}}"#);
|
||||
|
||||
decoded.integer_dict = Default::default();
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
decoded.one_of = Some(kitchen_sink::OneOf::OneOfI32(0));
|
||||
verify(&decoded, r#"{"oneOfI32":0}"#);
|
||||
// Can also specify string
|
||||
verify_decode(&decoded, r#"{"oneOfI32":"0"}"#);
|
||||
|
||||
decoded.one_of = Some(kitchen_sink::OneOf::OneOfI32(12));
|
||||
verify(&decoded, r#"{"oneOfI32":12}"#);
|
||||
|
||||
decoded.one_of = Some(kitchen_sink::OneOf::OneOfBool(false));
|
||||
verify(&decoded, r#"{"oneOfBool":false}"#);
|
||||
|
||||
decoded.one_of = Some(kitchen_sink::OneOf::OneOfBool(true));
|
||||
verify(&decoded, r#"{"oneOfBool":true}"#);
|
||||
|
||||
decoded.one_of = Some(kitchen_sink::OneOf::OneOfValue(
|
||||
kitchen_sink::Value::B as i32,
|
||||
));
|
||||
verify(&decoded, r#"{"oneOfValue":"VALUE_B"}"#);
|
||||
// Can also specify enum variant
|
||||
verify_decode(&decoded, r#"{"oneOfValue":63}"#);
|
||||
|
||||
decoded.one_of = None;
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
decoded.repeated_value = vec![
|
||||
kitchen_sink::Value::B as i32,
|
||||
kitchen_sink::Value::B as i32,
|
||||
kitchen_sink::Value::A as i32,
|
||||
];
|
||||
verify(
|
||||
&decoded,
|
||||
r#"{"repeatedValue":["VALUE_B","VALUE_B","VALUE_A"]}"#,
|
||||
);
|
||||
verify_decode(&decoded, r#"{"repeatedValue":[63,"VALUE_B","VALUE_A"]}"#);
|
||||
|
||||
decoded.repeated_value = Default::default();
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
decoded.bytes = prost::bytes::Bytes::from_static(b"kjkjkj");
|
||||
verify(&decoded, r#"{"bytes":"a2pramtq"}"#);
|
||||
|
||||
decoded.bytes = Default::default();
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
decoded.optional_bytes = Some(prost::bytes::Bytes::from_static(b"kjkjkj"));
|
||||
verify(&decoded, r#"{"optionalBytes":"a2pramtq"}"#);
|
||||
|
||||
decoded.optional_bytes = Some(Default::default());
|
||||
verify(&decoded, r#"{"optionalBytes":""}"#);
|
||||
|
||||
decoded.optional_bytes = None;
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
decoded.repeated_bytes = vec![
|
||||
prost::bytes::Bytes::from_static(b"sdfsd"),
|
||||
prost::bytes::Bytes::from_static(b"fghfg"),
|
||||
];
|
||||
verify(&decoded, r#"{"repeatedBytes":["c2Rmc2Q=","ZmdoZmc="]}"#);
|
||||
|
||||
decoded.repeated_bytes = Default::default();
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
// decoded.bytes_dict.insert(
|
||||
// "test".to_string(),
|
||||
// prost::bytes::Bytes::from_static(b"asdf"),
|
||||
// );
|
||||
// verify(&decoded, r#"{"bytesDict":{"test":"YXNkZgo="}}"#);
|
||||
//
|
||||
// decoded.bytes_dict = Default::default();
|
||||
// verify_decode(&decoded, "{}");
|
||||
|
||||
decoded.string = "test".to_string();
|
||||
verify(&decoded, r#"{"string":"test"}"#);
|
||||
|
||||
decoded.string = Default::default();
|
||||
verify_decode(&decoded, "{}");
|
||||
|
||||
decoded.optional_string = Some(String::new());
|
||||
verify(&decoded, r#"{"optionalString":""}"#);
|
||||
}
|
||||
}
|
|
@ -161,6 +161,51 @@ impl Predicate {
|
|||
pub fn is_empty(&self) -> bool {
|
||||
self == &EMPTY_PREDICATE
|
||||
}
|
||||
|
||||
/// Add each range [start, stop] of the delete_predicates into the predicate in
|
||||
/// the form "time < start OR time > stop" to eliminate that range from the query
|
||||
pub fn add_delete_ranges<S>(&mut self, delete_predicates: &[S])
|
||||
where
|
||||
S: AsRef<Self>,
|
||||
{
|
||||
for pred in delete_predicates {
|
||||
let pred = pred.as_ref();
|
||||
|
||||
if let Some(range) = pred.range {
|
||||
let expr = col(TIME_COLUMN_NAME)
|
||||
.lt(lit(range.start))
|
||||
.or(col(TIME_COLUMN_NAME).gt(lit(range.end)));
|
||||
self.exprs.push(expr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a list of disjunctive negated expressions.
|
||||
/// Example: there are two deletes as follows
|
||||
/// . Delete_1: WHERE city != "Boston" AND temp = 70
|
||||
/// . Delete 2: WHERE state = "NY" AND route != "I90"
|
||||
/// The negated list will be "NOT(Delete_1)", NOT(Delete_2)" which means
|
||||
/// NOT(city != "Boston" AND temp = 70), NOT(state = "NY" AND route != "I90") which means
|
||||
/// [NOT(city = Boston") OR NOT(temp = 70)], [NOT(state = "NY") OR NOT(route != "I90")]
|
||||
pub fn add_delete_exprs<S>(&mut self, delete_predicates: &[S])
|
||||
where
|
||||
S: AsRef<Self>,
|
||||
{
|
||||
for pred in delete_predicates {
|
||||
let pred = pred.as_ref();
|
||||
|
||||
let mut expr: Option<Expr> = None;
|
||||
for exp in &pred.exprs {
|
||||
match expr {
|
||||
None => expr = Some(exp.clone().not()),
|
||||
Some(e) => expr = Some(e.or(exp.clone().not())),
|
||||
}
|
||||
}
|
||||
if let Some(e) = expr {
|
||||
self.exprs.push(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Predicate {
|
||||
|
|
|
@ -11,18 +11,19 @@ use datafusion::{
|
|||
logical_plan::{LogicalPlan, UserDefinedLogicalNode},
|
||||
physical_plan::{
|
||||
coalesce_partitions::CoalescePartitionsExec,
|
||||
collect, displayable,
|
||||
displayable,
|
||||
planner::{DefaultPhysicalPlanner, ExtensionPlanner},
|
||||
ExecutionPlan, PhysicalPlanner, SendableRecordBatchStream,
|
||||
},
|
||||
prelude::*,
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use trace::{ctx::SpanContext, span::SpanRecorder};
|
||||
|
||||
use crate::exec::{
|
||||
fieldlist::{FieldList, IntoFieldList},
|
||||
query_tracing::send_metrics_to_tracing,
|
||||
query_tracing::TracedStream,
|
||||
schema_pivot::{SchemaPivotExec, SchemaPivotNode},
|
||||
seriesset::{SeriesSetConverter, SeriesSetItem},
|
||||
split::StreamSplitExec,
|
||||
|
@ -272,45 +273,63 @@ impl IOxExecutionContext {
|
|||
/// Executes the logical plan using DataFusion on a separate
|
||||
/// thread pool and produces RecordBatches
|
||||
pub async fn collect(&self, physical_plan: Arc<dyn ExecutionPlan>) -> Result<Vec<RecordBatch>> {
|
||||
let ctx = self.child_ctx("collect");
|
||||
debug!(
|
||||
"Running plan, physical:\n{}",
|
||||
displayable(physical_plan.as_ref()).indent()
|
||||
);
|
||||
let ctx = self.child_ctx("collect");
|
||||
let stream = ctx.execute_stream(physical_plan).await?;
|
||||
|
||||
let res = ctx.run(collect(Arc::clone(&physical_plan))).await;
|
||||
|
||||
// send metrics to tracing, even on error
|
||||
ctx.save_metrics(physical_plan);
|
||||
res
|
||||
ctx.run(
|
||||
stream
|
||||
.err_into() // convert to DataFusionError
|
||||
.try_collect(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Executes the physical plan and produces a RecordBatchStream to stream
|
||||
/// over the result that iterates over the results.
|
||||
pub async fn execute(
|
||||
/// Executes the physical plan and produces a
|
||||
/// `SendableRecordBatchStream` to stream over the result that
|
||||
/// iterates over the results. The creation of the stream is
|
||||
/// performed in a separate thread pool.
|
||||
pub async fn execute_stream(
|
||||
&self,
|
||||
physical_plan: Arc<dyn ExecutionPlan>,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
match physical_plan.output_partitioning().partition_count() {
|
||||
0 => unreachable!(),
|
||||
1 => self.execute_partition(physical_plan, 0).await,
|
||||
1 => self.execute_stream_partitioned(physical_plan, 0).await,
|
||||
_ => {
|
||||
// Merge into a single partition
|
||||
self.execute_partition(Arc::new(CoalescePartitionsExec::new(physical_plan)), 0)
|
||||
.await
|
||||
self.execute_stream_partitioned(
|
||||
Arc::new(CoalescePartitionsExec::new(physical_plan)),
|
||||
0,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Executes a single partition of a physical plan and produces a RecordBatchStream to stream
|
||||
/// over the result that iterates over the results.
|
||||
pub async fn execute_partition(
|
||||
/// Executes a single partition of a physical plan and produces a
|
||||
/// `SendableRecordBatchStream` to stream over the result that
|
||||
/// iterates over the results. The creation of the stream is
|
||||
/// performed in a separate thread pool.
|
||||
pub async fn execute_stream_partitioned(
|
||||
&self,
|
||||
physical_plan: Arc<dyn ExecutionPlan>,
|
||||
partition: usize,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
self.run(async move { physical_plan.execute(partition).await })
|
||||
.await
|
||||
let span = self
|
||||
.recorder
|
||||
.span()
|
||||
.map(|span| span.child("execute_stream_partitioned"));
|
||||
|
||||
self.run(async move {
|
||||
let stream = physical_plan.execute(partition).await?;
|
||||
let stream = TracedStream::new(stream, span, physical_plan);
|
||||
Ok(Box::pin(stream) as _)
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Executes the SeriesSetPlans on the query executor, in
|
||||
|
@ -349,7 +368,7 @@ impl IOxExecutionContext {
|
|||
|
||||
let physical_plan = ctx.prepare_plan(&plan)?;
|
||||
|
||||
let it = ctx.execute(physical_plan).await?;
|
||||
let it = ctx.execute_stream(physical_plan).await?;
|
||||
|
||||
SeriesSetConverter::default()
|
||||
.convert(
|
||||
|
@ -486,19 +505,4 @@ impl IOxExecutionContext {
|
|||
recorder: self.recorder.child(name),
|
||||
}
|
||||
}
|
||||
|
||||
/// Saves any DataFusion metrics that are currently present in
|
||||
/// `physical_plan` to the span recorder so they show up in
|
||||
/// distributed traces (e.g. Jaeger)
|
||||
///
|
||||
/// This function should be invoked after `physical_plan` has
|
||||
/// fully `collect`ed, meaning that `PhysicalPlan::execute()` has
|
||||
/// been invoked and the resulting streams have been completely
|
||||
/// consumed. Calling `save_metrics` metrics prior to this point
|
||||
/// may result in saving incomplete information.
|
||||
pub fn save_metrics(&self, physical_plan: Arc<dyn ExecutionPlan>) {
|
||||
if let Some(span) = self.recorder.span() {
|
||||
send_metrics_to_tracing(span, physical_plan.as_ref())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,15 +1,67 @@
|
|||
//! This module contains the code to map DataFusion metrics to `Span`s
|
||||
//! for use in distributed tracing (e.g. Jaeger)
|
||||
|
||||
use std::{borrow::Cow, fmt};
|
||||
use std::{borrow::Cow, fmt, sync::Arc};
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use chrono::{DateTime, Utc};
|
||||
use datafusion::physical_plan::{
|
||||
metrics::{MetricValue, MetricsSet},
|
||||
DisplayFormatType, ExecutionPlan,
|
||||
DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
|
||||
};
|
||||
use futures::StreamExt;
|
||||
use observability_deps::tracing::debug;
|
||||
use trace::span::Span;
|
||||
use trace::span::{Span, SpanRecorder};
|
||||
|
||||
/// Stream wrapper that records DataFusion `MetricSets` into IOx
|
||||
/// [`Span`]s when it is dropped.
|
||||
pub(crate) struct TracedStream {
|
||||
inner: SendableRecordBatchStream,
|
||||
span_recorder: SpanRecorder,
|
||||
physical_plan: Arc<dyn ExecutionPlan>,
|
||||
}
|
||||
|
||||
impl TracedStream {
|
||||
/// Return a stream that records DataFusion `MetricSets` from
|
||||
/// `physical_plan` into `span` when dropped.
|
||||
pub(crate) fn new(
|
||||
inner: SendableRecordBatchStream,
|
||||
span: Option<trace::span::Span>,
|
||||
physical_plan: Arc<dyn ExecutionPlan>,
|
||||
) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
span_recorder: SpanRecorder::new(span),
|
||||
physical_plan,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordBatchStream for TracedStream {
|
||||
fn schema(&self) -> arrow::datatypes::SchemaRef {
|
||||
self.inner.schema()
|
||||
}
|
||||
}
|
||||
|
||||
impl futures::Stream for TracedStream {
|
||||
type Item = arrow::error::Result<RecordBatch>;
|
||||
|
||||
fn poll_next(
|
||||
mut self: std::pin::Pin<&mut Self>,
|
||||
cx: &mut std::task::Context<'_>,
|
||||
) -> std::task::Poll<Option<Self::Item>> {
|
||||
self.inner.poll_next_unpin(cx)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for TracedStream {
|
||||
fn drop(&mut self) {
|
||||
if let Some(span) = self.span_recorder.span() {
|
||||
let default_end_time = Utc::now();
|
||||
send_metrics_to_tracing(default_end_time, span, self.physical_plan.as_ref());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This function translates data in DataFusion `MetricSets` into IOx
|
||||
/// [`Span`]s. It records a snapshot of the current state of the
|
||||
|
@ -26,15 +78,7 @@ use trace::span::Span;
|
|||
/// 1. If the ExecutionPlan had no metrics
|
||||
/// 2. The total number of rows produced by the ExecutionPlan (if available)
|
||||
/// 3. The elapsed compute time taken by the ExecutionPlan
|
||||
pub(crate) fn send_metrics_to_tracing(parent_span: &Span, physical_plan: &dyn ExecutionPlan) {
|
||||
// The parent span may be open, but since the physical_plan is
|
||||
// assumed to be fully collected, using `now()` is a conservative
|
||||
// estimate of the end time
|
||||
let default_end_time = Utc::now();
|
||||
send_metrics_to_tracing_inner(default_end_time, parent_span, physical_plan)
|
||||
}
|
||||
|
||||
fn send_metrics_to_tracing_inner(
|
||||
fn send_metrics_to_tracing(
|
||||
default_end_time: DateTime<Utc>,
|
||||
parent_span: &Span,
|
||||
physical_plan: &dyn ExecutionPlan,
|
||||
|
@ -101,7 +145,7 @@ fn send_metrics_to_tracing_inner(
|
|||
|
||||
// recurse
|
||||
for child in physical_plan.children() {
|
||||
send_metrics_to_tracing_inner(span_end, &span, child.as_ref())
|
||||
send_metrics_to_tracing(span_end, &span, child.as_ref())
|
||||
}
|
||||
|
||||
span.export()
|
||||
|
@ -173,17 +217,9 @@ mod tests {
|
|||
Metric,
|
||||
};
|
||||
|
||||
use std::{
|
||||
num::{NonZeroU128, NonZeroU64},
|
||||
sync::Arc,
|
||||
time::Duration,
|
||||
};
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use trace::{
|
||||
ctx::{SpanContext, SpanId, TraceId},
|
||||
span::{MetaValue, SpanStatus},
|
||||
RingBufferTraceCollector,
|
||||
};
|
||||
use trace::{ctx::SpanContext, span::MetaValue, RingBufferTraceCollector};
|
||||
|
||||
use super::*;
|
||||
|
||||
|
@ -193,7 +229,7 @@ mod tests {
|
|||
let exec = TestExec::new(name, Default::default());
|
||||
|
||||
let traces = TraceBuilder::new();
|
||||
send_metrics_to_tracing(&traces.make_span(), &exec);
|
||||
send_metrics_to_tracing(Utc::now(), &traces.make_span(), &exec);
|
||||
|
||||
let spans = traces.spans();
|
||||
assert_eq!(spans.len(), 1);
|
||||
|
@ -224,7 +260,7 @@ mod tests {
|
|||
exec.new_child("child4", make_time_metricset(None, None));
|
||||
|
||||
let traces = TraceBuilder::new();
|
||||
send_metrics_to_tracing_inner(ts5, &traces.make_span(), &exec);
|
||||
send_metrics_to_tracing(ts5, &traces.make_span(), &exec);
|
||||
|
||||
let spans = traces.spans();
|
||||
println!("Spans: \n\n{:#?}", spans);
|
||||
|
@ -250,7 +286,7 @@ mod tests {
|
|||
exec.metrics = None;
|
||||
|
||||
let traces = TraceBuilder::new();
|
||||
send_metrics_to_tracing(&traces.make_span(), &exec);
|
||||
send_metrics_to_tracing(Utc::now(), &traces.make_span(), &exec);
|
||||
|
||||
let spans = traces.spans();
|
||||
assert_eq!(spans.len(), 1);
|
||||
|
@ -274,7 +310,7 @@ mod tests {
|
|||
add_elapsed_compute(exec.metrics_mut(), 2000, 2);
|
||||
|
||||
let traces = TraceBuilder::new();
|
||||
send_metrics_to_tracing(&traces.make_span(), &exec);
|
||||
send_metrics_to_tracing(Utc::now(), &traces.make_span(), &exec);
|
||||
|
||||
// aggregated metrics should be reported
|
||||
let spans = traces.spans();
|
||||
|
@ -358,23 +394,7 @@ mod tests {
|
|||
|
||||
// create a new span connected to the collector
|
||||
fn make_span(&self) -> Span {
|
||||
let collector = Arc::clone(&self.collector);
|
||||
|
||||
// lifted from make_span in trace/src/span.rs
|
||||
Span {
|
||||
name: "foo".into(),
|
||||
ctx: SpanContext {
|
||||
trace_id: TraceId(NonZeroU128::new(23948923).unwrap()),
|
||||
parent_span_id: None,
|
||||
span_id: SpanId(NonZeroU64::new(3498394).unwrap()),
|
||||
collector: Some(collector),
|
||||
},
|
||||
start: None,
|
||||
end: None,
|
||||
status: SpanStatus::Unknown,
|
||||
metadata: Default::default(),
|
||||
events: vec![],
|
||||
}
|
||||
SpanContext::new(Arc::clone(&self.collector) as _).child("foo")
|
||||
}
|
||||
|
||||
/// return all collected spans
|
||||
|
|
|
@ -524,6 +524,8 @@ impl InfluxRpcPlanner {
|
|||
/// The data is sorted on (tag_col1, tag_col2, ...) so that all
|
||||
/// rows for a particular series (groups where all tags are the
|
||||
/// same) occur together in the plan
|
||||
// NGA todo: may need to add delete predicate here to eliminate deleted data at read time
|
||||
// https://github.com/influxdata/influxdb_iox/issues/2548
|
||||
pub fn read_filter<D>(&self, database: &D, predicate: Predicate) -> Result<SeriesSetPlans>
|
||||
where
|
||||
D: QueryDatabase + 'static,
|
||||
|
|
|
@ -18,7 +18,7 @@ use internal_types::{
|
|||
schema::{sort::SortKey, Schema, TIME_COLUMN_NAME},
|
||||
selection::Selection,
|
||||
};
|
||||
use observability_deps::tracing::trace;
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use predicate::predicate::{Predicate, PredicateMatch};
|
||||
|
||||
use hashbrown::HashMap;
|
||||
|
@ -46,7 +46,7 @@ pub trait QueryChunkMeta: Sized {
|
|||
fn schema(&self) -> Arc<Schema>;
|
||||
|
||||
// return a reference to delete predicates of the chunk
|
||||
fn delete_predicates(&self) -> Arc<Vec<Predicate>>;
|
||||
fn delete_predicates(&self) -> &[Arc<Predicate>];
|
||||
}
|
||||
|
||||
/// A `Database` is the main trait implemented by the IOx subsystems
|
||||
|
@ -137,6 +137,7 @@ pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync {
|
|||
&self,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
delete_predicates: &[Arc<Predicate>],
|
||||
) -> Result<SendableRecordBatchStream, Self::Error>;
|
||||
|
||||
/// Returns true if data of this chunk is sorted
|
||||
|
@ -165,8 +166,10 @@ where
|
|||
self.as_ref().schema()
|
||||
}
|
||||
|
||||
fn delete_predicates(&self) -> Arc<Vec<Predicate>> {
|
||||
self.as_ref().delete_predicates()
|
||||
fn delete_predicates(&self) -> &[Arc<Predicate>] {
|
||||
let pred = self.as_ref().delete_predicates();
|
||||
debug!(?pred, "Delete predicate in QueryChunkMeta");
|
||||
pred
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -116,14 +116,18 @@ impl<C: QueryChunk + 'static> ExecutionPlan for IOxReadFilterNode<C> {
|
|||
let selection_cols = restrict_selection(selection_cols, &chunk_table_schema);
|
||||
let selection = Selection::Some(&selection_cols);
|
||||
|
||||
let stream = chunk.read_filter(&self.predicate, selection).map_err(|e| {
|
||||
DataFusionError::Execution(format!(
|
||||
"Error creating scan for table {} chunk {}: {}",
|
||||
self.table_name,
|
||||
chunk.id(),
|
||||
e
|
||||
))
|
||||
})?;
|
||||
let del_preds = chunk.delete_predicates();
|
||||
|
||||
let stream = chunk
|
||||
.read_filter(&self.predicate, selection, del_preds)
|
||||
.map_err(|e| {
|
||||
DataFusionError::Execution(format!(
|
||||
"Error creating scan for table {} chunk {}: {}",
|
||||
self.table_name,
|
||||
chunk.id(),
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
// all CPU time is now done, pass in baseline metrics to adapter
|
||||
timer.done();
|
||||
|
|
|
@ -25,6 +25,7 @@ use internal_types::{
|
|||
schema::{builder::SchemaBuilder, merge::SchemaMerger, InfluxColumnType, Schema},
|
||||
selection::Selection,
|
||||
};
|
||||
use observability_deps::tracing::debug;
|
||||
use parking_lot::Mutex;
|
||||
use snafu::Snafu;
|
||||
use std::num::NonZeroU64;
|
||||
|
@ -173,6 +174,9 @@ pub struct TestChunk {
|
|||
/// Return value for apply_predicate, if desired
|
||||
predicate_match: Option<PredicateMatch>,
|
||||
|
||||
/// Copy of delete predicates passed
|
||||
delete_predicates: Vec<Arc<Predicate>>,
|
||||
|
||||
/// Order of this chunk relative to other overlapping chunks.
|
||||
order: ChunkOrder,
|
||||
}
|
||||
|
@ -248,6 +252,7 @@ impl TestChunk {
|
|||
table_data: Default::default(),
|
||||
saved_error: Default::default(),
|
||||
predicate_match: Default::default(),
|
||||
delete_predicates: Default::default(),
|
||||
order: ChunkOrder::new(0),
|
||||
}
|
||||
}
|
||||
|
@ -819,6 +824,7 @@ impl QueryChunk for TestChunk {
|
|||
&self,
|
||||
predicate: &Predicate,
|
||||
_selection: Selection<'_>,
|
||||
_delete_predicates: &[Arc<Predicate>],
|
||||
) -> Result<SendableRecordBatchStream, Self::Error> {
|
||||
self.check_error()?;
|
||||
|
||||
|
@ -908,12 +914,11 @@ impl QueryChunkMeta for TestChunk {
|
|||
}
|
||||
|
||||
// return a reference to delete predicates of the chunk
|
||||
fn delete_predicates(&self) -> Arc<Vec<Predicate>> {
|
||||
// Since this is the test chunk and its focus is not (yet) on
|
||||
// deletion, return an empty delete predicate now which means nothing is deleted
|
||||
// from this test chunk
|
||||
let pred: Vec<Predicate> = vec![];
|
||||
Arc::new(pred)
|
||||
fn delete_predicates(&self) -> &[Arc<Predicate>] {
|
||||
let pred = &self.delete_predicates;
|
||||
debug!(?pred, "Delete predicate in Test Chunk");
|
||||
|
||||
pred
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -923,8 +928,9 @@ pub async fn raw_data(chunks: &[Arc<TestChunk>]) -> Vec<RecordBatch> {
|
|||
for c in chunks {
|
||||
let pred = Predicate::default();
|
||||
let selection = Selection::All;
|
||||
let delete_predicates: Vec<Arc<Predicate>> = vec![];
|
||||
let mut stream = c
|
||||
.read_filter(&pred, selection)
|
||||
.read_filter(&pred, selection, &delete_predicates)
|
||||
.expect("Error in read_filter");
|
||||
while let Some(b) = stream.next().await {
|
||||
let b = b.expect("Error in stream");
|
||||
|
|
|
@ -10,19 +10,19 @@ description = "Tests of the query engine against different database configuratio
|
|||
[dependencies]
|
||||
async-trait = "0.1"
|
||||
data_types = { path = "../data_types" }
|
||||
datafusion = { path = "../datafusion" }
|
||||
once_cell = { version = "1.4.0", features = ["parking_lot"] }
|
||||
predicate = { path = "../predicate" }
|
||||
query = { path = "../query" }
|
||||
server = { path = "../server" }
|
||||
|
||||
[dev-dependencies]
|
||||
arrow = { version = "5.0", features = ["prettyprint"] }
|
||||
arrow_util = { path = "../arrow_util" }
|
||||
datafusion = { path = "../datafusion" }
|
||||
data_types = { path = "../data_types" }
|
||||
internal_types = { path = "../internal_types" }
|
||||
metric = { path = "../metric" }
|
||||
object_store = { path = "../object_store" }
|
||||
predicate = { path = "../predicate" }
|
||||
snafu = "0.6.3"
|
||||
tempfile = "3.1.0"
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
//! This module contains testing scenarios for Db
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use datafusion::logical_plan::{col, lit};
|
||||
use once_cell::sync::OnceCell;
|
||||
|
||||
#[allow(unused_imports, dead_code, unused_macros)]
|
||||
use predicate::predicate::{Predicate, PredicateBuilder};
|
||||
|
||||
use query::QueryChunk;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
@ -1198,3 +1199,650 @@ impl DbSetup for ChunkOrder {
|
|||
vec![scenario]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
/// Setup for delete query test with one table and one chunk moved from MUB to RUB to OS
|
||||
pub struct DeleteFromMubOneMeasurementOneChunk {}
|
||||
#[async_trait]
|
||||
impl DbSetup for DeleteFromMubOneMeasurementOneChunk {
|
||||
async fn make(&self) -> Vec<DbScenario> {
|
||||
// The main purpose of these scenarios is the delete predicate is added in MUB and
|
||||
// is moved with chunk moving
|
||||
|
||||
// General setup for all scenarios
|
||||
let partition_key = "1970-01-01T00";
|
||||
let table_name = "cpu";
|
||||
// chunk data
|
||||
let lp_lines = vec!["cpu bar=1 10", "cpu bar=2 20"];
|
||||
// delete predicate
|
||||
let i: f64 = 1.0;
|
||||
let expr = col("bar").eq(lit(i));
|
||||
let pred = PredicateBuilder::new()
|
||||
.table("cpu")
|
||||
.timestamp_range(0, 15)
|
||||
.add_expr(expr)
|
||||
.build();
|
||||
|
||||
// delete happens when data in MUB
|
||||
let _scenario_mub = make_delete_mub(lp_lines.clone(), pred.clone()).await;
|
||||
|
||||
// delete happens when data in MUB then moved to RUB
|
||||
let scenario_rub =
|
||||
make_delete_mub_to_rub(lp_lines.clone(), pred.clone(), table_name, partition_key).await;
|
||||
|
||||
// delete happens when data in MUB then moved to RUB and then persisted
|
||||
let scenario_rub_os = make_delete_mub_to_rub_and_os(
|
||||
lp_lines.clone(),
|
||||
pred.clone(),
|
||||
table_name,
|
||||
partition_key,
|
||||
)
|
||||
.await;
|
||||
|
||||
// delete happens when data in MUB then moved to RUB, then persisted, and then RUB is unloaded
|
||||
let scenario_os =
|
||||
make_delete_mub_to_os(lp_lines.clone(), pred, table_name, partition_key).await;
|
||||
|
||||
// return scenarios to run queries
|
||||
// NGA todo: add scenario_mub in this after the deleted data is removed in the scan
|
||||
// right now MUB does not push predicate down so the result is not correct yet
|
||||
vec![scenario_rub, scenario_rub_os, scenario_os]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
/// Setup for delete query test with one table and one chunk moved from RUB to OS
|
||||
pub struct DeleteFromRubOneMeasurementOneChunk {}
|
||||
#[async_trait]
|
||||
impl DbSetup for DeleteFromRubOneMeasurementOneChunk {
|
||||
async fn make(&self) -> Vec<DbScenario> {
|
||||
// The main purpose of these scenarios is the delete predicate is added in RUB
|
||||
// and is moved with chunk moving
|
||||
|
||||
// General setup for all scenarios
|
||||
let partition_key = "1970-01-01T00";
|
||||
let table_name = "cpu";
|
||||
// chunk data
|
||||
let lp_lines = vec!["cpu bar=1 10", "cpu bar=2 20"];
|
||||
// delete predicate
|
||||
let i: f64 = 1.0;
|
||||
let expr = col("bar").eq(lit(i));
|
||||
let pred = PredicateBuilder::new()
|
||||
.table("cpu")
|
||||
.timestamp_range(0, 15)
|
||||
.add_expr(expr)
|
||||
.build();
|
||||
|
||||
// delete happens to data in RUB
|
||||
let scenario_rub =
|
||||
make_delete_rub(lp_lines.clone(), pred.clone(), table_name, partition_key).await;
|
||||
|
||||
// delete happens to data in RUB then persisted
|
||||
let scenario_rub_os =
|
||||
make_delete_rub_to_os(lp_lines.clone(), pred.clone(), table_name, partition_key).await;
|
||||
|
||||
// delete happens to data in RUB then persisted then RUB unloaded
|
||||
let scenario_os =
|
||||
make_delete_rub_to_os_and_unload_rub(lp_lines.clone(), pred, table_name, partition_key)
|
||||
.await;
|
||||
|
||||
// return scenarios to run queries
|
||||
vec![scenario_rub, scenario_rub_os, scenario_os]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
/// Setup for delete query test with one table and one chunk in both RUB and OS
|
||||
pub struct DeleteFromOsOneMeasurementOneChunk {}
|
||||
#[async_trait]
|
||||
impl DbSetup for DeleteFromOsOneMeasurementOneChunk {
|
||||
async fn make(&self) -> Vec<DbScenario> {
|
||||
// The main purpose of these scenarios is the delete predicate is added to persisted chunks
|
||||
|
||||
// General setup for all scenarios
|
||||
let partition_key = "1970-01-01T00";
|
||||
let table_name = "cpu";
|
||||
// chunk data
|
||||
let lp_lines = vec!["cpu bar=1 10", "cpu bar=2 20"];
|
||||
// delete predicate
|
||||
let i: f64 = 1.0;
|
||||
let expr = col("bar").eq(lit(i));
|
||||
let pred = PredicateBuilder::new()
|
||||
.table("cpu")
|
||||
.timestamp_range(0, 15)
|
||||
.add_expr(expr)
|
||||
.build();
|
||||
|
||||
// delete happens after data is persisted but still in RUB
|
||||
let scenario_rub_os =
|
||||
make_delete_os_with_rub(lp_lines.clone(), pred.clone(), table_name, partition_key)
|
||||
.await;
|
||||
|
||||
// delete happens after data is persisted but still in RUB and then unload RUB
|
||||
let _scenario_rub_os_unload_rub = make_delete_os_with_rub_then_unload_rub(
|
||||
lp_lines.clone(),
|
||||
pred.clone(),
|
||||
table_name,
|
||||
partition_key,
|
||||
)
|
||||
.await;
|
||||
|
||||
// delete happens after data is persisted and RUB is unloaded
|
||||
let _scenario_os = make_delete_os(lp_lines.clone(), pred, table_name, partition_key).await;
|
||||
|
||||
// return scenarios to run queries
|
||||
//vec![scenario_rub_os, scenario_rub_os_unload_rub, scenario_os]
|
||||
// NGA todo: turn the last 2 scenarios on when #2518 and #2550 are done
|
||||
vec![scenario_rub_os]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
/// Setup for multi-expression delete query test with one table and one chunk moved from MUB to RUB to OS
|
||||
pub struct DeleteMultiExprsFromMubOneMeasurementOneChunk {}
|
||||
#[async_trait]
|
||||
impl DbSetup for DeleteMultiExprsFromMubOneMeasurementOneChunk {
|
||||
async fn make(&self) -> Vec<DbScenario> {
|
||||
// The main purpose of these scenarios is the multi-expression delete predicate is added in MUB and
|
||||
// is moved with chunk moving
|
||||
|
||||
// General setup for all scenarios
|
||||
let partition_key = "1970-01-01T00";
|
||||
let table_name = "cpu";
|
||||
// chunk data
|
||||
let lp_lines = vec![
|
||||
"cpu,foo=me bar=1 10",
|
||||
"cpu,foo=you bar=2 20",
|
||||
"cpu,foo=me bar=1 30",
|
||||
"cpu,foo=me bar=1 40",
|
||||
];
|
||||
// delete predicate
|
||||
let i: f64 = 1.0;
|
||||
let expr1 = col("bar").eq(lit(i));
|
||||
let expr2 = col("foo").eq(lit("me"));
|
||||
let pred = PredicateBuilder::new()
|
||||
.table("cpu")
|
||||
.timestamp_range(0, 32)
|
||||
.add_expr(expr1)
|
||||
.add_expr(expr2)
|
||||
.build();
|
||||
|
||||
// delete happens when data in MUB
|
||||
let _scenario_mub = make_delete_mub(lp_lines.clone(), pred.clone()).await;
|
||||
|
||||
// delete happens when data in MUB then moved to RUB
|
||||
let scenario_rub =
|
||||
make_delete_mub_to_rub(lp_lines.clone(), pred.clone(), table_name, partition_key).await;
|
||||
|
||||
// delete happens when data in MUB then moved to RUB and then persisted
|
||||
let scenario_rub_os = make_delete_mub_to_rub_and_os(
|
||||
lp_lines.clone(),
|
||||
pred.clone(),
|
||||
table_name,
|
||||
partition_key,
|
||||
)
|
||||
.await;
|
||||
|
||||
// delete happens when data in MUB then moved to RUB, then persisted, and then RUB is unloaded
|
||||
let scenario_os =
|
||||
make_delete_mub_to_os(lp_lines.clone(), pred, table_name, partition_key).await;
|
||||
|
||||
// return scenarios to run queries
|
||||
// NGA todo: add scenario_mub in this after the deleted data is removed in the scan
|
||||
// right now MUB does not push predicate down so the result is not correct yet
|
||||
vec![scenario_rub, scenario_rub_os, scenario_os]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
/// Setup for multi-expression delete query test with one table and one chunk moved from MUB to RUB to OS
|
||||
pub struct DeleteMultiExprsFromRubOneMeasurementOneChunk {}
|
||||
#[async_trait]
|
||||
impl DbSetup for DeleteMultiExprsFromRubOneMeasurementOneChunk {
|
||||
async fn make(&self) -> Vec<DbScenario> {
|
||||
// The main purpose of these scenarios is the multi-expression delete predicate is added in MUB and
|
||||
// is moved with chunk moving
|
||||
|
||||
// General setup for all scenarios
|
||||
let partition_key = "1970-01-01T00";
|
||||
let table_name = "cpu";
|
||||
// chunk data
|
||||
let lp_lines = vec![
|
||||
"cpu,foo=me bar=1 10",
|
||||
"cpu,foo=you bar=2 20",
|
||||
"cpu,foo=me bar=1 30",
|
||||
"cpu,foo=me bar=1 40",
|
||||
];
|
||||
// delete predicate
|
||||
let i: f64 = 1.0;
|
||||
let expr1 = col("bar").eq(lit(i));
|
||||
let expr2 = col("foo").eq(lit("me"));
|
||||
let pred = PredicateBuilder::new()
|
||||
.table("cpu")
|
||||
.timestamp_range(0, 32)
|
||||
.add_expr(expr1)
|
||||
.add_expr(expr2)
|
||||
.build();
|
||||
|
||||
// delete happens to data in RUB
|
||||
let scenario_rub =
|
||||
make_delete_rub(lp_lines.clone(), pred.clone(), table_name, partition_key).await;
|
||||
|
||||
// delete happens to data in RUB then persisted
|
||||
let scenario_rub_os =
|
||||
make_delete_rub_to_os(lp_lines.clone(), pred.clone(), table_name, partition_key).await;
|
||||
|
||||
// delete happens to data in RUB then persisted then RUB unloaded
|
||||
let scenario_os =
|
||||
make_delete_rub_to_os_and_unload_rub(lp_lines.clone(), pred, table_name, partition_key)
|
||||
.await;
|
||||
|
||||
// return scenarios to run queries
|
||||
vec![scenario_rub, scenario_rub_os, scenario_os]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
/// Setup for multi-expression delete query test with one table and one chunk moved from MUB to RUB to OS
|
||||
pub struct DeleteMultiExprsFromOsOneMeasurementOneChunk {}
|
||||
#[async_trait]
|
||||
impl DbSetup for DeleteMultiExprsFromOsOneMeasurementOneChunk {
|
||||
async fn make(&self) -> Vec<DbScenario> {
|
||||
// The main purpose of these scenarios is the multi-expression delete predicate is added in MUB and
|
||||
// is moved with chunk moving
|
||||
|
||||
// General setup for all scenarios
|
||||
let partition_key = "1970-01-01T00";
|
||||
let table_name = "cpu";
|
||||
// chunk data
|
||||
let lp_lines = vec![
|
||||
"cpu,foo=me bar=1 10",
|
||||
"cpu,foo=you bar=2 20",
|
||||
"cpu,foo=me bar=1 30",
|
||||
"cpu,foo=me bar=1 40",
|
||||
];
|
||||
// delete predicate
|
||||
let i: f64 = 1.0;
|
||||
let expr1 = col("bar").eq(lit(i));
|
||||
let expr2 = col("foo").eq(lit("me"));
|
||||
let pred = PredicateBuilder::new()
|
||||
.table("cpu")
|
||||
.timestamp_range(0, 32)
|
||||
.add_expr(expr1)
|
||||
.add_expr(expr2)
|
||||
.build();
|
||||
|
||||
// delete happens after data is persisted but still in RUB
|
||||
let scenario_rub_os =
|
||||
make_delete_os_with_rub(lp_lines.clone(), pred.clone(), table_name, partition_key)
|
||||
.await;
|
||||
|
||||
// delete happens after data is persisted but still in RUB and then unload RUB
|
||||
let _scenario_rub_os_unload_rub = make_delete_os_with_rub_then_unload_rub(
|
||||
lp_lines.clone(),
|
||||
pred.clone(),
|
||||
table_name,
|
||||
partition_key,
|
||||
)
|
||||
.await;
|
||||
|
||||
// delete happens after data is persisted and RUB is unloaded
|
||||
let _scenario_os = make_delete_os(lp_lines.clone(), pred, table_name, partition_key).await;
|
||||
|
||||
// return scenarios to run queries
|
||||
//vec![scenario_rub_os, scenario_rub_os_unload_rub, scenario_os]
|
||||
// NGA todo: turn the last 2 scenarios on when #2518band #2550 are done
|
||||
vec![scenario_rub_os]
|
||||
}
|
||||
}
|
||||
|
||||
// NGA todo: Add these scenarios after deleted data is eliminated from scan
|
||||
// 1. Many deletes, each has one or/and multi expressions
|
||||
// 2. Many different-type chunks when a delete happens
|
||||
// 3. Combination of above
|
||||
|
||||
async fn make_delete_mub(lp_lines: Vec<&str>, pred: Predicate) -> DbScenario {
|
||||
let db = make_db().await.db;
|
||||
// create an open MUB
|
||||
write_lp(&db, &lp_lines.join("\n")).await;
|
||||
// One open MUB, no RUB, no OS
|
||||
assert_eq!(count_mutable_buffer_chunks(&db), 1);
|
||||
assert_eq!(count_read_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_object_store_chunks(&db), 0);
|
||||
db.delete("cpu", Arc::new(pred)).await.unwrap();
|
||||
// Still one but frozen MUB, no RUB, no OS
|
||||
assert_eq!(count_mutable_buffer_chunks(&db), 1);
|
||||
assert_eq!(count_read_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_object_store_chunks(&db), 0);
|
||||
|
||||
DbScenario {
|
||||
scenario_name: "Deleted data in MUB".into(),
|
||||
db,
|
||||
}
|
||||
}
|
||||
|
||||
async fn make_delete_mub_to_rub(
|
||||
lp_lines: Vec<&str>,
|
||||
pred: Predicate,
|
||||
table_name: &str,
|
||||
partition_key: &str,
|
||||
) -> DbScenario {
|
||||
let db = make_db().await.db;
|
||||
// create an open MUB
|
||||
write_lp(&db, &lp_lines.join("\n")).await;
|
||||
// delete data in MUB
|
||||
db.delete("cpu", Arc::new(pred)).await.unwrap();
|
||||
// move MUB to RUB and the delete predicate will be automatically included in RUB
|
||||
db.rollover_partition(table_name, partition_key)
|
||||
.await
|
||||
.unwrap();
|
||||
db.move_chunk_to_read_buffer(table_name, partition_key, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
// No MUB, one RUB, no OS
|
||||
assert_eq!(count_mutable_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_read_buffer_chunks(&db), 1);
|
||||
assert_eq!(count_object_store_chunks(&db), 0);
|
||||
|
||||
DbScenario {
|
||||
scenario_name: "Deleted data in RUB moved from MUB".into(),
|
||||
db,
|
||||
}
|
||||
}
|
||||
|
||||
async fn make_delete_mub_to_rub_and_os(
|
||||
lp_lines: Vec<&str>,
|
||||
pred: Predicate,
|
||||
table_name: &str,
|
||||
partition_key: &str,
|
||||
) -> DbScenario {
|
||||
let db = make_db().await.db;
|
||||
// create an open MUB
|
||||
write_lp(&db, &lp_lines.join("\n")).await;
|
||||
// delete data in MUB
|
||||
db.delete("cpu", Arc::new(pred)).await.unwrap();
|
||||
// move MUB to RUB and the delete predicate will be automatically included in RUB
|
||||
db.rollover_partition(table_name, partition_key)
|
||||
.await
|
||||
.unwrap();
|
||||
db.move_chunk_to_read_buffer(table_name, partition_key, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
// persist RUB and the delete predicate will be automatically included in the OS chunk
|
||||
db.persist_partition(
|
||||
table_name,
|
||||
partition_key,
|
||||
Instant::now() + Duration::from_secs(1),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
// No MUB, one RUB, one OS
|
||||
assert_eq!(count_mutable_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_read_buffer_chunks(&db), 1);
|
||||
assert_eq!(count_object_store_chunks(&db), 1);
|
||||
|
||||
DbScenario {
|
||||
scenario_name: "Deleted data in RUB and OS".into(),
|
||||
db,
|
||||
}
|
||||
}
|
||||
|
||||
async fn make_delete_mub_to_os(
|
||||
lp_lines: Vec<&str>,
|
||||
pred: Predicate,
|
||||
table_name: &str,
|
||||
partition_key: &str,
|
||||
) -> DbScenario {
|
||||
let db = make_db().await.db;
|
||||
// create an open MUB
|
||||
write_lp(&db, &lp_lines.join("\n")).await;
|
||||
// delete data in MUB
|
||||
db.delete("cpu", Arc::new(pred)).await.unwrap();
|
||||
// move MUB to RUB and the delete predicate will be automatically included in RUB
|
||||
db.rollover_partition(table_name, partition_key)
|
||||
.await
|
||||
.unwrap();
|
||||
db.move_chunk_to_read_buffer(table_name, partition_key, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
// persist RUB and the delete predicate will be automatically included in the OS chunk
|
||||
db.persist_partition(
|
||||
table_name,
|
||||
partition_key,
|
||||
Instant::now() + Duration::from_secs(1),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
// remove RUB
|
||||
db.unload_read_buffer(table_name, partition_key, 1).unwrap();
|
||||
// No MUB, no RUB, one OS
|
||||
assert_eq!(count_mutable_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_read_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_object_store_chunks(&db), 1);
|
||||
|
||||
DbScenario {
|
||||
scenario_name: "Deleted data in OS".into(),
|
||||
db,
|
||||
}
|
||||
}
|
||||
|
||||
async fn make_delete_rub(
|
||||
lp_lines: Vec<&str>,
|
||||
pred: Predicate,
|
||||
table_name: &str,
|
||||
partition_key: &str,
|
||||
) -> DbScenario {
|
||||
let db = make_db().await.db;
|
||||
// create an open MUB
|
||||
write_lp(&db, &lp_lines.join("\n")).await;
|
||||
// move MUB to RUB
|
||||
db.rollover_partition(table_name, partition_key)
|
||||
.await
|
||||
.unwrap();
|
||||
db.move_chunk_to_read_buffer(table_name, partition_key, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
// delete data in RUB
|
||||
db.delete("cpu", Arc::new(pred)).await.unwrap();
|
||||
// No MUB, one RUB, no OS
|
||||
assert_eq!(count_mutable_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_read_buffer_chunks(&db), 1);
|
||||
assert_eq!(count_object_store_chunks(&db), 0);
|
||||
|
||||
DbScenario {
|
||||
scenario_name: "Deleted data in RUB".into(),
|
||||
db,
|
||||
}
|
||||
}
|
||||
|
||||
async fn make_delete_rub_to_os(
|
||||
lp_lines: Vec<&str>,
|
||||
pred: Predicate,
|
||||
table_name: &str,
|
||||
partition_key: &str,
|
||||
) -> DbScenario {
|
||||
let db = make_db().await.db;
|
||||
// create an open MUB
|
||||
write_lp(&db, &lp_lines.join("\n")).await;
|
||||
// move MUB to RUB
|
||||
db.rollover_partition(table_name, partition_key)
|
||||
.await
|
||||
.unwrap();
|
||||
db.move_chunk_to_read_buffer(table_name, partition_key, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
// delete data in RUB
|
||||
db.delete("cpu", Arc::new(pred)).await.unwrap();
|
||||
// persist RUB and the delete predicate will be automatically included in the OS chunk
|
||||
db.persist_partition(
|
||||
table_name,
|
||||
partition_key,
|
||||
Instant::now() + Duration::from_secs(1),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
// No MUB, one RUB, one OS
|
||||
assert_eq!(count_mutable_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_read_buffer_chunks(&db), 1);
|
||||
assert_eq!(count_object_store_chunks(&db), 1);
|
||||
|
||||
DbScenario {
|
||||
scenario_name: "Deleted data in RUB and then persisted to OS".into(),
|
||||
db,
|
||||
}
|
||||
}
|
||||
|
||||
async fn make_delete_rub_to_os_and_unload_rub(
|
||||
lp_lines: Vec<&str>,
|
||||
pred: Predicate,
|
||||
table_name: &str,
|
||||
partition_key: &str,
|
||||
) -> DbScenario {
|
||||
let db = make_db().await.db;
|
||||
// create an open MUB
|
||||
write_lp(&db, &lp_lines.join("\n")).await;
|
||||
// move MUB to RUB
|
||||
db.rollover_partition(table_name, partition_key)
|
||||
.await
|
||||
.unwrap();
|
||||
db.move_chunk_to_read_buffer(table_name, partition_key, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
// delete data in RUB
|
||||
db.delete("cpu", Arc::new(pred)).await.unwrap();
|
||||
// persist RUB and the delete predicate will be automatically included in the OS chunk
|
||||
db.persist_partition(
|
||||
table_name,
|
||||
partition_key,
|
||||
Instant::now() + Duration::from_secs(1),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
// remove RUB
|
||||
db.unload_read_buffer(table_name, partition_key, 1).unwrap();
|
||||
// No MUB, no RUB, one OS
|
||||
assert_eq!(count_mutable_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_read_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_object_store_chunks(&db), 1);
|
||||
|
||||
DbScenario {
|
||||
scenario_name: "Deleted data in RUB then persisted to OS then RUB unloaded".into(),
|
||||
db,
|
||||
}
|
||||
}
|
||||
|
||||
async fn make_delete_os_with_rub(
|
||||
lp_lines: Vec<&str>,
|
||||
pred: Predicate,
|
||||
table_name: &str,
|
||||
partition_key: &str,
|
||||
) -> DbScenario {
|
||||
let db = make_db().await.db;
|
||||
// create an open MUB
|
||||
write_lp(&db, &lp_lines.join("\n")).await;
|
||||
// move MUB to RUB
|
||||
db.rollover_partition(table_name, partition_key)
|
||||
.await
|
||||
.unwrap();
|
||||
db.move_chunk_to_read_buffer(table_name, partition_key, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
// persist RUB and the delete predicate will be automatically included in the OS chunk
|
||||
db.persist_partition(
|
||||
table_name,
|
||||
partition_key,
|
||||
Instant::now() + Duration::from_secs(1),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
// delete data after persisted but RUB still available
|
||||
db.delete("cpu", Arc::new(pred)).await.unwrap();
|
||||
// No MUB, one RUB, one OS
|
||||
assert_eq!(count_mutable_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_read_buffer_chunks(&db), 1);
|
||||
assert_eq!(count_object_store_chunks(&db), 1);
|
||||
|
||||
DbScenario {
|
||||
scenario_name: "Deleted data in OS with RUB".into(),
|
||||
db,
|
||||
}
|
||||
}
|
||||
|
||||
async fn make_delete_os_with_rub_then_unload_rub(
|
||||
lp_lines: Vec<&str>,
|
||||
pred: Predicate,
|
||||
table_name: &str,
|
||||
partition_key: &str,
|
||||
) -> DbScenario {
|
||||
let db = make_db().await.db;
|
||||
// create an open MUB
|
||||
write_lp(&db, &lp_lines.join("\n")).await;
|
||||
// move MUB to RUB
|
||||
db.rollover_partition(table_name, partition_key)
|
||||
.await
|
||||
.unwrap();
|
||||
db.move_chunk_to_read_buffer(table_name, partition_key, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
// persist RUB and the delete predicate will be automatically included in the OS chunk
|
||||
db.persist_partition(
|
||||
table_name,
|
||||
partition_key,
|
||||
Instant::now() + Duration::from_secs(1),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
// delete data after persisted but RUB still available
|
||||
db.delete("cpu", Arc::new(pred)).await.unwrap();
|
||||
// remove RUB
|
||||
db.unload_read_buffer(table_name, partition_key, 1).unwrap();
|
||||
// No MUB, no RUB, one OS
|
||||
assert_eq!(count_mutable_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_read_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_object_store_chunks(&db), 1);
|
||||
|
||||
DbScenario {
|
||||
scenario_name: "Deleted data in OS only but the delete happens before RUB is unloaded"
|
||||
.into(),
|
||||
db,
|
||||
}
|
||||
}
|
||||
|
||||
async fn make_delete_os(
|
||||
lp_lines: Vec<&str>,
|
||||
pred: Predicate,
|
||||
table_name: &str,
|
||||
partition_key: &str,
|
||||
) -> DbScenario {
|
||||
let db = make_db().await.db;
|
||||
// create an open MUB
|
||||
write_lp(&db, &lp_lines.join("\n")).await;
|
||||
// move MUB to RUB
|
||||
db.rollover_partition(table_name, partition_key)
|
||||
.await
|
||||
.unwrap();
|
||||
db.move_chunk_to_read_buffer(table_name, partition_key, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
// persist RUB and the delete predicate will be automatically included in the OS chunk
|
||||
db.persist_partition(
|
||||
table_name,
|
||||
partition_key,
|
||||
Instant::now() + Duration::from_secs(1),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
// remove RUB
|
||||
db.unload_read_buffer(table_name, partition_key, 1).unwrap();
|
||||
// delete data after persisted but RUB still available
|
||||
db.delete("cpu", Arc::new(pred)).await.unwrap();
|
||||
// No MUB, no RUB, one OS
|
||||
assert_eq!(count_mutable_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_read_buffer_chunks(&db), 0);
|
||||
assert_eq!(count_object_store_chunks(&db), 1);
|
||||
|
||||
DbScenario {
|
||||
scenario_name: "Deleted data in OS and the delete happens after RUB is unloaded".into(),
|
||||
db,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -808,3 +808,68 @@ async fn sql_select_all_different_tags_chunks() {
|
|||
&expected
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sql_select_with_deleted_data_from_one_expr() {
|
||||
let expected = vec![
|
||||
"+-----+--------------------------------+",
|
||||
"| bar | time |",
|
||||
"+-----+--------------------------------+",
|
||||
"| 2 | 1970-01-01T00:00:00.000000020Z |",
|
||||
"+-----+--------------------------------+",
|
||||
];
|
||||
|
||||
// Data deleted when it is in MUB, and then moved to RUB and OS
|
||||
run_sql_test_case!(
|
||||
DeleteFromMubOneMeasurementOneChunk {},
|
||||
"SELECT * from cpu",
|
||||
&expected
|
||||
);
|
||||
|
||||
// Data deleted when it is in RUB, and then moved OS
|
||||
run_sql_test_case!(
|
||||
DeleteFromRubOneMeasurementOneChunk {},
|
||||
"SELECT * from cpu",
|
||||
&expected
|
||||
);
|
||||
|
||||
// Data deleted when it is in OS
|
||||
run_sql_test_case!(
|
||||
DeleteFromOsOneMeasurementOneChunk {},
|
||||
"SELECT * from cpu",
|
||||
&expected
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sql_select_with_deleted_data_from_multi_exprs() {
|
||||
let expected = vec![
|
||||
"+-----+-----+--------------------------------+",
|
||||
"| bar | foo | time |",
|
||||
"+-----+-----+--------------------------------+",
|
||||
"| 1 | me | 1970-01-01T00:00:00.000000040Z |",
|
||||
"| 2 | you | 1970-01-01T00:00:00.000000020Z |",
|
||||
"+-----+-----+--------------------------------+",
|
||||
];
|
||||
|
||||
// Data deleted when it is in MUB, and then moved to RUB and OS
|
||||
run_sql_test_case!(
|
||||
DeleteMultiExprsFromMubOneMeasurementOneChunk {},
|
||||
"SELECT * from cpu",
|
||||
&expected
|
||||
);
|
||||
|
||||
// Data deleted when it is in RUB, and then moved OS
|
||||
run_sql_test_case!(
|
||||
DeleteMultiExprsFromRubOneMeasurementOneChunk {},
|
||||
"SELECT * from cpu",
|
||||
&expected
|
||||
);
|
||||
|
||||
// Data deleted when it is in OS
|
||||
run_sql_test_case!(
|
||||
DeleteMultiExprsFromOsOneMeasurementOneChunk {},
|
||||
"SELECT * from cpu",
|
||||
&expected
|
||||
);
|
||||
}
|
||||
|
|
|
@ -2067,6 +2067,40 @@ mod test {
|
|||
assert!(matches!(row_ids, RowIDsOption::All(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_ids_filter_float_trimmed() {
|
||||
let input = &[100.0, 200.0, 300.0, 2.0, 22.0, 30.0];
|
||||
|
||||
let col = Column::from(&input[..]);
|
||||
let mut row_ids = col.row_ids_filter(
|
||||
&cmp::Operator::Equal,
|
||||
&Value::from(200.0),
|
||||
RowIDs::new_bitmap(),
|
||||
);
|
||||
assert_eq!(row_ids.unwrap().to_vec(), vec![1]);
|
||||
|
||||
row_ids = col.row_ids_filter(
|
||||
&cmp::Operator::LT,
|
||||
&Value::from(64000.0),
|
||||
RowIDs::new_bitmap(),
|
||||
);
|
||||
assert!(matches!(row_ids, RowIDsOption::All(_)));
|
||||
|
||||
row_ids = col.row_ids_filter(
|
||||
&cmp::Operator::GTE,
|
||||
&Value::from(-1_000_000.0),
|
||||
RowIDs::new_bitmap(),
|
||||
);
|
||||
assert!(matches!(row_ids, RowIDsOption::All(_)));
|
||||
|
||||
row_ids = col.row_ids_filter(
|
||||
&cmp::Operator::NotEqual,
|
||||
&Value::from(1_000_000.3),
|
||||
RowIDs::new_bitmap(),
|
||||
);
|
||||
assert!(matches!(row_ids, RowIDsOption::All(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_ids_range() {
|
||||
let input = &[100_i64, 200, 300, 2, 200, 22, 30];
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
//! allow results to be emitted as some logical type `L` via a transformation
|
||||
//! `T`.
|
||||
use either::Either;
|
||||
use observability_deps::tracing::debug;
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt::Debug;
|
||||
use std::marker::PhantomData;
|
||||
|
@ -373,14 +374,36 @@ where
|
|||
Some(self.transcoder.decode(max))
|
||||
}
|
||||
|
||||
fn row_ids_filter(&self, value: L, op: &cmp::Operator, dst: RowIDs) -> RowIDs {
|
||||
let value = self.transcoder.encode(value);
|
||||
fn row_ids_filter(&self, value: L, op: &cmp::Operator, mut dst: RowIDs) -> RowIDs {
|
||||
debug!(value=?value, operator=?op, encoding=?ENCODING_NAME, "row_ids_filter");
|
||||
let (value, op) = match self.transcoder.encode_comparable(value, *op) {
|
||||
Some((value, op)) => (value, op),
|
||||
None => {
|
||||
// The value is not encodable. This can happen with the == or !=
|
||||
// operator. In the case of ==, no values in the encoding could
|
||||
// possible satisfy the expression. In the case of !=, all
|
||||
// values would satisfy the expression.
|
||||
dst.clear();
|
||||
return match op {
|
||||
cmp::Operator::Equal => dst,
|
||||
cmp::Operator::NotEqual => {
|
||||
dst.add_range(0, self.num_rows());
|
||||
dst
|
||||
}
|
||||
op => panic!("operator {:?} not expected", op),
|
||||
};
|
||||
}
|
||||
};
|
||||
debug!(value=?value, operator=?op, encoding=?ENCODING_NAME, "row_ids_filter encoded expr");
|
||||
|
||||
// N.B, the transcoder may have changed the operator depending on the
|
||||
// value provided.
|
||||
match op {
|
||||
cmp::Operator::GT => self.row_ids_cmp_order(&value, PartialOrd::gt, dst),
|
||||
cmp::Operator::GTE => self.row_ids_cmp_order(&value, PartialOrd::ge, dst),
|
||||
cmp::Operator::LT => self.row_ids_cmp_order(&value, PartialOrd::lt, dst),
|
||||
cmp::Operator::LTE => self.row_ids_cmp_order(&value, PartialOrd::le, dst),
|
||||
_ => self.row_ids_equal(&value, op, dst),
|
||||
_ => self.row_ids_equal(&value, &op, dst),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -390,8 +413,16 @@ where
|
|||
right: (L, &cmp::Operator),
|
||||
dst: RowIDs,
|
||||
) -> RowIDs {
|
||||
let left = (self.transcoder.encode(left.0), left.1);
|
||||
let right = (self.transcoder.encode(right.0), right.1);
|
||||
debug!(left=?left, right=?right, encoding=?ENCODING_NAME, "row_ids_filter_range");
|
||||
let left = self
|
||||
.transcoder
|
||||
.encode_comparable(left.0, *left.1)
|
||||
.expect("transcoder must return Some variant");
|
||||
let right = self
|
||||
.transcoder
|
||||
.encode_comparable(right.0, *right.1)
|
||||
.expect("transcoder must return Some variant");
|
||||
debug!(left=?left, right=?right, encoding=?ENCODING_NAME, "row_ids_filter_range encoded expr");
|
||||
|
||||
match (&left.1, &right.1) {
|
||||
(cmp::Operator::GT, cmp::Operator::LT)
|
||||
|
@ -402,8 +433,8 @@ where
|
|||
| (cmp::Operator::LT, cmp::Operator::GTE)
|
||||
| (cmp::Operator::LTE, cmp::Operator::GT)
|
||||
| (cmp::Operator::LTE, cmp::Operator::GTE) => self.row_ids_cmp_range_order(
|
||||
(&left.0, Self::ord_from_op(left.1)),
|
||||
(&right.0, Self::ord_from_op(right.1)),
|
||||
(&left.0, Self::ord_from_op(&left.1)),
|
||||
(&right.0, Self::ord_from_op(&right.1)),
|
||||
dst,
|
||||
),
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
//! This encoding stores a column of fixed-width numerical values backed by an
|
||||
//! an Arrow array, allowing for storage of NULL values.
|
||||
use either::Either;
|
||||
use observability_deps::tracing::debug;
|
||||
use std::fmt::Debug;
|
||||
use std::marker::PhantomData;
|
||||
use std::mem::size_of;
|
||||
|
@ -227,6 +228,43 @@ where
|
|||
}
|
||||
dst
|
||||
}
|
||||
|
||||
// Identify all row IDs that contain a non-null value.
|
||||
fn all_non_null_row_ids(&self, mut dst: RowIDs) -> RowIDs {
|
||||
dst.clear();
|
||||
|
||||
if self.null_count() == 0 {
|
||||
dst.add_range(0, self.num_rows());
|
||||
return dst;
|
||||
}
|
||||
|
||||
let mut found = false;
|
||||
let mut count = 0;
|
||||
for i in 0..self.num_rows() as usize {
|
||||
if self.arr.is_null(i) {
|
||||
if found {
|
||||
// add the non-null range
|
||||
let (min, max) = (i as u32 - count, i as u32);
|
||||
dst.add_range(min, max);
|
||||
found = false;
|
||||
count = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if !found {
|
||||
found = true;
|
||||
}
|
||||
count += 1;
|
||||
}
|
||||
|
||||
// add any remaining range.
|
||||
if found {
|
||||
let (min, max) = (self.num_rows() - count, self.num_rows());
|
||||
dst.add_range(min, max);
|
||||
}
|
||||
dst
|
||||
}
|
||||
}
|
||||
|
||||
impl<P, L, T> ScalarEncoding<L> for FixedNull<P, L, T>
|
||||
|
@ -411,14 +449,34 @@ where
|
|||
max.map(|v| self.transcoder.decode(v))
|
||||
}
|
||||
|
||||
fn row_ids_filter(&self, value: L, op: &cmp::Operator, dst: RowIDs) -> RowIDs {
|
||||
let value = self.transcoder.encode(value);
|
||||
fn row_ids_filter(&self, value: L, op: &cmp::Operator, mut dst: RowIDs) -> RowIDs {
|
||||
debug!(value=?value, operator=?op, encoding=?ENCODING_NAME, "row_ids_filter");
|
||||
let (value, op) = match self.transcoder.encode_comparable(value, *op) {
|
||||
Some((value, op)) => (value, op),
|
||||
None => {
|
||||
// The value is not encodable. This can happen with the == or !=
|
||||
// operator. In the case of ==, no values in the encoding could
|
||||
// possible satisfy the expression. In the case of !=, all
|
||||
// non-null values would satisfy the expression.
|
||||
dst.clear();
|
||||
return match op {
|
||||
cmp::Operator::Equal => dst,
|
||||
cmp::Operator::NotEqual => {
|
||||
dst = self.all_non_null_row_ids(dst);
|
||||
dst
|
||||
}
|
||||
op => panic!("operator {:?} not expected", op),
|
||||
};
|
||||
}
|
||||
};
|
||||
debug!(value=?value, operator=?op, encoding=?ENCODING_NAME, "row_ids_filter encoded expr");
|
||||
|
||||
match op {
|
||||
cmp::Operator::GT => self.row_ids_cmp_order(value, Self::ord_from_op(op), dst),
|
||||
cmp::Operator::GTE => self.row_ids_cmp_order(value, Self::ord_from_op(op), dst),
|
||||
cmp::Operator::LT => self.row_ids_cmp_order(value, Self::ord_from_op(op), dst),
|
||||
cmp::Operator::LTE => self.row_ids_cmp_order(value, Self::ord_from_op(op), dst),
|
||||
_ => self.row_ids_equal(value, op, dst),
|
||||
cmp::Operator::GT => self.row_ids_cmp_order(value, Self::ord_from_op(&op), dst),
|
||||
cmp::Operator::GTE => self.row_ids_cmp_order(value, Self::ord_from_op(&op), dst),
|
||||
cmp::Operator::LT => self.row_ids_cmp_order(value, Self::ord_from_op(&op), dst),
|
||||
cmp::Operator::LTE => self.row_ids_cmp_order(value, Self::ord_from_op(&op), dst),
|
||||
_ => self.row_ids_equal(value, &op, dst),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -428,8 +486,16 @@ where
|
|||
right: (L, &cmp::Operator),
|
||||
dst: RowIDs,
|
||||
) -> RowIDs {
|
||||
let left = (self.transcoder.encode(left.0), left.1);
|
||||
let right = (self.transcoder.encode(right.0), right.1);
|
||||
debug!(left=?left, right=?right, encoding=?ENCODING_NAME, "row_ids_filter_range");
|
||||
let left = self
|
||||
.transcoder
|
||||
.encode_comparable(left.0, *left.1)
|
||||
.expect("transcoder must return Some variant");
|
||||
let right = self
|
||||
.transcoder
|
||||
.encode_comparable(right.0, *right.1)
|
||||
.expect("transcoder must return Some variant");
|
||||
debug!(left=?left, right=?right, encoding=?ENCODING_NAME, "row_ids_filter_range encoded expr");
|
||||
|
||||
match (left.1, right.1) {
|
||||
(cmp::Operator::GT, cmp::Operator::LT)
|
||||
|
@ -440,8 +506,8 @@ where
|
|||
| (cmp::Operator::LT, cmp::Operator::GTE)
|
||||
| (cmp::Operator::LTE, cmp::Operator::GT)
|
||||
| (cmp::Operator::LTE, cmp::Operator::GTE) => self.row_ids_cmp_range_order(
|
||||
(left.0, Self::ord_from_op(left.1)),
|
||||
(right.0, Self::ord_from_op(right.1)),
|
||||
(left.0, Self::ord_from_op(&left.1)),
|
||||
(right.0, Self::ord_from_op(&right.1)),
|
||||
dst,
|
||||
),
|
||||
|
||||
|
@ -789,6 +855,38 @@ mod test {
|
|||
assert_eq!(row_ids.to_vec(), vec![1, 2, 4]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_ids_filter_range_all_non_null() {
|
||||
let cases = vec![
|
||||
(vec![None], vec![]),
|
||||
(vec![None, None, None], vec![]),
|
||||
(vec![Some(22)], vec![0_u32]),
|
||||
(vec![Some(22), Some(3), Some(3)], vec![0, 1, 2]),
|
||||
(vec![Some(22), None], vec![0]),
|
||||
(
|
||||
vec![Some(22), None, Some(1), None, Some(3), None],
|
||||
vec![0, 2, 4],
|
||||
),
|
||||
(vec![Some(22), None, None, Some(33)], vec![0, 3]),
|
||||
(vec![None, None, Some(33)], vec![2]),
|
||||
(
|
||||
vec![None, None, Some(33), None, None, Some(3), Some(3), Some(1)],
|
||||
vec![2, 5, 6, 7],
|
||||
),
|
||||
];
|
||||
|
||||
for (i, (data, exp)) in cases.into_iter().enumerate() {
|
||||
let (v, _) = new_encoding(data);
|
||||
let dst = RowIDs::new_vector();
|
||||
assert_eq!(
|
||||
v.all_non_null_row_ids(dst).unwrap_vector(),
|
||||
&exp,
|
||||
"example {:?} failed",
|
||||
i
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn has_non_null_value() {
|
||||
let (v, _) = new_encoding(vec![None, None]);
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use either::Either;
|
||||
use observability_deps::tracing::debug;
|
||||
|
||||
use crate::column::cmp;
|
||||
use crate::column::RowIDs;
|
||||
|
@ -288,18 +289,35 @@ where
|
|||
let left_cmp_result = next.partial_cmp(left.0);
|
||||
let right_cmp_result = next.partial_cmp(right.0);
|
||||
|
||||
// TODO(edd): eurgh I still don't understand how I got this to
|
||||
// be correct. Need to revisit to make it simpler.
|
||||
let left_result_ok =
|
||||
!(left_cmp_result == Some(left_op.0) || left_cmp_result == Some(left_op.1));
|
||||
left_cmp_result == Some(left_op.0) || left_cmp_result == Some(left_op.1);
|
||||
let right_result_ok =
|
||||
!(right_cmp_result == Some(right_op.0) || right_cmp_result == Some(right_op.1));
|
||||
right_cmp_result == Some(right_op.0) || right_cmp_result == Some(right_op.1);
|
||||
|
||||
if !(left_result_ok || right_result_ok) {
|
||||
if left_result_ok && right_result_ok {
|
||||
dst.add_range(curr_logical_row_id, curr_logical_row_id + rl);
|
||||
}
|
||||
curr_logical_row_id += rl;
|
||||
}
|
||||
curr_logical_row_id += rl;
|
||||
}
|
||||
|
||||
dst
|
||||
}
|
||||
|
||||
fn all_non_null_row_ids(&self, mut dst: RowIDs) -> RowIDs {
|
||||
dst.clear();
|
||||
|
||||
if self.null_count() == 0 {
|
||||
dst.add_range(0, self.num_rows());
|
||||
return dst;
|
||||
}
|
||||
|
||||
let mut curr_logical_row_id = 0;
|
||||
for (rl, next) in &self.run_lengths {
|
||||
if next.is_some() {
|
||||
dst.add_range(curr_logical_row_id, curr_logical_row_id + rl);
|
||||
}
|
||||
curr_logical_row_id += rl;
|
||||
}
|
||||
|
||||
dst
|
||||
|
@ -375,15 +393,34 @@ where
|
|||
self.num_rows
|
||||
}
|
||||
|
||||
fn row_ids_filter(&self, value: L, op: &cmp::Operator, dst: RowIDs) -> RowIDs {
|
||||
let value = self.transcoder.encode(value);
|
||||
fn row_ids_filter(&self, value: L, op: &cmp::Operator, mut dst: RowIDs) -> RowIDs {
|
||||
debug!(value=?value, operator=?op, encoding=?ENCODING_NAME, "row_ids_filter");
|
||||
let (value, op) = match self.transcoder.encode_comparable(value, *op) {
|
||||
Some((value, op)) => (value, op),
|
||||
None => {
|
||||
// The value is not encodable. This can happen with the == or !=
|
||||
// operator. In the case of ==, no values in the encoding could
|
||||
// possible satisfy the expression. In the case of !=, all
|
||||
// non-null values would satisfy the expression.
|
||||
dst.clear();
|
||||
return match op {
|
||||
cmp::Operator::Equal => dst,
|
||||
cmp::Operator::NotEqual => {
|
||||
dst = self.all_non_null_row_ids(dst);
|
||||
dst
|
||||
}
|
||||
op => panic!("operator {:?} not expected", op),
|
||||
};
|
||||
}
|
||||
};
|
||||
debug!(value=?value, operator=?op, encoding=?ENCODING_NAME, "row_ids_filter encoded expr");
|
||||
|
||||
match op {
|
||||
cmp::Operator::Equal | cmp::Operator::NotEqual => {
|
||||
self.row_ids_cmp_equal(value, op, dst)
|
||||
}
|
||||
cmp::Operator::LT | cmp::Operator::LTE | cmp::Operator::GT | cmp::Operator::GTE => {
|
||||
self.row_ids_cmp(value, op, dst)
|
||||
}
|
||||
cmp::Operator::GT => self.row_ids_cmp(value, &op, dst),
|
||||
cmp::Operator::GTE => self.row_ids_cmp(value, &op, dst),
|
||||
cmp::Operator::LT => self.row_ids_cmp(value, &op, dst),
|
||||
cmp::Operator::LTE => self.row_ids_cmp(value, &op, dst),
|
||||
_ => self.row_ids_cmp_equal(value, &op, dst),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -393,8 +430,16 @@ where
|
|||
right: (L, &cmp::Operator),
|
||||
dst: RowIDs,
|
||||
) -> RowIDs {
|
||||
let left = (self.transcoder.encode(left.0), left.1);
|
||||
let right = (self.transcoder.encode(right.0), right.1);
|
||||
debug!(left=?left, right=?right, encoding=?ENCODING_NAME, "row_ids_filter_range");
|
||||
let left = self
|
||||
.transcoder
|
||||
.encode_comparable(left.0, *left.1)
|
||||
.expect("transcoder must return Some variant");
|
||||
let right = self
|
||||
.transcoder
|
||||
.encode_comparable(right.0, *right.1)
|
||||
.expect("transcoder must return Some variant");
|
||||
debug!(left=?left, right=?right, encoding=?ENCODING_NAME, "row_ids_filter_range encoded expr");
|
||||
|
||||
match (&left.1, &right.1) {
|
||||
(cmp::Operator::GT, cmp::Operator::LT)
|
||||
|
@ -405,8 +450,8 @@ where
|
|||
| (cmp::Operator::LT, cmp::Operator::GTE)
|
||||
| (cmp::Operator::LTE, cmp::Operator::GT)
|
||||
| (cmp::Operator::LTE, cmp::Operator::GTE) => self.row_ids_cmp_range(
|
||||
(&left.0, Self::ord_from_op(left.1)),
|
||||
(&right.0, Self::ord_from_op(right.1)),
|
||||
(&left.0, Self::ord_from_op(&left.1)),
|
||||
(&right.0, Self::ord_from_op(&right.1)),
|
||||
dst,
|
||||
),
|
||||
|
||||
|
@ -622,6 +667,16 @@ mod test {
|
|||
)
|
||||
}
|
||||
|
||||
fn new_encoding_opt(
|
||||
values: Vec<Option<i64>>,
|
||||
) -> (RLE<i64, i64, Arc<MockTranscoder>>, Arc<MockTranscoder>) {
|
||||
let mock = Arc::new(MockTranscoder::default());
|
||||
(
|
||||
RLE::new_from_iter_opt(values.into_iter(), Arc::clone(&mock)),
|
||||
mock,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn new_from_iter() {
|
||||
let cases = vec![
|
||||
|
@ -981,6 +1036,38 @@ mod test {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_ids_filter_range_all_non_null() {
|
||||
let cases = vec![
|
||||
(vec![None], vec![]),
|
||||
(vec![None, None, None], vec![]),
|
||||
(vec![Some(22)], vec![0_u32]),
|
||||
(vec![Some(22), Some(3), Some(3)], vec![0, 1, 2]),
|
||||
(vec![Some(22), None], vec![0]),
|
||||
(
|
||||
vec![Some(22), None, Some(1), None, Some(3), None],
|
||||
vec![0, 2, 4],
|
||||
),
|
||||
(vec![Some(22), None, None, Some(33)], vec![0, 3]),
|
||||
(vec![None, None, Some(33)], vec![2]),
|
||||
(
|
||||
vec![None, None, Some(33), None, None, Some(3), Some(3), Some(1)],
|
||||
vec![2, 5, 6, 7],
|
||||
),
|
||||
];
|
||||
|
||||
for (i, (data, exp)) in cases.into_iter().enumerate() {
|
||||
let (v, _) = new_encoding_opt(data);
|
||||
let dst = RowIDs::new_vector();
|
||||
assert_eq!(
|
||||
v.all_non_null_row_ids(dst).unwrap_vector(),
|
||||
&exp,
|
||||
"example {:?} failed",
|
||||
i
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_ids_filter_range() {
|
||||
let (enc, transcoder) = new_encoding(vec![
|
||||
|
@ -1016,6 +1103,37 @@ mod test {
|
|||
assert_eq!(transcoder.encodings(), calls * 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_ids_filter_range_nulls() {
|
||||
let (enc, transcoder) = new_encoding_opt(vec![
|
||||
Some(100),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some(100),
|
||||
Some(101),
|
||||
Some(101),
|
||||
]);
|
||||
|
||||
let cases = vec![
|
||||
(
|
||||
(100, &Operator::GTE),
|
||||
(240, &Operator::LT),
|
||||
vec![0, 4, 5, 6],
|
||||
),
|
||||
((100, &Operator::GT), (240, &Operator::LT), vec![5, 6]),
|
||||
((10, &Operator::LT), (-100, &Operator::GT), vec![]),
|
||||
((21, &Operator::GTE), (100, &Operator::LTE), vec![0, 4]),
|
||||
];
|
||||
|
||||
let calls = cases.len();
|
||||
for (left, right, exp) in cases {
|
||||
let dst = enc.row_ids_filter_range(left, right, RowIDs::new_vector());
|
||||
assert_eq!(dst.unwrap_vector(), &exp);
|
||||
}
|
||||
assert_eq!(transcoder.encodings(), calls * 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn estimate_rle_size() {
|
||||
let cases = vec![
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use crate::column::cmp::Operator;
|
||||
use std::{
|
||||
convert::TryFrom,
|
||||
fmt::{Debug, Display},
|
||||
|
@ -13,7 +14,17 @@ use std::{
|
|||
// `P` is a physical type that is stored directly within an encoding, `L` is
|
||||
// a logical type callers expect to be returned.
|
||||
pub trait Transcoder<P, L>: Debug + Display {
|
||||
/// A function that encodes a logical value into a physical representation.
|
||||
fn encode(&self, _: L) -> P;
|
||||
|
||||
/// A function that attempts to encode a logical value, within the context
|
||||
/// of a comparison operator, into a physical representation.
|
||||
///
|
||||
/// Implementation should return a suitable operator for the physical
|
||||
/// representation, which may differ from the provided operator.
|
||||
fn encode_comparable(&self, _: L, _: Operator) -> Option<(P, Operator)>;
|
||||
|
||||
/// A function to decode a physical representation back into a logical value.
|
||||
fn decode(&self, _: P) -> L;
|
||||
}
|
||||
|
||||
|
@ -29,6 +40,10 @@ impl<T> Transcoder<T, T> for NoOpTranscoder {
|
|||
v
|
||||
}
|
||||
|
||||
fn encode_comparable(&self, v: T, op: Operator) -> Option<(T, Operator)> {
|
||||
Some((v, op))
|
||||
}
|
||||
|
||||
fn decode(&self, v: T) -> T {
|
||||
v
|
||||
}
|
||||
|
@ -56,13 +71,17 @@ pub struct ByteTrimmer {}
|
|||
impl<P, L> Transcoder<P, L> for ByteTrimmer
|
||||
where
|
||||
L: From<P>,
|
||||
P: TryFrom<L>,
|
||||
P: TryFrom<L> + PartialEq + PartialOrd,
|
||||
<P as TryFrom<L>>::Error: std::fmt::Debug,
|
||||
{
|
||||
fn encode(&self, v: L) -> P {
|
||||
P::try_from(v).unwrap()
|
||||
}
|
||||
|
||||
fn encode_comparable(&self, v: L, op: Operator) -> Option<(P, Operator)> {
|
||||
P::try_from(v).ok().map(|p| (p, op))
|
||||
}
|
||||
|
||||
fn decode(&self, v: P) -> L {
|
||||
L::from(v)
|
||||
}
|
||||
|
@ -91,6 +110,54 @@ macro_rules! make_float_trimmer {
|
|||
v as $type
|
||||
}
|
||||
|
||||
fn encode_comparable(&self, v: f64, op: Operator) -> Option<($type, Operator)> {
|
||||
assert!(v <= <$type>::MAX as f64);
|
||||
if v == ((v as $type) as f64) {
|
||||
return Some((v as $type, op));
|
||||
}
|
||||
|
||||
match op {
|
||||
Operator::Equal => {
|
||||
None // no encoded values will == v
|
||||
}
|
||||
Operator::NotEqual => {
|
||||
None // all encoded values will != v
|
||||
}
|
||||
Operator::LT => {
|
||||
// convert to next highest encodable value. For example
|
||||
// given '< 23.2` return 24.0 encoded as the physical
|
||||
// type. < 23.2 is logically equivalent to < 24.0 since
|
||||
// there are no valid values in the domain (23.2, 24.0).
|
||||
Some((v.ceil() as $type, op))
|
||||
}
|
||||
Operator::LTE => {
|
||||
// convert to next highest encodable value and change
|
||||
// operator to <.
|
||||
// For example given '<= 23.2` return 24.0 encoded as
|
||||
// the physical type. <= 23.2 is logically equivalent
|
||||
// to < 24.0 since there are no valid values in the
|
||||
// domain [23.2, 24.0).
|
||||
Some((v.ceil() as $type, Operator::LT))
|
||||
}
|
||||
Operator::GT => {
|
||||
// convert to next lowest encodable value. For example
|
||||
// given '> 23.2` return 23.0 encoded as the physical
|
||||
// type. > 23.2 is logically equivalent to > 23.0 since
|
||||
// there are no valid values in the domain (23.0, 23.2].
|
||||
Some((v.floor() as $type, op))
|
||||
}
|
||||
Operator::GTE => {
|
||||
// convert to next lowest encodable value and change
|
||||
// operator to >.
|
||||
// For example given '>= 23.2` return 23.0 encoded as
|
||||
// the physical type. >= 23.2 is logically equivalent
|
||||
// to > 24.0 since there are no valid values in the
|
||||
// domain [23.2, 24.0).
|
||||
Some((v.floor() as $type, Operator::GT))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn decode(&self, v: $type) -> f64 {
|
||||
v.into()
|
||||
}
|
||||
|
@ -119,7 +186,7 @@ impl Display for FloatByteTrimmer {
|
|||
// result.
|
||||
|
||||
#[cfg(test)]
|
||||
use std::{sync::atomic::AtomicUsize, sync::atomic::Ordering, sync::Arc};
|
||||
use std::{sync::atomic, sync::atomic::AtomicUsize, sync::Arc};
|
||||
#[cfg(test)]
|
||||
/// A mock implementation of Transcoder that tracks calls to encode and decode.
|
||||
/// This is useful for testing encoder implementations.
|
||||
|
@ -127,6 +194,7 @@ use std::{sync::atomic::AtomicUsize, sync::atomic::Ordering, sync::Arc};
|
|||
pub struct MockTranscoder {
|
||||
encoding_calls: AtomicUsize,
|
||||
decoding_calls: AtomicUsize,
|
||||
partial_cmp_calls: AtomicUsize,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -135,6 +203,7 @@ impl Default for MockTranscoder {
|
|||
Self {
|
||||
encoding_calls: AtomicUsize::default(),
|
||||
decoding_calls: AtomicUsize::default(),
|
||||
partial_cmp_calls: AtomicUsize::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -142,23 +211,28 @@ impl Default for MockTranscoder {
|
|||
#[cfg(test)]
|
||||
impl MockTranscoder {
|
||||
pub fn encodings(&self) -> usize {
|
||||
self.encoding_calls.load(Ordering::Relaxed)
|
||||
self.encoding_calls.load(atomic::Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn decodings(&self) -> usize {
|
||||
self.decoding_calls.load(Ordering::Relaxed)
|
||||
self.decoding_calls.load(atomic::Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl<T> Transcoder<T, T> for MockTranscoder {
|
||||
fn encode(&self, v: T) -> T {
|
||||
self.encoding_calls.fetch_add(1, Ordering::Relaxed);
|
||||
self.encoding_calls.fetch_add(1, atomic::Ordering::Relaxed);
|
||||
v
|
||||
}
|
||||
|
||||
fn encode_comparable(&self, v: T, op: Operator) -> Option<(T, Operator)> {
|
||||
self.encoding_calls.fetch_add(1, atomic::Ordering::Relaxed);
|
||||
Some((v, op))
|
||||
}
|
||||
|
||||
fn decode(&self, v: T) -> T {
|
||||
self.decoding_calls.fetch_add(1, Ordering::Relaxed);
|
||||
self.decoding_calls.fetch_add(1, atomic::Ordering::Relaxed);
|
||||
v
|
||||
}
|
||||
}
|
||||
|
@ -166,12 +240,17 @@ impl<T> Transcoder<T, T> for MockTranscoder {
|
|||
#[cfg(test)]
|
||||
impl<T> Transcoder<T, T> for Arc<MockTranscoder> {
|
||||
fn encode(&self, v: T) -> T {
|
||||
self.encoding_calls.fetch_add(1, Ordering::Relaxed);
|
||||
self.encoding_calls.fetch_add(1, atomic::Ordering::Relaxed);
|
||||
v
|
||||
}
|
||||
|
||||
fn encode_comparable(&self, v: T, op: Operator) -> Option<(T, Operator)> {
|
||||
self.encoding_calls.fetch_add(1, atomic::Ordering::Relaxed);
|
||||
Some((v, op))
|
||||
}
|
||||
|
||||
fn decode(&self, v: T) -> T {
|
||||
self.decoding_calls.fetch_add(1, Ordering::Relaxed);
|
||||
self.decoding_calls.fetch_add(1, atomic::Ordering::Relaxed);
|
||||
v
|
||||
}
|
||||
}
|
||||
|
|
|
@ -754,4 +754,303 @@ mod test {
|
|||
//assert_eq!(dst.unwrap_vector(), &exp, "example '{} {:?}' failed", op, v);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_ids_filter_float_trimmer() {
|
||||
let data = vec![100.0, 200.0, 100.0, 300.0, 400.0];
|
||||
|
||||
let float_trimmer = FloatByteTrimmer {};
|
||||
let data_float_trimmed = data
|
||||
.iter()
|
||||
.cloned()
|
||||
.map::<u16, _>(|x| float_trimmer.encode(x))
|
||||
.collect::<Vec<u16>>();
|
||||
|
||||
let cases: Vec<Box<dyn ScalarEncoding<f64>>> = vec![
|
||||
Box::new(RLE::<u16, f64, _>::new_from_iter(
|
||||
data_float_trimmed.iter().cloned(),
|
||||
float_trimmer,
|
||||
)),
|
||||
Box::new(Fixed::<u16, f64, _>::new(
|
||||
data_float_trimmed.clone(),
|
||||
FloatByteTrimmer {},
|
||||
)),
|
||||
Box::new(FixedNull::<UInt16Type, f64, _>::new(
|
||||
PrimitiveArray::from(data_float_trimmed),
|
||||
FloatByteTrimmer {},
|
||||
)),
|
||||
];
|
||||
|
||||
for enc in cases {
|
||||
_row_ids_filter_float_trimmer(enc)
|
||||
}
|
||||
}
|
||||
|
||||
fn _row_ids_filter_float_trimmer(enc: Box<dyn ScalarEncoding<f64>>) {
|
||||
// [100.0, 200.0, 100.0, 300.0, 400.0]
|
||||
let cases = vec![
|
||||
(100.0, Operator::Equal, vec![0, 2]), // 100.0, 100.0
|
||||
(100.0, Operator::NotEqual, vec![1, 3, 4]), // 200.0, 300.0, 400.0
|
||||
(100.0, Operator::LT, vec![]), //
|
||||
(100.0, Operator::LTE, vec![0, 2]), // 100.0, 100.0
|
||||
(100.0, Operator::GT, vec![1, 3, 4]), // 200.0, 300.0, 400.0
|
||||
(100.0, Operator::GTE, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0
|
||||
(200.0, Operator::Equal, vec![1]), // 200.0
|
||||
(200.0, Operator::NotEqual, vec![0, 2, 3, 4]), // 100.0, 100.0, 300.0, 400.0
|
||||
(200.0, Operator::LT, vec![0, 2]), // 100.0, 100.0
|
||||
(200.0, Operator::LTE, vec![0, 1, 2]), // 100.0, 200.0, 100.0
|
||||
(200.0, Operator::GT, vec![3, 4]), // 300.0, 400.0
|
||||
(200.0, Operator::GTE, vec![1, 3, 4]), // 200.0, 300.0, 400.0
|
||||
(400.0, Operator::Equal, vec![4]), // 400.0
|
||||
(400.0, Operator::NotEqual, vec![0, 1, 2, 3]), // 100.0, 200.0, 100.0, 300.0
|
||||
(400.0, Operator::LT, vec![0, 1, 2, 3]), // 100.0, 200.0, 100.0, 300.0
|
||||
(400.0, Operator::LTE, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0
|
||||
(400.0, Operator::GT, vec![]), //
|
||||
(400.0, Operator::GTE, vec![4]), // 400.0
|
||||
// Values not present in the column
|
||||
(99.0, Operator::Equal, vec![]), //
|
||||
(99.0, Operator::NotEqual, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0
|
||||
(99.0, Operator::LT, vec![]), //
|
||||
(99.0, Operator::LTE, vec![]), //
|
||||
(99.0, Operator::GT, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0
|
||||
(99.0, Operator::GTE, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0
|
||||
(200.4, Operator::Equal, vec![]), //
|
||||
(200.4, Operator::NotEqual, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0
|
||||
(200.4, Operator::LT, vec![0, 1, 2]), // 100.0, 200.0, 100.0
|
||||
(200.4, Operator::LTE, vec![0, 1, 2]), // 100.0, 200.0, 100.0
|
||||
(200.4, Operator::GT, vec![3, 4]), // 300.0, 400.0
|
||||
(200.4, Operator::GTE, vec![3, 4]), // 300.0, 400.0
|
||||
(201.0, Operator::Equal, vec![]), //
|
||||
(201.0, Operator::NotEqual, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0
|
||||
(201.0, Operator::LT, vec![0, 1, 2]), // 100.0, 200.0, 100.0
|
||||
(201.0, Operator::LTE, vec![0, 1, 2]), // 100.0, 200.0, 100.0
|
||||
(201.0, Operator::GT, vec![3, 4]), // 300.0, 400.0
|
||||
(201.0, Operator::GTE, vec![3, 4]), // 300.0, 400.0
|
||||
(401.0, Operator::Equal, vec![]), //
|
||||
(401.0, Operator::NotEqual, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0
|
||||
(401.0, Operator::LT, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0
|
||||
(401.0, Operator::LTE, vec![0, 1, 2, 3, 4]), // 100.0, 200.0, 100.0, 300.0, 400.0
|
||||
(401.0, Operator::GT, vec![]), //
|
||||
(401.0, Operator::GTE, vec![]), //
|
||||
];
|
||||
|
||||
for (v, op, exp) in cases {
|
||||
let dst = enc.row_ids_filter(v, &op, RowIDs::new_vector());
|
||||
assert_eq!(
|
||||
dst.unwrap_vector(),
|
||||
&exp,
|
||||
"example '{} {:?}' failed for {:?}",
|
||||
op,
|
||||
v,
|
||||
enc.name()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_ids_filter_float_trimmer_with_nulls() {
|
||||
let data = vec![Some(100.0), None, None, Some(200.0), None];
|
||||
|
||||
let float_trimmer = FloatByteTrimmer {};
|
||||
|
||||
let cases: Vec<Box<dyn ScalarEncoding<f64>>> = vec![
|
||||
Box::new(RLE::<u16, f64, _>::new_from_iter_opt(
|
||||
data.iter()
|
||||
.cloned()
|
||||
.map(|x| x.map(|v| float_trimmer.encode(v))),
|
||||
FloatByteTrimmer {},
|
||||
)),
|
||||
Box::new(FixedNull::<UInt16Type, f64, _>::new(
|
||||
data.iter()
|
||||
.cloned()
|
||||
.map(|v| v.map(|v| float_trimmer.encode(v)))
|
||||
.collect(),
|
||||
FloatByteTrimmer {},
|
||||
)),
|
||||
];
|
||||
|
||||
for enc in cases {
|
||||
_row_ids_filter_float_trimmer_with_nulls(enc)
|
||||
}
|
||||
}
|
||||
|
||||
fn _row_ids_filter_float_trimmer_with_nulls(enc: Box<dyn ScalarEncoding<f64>>) {
|
||||
// [100.0, NULL, NULL, 200.0]
|
||||
let cases = vec![
|
||||
(100.0, Operator::Equal, vec![0]), // 100.0
|
||||
(100.0, Operator::NotEqual, vec![3]), // 200.0
|
||||
(100.0, Operator::LT, vec![]), //
|
||||
(100.0, Operator::LTE, vec![0]), // 100.0
|
||||
(100.0, Operator::GT, vec![3]), // 200.0
|
||||
(100.0, Operator::GTE, vec![0, 3]), // 100.0, 200.0
|
||||
(200.0, Operator::Equal, vec![3]), // 200.0
|
||||
(200.0, Operator::NotEqual, vec![0]), // 100.0
|
||||
(200.0, Operator::LT, vec![0]), // 100.0
|
||||
(200.0, Operator::LTE, vec![0, 3]), // 100.0, 200.0
|
||||
(200.0, Operator::GT, vec![]), //
|
||||
(200.0, Operator::GTE, vec![3]), // 200.0
|
||||
// Values not present in the column
|
||||
(99.0, Operator::Equal, vec![]), //
|
||||
(99.0, Operator::NotEqual, vec![0, 3]), // 100.0, 200.0
|
||||
(99.0, Operator::LT, vec![]), //
|
||||
(99.0, Operator::LTE, vec![]), //
|
||||
(99.0, Operator::GT, vec![0, 3]), // 100.0, 200.0
|
||||
(99.0, Operator::GTE, vec![0, 3]), // 100.0, 200.0
|
||||
(200.4, Operator::Equal, vec![]), //
|
||||
(200.4, Operator::NotEqual, vec![0, 3]), // 100.0, 200.0
|
||||
(200.4, Operator::LT, vec![0, 3]), // 100.0,200.0
|
||||
(200.4, Operator::LTE, vec![0, 3]), // 100.0, 200.0
|
||||
(200.4, Operator::GT, vec![]), //
|
||||
(200.4, Operator::GTE, vec![]), //
|
||||
(201.0, Operator::Equal, vec![]), //
|
||||
(201.0, Operator::NotEqual, vec![0, 3]), // 100.0, 200.0
|
||||
(201.0, Operator::LT, vec![0, 3]), // 100.0, 200.0
|
||||
(201.0, Operator::LTE, vec![0, 3]), // 100.0, 200.0
|
||||
(201.0, Operator::GT, vec![]), //
|
||||
(201.0, Operator::GTE, vec![]), //
|
||||
(401.0, Operator::Equal, vec![]), //
|
||||
(401.0, Operator::NotEqual, vec![0, 3]), // 100.0, 200.0
|
||||
(401.0, Operator::LT, vec![0, 3]), // 100.0, 200.0
|
||||
(401.0, Operator::LTE, vec![0, 3]), // 100.0, 200.0
|
||||
(401.0, Operator::GT, vec![]), //
|
||||
(401.0, Operator::GTE, vec![]), //
|
||||
];
|
||||
|
||||
for (v, op, exp) in cases {
|
||||
let dst = enc.row_ids_filter(v, &op, RowIDs::new_vector());
|
||||
assert_eq!(
|
||||
dst.unwrap_vector(),
|
||||
&exp,
|
||||
"example '{} {:?}' failed for {:?}",
|
||||
op,
|
||||
v,
|
||||
enc.name()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_ids_filter_range_float_trimmer() {
|
||||
let data = vec![100.0, 200.0, 100.0, 300.0, 400.0];
|
||||
|
||||
let float_trimmer = FloatByteTrimmer {};
|
||||
let data_float_trimmed = data
|
||||
.iter()
|
||||
.cloned()
|
||||
.map::<u16, _>(|x| float_trimmer.encode(x))
|
||||
.collect::<Vec<u16>>();
|
||||
|
||||
let cases: Vec<Box<dyn ScalarEncoding<f64>>> = vec![
|
||||
Box::new(RLE::<u16, f64, _>::new_from_iter(
|
||||
data_float_trimmed.iter().cloned(),
|
||||
float_trimmer,
|
||||
)),
|
||||
Box::new(Fixed::<u16, f64, _>::new(
|
||||
data_float_trimmed.clone(),
|
||||
FloatByteTrimmer {},
|
||||
)),
|
||||
Box::new(FixedNull::<UInt16Type, f64, _>::new(
|
||||
PrimitiveArray::from(data_float_trimmed),
|
||||
FloatByteTrimmer {},
|
||||
)),
|
||||
];
|
||||
|
||||
for enc in cases {
|
||||
_row_ids_filter_range_float_trimmer(enc)
|
||||
}
|
||||
}
|
||||
|
||||
fn _row_ids_filter_range_float_trimmer(enc: Box<dyn ScalarEncoding<f64>>) {
|
||||
// [100.0, 200.0, 100.0, 300.0, 400.0]
|
||||
let cases = vec![
|
||||
((100.0, &Operator::LT), (99.0, &Operator::GT), vec![]), //
|
||||
((100.0, &Operator::LTE), (100.0, &Operator::GTE), vec![0, 2]), // 100.0, 100.0
|
||||
(
|
||||
(100.0, &Operator::GT),
|
||||
(400.0, &Operator::LTE),
|
||||
vec![1, 3, 4],
|
||||
), // 200.0, 300.0, 400.0
|
||||
(
|
||||
(100.0, &Operator::GTE),
|
||||
(401.0, &Operator::LTE),
|
||||
vec![0, 1, 2, 3, 4],
|
||||
), // 100.0, 200.0, 100.0, 300.0, 400.0
|
||||
((200.0, &Operator::LT), (99.6, &Operator::GT), vec![0, 2]), // 100.0, 100.0
|
||||
((200.0, &Operator::GT), (401.2, &Operator::LTE), vec![3, 4]), // 300.0, 400.0
|
||||
(
|
||||
(200.0, &Operator::GTE),
|
||||
(400.9, &Operator::LT),
|
||||
vec![1, 3, 4],
|
||||
), // 200.0, 300.0, 400.0
|
||||
(
|
||||
(99.8, &Operator::GT),
|
||||
(500.87, &Operator::LT),
|
||||
vec![0, 1, 2, 3, 4],
|
||||
), // 100.0, 200.0, 100.0, 300.0, 400.0
|
||||
];
|
||||
|
||||
for (left, right, exp) in cases {
|
||||
let dst = enc.row_ids_filter_range(left, right, RowIDs::new_vector());
|
||||
assert_eq!(
|
||||
dst.unwrap_vector(),
|
||||
&exp,
|
||||
"example '{:?} {:?}' failed for {:?}",
|
||||
left,
|
||||
right,
|
||||
enc.name(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_ids_filter_range_float_trimmer_with_nulls() {
|
||||
let data = vec![Some(100.0), None, None, Some(200.0), None];
|
||||
|
||||
let float_trimmer = FloatByteTrimmer {};
|
||||
|
||||
let cases: Vec<Box<dyn ScalarEncoding<f64>>> = vec![
|
||||
Box::new(RLE::<u16, f64, _>::new_from_iter_opt(
|
||||
data.iter()
|
||||
.cloned()
|
||||
.map(|x| x.map(|v| float_trimmer.encode(v))),
|
||||
FloatByteTrimmer {},
|
||||
)),
|
||||
Box::new(FixedNull::<UInt16Type, f64, _>::new(
|
||||
data.iter()
|
||||
.cloned()
|
||||
.map(|v| v.map(|v| float_trimmer.encode(v)))
|
||||
.collect(),
|
||||
FloatByteTrimmer {},
|
||||
)),
|
||||
];
|
||||
|
||||
for enc in cases {
|
||||
_row_ids_filter_range_float_trimmer_with_nulls(enc)
|
||||
}
|
||||
}
|
||||
|
||||
fn _row_ids_filter_range_float_trimmer_with_nulls(enc: Box<dyn ScalarEncoding<f64>>) {
|
||||
// [100.0, NULL, NULL, 200.0, NULL]
|
||||
let cases = vec![
|
||||
((100.0, &Operator::LT), (99.0, &Operator::GT), vec![]), //
|
||||
((100.0, &Operator::LTE), (100.0, &Operator::GTE), vec![0]), // 100.0
|
||||
((100.0, &Operator::GT), (400.0, &Operator::LTE), vec![3]), // 200.0
|
||||
((100.0, &Operator::GTE), (401.0, &Operator::LTE), vec![0, 3]), // 100.0, 200.0
|
||||
((200.0, &Operator::LT), (99.6, &Operator::GT), vec![0]), // 100.0
|
||||
((200.0, &Operator::GT), (401.2, &Operator::LTE), vec![]), //
|
||||
((99.8, &Operator::GT), (500.87, &Operator::LT), vec![0, 3]), // 100.0, 200.0
|
||||
];
|
||||
|
||||
for (left, right, exp) in cases {
|
||||
let dst = enc.row_ids_filter_range(left, right, RowIDs::new_vector());
|
||||
assert_eq!(
|
||||
dst.unwrap_vector(),
|
||||
&exp,
|
||||
"example '{:?} {:?}' failed for {:?}",
|
||||
left,
|
||||
right,
|
||||
enc.name(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ use internal_types::freezable::Freezable;
|
|||
use iox_object_store::IoxObjectStore;
|
||||
use observability_deps::tracing::{error, info, warn};
|
||||
use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard};
|
||||
use parquet_file::catalog::api::PreservedCatalog;
|
||||
use parquet_file::catalog::core::PreservedCatalog;
|
||||
use persistence_windows::checkpoint::ReplayPlan;
|
||||
use snafu::{ensure, OptionExt, ResultExt, Snafu};
|
||||
use std::{future::Future, sync::Arc, time::Duration};
|
||||
|
@ -63,7 +63,7 @@ pub enum Error {
|
|||
))]
|
||||
WipePreservedCatalog {
|
||||
db_name: String,
|
||||
source: Box<parquet_file::catalog::api::Error>,
|
||||
source: Box<parquet_file::catalog::core::Error>,
|
||||
},
|
||||
|
||||
#[snafu(display("failed to skip replay for database ({}): {}", db_name, source))]
|
||||
|
|
138
server/src/db.rs
138
server/src/db.rs
|
@ -32,8 +32,9 @@ use iox_object_store::IoxObjectStore;
|
|||
use mutable_buffer::chunk::{ChunkMetrics as MutableBufferChunkMetrics, MBChunk};
|
||||
use observability_deps::tracing::{debug, error, info};
|
||||
use parquet_file::catalog::{
|
||||
api::{CatalogParquetInfo, CheckpointData, PreservedCatalog},
|
||||
cleanup::{delete_files as delete_parquet_files, get_unreferenced_parquet_files},
|
||||
core::PreservedCatalog,
|
||||
interface::{CatalogParquetInfo, CheckpointData},
|
||||
prune::prune_history as prune_catalog_transaction_history,
|
||||
};
|
||||
use persistence_windows::{checkpoint::ReplayPlan, persistence_windows::PersistenceWindows};
|
||||
|
@ -518,7 +519,7 @@ impl Db {
|
|||
pub async fn delete(
|
||||
self: &Arc<Self>,
|
||||
table_name: &str,
|
||||
delete_predicate: &Predicate,
|
||||
delete_predicate: Arc<Predicate>,
|
||||
) -> Result<()> {
|
||||
// get all partitions of this table
|
||||
let table = self
|
||||
|
@ -533,7 +534,7 @@ impl Db {
|
|||
// save the delete predicate in the chunk
|
||||
let mut chunk = chunk.write();
|
||||
chunk
|
||||
.add_delete_predicate(delete_predicate)
|
||||
.add_delete_predicate(Arc::clone(&delete_predicate))
|
||||
.context(AddDeletePredicateError)?;
|
||||
}
|
||||
}
|
||||
|
@ -1331,7 +1332,7 @@ mod tests {
|
|||
assert_store_sequenced_entry_failures,
|
||||
db::{
|
||||
catalog::chunk::ChunkStage,
|
||||
test_helpers::{run_query, try_write_lp, write_lp},
|
||||
test_helpers::{run_query, try_write_lp, write_lp, write_lp_with_time},
|
||||
},
|
||||
utils::{make_db, TestDb},
|
||||
};
|
||||
|
@ -1875,9 +1876,8 @@ mod tests {
|
|||
let test_db = make_db().await;
|
||||
let db = Arc::new(test_db.db);
|
||||
|
||||
let time0 = Utc::now();
|
||||
write_lp(db.as_ref(), "cpu bar=1 10").await;
|
||||
let time1 = Utc::now();
|
||||
let t_write1 = Utc::now();
|
||||
write_lp_with_time(db.as_ref(), "cpu bar=1 10", t_write1).await;
|
||||
|
||||
let partition_key = "1970-01-01T00";
|
||||
let mb_chunk = db
|
||||
|
@ -1892,14 +1892,12 @@ mod tests {
|
|||
|
||||
let first_old_rb_write = old_rb_chunk.time_of_first_write();
|
||||
let last_old_rb_write = old_rb_chunk.time_of_last_write();
|
||||
assert!(time0 < first_old_rb_write);
|
||||
assert_eq!(first_old_rb_write, last_old_rb_write);
|
||||
assert!(first_old_rb_write < time1);
|
||||
assert_eq!(first_old_rb_write, t_write1);
|
||||
|
||||
// Put new data into the mutable buffer
|
||||
let time2 = Utc::now();
|
||||
write_lp(db.as_ref(), "cpu bar=2 20").await;
|
||||
let time3 = Utc::now();
|
||||
let t_write2 = Utc::now();
|
||||
write_lp_with_time(db.as_ref(), "cpu bar=2 20", t_write2).await;
|
||||
|
||||
// now, compact it
|
||||
let compacted_rb_chunk = db.compact_partition("cpu", partition_key).await.unwrap();
|
||||
|
@ -1917,8 +1915,7 @@ mod tests {
|
|||
let last_compacted_write = compacted_rb_chunk.time_of_last_write();
|
||||
assert_eq!(first_old_rb_write, first_compacted_write);
|
||||
assert_ne!(last_old_rb_write, last_compacted_write);
|
||||
assert!(time2 < last_compacted_write);
|
||||
assert!(last_compacted_write < time3);
|
||||
assert_eq!(last_compacted_write, t_write2);
|
||||
|
||||
// data should be readable
|
||||
let expected = vec![
|
||||
|
@ -1935,7 +1932,7 @@ mod tests {
|
|||
|
||||
async fn collect_read_filter(chunk: &DbChunk) -> Vec<RecordBatch> {
|
||||
chunk
|
||||
.read_filter(&Default::default(), Selection::All)
|
||||
.read_filter(&Default::default(), Selection::All, &[])
|
||||
.unwrap()
|
||||
.collect::<Vec<_>>()
|
||||
.await
|
||||
|
@ -2483,12 +2480,16 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn partition_chunk_summaries_timestamp() {
|
||||
let db = Arc::new(make_db().await.db);
|
||||
let start = Utc::now();
|
||||
write_lp(&db, "cpu bar=1 1").await;
|
||||
let after_first_write = Utc::now();
|
||||
write_lp(&db, "cpu bar=2 2").await;
|
||||
|
||||
let t_first_write = Utc::now();
|
||||
write_lp_with_time(&db, "cpu bar=1 1", t_first_write).await;
|
||||
|
||||
let t_second_write = Utc::now();
|
||||
write_lp_with_time(&db, "cpu bar=2 2", t_second_write).await;
|
||||
|
||||
let t_close_before = Utc::now();
|
||||
db.rollover_partition("cpu", "1970-01-01T00").await.unwrap();
|
||||
let after_close = Utc::now();
|
||||
let t_close_after = Utc::now();
|
||||
|
||||
let mut chunk_summaries = db.chunk_summaries().unwrap();
|
||||
|
||||
|
@ -2496,59 +2497,18 @@ mod tests {
|
|||
|
||||
let summary = &chunk_summaries[0];
|
||||
assert_eq!(summary.id, 0, "summary; {:#?}", summary);
|
||||
assert!(
|
||||
summary.time_of_first_write > start,
|
||||
"summary; {:#?}",
|
||||
summary
|
||||
);
|
||||
assert!(
|
||||
summary.time_of_first_write < after_close,
|
||||
"summary; {:#?}",
|
||||
summary
|
||||
);
|
||||
|
||||
assert!(
|
||||
summary.time_of_last_write > after_first_write,
|
||||
"summary; {:#?}",
|
||||
summary
|
||||
);
|
||||
assert!(
|
||||
summary.time_of_last_write < after_close,
|
||||
"summary; {:#?}",
|
||||
summary
|
||||
);
|
||||
|
||||
assert!(
|
||||
summary.time_closed.unwrap() > after_first_write,
|
||||
"summary; {:#?}",
|
||||
summary
|
||||
);
|
||||
assert!(
|
||||
summary.time_closed.unwrap() < after_close,
|
||||
"summary; {:#?}",
|
||||
summary
|
||||
);
|
||||
assert_eq!(summary.time_of_first_write, t_first_write);
|
||||
assert_eq!(summary.time_of_last_write, t_second_write);
|
||||
assert!(t_close_before <= summary.time_closed.unwrap());
|
||||
assert!(summary.time_closed.unwrap() <= t_close_after);
|
||||
}
|
||||
|
||||
fn assert_first_last_times_eq(chunk_summary: &ChunkSummary) {
|
||||
fn assert_first_last_times_eq(chunk_summary: &ChunkSummary, expected: DateTime<Utc>) {
|
||||
let first_write = chunk_summary.time_of_first_write;
|
||||
let last_write = chunk_summary.time_of_last_write;
|
||||
|
||||
assert_eq!(first_write, last_write);
|
||||
}
|
||||
|
||||
fn assert_first_last_times_between(
|
||||
chunk_summary: &ChunkSummary,
|
||||
before: DateTime<Utc>,
|
||||
after: DateTime<Utc>,
|
||||
) {
|
||||
let first_write = chunk_summary.time_of_first_write;
|
||||
let last_write = chunk_summary.time_of_last_write;
|
||||
|
||||
assert!(before < first_write);
|
||||
assert!(before < last_write);
|
||||
assert!(first_write < after);
|
||||
assert!(last_write < after);
|
||||
assert_eq!(first_write, expected);
|
||||
}
|
||||
|
||||
fn assert_chunks_times_ordered(before: &ChunkSummary, after: &ChunkSummary) {
|
||||
|
@ -2581,21 +2541,17 @@ mod tests {
|
|||
let db = make_db().await.db;
|
||||
|
||||
// get three chunks: one open, one closed in mb and one close in rb
|
||||
// TIME 0 ---------------------------------------------------------------------------------
|
||||
let time0 = Utc::now();
|
||||
// In open chunk, will end up in rb/os
|
||||
write_lp(&db, "cpu bar=1 1").await;
|
||||
// TIME 1 ---------------------------------------------------------------------------------
|
||||
let time1 = Utc::now();
|
||||
let t_write1 = Utc::now();
|
||||
write_lp_with_time(&db, "cpu bar=1 1", t_write1).await;
|
||||
|
||||
// Move open chunk to closed
|
||||
db.rollover_partition("cpu", "1970-01-01T00").await.unwrap();
|
||||
// TIME 2 ---------------------------------------------------------------------------------
|
||||
let time2 = Utc::now();
|
||||
|
||||
// New open chunk in mb
|
||||
// This point will end up in rb/os
|
||||
write_lp(&db, "cpu bar=1,baz=2 2").await;
|
||||
// TIME 3 ---------------------------------------------------------------------------------
|
||||
let time3 = Utc::now();
|
||||
let t_write2 = Utc::now();
|
||||
write_lp_with_time(&db, "cpu bar=1,baz=2 2", t_write2).await;
|
||||
|
||||
// Check first/last write times on the chunks at this point
|
||||
let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return");
|
||||
|
@ -2604,19 +2560,15 @@ mod tests {
|
|||
// Each chunk has one write, so both chunks should have first write == last write
|
||||
let closed_mb_t3 = chunk_summaries[0].clone();
|
||||
assert_eq!(closed_mb_t3.storage, ChunkStorage::ClosedMutableBuffer);
|
||||
assert_first_last_times_eq(&closed_mb_t3);
|
||||
assert_first_last_times_between(&closed_mb_t3, time0, time1);
|
||||
assert_first_last_times_eq(&closed_mb_t3, t_write1);
|
||||
let open_mb_t3 = chunk_summaries[1].clone();
|
||||
assert_eq!(open_mb_t3.storage, ChunkStorage::OpenMutableBuffer);
|
||||
assert_first_last_times_eq(&open_mb_t3);
|
||||
assert_first_last_times_between(&open_mb_t3, time2, time3);
|
||||
assert_first_last_times_eq(&open_mb_t3, t_write2);
|
||||
assert_chunks_times_ordered(&closed_mb_t3, &open_mb_t3);
|
||||
|
||||
// This point makes a new open mb chunk and will end up in the closed mb chunk
|
||||
write_lp(&db, "cpu bar=1,baz=2,frob=3 400000000000000").await;
|
||||
// TIME 4 ---------------------------------------------------------------------------------
|
||||
// we don't need to check this value with anything because no timestamps
|
||||
// should be between time3 and time4
|
||||
let t_write3 = Utc::now();
|
||||
write_lp_with_time(&db, "cpu bar=1,baz=2,frob=3 400000000000000", t_write3).await;
|
||||
|
||||
// Check first/last write times on the chunks at this point
|
||||
let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return");
|
||||
|
@ -2639,9 +2591,6 @@ mod tests {
|
|||
db.move_chunk_to_read_buffer("cpu", "1970-01-01T00", 0)
|
||||
.await
|
||||
.unwrap();
|
||||
// TIME 5 ---------------------------------------------------------------------------------
|
||||
// we don't need to check this value with anything because no timestamps
|
||||
// should be between time4 and time5
|
||||
|
||||
// Check first/last write times on the chunks at this point
|
||||
let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return");
|
||||
|
@ -2668,9 +2617,6 @@ mod tests {
|
|||
)
|
||||
.await
|
||||
.unwrap();
|
||||
// TIME 6 ---------------------------------------------------------------------------------
|
||||
// we don't need to check this value with anything because no timestamps
|
||||
// should be between time5 and time6
|
||||
|
||||
// Check first/last write times on the chunks at this point
|
||||
let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return");
|
||||
|
@ -2692,8 +2638,6 @@ mod tests {
|
|||
|
||||
// Move open chunk to closed
|
||||
db.rollover_partition("cpu", "1970-01-05T15").await.unwrap();
|
||||
// TIME 7 ---------------------------------------------------------------------------------
|
||||
let time7 = Utc::now();
|
||||
|
||||
// Check first/last write times on the chunks at this point
|
||||
let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return");
|
||||
|
@ -2710,9 +2654,8 @@ mod tests {
|
|||
|
||||
// New open chunk in mb
|
||||
// This point will stay in this open mb chunk
|
||||
write_lp(&db, "cpu bar=1,baz=3,blargh=3 400000000000000").await;
|
||||
// TIME 8 ---------------------------------------------------------------------------------
|
||||
let time8 = Utc::now();
|
||||
let t_write4 = Utc::now();
|
||||
write_lp_with_time(&db, "cpu bar=1,baz=3,blargh=3 400000000000000", t_write4).await;
|
||||
|
||||
// Check first/last write times on the chunks at this point
|
||||
let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return");
|
||||
|
@ -2730,8 +2673,7 @@ mod tests {
|
|||
// times should be the same
|
||||
let open_mb_t8 = chunk_summaries[2].clone();
|
||||
assert_eq!(open_mb_t8.storage, ChunkStorage::OpenMutableBuffer);
|
||||
assert_first_last_times_eq(&open_mb_t8);
|
||||
assert_first_last_times_between(&open_mb_t8, time7, time8);
|
||||
assert_first_last_times_eq(&open_mb_t8, t_write4);
|
||||
|
||||
let lifecycle_action = None;
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ use internal_types::{access::AccessRecorder, schema::Schema};
|
|||
use mutable_buffer::chunk::{snapshot::ChunkSnapshot as MBChunkSnapshot, MBChunk};
|
||||
use observability_deps::tracing::debug;
|
||||
use parquet_file::chunk::ParquetChunk;
|
||||
use predicate::predicate::{Predicate, PredicateBuilder};
|
||||
use predicate::predicate::Predicate;
|
||||
use read_buffer::RBChunk;
|
||||
use tracker::{TaskRegistration, TaskTracker};
|
||||
|
||||
|
@ -80,7 +80,7 @@ pub struct ChunkMetadata {
|
|||
pub schema: Arc<Schema>,
|
||||
|
||||
/// Delete predicates of this chunk
|
||||
pub delete_predicates: Arc<Vec<Predicate>>,
|
||||
pub delete_predicates: Vec<Arc<Predicate>>,
|
||||
}
|
||||
|
||||
/// Different memory representations of a frozen chunk.
|
||||
|
@ -307,14 +307,14 @@ impl CatalogChunk {
|
|||
time_of_last_write: DateTime<Utc>,
|
||||
schema: Arc<Schema>,
|
||||
metrics: ChunkMetrics,
|
||||
delete_predicates: Arc<Vec<Predicate>>,
|
||||
delete_predicates: Vec<Arc<Predicate>>,
|
||||
order: ChunkOrder,
|
||||
) -> Self {
|
||||
let stage = ChunkStage::Frozen {
|
||||
meta: Arc::new(ChunkMetadata {
|
||||
table_summary: Arc::new(chunk.table_summary()),
|
||||
schema,
|
||||
delete_predicates: Arc::clone(&delete_predicates),
|
||||
delete_predicates,
|
||||
}),
|
||||
representation: ChunkStageFrozenRepr::ReadBuffer(Arc::new(chunk)),
|
||||
};
|
||||
|
@ -342,7 +342,7 @@ impl CatalogChunk {
|
|||
time_of_first_write: DateTime<Utc>,
|
||||
time_of_last_write: DateTime<Utc>,
|
||||
metrics: ChunkMetrics,
|
||||
delete_predicates: Arc<Vec<Predicate>>,
|
||||
delete_predicates: Vec<Arc<Predicate>>,
|
||||
order: ChunkOrder,
|
||||
) -> Self {
|
||||
assert_eq!(chunk.table_name(), addr.table_name.as_ref());
|
||||
|
@ -469,30 +469,24 @@ impl CatalogChunk {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn add_delete_predicate(&mut self, delete_predicate: &Predicate) -> Result<()> {
|
||||
pub fn add_delete_predicate(&mut self, delete_predicate: Arc<Predicate>) -> Result<()> {
|
||||
debug!(
|
||||
?delete_predicate,
|
||||
"Input delete predicate to CatalogChunk add_delete_predicate"
|
||||
);
|
||||
match &mut self.stage {
|
||||
ChunkStage::Open { mb_chunk: _ } => {
|
||||
// Freeze/close this chunk and add delete_predicate to its frozen one
|
||||
self.freeze_with_predicate(delete_predicate)?;
|
||||
}
|
||||
ChunkStage::Frozen { meta, .. } => {
|
||||
ChunkStage::Frozen { meta, .. } | ChunkStage::Persisted { meta, .. } => {
|
||||
// Add the delete_predicate into the chunk's metadata
|
||||
let mut del_preds: Vec<Predicate> = (*meta.delete_predicates).clone();
|
||||
del_preds.push(delete_predicate.clone());
|
||||
let mut del_preds = meta.delete_predicates.clone();
|
||||
del_preds.push(delete_predicate);
|
||||
*meta = Arc::new(ChunkMetadata {
|
||||
table_summary: Arc::clone(&meta.table_summary),
|
||||
schema: Arc::clone(&meta.schema),
|
||||
delete_predicates: Arc::new(del_preds),
|
||||
});
|
||||
}
|
||||
ChunkStage::Persisted { meta, .. } => {
|
||||
// Add the delete_predicate into the chunk's metadata
|
||||
let mut del_preds: Vec<Predicate> = (*meta.delete_predicates).clone();
|
||||
del_preds.push(delete_predicate.clone());
|
||||
*meta = Arc::new(ChunkMetadata {
|
||||
table_summary: Arc::clone(&meta.table_summary),
|
||||
schema: Arc::clone(&meta.schema),
|
||||
delete_predicates: Arc::new(del_preds),
|
||||
delete_predicates: del_preds,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -500,14 +494,23 @@ impl CatalogChunk {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub fn delete_predicates(&mut self) -> Arc<Vec<Predicate>> {
|
||||
pub fn delete_predicates(&mut self) -> &[Arc<Predicate>] {
|
||||
match &self.stage {
|
||||
ChunkStage::Open { mb_chunk: _ } => {
|
||||
// no delete predicate for open chunk
|
||||
Arc::new(vec![])
|
||||
debug!("delete_predicates of Open chunk is empty");
|
||||
&[]
|
||||
}
|
||||
ChunkStage::Frozen { meta, .. } => {
|
||||
let preds = &meta.delete_predicates;
|
||||
debug!(?preds, "delete_predicates of Frozen chunk");
|
||||
preds
|
||||
}
|
||||
ChunkStage::Persisted { meta, .. } => {
|
||||
let preds = &meta.delete_predicates;
|
||||
debug!(?preds, "delete_predicates of Persisted chunk");
|
||||
preds
|
||||
}
|
||||
ChunkStage::Frozen { meta, .. } => Arc::clone(&meta.delete_predicates),
|
||||
ChunkStage::Persisted { meta, .. } => Arc::clone(&meta.delete_predicates),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -679,7 +682,14 @@ impl CatalogChunk {
|
|||
///
|
||||
/// This only works for chunks in the _open_ stage (chunk is converted) and the _frozen_ stage
|
||||
/// (no-op) and will fail for other stages.
|
||||
pub fn freeze_with_predicate(&mut self, delete_predicate: &Predicate) -> Result<()> {
|
||||
pub fn freeze_with_predicate(&mut self, delete_predicate: Arc<Predicate>) -> Result<()> {
|
||||
self.freeze_with_delete_predicates(vec![delete_predicate])
|
||||
}
|
||||
|
||||
fn freeze_with_delete_predicates(
|
||||
&mut self,
|
||||
delete_predicates: Vec<Arc<Predicate>>,
|
||||
) -> Result<()> {
|
||||
match &self.stage {
|
||||
ChunkStage::Open { mb_chunk, .. } => {
|
||||
debug!(%self.addr, row_count=mb_chunk.rows(), "freezing chunk");
|
||||
|
@ -692,7 +702,7 @@ impl CatalogChunk {
|
|||
let metadata = ChunkMetadata {
|
||||
table_summary: Arc::new(mb_chunk.table_summary()),
|
||||
schema: s.full_schema(),
|
||||
delete_predicates: Arc::new(vec![delete_predicate.clone()]),
|
||||
delete_predicates,
|
||||
};
|
||||
|
||||
self.stage = ChunkStage::Frozen {
|
||||
|
@ -714,7 +724,7 @@ impl CatalogChunk {
|
|||
}
|
||||
|
||||
pub fn freeze(&mut self) -> Result<()> {
|
||||
self.freeze_with_predicate(&PredicateBuilder::default().build())
|
||||
self.freeze_with_delete_predicates(vec![])
|
||||
}
|
||||
|
||||
/// Set the chunk to the Moving state, returning a handle to the underlying storage
|
||||
|
@ -776,7 +786,7 @@ impl CatalogChunk {
|
|||
*meta = Arc::new(ChunkMetadata {
|
||||
table_summary: Arc::clone(&meta.table_summary),
|
||||
schema,
|
||||
delete_predicates: Arc::clone(&meta.delete_predicates),
|
||||
delete_predicates: meta.delete_predicates.clone(),
|
||||
});
|
||||
|
||||
match &representation {
|
||||
|
@ -1151,7 +1161,7 @@ mod tests {
|
|||
expected_exprs1.push(e);
|
||||
|
||||
// Add a delete predicate into a chunk the open chunk = delete simulation for open chunk
|
||||
chunk.add_delete_predicate(&del_pred1).unwrap();
|
||||
chunk.add_delete_predicate(Arc::new(del_pred1)).unwrap();
|
||||
// chunk must be in frozen stage now
|
||||
assert_eq!(chunk.stage().name(), "Frozen");
|
||||
// chunk must have a delete predicate
|
||||
|
@ -1182,7 +1192,7 @@ mod tests {
|
|||
let mut expected_exprs2 = vec![];
|
||||
let e = col("cost").not_eq(lit(15));
|
||||
expected_exprs2.push(e);
|
||||
chunk.add_delete_predicate(&del_pred2).unwrap();
|
||||
chunk.add_delete_predicate(Arc::new(del_pred2)).unwrap();
|
||||
// chunk still must be in frozen stage now
|
||||
assert_eq!(chunk.stage().name(), "Frozen");
|
||||
// chunk must have 2 delete predicates
|
||||
|
@ -1248,7 +1258,7 @@ mod tests {
|
|||
now,
|
||||
now,
|
||||
ChunkMetrics::new_unregistered(),
|
||||
Arc::new(vec![] as Vec<Predicate>),
|
||||
vec![],
|
||||
ChunkOrder::new(6),
|
||||
)
|
||||
}
|
||||
|
|
|
@ -176,7 +176,7 @@ impl Partition {
|
|||
time_of_first_write: DateTime<Utc>,
|
||||
time_of_last_write: DateTime<Utc>,
|
||||
schema: Arc<Schema>,
|
||||
delete_predicates: Arc<Vec<Predicate>>,
|
||||
delete_predicates: Vec<Arc<Predicate>>,
|
||||
chunk_order: ChunkOrder,
|
||||
) -> (u32, Arc<RwLock<CatalogChunk>>) {
|
||||
let chunk_id = Self::pick_next(&mut self.next_chunk_id, "Chunk ID Overflow");
|
||||
|
@ -231,7 +231,7 @@ impl Partition {
|
|||
chunk: Arc<parquet_file::chunk::ParquetChunk>,
|
||||
time_of_first_write: DateTime<Utc>,
|
||||
time_of_last_write: DateTime<Utc>,
|
||||
delete_predicates: Arc<Vec<Predicate>>,
|
||||
delete_predicates: Vec<Arc<Predicate>>,
|
||||
chunk_order: ChunkOrder,
|
||||
) -> Arc<RwLock<CatalogChunk>> {
|
||||
assert_eq!(chunk.table_name(), self.table_name());
|
||||
|
@ -246,7 +246,7 @@ impl Partition {
|
|||
time_of_first_write,
|
||||
time_of_last_write,
|
||||
self.metrics.new_chunk_metrics(),
|
||||
Arc::clone(&delete_predicates),
|
||||
delete_predicates,
|
||||
chunk_order,
|
||||
)),
|
||||
);
|
||||
|
|
|
@ -121,7 +121,7 @@ impl DbChunk {
|
|||
let meta = ChunkMetadata {
|
||||
table_summary: Arc::new(mb_chunk.table_summary()),
|
||||
schema: snapshot.full_schema(),
|
||||
delete_predicates: Arc::new(vec![]), //todo: consider to use the one of the given chunk if appropriate
|
||||
delete_predicates: vec![], // open chunk does not have delete predicate
|
||||
};
|
||||
(state, Arc::new(meta))
|
||||
}
|
||||
|
@ -224,6 +224,19 @@ impl DbChunk {
|
|||
pub fn time_of_last_write(&self) -> DateTime<Utc> {
|
||||
self.time_of_last_write
|
||||
}
|
||||
|
||||
pub fn to_rub_negated_predicates(
|
||||
delete_predicates: &[Arc<Predicate>],
|
||||
) -> Result<Vec<read_buffer::Predicate>> {
|
||||
let mut rub_preds: Vec<read_buffer::Predicate> = vec![];
|
||||
for pred in delete_predicates {
|
||||
let rub_pred = to_read_buffer_predicate(pred).context(PredicateConversion)?;
|
||||
rub_preds.push(rub_pred);
|
||||
}
|
||||
|
||||
debug!(?rub_preds, "RUB delete predicates");
|
||||
Ok(rub_preds)
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryChunk for DbChunk {
|
||||
|
@ -314,11 +327,11 @@ impl QueryChunk for DbChunk {
|
|||
Ok(pred_result)
|
||||
}
|
||||
|
||||
// NGA todo: add delete predicate here to eliminate data at query time
|
||||
fn read_filter(
|
||||
&self,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
delete_predicates: &[Arc<Predicate>],
|
||||
) -> Result<SendableRecordBatchStream, Self::Error> {
|
||||
// Predicate is not required to be applied for correctness. We only pushed it down
|
||||
// when possible for performance gain
|
||||
|
@ -326,6 +339,24 @@ impl QueryChunk for DbChunk {
|
|||
debug!(?predicate, "Input Predicate to read_filter");
|
||||
self.access_recorder.record_access_now();
|
||||
|
||||
debug!(?delete_predicates, "Input Delete Predicates to read_filter");
|
||||
|
||||
// add negated deleted ranges to the predicate
|
||||
let mut pred_with_deleted_ranges = predicate.clone();
|
||||
pred_with_deleted_ranges.add_delete_ranges(delete_predicates);
|
||||
debug!(
|
||||
?pred_with_deleted_ranges,
|
||||
"Input Predicate plus deleted ranges"
|
||||
);
|
||||
|
||||
// add negated deleted predicates
|
||||
let mut pred_wth_deleted_exprs = pred_with_deleted_ranges.clone();
|
||||
pred_wth_deleted_exprs.add_delete_exprs(delete_predicates);
|
||||
debug!(
|
||||
?pred_wth_deleted_exprs,
|
||||
"Input Predicate plus deleted ranges and deleted predicates"
|
||||
);
|
||||
|
||||
match &self.state {
|
||||
State::MutableBuffer { chunk, .. } => {
|
||||
let batch = chunk.read_filter(selection).context(MutableBufferChunk)?;
|
||||
|
@ -339,12 +370,16 @@ impl QueryChunk for DbChunk {
|
|||
Ok(predicate) => predicate,
|
||||
Err(_) => read_buffer::Predicate::default(),
|
||||
};
|
||||
|
||||
debug!(?rb_predicate, "Predicate pushed down to RUB");
|
||||
|
||||
// TODO: add collection of delete predicates associated with
|
||||
// this chunk.
|
||||
let read_results = chunk.read_filter(rb_predicate, selection, vec![]);
|
||||
// combine all delete expressions to RUB's negated ones
|
||||
let negated_delete_exprs = Self::to_rub_negated_predicates(delete_predicates)?;
|
||||
debug!(
|
||||
?negated_delete_exprs,
|
||||
"Negated Predicate pushed down to RUB"
|
||||
);
|
||||
|
||||
let read_results = chunk.read_filter(rb_predicate, selection, negated_delete_exprs);
|
||||
let schema =
|
||||
chunk
|
||||
.read_filter_table_schema(selection)
|
||||
|
@ -357,13 +392,11 @@ impl QueryChunk for DbChunk {
|
|||
schema.into(),
|
||||
)))
|
||||
}
|
||||
State::ParquetFile { chunk, .. } => {
|
||||
chunk
|
||||
.read_filter(predicate, selection)
|
||||
.context(ParquetFileChunkError {
|
||||
chunk_id: self.id(),
|
||||
})
|
||||
}
|
||||
State::ParquetFile { chunk, .. } => chunk
|
||||
.read_filter(&pred_wth_deleted_exprs, selection)
|
||||
.context(ParquetFileChunkError {
|
||||
chunk_id: self.id(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -503,8 +536,11 @@ impl QueryChunkMeta for DbChunk {
|
|||
}
|
||||
|
||||
// return a reference to delete predicates of the chunk
|
||||
fn delete_predicates(&self) -> Arc<Vec<Predicate>> {
|
||||
Arc::clone(&self.meta.delete_predicates)
|
||||
fn delete_predicates(&self) -> &[Arc<Predicate>] {
|
||||
let pred = &self.meta.delete_predicates;
|
||||
debug!(?pred, "Delete predicate in DbChunk");
|
||||
|
||||
pred
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -514,7 +550,7 @@ mod tests {
|
|||
use crate::{
|
||||
db::{
|
||||
catalog::chunk::{CatalogChunk, ChunkStage},
|
||||
test_helpers::write_lp,
|
||||
test_helpers::{write_lp, write_lp_with_time},
|
||||
},
|
||||
utils::make_db,
|
||||
};
|
||||
|
@ -527,7 +563,7 @@ mod tests {
|
|||
let t2 = chunk.access_recorder().get_metrics();
|
||||
|
||||
snapshot
|
||||
.read_filter(&Default::default(), Selection::All)
|
||||
.read_filter(&Default::default(), Selection::All, &[])
|
||||
.unwrap();
|
||||
let t3 = chunk.access_recorder().get_metrics();
|
||||
|
||||
|
@ -608,9 +644,8 @@ mod tests {
|
|||
async fn parquet_records_access() {
|
||||
let db = make_db().await.db;
|
||||
|
||||
let before_creation = Utc::now();
|
||||
write_lp(&db, "cpu,tag=1 bar=1 1").await;
|
||||
let after_creation = Utc::now();
|
||||
let creation_time = Utc::now();
|
||||
write_lp_with_time(&db, "cpu,tag=1 bar=1 1", creation_time).await;
|
||||
|
||||
let id = db
|
||||
.persist_partition(
|
||||
|
@ -632,8 +667,7 @@ mod tests {
|
|||
let first_write = chunk.time_of_first_write();
|
||||
let last_write = chunk.time_of_last_write();
|
||||
assert_eq!(first_write, last_write);
|
||||
assert!(before_creation < first_write);
|
||||
assert!(last_write < after_creation);
|
||||
assert_eq!(first_write, creation_time);
|
||||
|
||||
test_chunk_access(&chunk).await
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ pub(crate) fn compact_chunks(
|
|||
let mut input_rows = 0;
|
||||
let mut time_of_first_write: Option<DateTime<Utc>> = None;
|
||||
let mut time_of_last_write: Option<DateTime<Utc>> = None;
|
||||
let mut delete_predicates: Vec<Predicate> = vec![];
|
||||
let mut delete_predicates: Vec<Arc<Predicate>> = vec![];
|
||||
let mut min_order = ChunkOrder::MAX;
|
||||
let query_chunks = chunks
|
||||
.into_iter()
|
||||
|
@ -66,8 +66,7 @@ pub(crate) fn compact_chunks(
|
|||
.map(|prev_last| prev_last.max(candidate_last))
|
||||
.or(Some(candidate_last));
|
||||
|
||||
let mut preds = (*chunk.delete_predicates()).clone();
|
||||
delete_predicates.append(&mut preds);
|
||||
delete_predicates.extend(chunk.delete_predicates().iter().cloned());
|
||||
|
||||
min_order = min_order.min(chunk.order());
|
||||
|
||||
|
@ -103,7 +102,7 @@ pub(crate) fn compact_chunks(
|
|||
ReorgPlanner::new().compact_plan(schema, query_chunks.iter().map(Arc::clone), key)?;
|
||||
|
||||
let physical_plan = ctx.prepare_plan(&plan)?;
|
||||
let stream = ctx.execute(physical_plan).await?;
|
||||
let stream = ctx.execute_stream(physical_plan).await?;
|
||||
let rb_chunk = collect_rub(stream, &addr, metric_registry.as_ref())
|
||||
.await?
|
||||
.expect("chunk has zero rows");
|
||||
|
@ -119,7 +118,7 @@ pub(crate) fn compact_chunks(
|
|||
time_of_first_write,
|
||||
time_of_last_write,
|
||||
schema,
|
||||
Arc::new(delete_predicates),
|
||||
delete_predicates,
|
||||
min_order,
|
||||
)
|
||||
};
|
||||
|
@ -148,7 +147,7 @@ pub(crate) fn compact_chunks(
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{db::test_helpers::write_lp, utils::make_db};
|
||||
use crate::{db::test_helpers::write_lp_with_time, utils::make_db};
|
||||
use data_types::chunk_metadata::ChunkStorage;
|
||||
use lifecycle::{LockableChunk, LockablePartition};
|
||||
use query::QueryDatabase;
|
||||
|
@ -158,15 +157,13 @@ mod tests {
|
|||
let test_db = make_db().await;
|
||||
let db = test_db.db;
|
||||
|
||||
let time0 = Utc::now();
|
||||
|
||||
write_lp(db.as_ref(), "cpu,tag1=cupcakes bar=1 10").await;
|
||||
write_lp(db.as_ref(), "cpu,tag1=asfd,tag2=foo bar=2 20").await;
|
||||
write_lp(db.as_ref(), "cpu,tag1=bingo,tag2=foo bar=2 10").await;
|
||||
write_lp(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 20").await;
|
||||
write_lp(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 10").await;
|
||||
|
||||
let time1 = Utc::now();
|
||||
let t_first_write = Utc::now();
|
||||
write_lp_with_time(db.as_ref(), "cpu,tag1=cupcakes bar=1 10", t_first_write).await;
|
||||
write_lp_with_time(db.as_ref(), "cpu,tag1=asfd,tag2=foo bar=2 20", Utc::now()).await;
|
||||
write_lp_with_time(db.as_ref(), "cpu,tag1=bingo,tag2=foo bar=2 10", Utc::now()).await;
|
||||
write_lp_with_time(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 20", Utc::now()).await;
|
||||
let t_last_write = Utc::now();
|
||||
write_lp_with_time(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 10", t_last_write).await;
|
||||
|
||||
let partition_keys = db.partition_keys().unwrap();
|
||||
assert_eq!(partition_keys.len(), 1);
|
||||
|
@ -182,9 +179,8 @@ mod tests {
|
|||
|
||||
let (_, fut) = compact_chunks(partition.upgrade(), vec![chunk.upgrade()]).unwrap();
|
||||
// NB: perform the write before spawning the background task that performs the compaction
|
||||
let time2 = Utc::now();
|
||||
write_lp(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 40").await;
|
||||
let time3 = Utc::now();
|
||||
let t_later_write = Utc::now();
|
||||
write_lp_with_time(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 40", t_later_write).await;
|
||||
tokio::spawn(fut).await.unwrap().unwrap().unwrap();
|
||||
|
||||
let mut chunk_summaries: Vec<_> = db_partition.read().chunk_summaries().collect();
|
||||
|
@ -194,16 +190,14 @@ mod tests {
|
|||
let mub_summary = &chunk_summaries[0];
|
||||
let first_mub_write = mub_summary.time_of_first_write;
|
||||
let last_mub_write = mub_summary.time_of_last_write;
|
||||
assert!(time2 < first_mub_write);
|
||||
assert_eq!(first_mub_write, last_mub_write);
|
||||
assert!(first_mub_write < time3);
|
||||
assert_eq!(first_mub_write, t_later_write);
|
||||
|
||||
let rub_summary = &chunk_summaries[1];
|
||||
let first_rub_write = rub_summary.time_of_first_write;
|
||||
let last_rub_write = rub_summary.time_of_last_write;
|
||||
assert!(time0 < first_rub_write);
|
||||
assert!(first_rub_write < last_rub_write);
|
||||
assert!(last_rub_write < time1);
|
||||
assert_eq!(first_rub_write, t_first_write);
|
||||
assert_eq!(last_rub_write, t_last_write);
|
||||
|
||||
let summaries: Vec<_> = chunk_summaries
|
||||
.iter()
|
||||
|
|
|
@ -49,7 +49,7 @@ pub enum Error {
|
|||
|
||||
#[snafu(display("Error while commiting transaction on preserved catalog: {}", source))]
|
||||
CommitError {
|
||||
source: parquet_file::catalog::api::Error,
|
||||
source: parquet_file::catalog::core::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Cannot write chunk: {}", addr))]
|
||||
|
|
|
@ -54,7 +54,7 @@ pub fn move_chunk_to_read_buffer(
|
|||
ReorgPlanner::new().compact_plan(schema, query_chunks.iter().map(Arc::clone), key)?;
|
||||
|
||||
let physical_plan = ctx.prepare_plan(&plan)?;
|
||||
let stream = ctx.execute(physical_plan).await?;
|
||||
let stream = ctx.execute_stream(physical_plan).await?;
|
||||
let rb_chunk = collect_rub(
|
||||
stream,
|
||||
&addr.clone().into_partition(),
|
||||
|
|
|
@ -52,7 +52,7 @@ pub fn persist_chunks(
|
|||
let mut time_of_first_write: Option<DateTime<Utc>> = None;
|
||||
let mut time_of_last_write: Option<DateTime<Utc>> = None;
|
||||
let mut query_chunks = vec![];
|
||||
let mut delete_predicates: Vec<Predicate> = vec![];
|
||||
let mut delete_predicates: Vec<Arc<Predicate>> = vec![];
|
||||
let mut min_order = ChunkOrder::MAX;
|
||||
for mut chunk in chunks {
|
||||
// Sanity-check
|
||||
|
@ -72,8 +72,7 @@ pub fn persist_chunks(
|
|||
.map(|prev_last| prev_last.max(candidate_last))
|
||||
.or(Some(candidate_last));
|
||||
|
||||
let mut preds = (*chunk.delete_predicates()).clone();
|
||||
delete_predicates.append(&mut preds);
|
||||
delete_predicates.extend(chunk.delete_predicates().iter().cloned());
|
||||
|
||||
min_order = min_order.min(chunk.order());
|
||||
|
||||
|
@ -112,8 +111,10 @@ pub fn persist_chunks(
|
|||
"Expected split plan to produce exactly 2 partitions"
|
||||
);
|
||||
|
||||
let to_persist_stream = ctx.execute_partition(Arc::clone(&physical_plan), 0).await?;
|
||||
let remainder_stream = ctx.execute_partition(physical_plan, 1).await?;
|
||||
let to_persist_stream = ctx
|
||||
.execute_stream_partitioned(Arc::clone(&physical_plan), 0)
|
||||
.await?;
|
||||
let remainder_stream = ctx.execute_stream_partitioned(physical_plan, 1).await?;
|
||||
|
||||
let (to_persist, remainder) = futures::future::try_join(
|
||||
collect_rub(to_persist_stream, &addr, metric_registry.as_ref()),
|
||||
|
@ -131,7 +132,6 @@ pub fn persist_chunks(
|
|||
partition_write.force_drop_chunk(id)
|
||||
}
|
||||
|
||||
let del_preds = Arc::new(delete_predicates);
|
||||
// Upsert remainder to catalog
|
||||
if let Some(remainder) = remainder {
|
||||
partition_write.create_rub_chunk(
|
||||
|
@ -139,11 +139,13 @@ pub fn persist_chunks(
|
|||
time_of_first_write,
|
||||
time_of_last_write,
|
||||
Arc::clone(&schema),
|
||||
Arc::clone(&del_preds),
|
||||
delete_predicates.clone(),
|
||||
min_order,
|
||||
);
|
||||
}
|
||||
|
||||
// NGA todo: we hit this error if there are rows but they are deleted
|
||||
// Need to think a way to handle this (https://github.com/influxdata/influxdb_iox/issues/2546)
|
||||
let to_persist = to_persist.expect("should be rows to persist");
|
||||
|
||||
let (new_chunk_id, new_chunk) = partition_write.create_rub_chunk(
|
||||
|
@ -151,7 +153,7 @@ pub fn persist_chunks(
|
|||
time_of_first_write,
|
||||
time_of_last_write,
|
||||
schema,
|
||||
del_preds,
|
||||
delete_predicates,
|
||||
min_order,
|
||||
);
|
||||
let to_persist = LockableCatalogChunk {
|
||||
|
|
|
@ -17,7 +17,7 @@ use data_types::{chunk_metadata::ChunkLifecycleAction, job::Job};
|
|||
use internal_types::selection::Selection;
|
||||
use observability_deps::tracing::{debug, warn};
|
||||
use parquet_file::{
|
||||
catalog::api::CatalogParquetInfo,
|
||||
catalog::interface::CatalogParquetInfo,
|
||||
chunk::{ChunkMetrics as ParquetChunkMetrics, ParquetChunk},
|
||||
metadata::IoxMetadata,
|
||||
storage::Storage,
|
||||
|
@ -26,7 +26,7 @@ use persistence_windows::{
|
|||
checkpoint::{DatabaseCheckpoint, PartitionCheckpoint, PersistCheckpointBuilder},
|
||||
persistence_windows::FlushHandle,
|
||||
};
|
||||
use query::QueryChunk;
|
||||
use query::{QueryChunk, QueryChunkMeta};
|
||||
use snafu::ResultExt;
|
||||
use std::{future::Future, sync::Arc};
|
||||
use tracker::{TaskTracker, TrackedFuture, TrackedFutureExt};
|
||||
|
@ -89,7 +89,11 @@ pub(super) fn write_chunk_to_object_store(
|
|||
|
||||
// Get RecordBatchStream of data from the read buffer chunk
|
||||
let stream = db_chunk
|
||||
.read_filter(&Default::default(), Selection::All)
|
||||
.read_filter(
|
||||
&Default::default(),
|
||||
Selection::All,
|
||||
db_chunk.delete_predicates(),
|
||||
)
|
||||
.expect("read filter should be infallible");
|
||||
|
||||
// check that the upcoming state change will very likely succeed
|
||||
|
|
|
@ -1,11 +1,17 @@
|
|||
//! Functionality to load a [`Catalog`](crate::db::catalog::Catalog) and other information from a
|
||||
//! [`PreservedCatalog`](parquet_file::catalog::api::PreservedCatalog).
|
||||
//! [`PreservedCatalog`](parquet_file::catalog::core::PreservedCatalog).
|
||||
|
||||
use super::catalog::{chunk::ChunkStage, table::TableSchemaUpsertHandle, Catalog};
|
||||
use iox_object_store::{IoxObjectStore, ParquetFilePath};
|
||||
use observability_deps::tracing::{error, info};
|
||||
use parquet_file::{
|
||||
catalog::api::{CatalogParquetInfo, CatalogState, ChunkCreationFailed, PreservedCatalog},
|
||||
catalog::{
|
||||
core::PreservedCatalog,
|
||||
interface::{
|
||||
CatalogParquetInfo, CatalogState, CatalogStateAddError, CatalogStateRemoveError,
|
||||
ChunkCreationFailed,
|
||||
},
|
||||
},
|
||||
chunk::{ChunkMetrics as ParquetChunkMetrics, ParquetChunk},
|
||||
};
|
||||
use persistence_windows::checkpoint::{ReplayPlan, ReplayPlanner};
|
||||
|
@ -22,17 +28,17 @@ pub enum Error {
|
|||
|
||||
#[snafu(display("Cannot create new empty preserved catalog: {}", source))]
|
||||
CannotCreateCatalog {
|
||||
source: parquet_file::catalog::api::Error,
|
||||
source: parquet_file::catalog::core::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Cannot load preserved catalog: {}", source))]
|
||||
CannotLoadCatalog {
|
||||
source: parquet_file::catalog::api::Error,
|
||||
source: parquet_file::catalog::core::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Cannot wipe preserved catalog: {}", source))]
|
||||
CannotWipeCatalog {
|
||||
source: parquet_file::catalog::api::Error,
|
||||
source: parquet_file::catalog::core::Error,
|
||||
},
|
||||
}
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
@ -166,8 +172,10 @@ impl CatalogState for Loader {
|
|||
&mut self,
|
||||
iox_object_store: Arc<IoxObjectStore>,
|
||||
info: CatalogParquetInfo,
|
||||
) -> parquet_file::catalog::api::Result<()> {
|
||||
use parquet_file::catalog::api::{MetadataExtractFailed, ReplayPlanError, SchemaError};
|
||||
) -> Result<(), CatalogStateAddError> {
|
||||
use parquet_file::catalog::interface::{
|
||||
MetadataExtractFailed, ReplayPlanError, SchemaError,
|
||||
};
|
||||
|
||||
// extract relevant bits from parquet file metadata
|
||||
let iox_md = info
|
||||
|
@ -212,15 +220,13 @@ impl CatalogState for Loader {
|
|||
.get_or_create_partition(&iox_md.table_name, &iox_md.partition_key);
|
||||
let mut partition = partition.write();
|
||||
if partition.chunk(iox_md.chunk_id).is_some() {
|
||||
return Err(
|
||||
parquet_file::catalog::api::Error::ParquetFileAlreadyExists { path: info.path },
|
||||
);
|
||||
return Err(CatalogStateAddError::ParquetFileAlreadyExists { path: info.path });
|
||||
}
|
||||
let schema_handle = TableSchemaUpsertHandle::new(&table_schema, &parquet_chunk.schema())
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
.context(SchemaError { path: info.path })?;
|
||||
|
||||
let delete_predicates: Arc<Vec<Predicate>> = Arc::new(vec![]); // NGA todo: After Marco save delete predicate into the catalog, it will need to extract into this variable
|
||||
let delete_predicates: Vec<Arc<Predicate>> = vec![]; // NGA todo: After Marco saves delete predicate into the catalog, it will need to get extracted into this variable
|
||||
partition.insert_object_store_only_chunk(
|
||||
iox_md.chunk_id,
|
||||
parquet_chunk,
|
||||
|
@ -234,7 +240,7 @@ impl CatalogState for Loader {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn remove(&mut self, path: &ParquetFilePath) -> parquet_file::catalog::api::Result<()> {
|
||||
fn remove(&mut self, path: &ParquetFilePath) -> Result<(), CatalogStateRemoveError> {
|
||||
let mut removed_any = false;
|
||||
|
||||
for partition in self.catalog.partitions() {
|
||||
|
@ -261,7 +267,7 @@ impl CatalogState for Loader {
|
|||
if removed_any {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(parquet_file::catalog::api::Error::ParquetFileDoesNotExist { path: path.clone() })
|
||||
Err(CatalogStateRemoveError::ParquetFileDoesNotExist { path: path.clone() })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -273,7 +279,7 @@ mod tests {
|
|||
use data_types::{server_id::ServerId, DatabaseName};
|
||||
use object_store::ObjectStore;
|
||||
use parquet_file::catalog::{
|
||||
api::CheckpointData,
|
||||
interface::CheckpointData,
|
||||
test_helpers::{assert_catalog_state_implementation, TestCatalogState},
|
||||
};
|
||||
use std::convert::TryFrom;
|
||||
|
|
|
@ -1195,7 +1195,7 @@ mod tests {
|
|||
path::{parsed::DirsAndFileName, ObjectStorePath},
|
||||
ObjectStore, ObjectStoreApi,
|
||||
};
|
||||
use parquet_file::catalog::{api::PreservedCatalog, test_helpers::TestCatalogState};
|
||||
use parquet_file::catalog::{core::PreservedCatalog, test_helpers::TestCatalogState};
|
||||
use query::{exec::ExecutionContextProvider, frontend::sql::SqlQueryPlanner, QueryDatabase};
|
||||
use std::{
|
||||
convert::{TryFrom, TryInto},
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
//! This module implements the `partition` CLI command
|
||||
use data_types::chunk_metadata::ChunkSummary;
|
||||
use data_types::job::Operation;
|
||||
use generated_types::google::FieldViolation;
|
||||
use influxdb_iox_client::{
|
||||
connection::Connection,
|
||||
|
@ -10,7 +9,7 @@ use influxdb_iox_client::{
|
|||
PersistPartitionError, UnloadPartitionChunkError,
|
||||
},
|
||||
};
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::convert::TryFrom;
|
||||
use structopt::StructOpt;
|
||||
use thiserror::Error;
|
||||
|
||||
|
@ -283,10 +282,9 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
|
|||
chunk_id,
|
||||
} = close_chunk;
|
||||
|
||||
let operation: Operation = client
|
||||
let operation = client
|
||||
.close_partition_chunk(db_name, table_name, partition_key, chunk_id)
|
||||
.await?
|
||||
.try_into()?;
|
||||
.await?;
|
||||
|
||||
serde_json::to_writer_pretty(std::io::stdout(), &operation)?;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
use std::convert::TryInto;
|
||||
|
||||
use data_types::job::Operation;
|
||||
use generated_types::google::FieldViolation;
|
||||
use influxdb_iox_client::{connection::Connection, management};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
@ -74,12 +71,10 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
|
|||
return Err(Error::NeedsTheForceError);
|
||||
}
|
||||
|
||||
let operation: Operation = client
|
||||
let operation = client
|
||||
.wipe_persisted_catalog(db_name)
|
||||
.await
|
||||
.context(WipeError)?
|
||||
.try_into()
|
||||
.context(InvalidResponse)?;
|
||||
.context(WipeError)?;
|
||||
|
||||
serde_json::to_writer_pretty(std::io::stdout(), &operation).context(WritingJson)?;
|
||||
}
|
||||
|
|
|
@ -1,11 +1,8 @@
|
|||
use data_types::job::Operation;
|
||||
use generated_types::google::FieldViolation;
|
||||
use influxdb_iox_client::{
|
||||
connection::Connection,
|
||||
management,
|
||||
operations::{self, Client},
|
||||
};
|
||||
use std::convert::TryInto;
|
||||
use structopt::StructOpt;
|
||||
use thiserror::Error;
|
||||
|
||||
|
@ -15,9 +12,6 @@ pub enum Error {
|
|||
#[error("Client error: {0}")]
|
||||
ClientError(#[from] operations::Error),
|
||||
|
||||
#[error("Received invalid response: {0}")]
|
||||
InvalidResponse(#[from] FieldViolation),
|
||||
|
||||
#[error("Failed to create dummy job: {0}")]
|
||||
CreateDummyJobError(#[from] management::CreateDummyJobError),
|
||||
|
||||
|
@ -68,29 +62,16 @@ enum Command {
|
|||
pub async fn command(connection: Connection, config: Config) -> Result<()> {
|
||||
match config.command {
|
||||
Command::List => {
|
||||
let result: Result<Vec<Operation>, _> = Client::new(connection)
|
||||
.list_operations()
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|c| c.operation())
|
||||
.map(TryInto::try_into)
|
||||
.collect();
|
||||
let operations = result?;
|
||||
let operations = Client::new(connection).list_operations().await?;
|
||||
serde_json::to_writer_pretty(std::io::stdout(), &operations)?;
|
||||
}
|
||||
Command::Get { id } => {
|
||||
let operation: Operation = Client::new(connection)
|
||||
.get_operation(id)
|
||||
.await?
|
||||
.try_into()?;
|
||||
let operation = Client::new(connection).get_operation(id).await?;
|
||||
serde_json::to_writer_pretty(std::io::stdout(), &operation)?;
|
||||
}
|
||||
Command::Wait { id, nanos } => {
|
||||
let timeout = nanos.map(std::time::Duration::from_nanos);
|
||||
let operation: Operation = Client::new(connection)
|
||||
.wait_operation(id, timeout)
|
||||
.await?
|
||||
.try_into()?;
|
||||
let operation = Client::new(connection).wait_operation(id, timeout).await?;
|
||||
serde_json::to_writer_pretty(std::io::stdout(), &operation)?;
|
||||
}
|
||||
Command::Cancel { id } => {
|
||||
|
@ -98,10 +79,9 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
|
|||
println!("Ok");
|
||||
}
|
||||
Command::Test { nanos } => {
|
||||
let operation: Operation = management::Client::new(connection)
|
||||
let operation = management::Client::new(connection)
|
||||
.create_dummy_job(nanos)
|
||||
.await?
|
||||
.try_into()?;
|
||||
.await?;
|
||||
serde_json::to_writer_pretty(std::io::stdout(), &operation)?;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -753,9 +753,11 @@ mod tests {
|
|||
child(prepare_sql_span, "prepare_plan").unwrap();
|
||||
|
||||
let collect_span = child(ctx_span, "collect").unwrap();
|
||||
let execute_span = child(collect_span, "execute_stream_partitioned").unwrap();
|
||||
let coalesce_span = child(execute_span, "CoalescePartitionsEx").unwrap();
|
||||
|
||||
// validate spans from DataFusion ExecutionPlan are present
|
||||
child(collect_span, "ProjectionExec: expr").unwrap();
|
||||
child(coalesce_span, "ProjectionExec: expr").unwrap();
|
||||
|
||||
let database_not_found = root_spans[3];
|
||||
assert_eq!(database_not_found.status, SpanStatus::Err);
|
||||
|
|
|
@ -28,7 +28,7 @@ use data_types::{
|
|||
};
|
||||
use influxdb_iox_client::format::QueryOutputFormat;
|
||||
use influxdb_line_protocol::parse_lines;
|
||||
use query::{exec::ExecutionContextProvider, QueryDatabase};
|
||||
use query::exec::ExecutionContextProvider;
|
||||
use server::{ApplicationState, ConnectionManager, Error, Server as AppServer};
|
||||
|
||||
// External crates
|
||||
|
@ -392,7 +392,6 @@ where
|
|||
.get("/health", health::<M>)
|
||||
.get("/metrics", handle_metrics::<M>)
|
||||
.get("/iox/api/v1/databases/:name/query", query::<M>)
|
||||
.get("/api/v1/partitions", list_partitions::<M>)
|
||||
.get("/debug/pprof", pprof_home::<M>)
|
||||
.get("/debug/pprof/profile", pprof_profile::<M>)
|
||||
.get("/debug/pprof/allocs", pprof_heappy_profile::<M>)
|
||||
|
@ -644,43 +643,6 @@ async fn handle_metrics<M: ConnectionManager + Send + Sync + Debug + 'static>(
|
|||
Ok(Response::new(Body::from(body)))
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
/// Arguments in the query string of the request to /partitions
|
||||
struct DatabaseInfo {
|
||||
org: String,
|
||||
bucket: String,
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "debug")]
|
||||
async fn list_partitions<M: ConnectionManager + Send + Sync + Debug + 'static>(
|
||||
req: Request<Body>,
|
||||
) -> Result<Response<Body>, ApplicationError> {
|
||||
let server = Arc::clone(&req.data::<Server<M>>().expect("server state").app_server);
|
||||
|
||||
let query = req.uri().query().context(ExpectedQueryString {})?;
|
||||
|
||||
let info: DatabaseInfo = serde_urlencoded::from_str(query).context(InvalidQueryString {
|
||||
query_string: query,
|
||||
})?;
|
||||
|
||||
let db_name =
|
||||
org_and_bucket_to_database(&info.org, &info.bucket).context(BucketMappingError)?;
|
||||
|
||||
let db = server.db(&db_name)?;
|
||||
|
||||
let partition_keys =
|
||||
db.partition_keys()
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
.context(BucketByName {
|
||||
org: &info.org,
|
||||
bucket_name: &info.bucket,
|
||||
})?;
|
||||
|
||||
let result = serde_json::to_string(&partition_keys).context(JsonGenerationError)?;
|
||||
|
||||
Ok(Response::new(Body::from(result)))
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
/// Arguments in the query string of the request to /snapshot
|
||||
struct SnapshotInfo {
|
||||
|
|
|
@ -631,7 +631,7 @@ where
|
|||
del_predicate.exprs.push(expr);
|
||||
}
|
||||
|
||||
db.delete(&table_name, &del_predicate)
|
||||
db.delete(&table_name, Arc::new(del_predicate))
|
||||
.await
|
||||
.map_err(default_db_error_handler)?;
|
||||
}
|
||||
|
|
|
@ -9,7 +9,6 @@ use generated_types::{
|
|||
};
|
||||
use influxdb_iox_client::{
|
||||
management::{Client, CreateDatabaseError},
|
||||
operations,
|
||||
write::WriteError,
|
||||
};
|
||||
|
||||
|
@ -880,20 +879,16 @@ async fn test_close_partition_chunk() {
|
|||
assert_eq!(chunks[0].storage, ChunkStorage::OpenMutableBuffer as i32);
|
||||
|
||||
// Move the chunk to read buffer
|
||||
let operation = management_client
|
||||
let iox_operation = management_client
|
||||
.close_partition_chunk(&db_name, table_name, partition_key, 0)
|
||||
.await
|
||||
.expect("new partition chunk");
|
||||
|
||||
println!("Operation response is {:?}", operation);
|
||||
let operation_id = operation.id();
|
||||
|
||||
let meta = operations::ClientOperation::try_new(operation)
|
||||
.unwrap()
|
||||
.metadata();
|
||||
println!("Operation response is {:?}", iox_operation);
|
||||
let operation_id = iox_operation.operation.id();
|
||||
|
||||
// ensure we got a legit job description back
|
||||
if let Some(Job::CloseChunk(close_chunk)) = meta.job {
|
||||
if let Some(Job::CloseChunk(close_chunk)) = iox_operation.metadata.job {
|
||||
assert_eq!(close_chunk.db_name, db_name);
|
||||
assert_eq!(close_chunk.partition_key, partition_key);
|
||||
assert_eq!(close_chunk.chunk_id, 0);
|
||||
|
@ -1020,20 +1015,16 @@ async fn test_wipe_preserved_catalog() {
|
|||
// Recover by wiping preserved catalog
|
||||
//
|
||||
|
||||
let operation = management_client
|
||||
let iox_operation = management_client
|
||||
.wipe_persisted_catalog(&db_name)
|
||||
.await
|
||||
.expect("wipe persisted catalog");
|
||||
|
||||
println!("Operation response is {:?}", operation);
|
||||
let operation_id = operation.id();
|
||||
|
||||
let meta = operations::ClientOperation::try_new(operation)
|
||||
.unwrap()
|
||||
.metadata();
|
||||
println!("Operation response is {:?}", iox_operation);
|
||||
let operation_id = iox_operation.operation.id();
|
||||
|
||||
// ensure we got a legit job description back
|
||||
if let Some(Job::WipePreservedCatalog(wipe_persisted_catalog)) = meta.job {
|
||||
if let Some(Job::WipePreservedCatalog(wipe_persisted_catalog)) = iox_operation.metadata.job {
|
||||
assert_eq!(wipe_persisted_catalog.db_name, db_name);
|
||||
} else {
|
||||
panic!("unexpected job returned")
|
||||
|
|
|
@ -1,13 +1,12 @@
|
|||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use assert_cmd::Command;
|
||||
use predicates::prelude::*;
|
||||
|
||||
use data_types::chunk_metadata::ChunkAddr;
|
||||
use data_types::{
|
||||
chunk_metadata::ChunkStorage,
|
||||
job::{Job, Operation},
|
||||
use data_types::chunk_metadata::ChunkStorage;
|
||||
use generated_types::google::longrunning::IoxOperation;
|
||||
use generated_types::influxdata::iox::management::v1::{
|
||||
operation_metadata::Job, CloseChunk, WipePreservedCatalog,
|
||||
};
|
||||
use test_helpers::make_temp_file;
|
||||
use write_buffer::maybe_skip_kafka_integration;
|
||||
|
@ -112,7 +111,7 @@ async fn test_create_database() {
|
|||
.and(predicate::str::contains(format!(r#""name": "{}"#, db)))
|
||||
// validate the defaults have been set reasonably
|
||||
.and(predicate::str::contains("%Y-%m-%d %H:00:00"))
|
||||
.and(predicate::str::contains(r#""bufferSizeHard": 104857600"#))
|
||||
.and(predicate::str::contains(r#""bufferSizeHard": "104857600""#))
|
||||
.and(predicate::str::contains("lifecycleRules")),
|
||||
);
|
||||
}
|
||||
|
@ -147,7 +146,7 @@ async fn test_create_database_size() {
|
|||
.assert()
|
||||
.success()
|
||||
.stdout(
|
||||
predicate::str::contains(r#""bufferSizeHard": 1000"#)
|
||||
predicate::str::contains(r#""bufferSizeHard": "1000""#)
|
||||
.and(predicate::str::contains("lifecycleRules")),
|
||||
);
|
||||
}
|
||||
|
@ -765,7 +764,7 @@ async fn test_close_partition_chunk() {
|
|||
let lp_data = vec!["cpu,region=west user=23.2 100"];
|
||||
load_lp(addr, &db_name, lp_data);
|
||||
|
||||
let stdout: Operation = serde_json::from_slice(
|
||||
let stdout: IoxOperation = serde_json::from_slice(
|
||||
&Command::cargo_bin("influxdb_iox")
|
||||
.unwrap()
|
||||
.arg("database")
|
||||
|
@ -784,18 +783,16 @@ async fn test_close_partition_chunk() {
|
|||
)
|
||||
.expect("Expected JSON output");
|
||||
|
||||
let expected_job = Job::CompactChunk {
|
||||
chunk: ChunkAddr {
|
||||
db_name: Arc::from(db_name.as_str()),
|
||||
table_name: Arc::from("cpu"),
|
||||
partition_key: Arc::from("cpu"),
|
||||
chunk_id: 0,
|
||||
},
|
||||
};
|
||||
let expected_job = Job::CloseChunk(CloseChunk {
|
||||
db_name,
|
||||
table_name: "cpu".to_string(),
|
||||
partition_key: "cpu".to_string(),
|
||||
chunk_id: 0,
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
Some(expected_job),
|
||||
stdout.job,
|
||||
stdout.metadata.job,
|
||||
"operation was {:#?}",
|
||||
stdout
|
||||
);
|
||||
|
@ -828,7 +825,7 @@ async fn test_wipe_persisted_catalog() {
|
|||
let server_fixture = fixture_broken_catalog(&db_name).await;
|
||||
let addr = server_fixture.grpc_base();
|
||||
|
||||
let stdout: Operation = serde_json::from_slice(
|
||||
let stdout: IoxOperation = serde_json::from_slice(
|
||||
&Command::cargo_bin("influxdb_iox")
|
||||
.unwrap()
|
||||
.arg("database")
|
||||
|
@ -845,13 +842,11 @@ async fn test_wipe_persisted_catalog() {
|
|||
)
|
||||
.expect("Expected JSON output");
|
||||
|
||||
let expected_job = Job::WipePreservedCatalog {
|
||||
db_name: Arc::from(db_name.as_str()),
|
||||
};
|
||||
let expected_job = Job::WipePreservedCatalog(WipePreservedCatalog { db_name });
|
||||
|
||||
assert_eq!(
|
||||
Some(expected_job),
|
||||
stdout.job,
|
||||
stdout.metadata.job,
|
||||
"operation was {:#?}",
|
||||
stdout
|
||||
);
|
||||
|
|
|
@ -17,7 +17,7 @@ async fn test_operations() {
|
|||
|
||||
let nanos = vec![Duration::from_secs(20).as_nanos() as _, 1];
|
||||
|
||||
let operation = management_client
|
||||
let iox_operation = management_client
|
||||
.create_dummy_job(nanos.clone())
|
||||
.await
|
||||
.expect("create dummy job failed");
|
||||
|
@ -28,20 +28,15 @@ async fn test_operations() {
|
|||
.expect("list operations failed");
|
||||
|
||||
assert_eq!(running_ops.len(), 1);
|
||||
assert_eq!(running_ops[0].name(), operation.name);
|
||||
assert_eq!(running_ops[0].operation.name, iox_operation.operation.name);
|
||||
|
||||
let id = operation.name.parse().expect("not an integer");
|
||||
let id = iox_operation.operation.id();
|
||||
let iox_operation = operations_client.get_operation(id).await.unwrap();
|
||||
|
||||
let meta = operations_client
|
||||
.client_operation(id)
|
||||
.await
|
||||
.unwrap()
|
||||
.metadata();
|
||||
let job = iox_operation.metadata.job.expect("expected a job");
|
||||
|
||||
let job = meta.job.expect("expected a job");
|
||||
|
||||
assert_eq!(meta.total_count, 2);
|
||||
assert_eq!(meta.pending_count, 1);
|
||||
assert_eq!(iox_operation.metadata.total_count, 2);
|
||||
assert_eq!(iox_operation.metadata.pending_count, 1);
|
||||
assert_eq!(
|
||||
job,
|
||||
operation_metadata::Job::Dummy(Dummy {
|
||||
|
@ -51,14 +46,14 @@ async fn test_operations() {
|
|||
);
|
||||
|
||||
// Check wait times out correctly
|
||||
let fetched = operations_client
|
||||
let iox_operation = operations_client
|
||||
.wait_operation(id, Some(Duration::from_micros(10)))
|
||||
.await
|
||||
.expect("failed to wait operation");
|
||||
|
||||
assert!(!fetched.done);
|
||||
assert!(!iox_operation.operation.done);
|
||||
// Shouldn't specify wall_nanos as not complete
|
||||
assert_eq!(meta.wall_nanos, 0);
|
||||
assert_eq!(iox_operation.metadata.wall_nanos, 0);
|
||||
|
||||
let wait = tokio::spawn(async move {
|
||||
let mut operations_client = server_fixture.operations_client();
|
||||
|
@ -74,18 +69,15 @@ async fn test_operations() {
|
|||
.expect("failed to cancel operation");
|
||||
|
||||
let waited = wait.await.unwrap();
|
||||
let meta = operations::ClientOperation::try_new(waited.clone())
|
||||
.unwrap()
|
||||
.metadata();
|
||||
|
||||
assert!(waited.done);
|
||||
assert!(meta.wall_nanos > 0);
|
||||
assert!(meta.cpu_nanos > 0);
|
||||
assert_eq!(meta.pending_count, 0);
|
||||
assert_eq!(meta.total_count, 2);
|
||||
assert_eq!(meta.cancelled_count, 1);
|
||||
assert!(waited.operation.done);
|
||||
assert!(waited.metadata.wall_nanos > 0);
|
||||
assert!(waited.metadata.cpu_nanos > 0);
|
||||
assert_eq!(waited.metadata.pending_count, 0);
|
||||
assert_eq!(waited.metadata.total_count, 2);
|
||||
assert_eq!(waited.metadata.cancelled_count, 1);
|
||||
|
||||
match waited.result {
|
||||
match waited.operation.result {
|
||||
Some(operations::generated_types::operation::Result::Error(status)) => {
|
||||
assert_eq!(status.code, tonic::Code::Cancelled as i32)
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use crate::common::server_fixture::ServerFixture;
|
||||
use assert_cmd::Command;
|
||||
use data_types::job::{Job, Operation, OperationStatus};
|
||||
use generated_types::google::longrunning::IoxOperation;
|
||||
use generated_types::influxdata::iox::management::v1::{operation_metadata::Job, Dummy};
|
||||
use predicates::prelude::*;
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -9,7 +10,7 @@ async fn test_start_stop() {
|
|||
let addr = server_fixture.grpc_base();
|
||||
let duration = std::time::Duration::from_secs(10).as_nanos() as u64;
|
||||
|
||||
let stdout: Operation = serde_json::from_slice(
|
||||
let stdout: IoxOperation = serde_json::from_slice(
|
||||
&Command::cargo_bin("influxdb_iox")
|
||||
.unwrap()
|
||||
.arg("operation")
|
||||
|
@ -24,13 +25,13 @@ async fn test_start_stop() {
|
|||
)
|
||||
.expect("expected JSON output");
|
||||
|
||||
assert_eq!(stdout.total_count, 1);
|
||||
match stdout.job {
|
||||
Some(Job::Dummy { nanos, .. }) => assert_eq!(nanos, vec![duration]),
|
||||
_ => panic!("expected dummy job got {:?}", stdout.job),
|
||||
assert_eq!(stdout.metadata.total_count, 1);
|
||||
match stdout.metadata.job {
|
||||
Some(Job::Dummy(Dummy { nanos, .. })) => assert_eq!(nanos, vec![duration]),
|
||||
_ => panic!("expected dummy job got {:?}", stdout.metadata.job),
|
||||
}
|
||||
|
||||
let operations: Vec<Operation> = serde_json::from_slice(
|
||||
let operations: Vec<IoxOperation> = serde_json::from_slice(
|
||||
&Command::cargo_bin("influxdb_iox")
|
||||
.unwrap()
|
||||
.arg("operation")
|
||||
|
@ -45,33 +46,33 @@ async fn test_start_stop() {
|
|||
.expect("expected JSON output");
|
||||
|
||||
assert_eq!(operations.len(), 1);
|
||||
match &operations[0].job {
|
||||
Some(Job::Dummy { nanos, .. }) => {
|
||||
match &operations[0].metadata.job {
|
||||
Some(Job::Dummy(Dummy { nanos, .. })) => {
|
||||
assert_eq!(nanos.len(), 1);
|
||||
assert_eq!(nanos[0], duration);
|
||||
}
|
||||
_ => panic!("expected dummy job got {:?}", &operations[0].job),
|
||||
_ => panic!("expected dummy job got {:?}", &operations[0].metadata.job),
|
||||
}
|
||||
|
||||
let id = operations[0].id;
|
||||
let name = &operations[0].operation.name;
|
||||
|
||||
Command::cargo_bin("influxdb_iox")
|
||||
.unwrap()
|
||||
.arg("operation")
|
||||
.arg("cancel")
|
||||
.arg(id.to_string())
|
||||
.arg(name.clone())
|
||||
.arg("--host")
|
||||
.arg(addr)
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains("Ok"));
|
||||
|
||||
let completed: Operation = serde_json::from_slice(
|
||||
let completed: IoxOperation = serde_json::from_slice(
|
||||
&Command::cargo_bin("influxdb_iox")
|
||||
.unwrap()
|
||||
.arg("operation")
|
||||
.arg("wait")
|
||||
.arg(id.to_string())
|
||||
.arg(name.to_string())
|
||||
.arg("--host")
|
||||
.arg(addr)
|
||||
.assert()
|
||||
|
@ -81,9 +82,8 @@ async fn test_start_stop() {
|
|||
)
|
||||
.expect("expected JSON output");
|
||||
|
||||
assert_eq!(completed.pending_count, 0);
|
||||
assert_eq!(completed.total_count, 1);
|
||||
assert_eq!(completed.cancelled_count, 1);
|
||||
assert_eq!(completed.status, OperationStatus::Cancelled);
|
||||
assert_eq!(&completed.job, &operations[0].job)
|
||||
assert_eq!(completed.metadata.pending_count, 0);
|
||||
assert_eq!(completed.metadata.total_count, 1);
|
||||
assert_eq!(completed.metadata.cancelled_count, 1);
|
||||
assert_eq!(&completed.metadata.job, &operations[0].metadata.job)
|
||||
}
|
||||
|
|
|
@ -2,7 +2,6 @@ use itertools::Itertools;
|
|||
|
||||
use arrow_util::assert_batches_eq;
|
||||
use data_types::chunk_metadata::ChunkStorage;
|
||||
use influxdb_iox_client::operations;
|
||||
|
||||
use crate::{
|
||||
common::server_fixture::ServerFixture,
|
||||
|
@ -125,11 +124,11 @@ async fn test_full_lifecycle() {
|
|||
.await
|
||||
.unwrap()
|
||||
.iter()
|
||||
.any(|operation| match operation.metadata().job {
|
||||
.any(|operation| match &operation.metadata.job {
|
||||
Some(Job::CompactChunks(CompactChunks {
|
||||
db_name: operation_db_name,
|
||||
..
|
||||
})) => operation_db_name == db_name,
|
||||
})) => operation_db_name == &db_name,
|
||||
_ => false,
|
||||
});
|
||||
assert!(performed_compaction);
|
||||
|
@ -269,20 +268,16 @@ async fn create_readbuffer_chunk(fixture: &ServerFixture, db_name: &str) -> u32
|
|||
assert_eq!(chunks[0].storage, ChunkStorage::OpenMutableBuffer);
|
||||
|
||||
// Move the chunk to read buffer
|
||||
let operation = management_client
|
||||
let iox_operation = management_client
|
||||
.close_partition_chunk(db_name, table_name, partition_key, 0)
|
||||
.await
|
||||
.expect("new partition chunk");
|
||||
|
||||
println!("Operation response is {:?}", operation);
|
||||
let operation_id = operation.id();
|
||||
|
||||
let meta = operations::ClientOperation::try_new(operation)
|
||||
.unwrap()
|
||||
.metadata();
|
||||
println!("Operation response is {:?}", iox_operation);
|
||||
let operation_id = iox_operation.operation.id();
|
||||
|
||||
// ensure we got a legit job description back
|
||||
if let Some(Job::CloseChunk(close_chunk)) = meta.job {
|
||||
if let Some(Job::CloseChunk(close_chunk)) = iox_operation.metadata.job {
|
||||
assert_eq!(close_chunk.db_name, db_name);
|
||||
assert_eq!(close_chunk.partition_key, partition_key);
|
||||
assert_eq!(close_chunk.chunk_id, 0);
|
||||
|
|
|
@ -556,12 +556,8 @@ where
|
|||
}
|
||||
|
||||
if t_start.elapsed() >= wait_time {
|
||||
let operations = fixture.operations_client().list_operations().await.unwrap();
|
||||
let mut operations: Vec<_> = operations
|
||||
.into_iter()
|
||||
.map(|x| (x.name().parse::<usize>().unwrap(), x.metadata()))
|
||||
.collect();
|
||||
operations.sort_by_key(|x| x.0);
|
||||
let mut operations = fixture.operations_client().list_operations().await.unwrap();
|
||||
operations.sort_by(|a, b| a.operation.name.cmp(&b.operation.name));
|
||||
|
||||
panic!(
|
||||
"Could not find {} within {:?}.\nChunks were: {:#?}\nOperations were: {:#?}",
|
||||
|
|
|
@ -306,17 +306,20 @@ async fn test_sql_observer_operations() {
|
|||
let partition_key = "cpu";
|
||||
let table_name = "cpu";
|
||||
// Move the chunk to read buffer
|
||||
let operation = management_client
|
||||
let iox_operation = management_client
|
||||
.close_partition_chunk(&db_name, table_name, partition_key, 0)
|
||||
.await
|
||||
.expect("new partition chunk");
|
||||
|
||||
println!("Operation response is {:?}", operation);
|
||||
println!("Operation response is {:?}", iox_operation);
|
||||
|
||||
// wait for the job to be done
|
||||
fixture
|
||||
.operations_client()
|
||||
.wait_operation(operation.id(), Some(std::time::Duration::from_secs(1)))
|
||||
.wait_operation(
|
||||
iox_operation.operation.id(),
|
||||
Some(std::time::Duration::from_secs(1)),
|
||||
)
|
||||
.await
|
||||
.expect("failed to wait operation");
|
||||
|
||||
|
|
|
@ -27,12 +27,12 @@ async fn test_operations() {
|
|||
.expect("write succeded");
|
||||
|
||||
// Move the chunk to read buffer
|
||||
let operation = management_client
|
||||
let iox_operation = management_client
|
||||
.close_partition_chunk(&db_name1, table_name, partition_key, 0)
|
||||
.await
|
||||
.expect("new partition chunk");
|
||||
|
||||
let operation_id = operation.id();
|
||||
let operation_id = iox_operation.operation.id();
|
||||
operations_client
|
||||
.wait_operation(operation_id, Some(std::time::Duration::from_secs(1)))
|
||||
.await
|
||||
|
|
|
@ -55,6 +55,22 @@ pub struct SpanContext {
|
|||
}
|
||||
|
||||
impl SpanContext {
|
||||
/// Create a new root span context, sent to `collector`. The
|
||||
/// new span context has a random trace_id and span_id, and thus
|
||||
/// is not connected to any existing span or trace.
|
||||
pub fn new(collector: Arc<dyn TraceCollector>) -> Self {
|
||||
let mut rng = rand::thread_rng();
|
||||
let trace_id: u128 = rng.gen_range(1..u128::MAX);
|
||||
let span_id: u64 = rng.gen_range(1..u64::MAX);
|
||||
|
||||
Self {
|
||||
trace_id: TraceId(NonZeroU128::new(trace_id).unwrap()),
|
||||
parent_span_id: None,
|
||||
span_id: SpanId(NonZeroU64::new(span_id).unwrap()),
|
||||
collector: Some(collector),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new child of the Span described by this TraceContext
|
||||
pub fn child(&self, name: impl Into<Cow<'static, str>>) -> Span {
|
||||
Span {
|
||||
|
@ -73,3 +89,22 @@ impl SpanContext {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use crate::RingBufferTraceCollector;
|
||||
|
||||
#[test]
|
||||
fn test_new() {
|
||||
// two newly created spans should not have duplicated trace or span ids
|
||||
let collector = Arc::new(RingBufferTraceCollector::new(5)) as _;
|
||||
|
||||
let ctx1 = SpanContext::new(Arc::clone(&collector));
|
||||
let ctx2 = SpanContext::new(collector);
|
||||
|
||||
assert_ne!(ctx1.trace_id, ctx2.trace_id);
|
||||
assert_ne!(ctx1.span_id, ctx2.span_id);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -201,29 +201,14 @@ impl<'a> Drop for SpanRecorder {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::num::{NonZeroU128, NonZeroU64};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::ctx::{SpanId, TraceId};
|
||||
use crate::{RingBufferTraceCollector, TraceCollector};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn make_span(collector: Arc<dyn TraceCollector>) -> Span {
|
||||
Span {
|
||||
name: "foo".into(),
|
||||
ctx: SpanContext {
|
||||
trace_id: TraceId(NonZeroU128::new(23948923).unwrap()),
|
||||
parent_span_id: None,
|
||||
span_id: SpanId(NonZeroU64::new(3498394).unwrap()),
|
||||
collector: Some(collector),
|
||||
},
|
||||
start: None,
|
||||
end: None,
|
||||
status: SpanStatus::Unknown,
|
||||
metadata: Default::default(),
|
||||
events: vec![],
|
||||
}
|
||||
SpanContext::new(collector).child("foo")
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Reference in New Issue