Merge pull request #4522 from influxdata/cn/delete-more

fix: Delete even more
pull/24376/head
kodiakhq[bot] 2022-05-09 08:53:33 +00:00 committed by GitHub
commit f23d569054
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
215 changed files with 4375 additions and 8470 deletions

225
Cargo.lock generated
View File

@ -685,7 +685,6 @@ name = "clap_blocks"
version = "0.1.0"
dependencies = [
"clap 3.1.12",
"data_types",
"futures",
"humantime",
"iox_catalog",
@ -730,7 +729,6 @@ name = "client_util"
version = "0.1.0"
dependencies = [
"http",
"prost",
"thiserror",
"tokio",
"tonic",
@ -814,17 +812,14 @@ dependencies = [
"backoff 0.1.0",
"bytes",
"data_types",
"data_types2",
"datafusion 0.1.0",
"futures",
"iox_catalog",
"iox_object_store",
"iox_tests",
"iox_time",
"metric",
"object_store",
"observability_deps",
"parking_lot 0.12.0",
"parquet_file",
"predicate",
"querier",
@ -1155,30 +1150,14 @@ dependencies = [
name = "data_types"
version = "0.1.0"
dependencies = [
"bytes",
"iox_time",
"num_cpus",
"influxdb_line_protocol",
"observability_deps",
"ordered-float 3.0.0",
"percent-encoding",
"regex",
"siphasher",
"snafu",
"test_helpers",
"uuid 0.8.2",
"workspace-hack",
]
[[package]]
name = "data_types2"
version = "0.1.0"
dependencies = [
"data_types",
"influxdb_line_protocol",
"mutable_batch",
"predicate",
"schema",
"snafu",
"sqlx",
"test_helpers",
"uuid 0.8.2",
"workspace-hack",
]
@ -1415,9 +1394,6 @@ dependencies = [
"hashbrown 0.12.0",
"iox_time",
"mutable_batch",
"mutable_batch_lp",
"ordered-float 3.0.0",
"regex",
"schema",
"trace",
"workspace-hack",
@ -1734,10 +1710,7 @@ version = "0.1.0"
dependencies = [
"bytes",
"data_types",
"data_types2",
"datafusion 0.1.0",
"iox_time",
"num_cpus",
"observability_deps",
"pbjson",
"pbjson-build",
@ -1746,7 +1719,6 @@ dependencies = [
"prost",
"prost-build",
"query_functions",
"regex",
"serde",
"tonic",
"tonic-build",
@ -1801,22 +1773,15 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
name = "grpc-router"
version = "0.1.0"
dependencies = [
"bytes",
"cache_loader_async",
"futures",
"grpc-router-test-gen",
"observability_deps",
"paste",
"prost",
"prost-build",
"prost-types",
"thiserror",
"tokio",
"tokio-stream",
"tokio-util 0.7.1",
"tonic",
"tonic-build",
"tonic-reflection",
]
[[package]]
@ -1825,7 +1790,6 @@ version = "0.1.0"
dependencies = [
"prost",
"prost-build",
"prost-types",
"tonic",
"tonic-build",
]
@ -2153,76 +2117,48 @@ version = "0.1.0"
dependencies = [
"ansi_term",
"arrow",
"arrow-flight",
"arrow_util",
"assert_cmd",
"async-trait",
"backtrace",
"byteorder",
"bytes",
"chrono",
"clap 3.1.12",
"clap_blocks",
"compactor",
"console-subscriber",
"csv",
"data_types",
"data_types2",
"datafusion 0.1.0",
"dml",
"dotenv",
"flate2",
"futures",
"generated_types",
"hashbrown 0.12.0",
"heappy",
"http",
"humantime",
"hyper",
"influxdb_iox_client",
"influxdb_line_protocol",
"influxdb_storage_client",
"influxrpc_parser",
"ingester",
"iox_catalog",
"iox_object_store",
"iox_time",
"ioxd_common",
"ioxd_compactor",
"ioxd_ingester",
"ioxd_querier",
"ioxd_router2",
"ioxd_router",
"ioxd_test",
"itertools",
"libc",
"log",
"logfmt",
"metric",
"metric_exporters",
"mutable_batch",
"mutable_batch_lp",
"mutable_batch_pb",
"num_cpus",
"object_store",
"observability_deps",
"once_cell",
"panic_logging",
"parking_lot 0.12.0",
"parquet",
"parquet_file",
"pin-project",
"predicate",
"predicates",
"prost",
"querier",
"query",
"read_buffer",
"router2",
"rustyline",
"schema",
"serde",
"serde_json",
"serde_urlencoded",
"snafu",
"tempfile",
"test_helpers",
@ -2234,17 +2170,10 @@ dependencies = [
"tokio-stream",
"tokio-util 0.7.1",
"tonic",
"tonic-health",
"tonic-reflection",
"tower",
"trace",
"trace_exporters",
"trace_http",
"tracker",
"trogging",
"uuid 0.8.2",
"workspace-hack",
"write_buffer",
]
[[package]]
@ -2259,17 +2188,13 @@ dependencies = [
"dml",
"futures-util",
"generated_types",
"mutable_batch",
"mutable_batch_lp",
"mutable_batch_pb",
"prost",
"rand",
"serde",
"serde_json",
"thiserror",
"tokio",
"tonic",
"uuid 0.8.2",
]
[[package]]
@ -2331,12 +2256,10 @@ dependencies = [
"assert_matches",
"async-trait",
"backoff 0.1.0",
"base64 0.13.0",
"bitflags",
"bytes",
"chrono",
"data_types",
"data_types2",
"datafusion 0.1.0",
"datafusion_util",
"dml",
@ -2344,7 +2267,6 @@ dependencies = [
"generated_types",
"hyper",
"iox_catalog",
"iox_object_store",
"iox_time",
"lazy_static",
"metric",
@ -2353,7 +2275,6 @@ dependencies = [
"object_store",
"observability_deps",
"parking_lot 0.12.0",
"parquet",
"parquet_file",
"paste",
"pin-project",
@ -2409,7 +2330,7 @@ version = "0.1.0"
dependencies = [
"assert_matches",
"async-trait",
"data_types2",
"data_types",
"dotenv",
"futures",
"iox_time",
@ -2434,18 +2355,13 @@ dependencies = [
name = "iox_catalog_service"
version = "0.1.0"
dependencies = [
"async-trait",
"data_types2",
"data_types",
"generated_types",
"iox_catalog",
"iox_time",
"metric",
"observability_deps",
"serde",
"serde_urlencoded",
"tokio",
"tonic",
"trace",
"uuid 0.8.2",
"workspace-hack",
]
@ -2458,12 +2374,10 @@ dependencies = [
"chrono-english",
"clap 3.1.12",
"criterion",
"data_types",
"futures",
"handlebars",
"humantime",
"influxdb2_client",
"influxdb_iox_client",
"itertools",
"rand",
"regex",
@ -2488,65 +2402,36 @@ dependencies = [
"clap 3.1.12",
"dotenv",
"futures",
"glob",
"k8s-openapi",
"kube",
"kube-derive",
"kube-runtime",
"parking_lot 0.11.2",
"pbjson-build",
"prost",
"schemars",
"serde",
"serde_json",
"thiserror",
"tokio",
"tonic",
"tonic-build",
"tracing",
"trogging",
"workspace-hack",
]
[[package]]
name = "iox_object_store"
version = "0.1.0"
dependencies = [
"bytes",
"data_types",
"data_types2",
"futures",
"object_store",
"observability_deps",
"snafu",
"test_helpers",
"tokio",
"tokio-stream",
"uuid 0.8.2",
"workspace-hack",
]
[[package]]
name = "iox_object_store_service"
version = "0.1.0"
dependencies = [
"async-trait",
"bytes",
"data_types2",
"data_types",
"futures",
"generated_types",
"iox_catalog",
"iox_object_store",
"iox_time",
"metric",
"object_store",
"observability_deps",
"serde",
"serde_urlencoded",
"parquet_file",
"tokio",
"tokio-stream",
"tonic",
"trace",
"uuid 0.8.2",
"workspace-hack",
]
@ -2557,10 +2442,9 @@ version = "0.1.0"
dependencies = [
"arrow",
"bytes",
"data_types2",
"data_types",
"datafusion 0.1.0",
"iox_catalog",
"iox_object_store",
"iox_time",
"metric",
"mutable_batch_lp",
@ -2594,6 +2478,7 @@ dependencies = [
"dml",
"flate2",
"futures",
"generated_types",
"hashbrown 0.12.0",
"http",
"hyper",
@ -2605,17 +2490,18 @@ dependencies = [
"parking_lot 0.12.0",
"pprof",
"predicate",
"prost",
"reqwest",
"serde",
"serde_json",
"serde_urlencoded",
"service_grpc_testing",
"snafu",
"tokio",
"tokio-stream",
"tokio-util 0.7.1",
"tonic",
"tonic-health",
"tonic-reflection",
"tower",
"trace",
"trace_exporters",
@ -2630,8 +2516,7 @@ dependencies = [
"async-trait",
"clap_blocks",
"compactor",
"data_types2",
"generated_types",
"data_types",
"hyper",
"iox_catalog",
"iox_time",
@ -2639,16 +2524,8 @@ dependencies = [
"metric",
"object_store",
"query",
"service_grpc_testing",
"thiserror",
"tokio",
"tokio-stream",
"tokio-util 0.7.1",
"tonic",
"tonic-health",
"tonic-reflection",
"trace",
"trace_http",
"workspace-hack",
]
@ -2658,26 +2535,16 @@ version = "0.1.0"
dependencies = [
"async-trait",
"clap_blocks",
"data_types2",
"generated_types",
"data_types",
"hyper",
"ingester",
"iox_catalog",
"iox_time",
"ioxd_common",
"metric",
"object_store",
"query",
"service_grpc_testing",
"thiserror",
"tokio",
"tokio-stream",
"tokio-util 0.7.1",
"tonic",
"tonic-health",
"tonic-reflection",
"trace",
"trace_http",
"workspace-hack",
"write_buffer",
]
@ -2688,7 +2555,7 @@ version = "0.1.0"
dependencies = [
"arrow-flight",
"async-trait",
"data_types2",
"data_types",
"generated_types",
"hyper",
"iox_catalog",
@ -2699,50 +2566,34 @@ dependencies = [
"object_store",
"querier",
"query",
"service_common",
"service_grpc_flight",
"service_grpc_influxrpc",
"service_grpc_testing",
"tokio",
"tokio-stream",
"tokio-util 0.7.1",
"tonic",
"tonic-health",
"tonic-reflection",
"trace",
"trace_http",
"workspace-hack",
]
[[package]]
name = "ioxd_router2"
name = "ioxd_router"
version = "0.1.0"
dependencies = [
"arrow-flight",
"async-trait",
"clap_blocks",
"data_types2",
"generated_types",
"data_types",
"hashbrown 0.12.0",
"hyper",
"iox_catalog",
"iox_time",
"ioxd_common",
"metric",
"mutable_batch",
"object_store",
"observability_deps",
"router2",
"service_grpc_testing",
"router",
"thiserror",
"tokio",
"tokio-stream",
"tokio-util 0.7.1",
"tonic",
"tonic-health",
"tonic-reflection",
"trace",
"trace_http",
"workspace-hack",
"write_buffer",
"write_summary",
@ -2754,20 +2605,12 @@ version = "0.1.0"
dependencies = [
"async-trait",
"clap 3.1.12",
"generated_types",
"hyper",
"ioxd_common",
"metric",
"service_grpc_testing",
"snafu",
"tokio",
"tokio-stream",
"tokio-util 0.7.1",
"tonic",
"tonic-health",
"tonic-reflection",
"trace",
"trace_http",
"workspace-hack",
]
@ -3324,6 +3167,7 @@ dependencies = [
"chrono",
"data_types",
"hashbrown 0.12.0",
"iox_time",
"itertools",
"rand",
"schema",
@ -3603,7 +3447,6 @@ dependencies = [
"futures-test",
"hyper",
"hyper-rustls",
"indexmap",
"iox_time",
"itertools",
"metric",
@ -3712,12 +3555,10 @@ dependencies = [
"arrow",
"criterion",
"influxdb_tsm",
"observability_deps",
"parquet",
"rand",
"schema",
"snafu",
"test_helpers",
"workspace-hack",
]
@ -3814,16 +3655,13 @@ name = "parquet_file"
version = "0.1.0"
dependencies = [
"arrow",
"arrow_util",
"base64 0.13.0",
"bytes",
"data_types",
"data_types2",
"datafusion 0.1.0",
"datafusion_util",
"futures",
"generated_types",
"iox_object_store",
"iox_time",
"metric",
"object_store",
@ -3837,10 +3675,8 @@ dependencies = [
"schema",
"snafu",
"tempfile",
"test_helpers",
"thrift",
"tokio",
"tokio-stream",
"uuid 0.8.2",
"workspace-hack",
"zstd",
@ -4082,7 +3918,6 @@ dependencies = [
"datafusion_util",
"itertools",
"observability_deps",
"ordered-float 3.0.0",
"query_functions",
"schema",
"serde_json",
@ -4277,24 +4112,19 @@ name = "querier"
version = "0.1.0"
dependencies = [
"arrow",
"arrow-flight",
"arrow_util",
"assert_matches",
"async-trait",
"backoff 0.1.0",
"bytes",
"client_util",
"criterion",
"data_types",
"data_types2",
"datafusion 0.1.0",
"datafusion_util",
"futures",
"generated_types",
"hyper",
"influxdb_iox_client",
"iox_catalog",
"iox_object_store",
"iox_tests",
"iox_time",
"metric",
@ -4306,7 +4136,6 @@ dependencies = [
"pin-project",
"predicate",
"proptest",
"prost",
"query",
"rand",
"schema",
@ -4332,7 +4161,6 @@ dependencies = [
"chrono",
"croaring",
"data_types",
"data_types2",
"datafusion 0.1.0",
"datafusion_util",
"executor",
@ -4341,16 +4169,13 @@ dependencies = [
"itertools",
"observability_deps",
"parking_lot 0.12.0",
"pin-project",
"predicate",
"query_functions",
"regex",
"schema",
"snafu",
"test_helpers",
"tokio",
"tokio-stream",
"tokio-util 0.7.1",
"trace",
"workspace-hack",
]
@ -4383,7 +4208,6 @@ dependencies = [
"async-trait",
"backoff 0.1.0",
"data_types",
"data_types2",
"datafusion 0.1.0",
"dml",
"futures",
@ -4393,10 +4217,8 @@ dependencies = [
"iox_catalog",
"iox_tests",
"itertools",
"metric",
"mutable_batch",
"mutable_batch_lp",
"object_store",
"once_cell",
"predicate",
"querier",
@ -4406,7 +4228,6 @@ dependencies = [
"tempfile",
"test_helpers",
"tokio",
"trace",
"workspace-hack",
]
@ -4556,7 +4377,6 @@ dependencies = [
"rand_distr",
"schema",
"snafu",
"test_helpers",
"workspace-hack",
]
@ -4679,21 +4499,20 @@ dependencies = [
]
[[package]]
name = "router2"
name = "router"
version = "0.1.0"
dependencies = [
"assert_matches",
"async-trait",
"bytes",
"criterion",
"data_types2",
"data_types",
"dml",
"flate2",
"futures",
"generated_types",
"hashbrown 0.12.0",
"hyper",
"influxdb_line_protocol",
"iox_catalog",
"iox_catalog_service",
"iox_object_store_service",
@ -5530,7 +5349,6 @@ dependencies = [
"rand",
"sqlx",
"tokio",
"tokio-stream",
"workspace-hack",
]
@ -6843,7 +6661,6 @@ version = "0.1.0"
dependencies = [
"base64 0.13.0",
"data_types",
"data_types2",
"dml",
"generated_types",
"iox_time",

View File

@ -7,7 +7,6 @@ members = [
"client_util",
"compactor",
"data_types",
"data_types2",
"datafusion",
"datafusion_util",
"dml",
@ -27,7 +26,6 @@ members = [
"iox_catalog_service",
"iox_data_generator",
"iox_gitops_adapter",
"iox_object_store",
"iox_object_store_service",
"iox_tests",
"iox_time",
@ -35,7 +33,7 @@ members = [
"ioxd_compactor",
"ioxd_ingester",
"ioxd_querier",
"ioxd_router2",
"ioxd_router",
"ioxd_test",
"logfmt",
"metric",
@ -55,7 +53,7 @@ members = [
"query_functions",
"query_tests",
"read_buffer",
"router2",
"router",
"schema",
"service_common",
"service_grpc_influxrpc",

View File

@ -5,7 +5,6 @@ edition = "2021"
[dependencies]
clap = { version = "3", features = ["derive", "env"] }
data_types = { path = "../data_types" }
futures = "0.3"
humantime = "2.1.0"
iox_catalog = { path = "../iox_catalog" }

View File

@ -7,6 +7,5 @@ pub mod ingester;
pub mod object_store;
pub mod querier;
pub mod run_config;
pub mod server_id;
pub mod socket_addr;
pub mod write_buffer;

View File

@ -1,7 +1,7 @@
use trace_exporters::TracingConfig;
use trogging::cli::LoggingConfig;
use crate::{object_store::ObjectStoreConfig, server_id::ServerIdConfig, socket_addr::SocketAddr};
use crate::{object_store::ObjectStoreConfig, socket_addr::SocketAddr};
/// The default bind address for the HTTP API.
pub const DEFAULT_API_BIND_ADDR: &str = "127.0.0.1:8080";
@ -20,10 +20,6 @@ pub struct RunConfig {
#[clap(flatten)]
pub(crate) tracing_config: TracingConfig,
/// object store config
#[clap(flatten)]
pub(crate) server_id_config: ServerIdConfig,
/// The address on which IOx will serve HTTP API requests.
#[clap(
long = "--api-bind",
@ -69,21 +65,11 @@ impl RunConfig {
&mut self.tracing_config
}
/// Get a reference to the run config's server id config.
pub fn server_id_config(&self) -> &ServerIdConfig {
&self.server_id_config
}
/// Get a reference to the run config's logging config.
pub fn logging_config(&self) -> &LoggingConfig {
&self.logging_config
}
/// Get a mutable reference to the run config's server id config.
pub fn server_id_config_mut(&mut self) -> &mut ServerIdConfig {
&mut self.server_id_config
}
/// set the http bind address
pub fn with_http_bind_address(mut self, http_bind_address: SocketAddr) -> Self {
self.http_bind_address = http_bind_address;
@ -108,9 +94,6 @@ impl RunConfig {
Self {
logging_config,
tracing_config,
// TODO: server_id isn't used in NG; this field should be removed when OG is removed
// https://github.com/influxdata/influxdb_iox/issues/4451
server_id_config: ServerIdConfig { server_id: None },
http_bind_address,
grpc_bind_address,
max_http_request_size,

View File

@ -1,14 +0,0 @@
use data_types::server_id::ServerId;
/// CLI config for server ID.
#[derive(Debug, Clone, clap::Parser)]
pub struct ServerIdConfig {
/// The identifier for the server.
///
/// Used for writing to object storage and as an identifier that is added to
/// replicated writes, write buffer segments, and Chunks. Must be unique in
/// a group of connected or semi-connected IOx servers. Must be a nonzero
/// number that can be represented by a 32-bit unsigned integer.
#[clap(long = "--server-id", env = "INFLUXDB_IOX_ID")]
pub server_id: Option<ServerId>,
}

View File

@ -1,11 +1,10 @@
use data_types::write_buffer::{WriteBufferConnection, WriteBufferCreationConfig};
use iox_time::SystemProvider;
use observability_deps::tracing::*;
use std::{collections::BTreeMap, num::NonZeroU32, path::PathBuf, sync::Arc};
use tempfile::TempDir;
use trace::TraceCollector;
use write_buffer::{
config::WriteBufferConfigFactory,
config::{WriteBufferConfigFactory, WriteBufferConnection, WriteBufferCreationConfig},
core::{WriteBufferError, WriteBufferReading, WriteBufferWriting},
};

View File

@ -7,7 +7,6 @@ edition = "2021"
[dependencies]
http = "0.2.7"
prost = "0.10"
thiserror = "1.0.31"
tonic = { version = "0.7" }
tower = "0.4"

View File

@ -10,15 +10,12 @@ async-trait = "0.1.53"
backoff = { path = "../backoff" }
bytes = "1.0"
data_types = { path = "../data_types" }
data_types2 = { path = "../data_types2" }
datafusion = { path = "../datafusion" }
futures = "0.3"
iox_catalog = { path = "../iox_catalog" }
iox_object_store = { path = "../iox_object_store" }
metric = { path = "../metric" }
object_store = { path = "../object_store" }
observability_deps = { path = "../observability_deps" }
parking_lot = "0.12"
parquet_file = { path = "../parquet_file" }
predicate = { path = "../predicate" }
query = { path = "../query" }

View File

@ -1,30 +1,34 @@
//! Data Points for the lifecycle of the Compactor
use crate::handler::CompactorConfig;
use crate::utils::GroupWithMinTimeAndSize;
use crate::{
handler::CompactorConfig,
query::QueryableParquetChunk,
utils::{CatalogUpdate, CompactedData, GroupWithTombstones, ParquetFileWithTombstone},
utils::{
CatalogUpdate, CompactedData, GroupWithMinTimeAndSize, GroupWithTombstones,
ParquetFileWithTombstone,
},
};
use arrow::record_batch::RecordBatch;
use backoff::{Backoff, BackoffConfig};
use bytes::Bytes;
use data_types2::{
use data_types::{
ParquetFile, ParquetFileId, ParquetFileWithMetadata, PartitionId, SequencerId, TableId,
TablePartition, Timestamp, Tombstone, TombstoneId,
};
use datafusion::error::DataFusionError;
use iox_catalog::interface::{Catalog, Transaction};
use iox_object_store::ParquetFilePath;
use iox_time::{Time, TimeProvider};
use metric::{Attributes, Metric, U64Counter, U64Gauge, U64Histogram, U64HistogramOptions};
use object_store::DynObjectStore;
use observability_deps::tracing::{debug, info, warn};
use parquet_file::metadata::{IoxMetadata, IoxParquetMetaData};
use query::provider::overlap::group_potential_duplicates;
use parquet_file::{
metadata::{IoxMetadata, IoxParquetMetaData},
ParquetFilePath,
};
use query::{
exec::{Executor, ExecutorType},
frontend::reorg::ReorgPlanner,
provider::overlap::group_potential_duplicates,
util::compute_timenanosecond_min_max,
QueryChunk,
};
@ -33,7 +37,7 @@ use snafu::{ensure, OptionExt, ResultExt, Snafu};
use std::{
cmp::{max, min, Ordering},
collections::{BTreeMap, HashSet},
ops::DerefMut,
ops::{Deref, DerefMut},
sync::Arc,
};
use uuid::Uuid;
@ -782,16 +786,7 @@ impl Compactor {
.expect("record_batches.is_empty was just checked")
.schema();
// Make a fake IOx object store to conform to the parquet file
// interface, but note this isn't actually used to find parquet
// paths to write to
use iox_object_store::IoxObjectStore;
let iox_object_store = Arc::new(IoxObjectStore::existing(
Arc::clone(&object_store),
IoxObjectStore::root_path_for(&*object_store, uuid::Uuid::new_v4()),
));
let data = parquet_file::storage::Storage::new(Arc::clone(&iox_object_store))
let data = parquet_file::storage::Storage::new(Arc::clone(&object_store))
.parquet_bytes(record_batches, schema, metadata)
.await
.context(ConvertingToBytesSnafu)?;
@ -810,16 +805,17 @@ impl Compactor {
let file_size = data.len();
let bytes = Bytes::from(data);
let path = ParquetFilePath::new_new_gen(
let path = ParquetFilePath::new(
metadata.namespace_id,
metadata.table_id,
metadata.sequencer_id,
metadata.partition_id,
metadata.object_store_id,
);
let path = path.object_store_path(object_store.deref());
iox_object_store
.put_parquet_file(&path, bytes)
object_store
.put(&path, bytes)
.await
.context(WritingToObjectStoreSnafu)?;
@ -1091,7 +1087,7 @@ pub struct PartitionCompactionCandidate {
mod tests {
use super::*;
use arrow_util::assert_batches_sorted_eq;
use data_types2::{ChunkId, KafkaPartition, NamespaceId, ParquetFileParams, SequenceNumber};
use data_types::{ChunkId, KafkaPartition, NamespaceId, ParquetFileParams, SequenceNumber};
use iox_catalog::interface::INITIAL_COMPACTION_LEVEL;
use iox_tests::util::TestCatalog;
use iox_time::SystemProvider;

View File

@ -2,13 +2,13 @@
//! no longer needed because they've been compacted and they're old enough to no longer be used by
//! any queriers.
use data_types2::Timestamp;
use data_types::Timestamp;
use iox_catalog::interface::Catalog;
use iox_object_store::ParquetFilePath;
use iox_time::TimeProvider;
use object_store::DynObjectStore;
use parquet_file::ParquetFilePath;
use snafu::{ResultExt, Snafu};
use std::sync::Arc;
use std::{ops::Deref, sync::Arc};
#[derive(Debug, Snafu)]
#[allow(missing_copy_implementations, missing_docs)]
@ -53,15 +53,6 @@ impl GarbageCollector {
/// associated object store files.
/// Meant to be invoked in a background loop.
pub async fn cleanup(&self, older_than: Timestamp) -> Result<()> {
// Make a fake IOx object store to conform to the parquet file
// interface, but note this isn't actually used to find parquet
// paths to write to
use iox_object_store::IoxObjectStore;
let iox_object_store = Arc::new(IoxObjectStore::existing(
Arc::clone(&self.object_store),
IoxObjectStore::root_path_for(&*self.object_store, uuid::Uuid::new_v4()),
));
let deleted_catalog_records = self
.catalog
.repositories()
@ -74,15 +65,16 @@ impl GarbageCollector {
let mut object_store_errors = Vec::with_capacity(deleted_catalog_records.len());
for catalog_record in deleted_catalog_records {
let path = ParquetFilePath::new_new_gen(
let path = ParquetFilePath::new(
catalog_record.namespace_id,
catalog_record.table_id,
catalog_record.sequencer_id,
catalog_record.partition_id,
catalog_record.object_store_id,
);
let path = path.object_store_path(self.object_store.deref());
if let Err(e) = iox_object_store.delete_parquet_file(&path).await {
if let Err(e) = self.object_store.delete(&path).await {
object_store_errors.push(e);
}
}
@ -101,9 +93,8 @@ impl GarbageCollector {
#[cfg(test)]
mod tests {
use super::*;
use data_types2::{KafkaPartition, ParquetFile, ParquetFileParams, SequenceNumber};
use data_types::{KafkaPartition, ParquetFile, ParquetFileParams, SequenceNumber};
use iox_catalog::interface::INITIAL_COMPACTION_LEVEL;
use iox_object_store::ParquetFilePath;
use iox_tests::util::TestCatalog;
use object_store::ObjectStoreTestConvenience;
use std::time::Duration;
@ -118,27 +109,16 @@ mod tests {
) {
let bytes = "arbitrary".into();
// Make a fake IOx object store to conform to the parquet file
// interface, but note this isn't actually used to find parquet
// paths to write to
use iox_object_store::IoxObjectStore;
let iox_object_store = Arc::new(IoxObjectStore::existing(
Arc::clone(&object_store),
IoxObjectStore::root_path_for(&*object_store, uuid::Uuid::new_v4()),
));
let path = ParquetFilePath::new_new_gen(
let path = ParquetFilePath::new(
catalog_record.namespace_id,
catalog_record.table_id,
catalog_record.sequencer_id,
catalog_record.partition_id,
catalog_record.object_store_id,
);
let path = path.object_store_path(object_store.deref());
iox_object_store
.put_parquet_file(&path, bytes)
.await
.unwrap();
object_store.put(&path, bytes).await.unwrap();
}
#[tokio::test]

View File

@ -2,7 +2,7 @@
use async_trait::async_trait;
use backoff::{Backoff, BackoffConfig};
use data_types2::SequencerId;
use data_types::SequencerId;
use futures::{
future::{BoxFuture, Shared},
select, FutureExt, TryFutureExt,

View File

@ -1,22 +1,20 @@
//! Queryable Compactor Data
use std::sync::Arc;
use data_types::timestamp::TimestampMinMax;
use data_types2::{
tombstones_to_delete_predicates, ChunkAddr, ChunkId, ChunkOrder, DeletePredicate, PartitionId,
SequenceNumber, TableSummary, Timestamp, Tombstone,
use data_types::{
ChunkAddr, ChunkId, ChunkOrder, DeletePredicate, PartitionId, SequenceNumber, TableSummary,
Timestamp, TimestampMinMax, Tombstone,
};
use datafusion::physical_plan::SendableRecordBatchStream;
use observability_deps::tracing::trace;
use parquet_file::chunk::ParquetChunk;
use predicate::{Predicate, PredicateMatch};
use predicate::{delete_predicate::tombstones_to_delete_predicates, Predicate, PredicateMatch};
use query::{
exec::{stringset::StringSet, IOxSessionContext},
QueryChunk, QueryChunkError, QueryChunkMeta,
};
use schema::{merge::SchemaMerger, selection::Selection, sort::SortKey, Schema};
use snafu::{ResultExt, Snafu};
use std::sync::Arc;
#[derive(Debug, Snafu)]
#[allow(missing_copy_implementations, missing_docs)]

View File

@ -2,10 +2,9 @@
use crate::query::QueryableParquetChunk;
use arrow::record_batch::RecordBatch;
use data_types2::{
use data_types::{
ParquetFileId, ParquetFileParams, ParquetFileWithMetadata, Timestamp, Tombstone, TombstoneId,
};
use iox_object_store::IoxObjectStore;
use object_store::DynObjectStore;
use observability_deps::tracing::*;
use parquet_file::{
@ -94,12 +93,11 @@ impl ParquetFileWithTombstone {
partition_sort_key: Option<SortKey>,
) -> QueryableParquetChunk {
let decoded_parquet_file = DecodedParquetFile::new((*self.data).clone());
let root_path = IoxObjectStore::root_path_for(&*object_store, self.data.object_store_id);
let iox_object_store = IoxObjectStore::existing(object_store, root_path);
let parquet_chunk = new_parquet_chunk(
&decoded_parquet_file,
ChunkMetrics::new_unregistered(), // TODO: need to add metrics
Arc::new(iox_object_store),
object_store,
);
debug!(

View File

@ -1,21 +1,17 @@
[package]
name = "data_types"
version = "0.1.0"
authors = ["pauldix <paul@pauldix.net>"]
description = "InfluxDB IOx data_types, shared between IOx instances and IOx clients"
edition = "2021"
readme = "README.md"
description = "Shared data types in the IOx NG architecture"
[dependencies] # In alphabetical order
bytes = "1.0"
num_cpus = "1.13.0"
[dependencies]
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
observability_deps = { path = "../observability_deps" }
ordered-float = "3"
percent-encoding = "2.1.0"
regex = "1"
siphasher = "0.3"
schema = { path = "../schema" }
snafu = "0.7"
iox_time = { path = "../iox_time" }
sqlx = { version = "0.5", features = ["runtime-tokio-rustls", "postgres", "uuid"] }
uuid = { version = "0.8", features = ["v4"] }
workspace-hack = { path = "../workspace-hack"}

View File

@ -1,5 +0,0 @@
# Data Types
This crate contains types that are designed for external consumption (in `influxdb_iox_client` and other "client" facing uses).
*Client facing* in this case means exposed via management API or CLI and where changing the structs may require additional coordination / organization with clients.

View File

@ -1,46 +0,0 @@
/// Boolean flag that works with environment variables.
#[derive(Debug, Clone, Copy)]
pub enum BooleanFlag {
True,
False,
}
impl std::str::FromStr for BooleanFlag {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_ascii_lowercase().as_str() {
"yes" | "y" | "true" | "t" | "1" => Ok(Self::True),
"no" | "n" | "false" | "f" | "0" => Ok(Self::False),
_ => Err(format!(
"Invalid boolean flag '{}'. Valid options: yes, no, y, n, true, false, t, f, 1, 0",
s
)),
}
}
}
impl From<BooleanFlag> for bool {
fn from(yes_no: BooleanFlag) -> Self {
matches!(yes_no, BooleanFlag::True)
}
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
use super::*;
#[test]
fn test_parsing() {
assert!(bool::from(BooleanFlag::from_str("yes").unwrap()));
assert!(bool::from(BooleanFlag::from_str("Yes").unwrap()));
assert!(bool::from(BooleanFlag::from_str("YES").unwrap()));
assert!(!bool::from(BooleanFlag::from_str("No").unwrap()));
assert!(!bool::from(BooleanFlag::from_str("FaLse").unwrap()));
BooleanFlag::from_str("foo").unwrap_err();
}
}

View File

@ -1,382 +0,0 @@
//! Module contains a representation of chunk metadata
use std::{convert::TryFrom, num::NonZeroU32, str::FromStr, sync::Arc};
use bytes::Bytes;
use iox_time::Time;
use snafu::{ResultExt, Snafu};
use uuid::Uuid;
use crate::partition_metadata::PartitionAddr;
/// Address of the chunk within the catalog
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
pub struct ChunkAddr {
/// Database name
pub db_name: Arc<str>,
/// What table does the chunk belong to?
pub table_name: Arc<str>,
/// What partition does the chunk belong to?
pub partition_key: Arc<str>,
/// The ID of the chunk
pub chunk_id: ChunkId,
}
impl ChunkAddr {
pub fn new(partition: &PartitionAddr, chunk_id: ChunkId) -> Self {
Self {
db_name: Arc::clone(&partition.db_name),
table_name: Arc::clone(&partition.table_name),
partition_key: Arc::clone(&partition.partition_key),
chunk_id,
}
}
pub fn into_partition(self) -> PartitionAddr {
PartitionAddr {
db_name: self.db_name,
table_name: self.table_name,
partition_key: self.partition_key,
}
}
}
impl std::fmt::Display for ChunkAddr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Chunk('{}':'{}':'{}':{})",
self.db_name,
self.table_name,
self.partition_key,
self.chunk_id.get()
)
}
}
/// Which storage system is a chunk located in?
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub enum ChunkStorage {
/// The chunk is still open for new writes, in the Mutable Buffer
OpenMutableBuffer,
/// The chunk is no longer open for writes, in the Mutable Buffer
ClosedMutableBuffer,
/// The chunk is in the Read Buffer (where it can not be mutated)
ReadBuffer,
/// The chunk is both in ReadBuffer and Object Store
ReadBufferAndObjectStore,
/// The chunk is stored in Object Storage (where it can not be mutated)
ObjectStoreOnly,
}
impl ChunkStorage {
/// Return a str representation of this storage state
pub fn as_str(&self) -> &'static str {
match self {
Self::OpenMutableBuffer => "OpenMutableBuffer",
Self::ClosedMutableBuffer => "ClosedMutableBuffer",
Self::ReadBuffer => "ReadBuffer",
Self::ReadBufferAndObjectStore => "ReadBufferAndObjectStore",
Self::ObjectStoreOnly => "ObjectStoreOnly",
}
}
/// Returns true if has data in the mutable buffer
pub fn has_mutable_buffer(&self) -> bool {
matches!(self, Self::OpenMutableBuffer | Self::ClosedMutableBuffer)
}
/// Returns true if has data in the read buffer
pub fn has_read_buffer(&self) -> bool {
matches!(self, Self::ReadBuffer | Self::ReadBufferAndObjectStore)
}
/// Returns true if has data in object storage
pub fn has_object_store(&self) -> bool {
matches!(self, Self::ReadBufferAndObjectStore | Self::ObjectStoreOnly)
}
}
/// Any lifecycle action currently in progress for this chunk
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub enum ChunkLifecycleAction {
/// Chunk is in the process of being written to object storage
Persisting,
/// Chunk is in the process of being compacted
Compacting,
/// Object Store Chunk is in the process of being compacted
CompactingObjectStore,
/// Chunk is about to be dropped from memory and (if persisted) from object store
Dropping,
/// Chunk is in the process of being loaded back into the RUB
LoadingReadBuffer,
}
impl std::fmt::Display for ChunkLifecycleAction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.name())
}
}
impl ChunkLifecycleAction {
pub fn name(&self) -> &'static str {
match self {
Self::Persisting => "Persisting to Object Storage",
Self::Compacting => "Compacting",
Self::CompactingObjectStore => "Compacting Object Store",
Self::Dropping => "Dropping",
Self::LoadingReadBuffer => "Loading to Read Buffer",
}
}
}
/// Represents metadata about the physical storage of a chunk in a
/// database.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct ChunkSummary {
/// The partition key of this chunk
pub partition_key: Arc<str>,
/// The table of this chunk
pub table_name: Arc<str>,
/// Order of this chunk relative to other overlapping chunks.
pub order: ChunkOrder,
/// The id of this chunk
pub id: ChunkId,
/// How is this chunk stored?
pub storage: ChunkStorage,
/// Is there any outstanding lifecycle action for this chunk?
pub lifecycle_action: Option<ChunkLifecycleAction>,
/// The number of bytes used to store this chunk in memory
pub memory_bytes: usize,
/// The number of bytes used to store this chunk in object storage
pub object_store_bytes: usize,
/// The total number of rows in this chunk
pub row_count: usize,
/// The time at which the chunk data was accessed, by a query or a write
pub time_of_last_access: Option<Time>,
/// The earliest time at which data contained within this chunk was written
/// into IOx. Note due to the compaction, etc... this may not be the chunk
/// that data was originally written into
pub time_of_first_write: Time,
/// The latest time at which data contained within this chunk was written
/// into IOx. Note due to the compaction, etc... this may not be the chunk
/// that data was originally written into
pub time_of_last_write: Time,
}
/// Represents metadata about the physical storage of a column in a chunk
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct ChunkColumnSummary {
/// Column name
pub name: Arc<str>,
/// Estimated size, in bytes, consumed by this column.
pub memory_bytes: usize,
}
/// Contains additional per-column details about physical storage of a chunk
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct DetailedChunkSummary {
/// Overall chunk statistic
pub inner: ChunkSummary,
/// Per column breakdown
pub columns: Vec<ChunkColumnSummary>,
}
impl ChunkSummary {
pub fn equal_without_timestamps_and_ids(&self, other: &Self) -> bool {
self.partition_key == other.partition_key
&& self.table_name == other.table_name
&& self.storage == other.storage
&& self.lifecycle_action == other.lifecycle_action
&& self.memory_bytes == other.memory_bytes
&& self.object_store_bytes == other.object_store_bytes
&& self.row_count == other.row_count
}
}
/// ID of a chunk.
///
/// This ID is unique within a single partition.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ChunkId(Uuid);
impl ChunkId {
/// Create new, random ID.
#[allow(clippy::new_without_default)] // `new` creates non-deterministic result
pub fn new() -> Self {
Self(Uuid::new_v4())
}
/// **TESTING ONLY:** Create new ID from integer.
///
/// Since this can easily lead to ID collissions (which in turn can lead to panics), this must only be used for
/// testing purposes!
pub fn new_test(id: u128) -> Self {
Self(Uuid::from_u128(id))
}
/// NG's chunk id is only effective in case the chunk's order is the same
/// with another chunk. Hence collisions are safe in that context
pub fn new_id_for_ng(id: u128) -> Self {
Self(Uuid::from_u128(id))
}
/// Get inner UUID.
pub fn get(&self) -> Uuid {
self.0
}
}
impl std::fmt::Debug for ChunkId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
<Self as std::fmt::Display>::fmt(self, f)
}
}
impl std::fmt::Display for ChunkId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if (self.0.get_variant() == Some(uuid::Variant::RFC4122))
&& (self.0.get_version() == Some(uuid::Version::Random))
{
f.debug_tuple("ChunkId").field(&self.0).finish()
} else {
f.debug_tuple("ChunkId").field(&self.0.as_u128()).finish()
}
}
}
impl From<ChunkId> for Bytes {
fn from(id: ChunkId) -> Self {
id.get().as_bytes().to_vec().into()
}
}
#[derive(Debug, Snafu)]
pub enum ChunkIdConversionError {
#[snafu(display("Cannot convert bytes to chunk ID: {}", source))]
CannotConvertBytes { source: uuid::Error },
#[snafu(display("Cannot convert UUID text to chunk ID: {}", source))]
CannotConvertUUIDText { source: uuid::Error },
}
impl TryFrom<Bytes> for ChunkId {
type Error = ChunkIdConversionError;
fn try_from(value: Bytes) -> Result<Self, Self::Error> {
Ok(Self(
Uuid::from_slice(&value).context(CannotConvertBytesSnafu)?,
))
}
}
impl From<Uuid> for ChunkId {
fn from(uuid: Uuid) -> Self {
Self(uuid)
}
}
/// Implements conversion from the canonical textual representation of a UUID
/// into a `ChunkId`.
impl FromStr for ChunkId {
type Err = ChunkIdConversionError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let uuid = Uuid::parse_str(s).context(CannotConvertUUIDTextSnafu)?;
Ok(Self::from(uuid))
}
}
/// Order of a chunk.
///
/// This is used for:
/// 1. **upsert order:** chunks with higher order overwrite data in chunks with lower order
/// 2. **locking order:** chunks must be locked in consistent (ascending) order
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ChunkOrder(NonZeroU32);
impl ChunkOrder {
// TODO: remove `unsafe` once https://github.com/rust-lang/rust/issues/51999 is fixed
pub const MIN: Self = Self(unsafe { NonZeroU32::new_unchecked(1) });
pub const MAX: Self = Self(unsafe { NonZeroU32::new_unchecked(u32::MAX) });
pub fn new(order: u32) -> Option<Self> {
NonZeroU32::new(order).map(Self)
}
pub fn get(&self) -> u32 {
self.0.get()
}
/// Get next chunk order.
///
/// # Panic
/// Panics if `self` is already [max](Self::MAX).
pub fn next(&self) -> Self {
Self(
NonZeroU32::new(self.0.get().checked_add(1).expect("chunk order overflow"))
.expect("did not overflow, so cannot be zero"),
)
}
}
impl std::fmt::Display for ChunkOrder {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("ChunkOrder").field(&self.0.get()).finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_chunk_id_new() {
// `ChunkId::new()` create new random ID
assert_ne!(ChunkId::new(), ChunkId::new());
}
#[test]
fn test_chunk_id_new_test() {
// `ChunkId::new_test(...)` creates deterministic ID
assert_eq!(ChunkId::new_test(1), ChunkId::new_test(1));
assert_ne!(ChunkId::new_test(1), ChunkId::new_test(2));
}
#[test]
fn test_chunk_id_debug_and_display() {
// Random chunk IDs use UUID-format
let id_random = ChunkId::new();
let inner: Uuid = id_random.get();
assert_eq!(format!("{:?}", id_random), format!("ChunkId({})", inner));
assert_eq!(format!("{}", id_random), format!("ChunkId({})", inner));
// Deterministic IDs use integer format
let id_test = ChunkId::new_test(42);
assert_eq!(format!("{:?}", id_test), "ChunkId(42)");
assert_eq!(format!("{}", id_test), "ChunkId(42)");
}
}

View File

@ -1,176 +0,0 @@
use std::hash::{Hash, Hasher};
use siphasher::sip::SipHasher13;
/// A ConsistentHasher implements a simple consistent hashing mechanism
/// that maps a point to the nearest "node" N.
///
/// It has the property that the addition or removal of one node in the ring
/// in the worst case only changes the mapping of points that were assigned
/// to the node adjacent to the node that gets inserted/removed (on average half
/// of them).
///
/// e.g. you can use it find the ShardID in vector of ShardIds
/// that is closest to a given hash value.
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct ConsistentHasher<T>
where
T: Copy + Hash,
{
ring: Vec<(u64, T)>,
}
impl<T> ConsistentHasher<T>
where
T: Copy + Hash,
{
pub fn new(nodes: &[T]) -> Self {
let mut ring: Vec<_> = nodes.iter().map(|node| (Self::hash(node), *node)).collect();
ring.sort_by_key(|(hash, _)| *hash);
Self { ring }
}
pub fn find<H: Hash>(&self, point: H) -> Option<T> {
let point_hash = Self::hash(point);
self.ring
.iter()
.find(|(node_hash, _)| node_hash > &point_hash)
.or_else(|| self.ring.first())
.map(|(_, node)| *node)
}
pub fn is_empty(&self) -> bool {
self.ring.is_empty()
}
pub fn len(&self) -> usize {
self.ring.len()
}
fn hash<H: Hash>(h: H) -> u64 {
let mut hasher = SipHasher13::new();
h.hash(&mut hasher);
hasher.finish()
}
}
impl<T> Default for ConsistentHasher<T>
where
T: Copy + Hash,
{
fn default() -> Self {
Self {
ring: Default::default(),
}
}
}
impl<T> From<ConsistentHasher<T>> for Vec<T>
where
T: Copy + Hash,
{
fn from(hasher: ConsistentHasher<T>) -> Self {
hasher.ring.into_iter().map(|(_, node)| node).collect()
}
}
impl<T> From<Vec<T>> for ConsistentHasher<T>
where
T: Copy + Hash,
{
fn from(vec: Vec<T>) -> Self {
Self::new(&vec)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_consistent_hasher() {
let ch = ConsistentHasher::new(&[10, 20, 30, 40]);
// test points found with:
/*
for needle in (10..=40).step_by(10) {
let mut found = 0;
for point in 0..100 {
if ch.find(point) == Some(needle) {
found += 1;
println!(r#"assert_eq!(ch.find({}), Some({}));"#, point, needle);
}
if found >= 16 {
break;
}
}
println!();
}
*/
assert_eq!(ch.find(1), Some(10));
assert_eq!(ch.find(6), Some(10));
assert_eq!(ch.find(16), Some(10));
assert_eq!(ch.find(25), Some(10));
assert_eq!(ch.find(8), Some(20));
assert_eq!(ch.find(9), Some(20));
assert_eq!(ch.find(11), Some(20));
assert_eq!(ch.find(13), Some(20));
assert_eq!(ch.find(3), Some(30));
assert_eq!(ch.find(12), Some(30));
assert_eq!(ch.find(15), Some(30));
assert_eq!(ch.find(20), Some(30));
assert_eq!(ch.find(7), Some(40));
assert_eq!(ch.find(10), Some(40));
assert_eq!(ch.find(14), Some(40));
assert_eq!(ch.find(18), Some(40));
let ch = ConsistentHasher::new(&[10, 20, 30, 40, 50]);
assert_eq!(ch.find(1), Some(10));
assert_eq!(ch.find(6), Some(10));
assert_eq!(ch.find(16), Some(10));
assert_eq!(ch.find(25), Some(10));
assert_eq!(ch.find(8), Some(20));
assert_eq!(ch.find(9), Some(20));
assert_eq!(ch.find(11), Some(50)); // <-- moved to node 50
assert_eq!(ch.find(13), Some(50)); // <-- moved to node 50
assert_eq!(ch.find(3), Some(30));
assert_eq!(ch.find(12), Some(30));
assert_eq!(ch.find(15), Some(30));
assert_eq!(ch.find(20), Some(30));
assert_eq!(ch.find(7), Some(40));
assert_eq!(ch.find(10), Some(40));
assert_eq!(ch.find(14), Some(40));
assert_eq!(ch.find(18), Some(40));
let ch = ConsistentHasher::new(&[10, 20, 30]);
assert_eq!(ch.find(1), Some(10));
assert_eq!(ch.find(6), Some(10));
assert_eq!(ch.find(16), Some(10));
assert_eq!(ch.find(25), Some(10));
assert_eq!(ch.find(8), Some(20));
assert_eq!(ch.find(9), Some(20));
assert_eq!(ch.find(11), Some(20));
assert_eq!(ch.find(13), Some(20));
assert_eq!(ch.find(3), Some(30));
assert_eq!(ch.find(12), Some(30));
assert_eq!(ch.find(15), Some(30));
assert_eq!(ch.find(20), Some(30));
// all points that used to map to shard 40 go to shard 20
assert_eq!(ch.find(7), Some(20));
assert_eq!(ch.find(10), Some(20));
assert_eq!(ch.find(14), Some(20));
assert_eq!(ch.find(18), Some(20));
}
}

View File

@ -1,187 +0,0 @@
use snafu::Snafu;
use std::{borrow::Cow, ops::RangeInclusive};
/// Length constraints for a database name.
///
/// A `RangeInclusive` is a closed interval, covering [1, 64]
const LENGTH_CONSTRAINT: RangeInclusive<usize> = 1..=64;
/// Database name validation errors.
#[derive(Debug, Snafu)]
pub enum DatabaseNameError {
#[snafu(display(
"Database name {} length must be between {} and {} characters",
name,
LENGTH_CONSTRAINT.start(),
LENGTH_CONSTRAINT.end()
))]
LengthConstraint { name: String },
#[snafu(display(
"Database name '{}' contains invalid character. Character number {} is a control which is not allowed.", name, bad_char_offset
))]
BadChars {
bad_char_offset: usize,
name: String,
},
}
/// A correctly formed database name.
///
/// Using this wrapper type allows the consuming code to enforce the invariant
/// that only valid names are provided.
///
/// This type derefs to a `str` and therefore can be used in place of anything
/// that is expecting a `str`:
///
/// ```rust
/// # use data_types::DatabaseName;
/// fn print_database(s: &str) {
/// println!("database name: {}", s);
/// }
///
/// let db = DatabaseName::new("data").unwrap();
/// print_database(&db);
/// ```
///
/// But this is not reciprocal - functions that wish to accept only
/// pre-validated names can use `DatabaseName` as a parameter.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct DatabaseName<'a>(Cow<'a, str>);
impl<'a> DatabaseName<'a> {
pub fn new<T: Into<Cow<'a, str>>>(name: T) -> Result<Self, DatabaseNameError> {
let name: Cow<'a, str> = name.into();
if !LENGTH_CONSTRAINT.contains(&name.len()) {
return Err(DatabaseNameError::LengthConstraint {
name: name.to_string(),
});
}
// Validate the name contains only valid characters.
//
// NOTE: If changing these characters, please update the error message
// above.
if let Some(bad_char_offset) = name.chars().position(|c| c.is_control()) {
return BadCharsSnafu {
bad_char_offset,
name,
}
.fail();
};
Ok(Self(name))
}
pub fn as_str(&self) -> &str {
self.0.as_ref()
}
}
impl<'a> std::convert::From<DatabaseName<'a>> for String {
fn from(name: DatabaseName<'a>) -> Self {
name.0.to_string()
}
}
impl<'a> std::convert::From<&DatabaseName<'a>> for String {
fn from(name: &DatabaseName<'a>) -> Self {
name.to_string()
}
}
impl<'a> std::convert::TryFrom<&'a str> for DatabaseName<'a> {
type Error = DatabaseNameError;
fn try_from(v: &'a str) -> Result<Self, Self::Error> {
Self::new(v)
}
}
impl<'a> std::convert::TryFrom<String> for DatabaseName<'a> {
type Error = DatabaseNameError;
fn try_from(v: String) -> Result<Self, Self::Error> {
Self::new(v)
}
}
impl<'a> std::ops::Deref for DatabaseName<'a> {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl<'a> std::fmt::Display for DatabaseName<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::convert::TryFrom;
use test_helpers::assert_contains;
#[test]
fn test_deref() {
let db = DatabaseName::new("my_example_name").unwrap();
assert_eq!(&*db, "my_example_name");
}
#[test]
fn test_too_short() {
let name = "".to_string();
let got = DatabaseName::try_from(name).unwrap_err();
assert!(matches!(
got,
DatabaseNameError::LengthConstraint { name: _n }
));
}
#[test]
fn test_too_long() {
let name = "my_example_name_that_is_quite_a_bit_longer_than_allowed_even_though_database_names_can_be_quite_long_bananas".to_string();
let got = DatabaseName::try_from(name).unwrap_err();
assert!(matches!(
got,
DatabaseNameError::LengthConstraint { name: _n }
));
}
#[test]
fn test_bad_chars_null() {
let got = DatabaseName::new("example\x00").unwrap_err();
assert_contains!(got.to_string() , "Database name 'example\x00' contains invalid character. Character number 7 is a control which is not allowed.");
}
#[test]
fn test_bad_chars_high_control() {
let got = DatabaseName::new("\u{007f}example").unwrap_err();
assert_contains!(got.to_string() , "Database name '\u{007f}example' contains invalid character. Character number 0 is a control which is not allowed.");
}
#[test]
fn test_bad_chars_tab() {
let got = DatabaseName::new("example\tdb").unwrap_err();
assert_contains!(got.to_string() , "Database name 'example\tdb' contains invalid character. Character number 7 is a control which is not allowed.");
}
#[test]
fn test_bad_chars_newline() {
let got = DatabaseName::new("my_example\ndb").unwrap_err();
assert_contains!(got.to_string() , "Database name 'my_example\ndb' contains invalid character. Character number 10 is a control which is not allowed.");
}
#[test]
fn test_ok_chars() {
let db = DatabaseName::new("my-example-db_with_underscores and spaces").unwrap();
assert_eq!(&*db, "my-example-db_with_underscores and spaces");
}
}

View File

@ -1,276 +0,0 @@
use crate::{write_buffer::WriteBufferConnection, DatabaseName};
use snafu::Snafu;
use std::{
num::{NonZeroU32, NonZeroU64, NonZeroUsize},
time::Duration,
};
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("Error in {}: {}", source_module, source))]
PassThrough {
source_module: &'static str,
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
#[snafu(display("No sharding rule matches table: {}", table))]
NoShardingRuleMatches { table: String },
#[snafu(display("No shards defined"))]
NoShardsDefined,
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// `DatabaseRules` contains the rules for replicating data, sending data to
/// subscribers, and querying data for a single database. This information is
/// provided by and exposed to operators.
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct DatabaseRules {
/// The name of the database
pub name: DatabaseName<'static>,
/// Template that generates a partition key for each row inserted into the
/// db
pub partition_template: PartitionTemplate,
/// Configure how data flows through the system
pub lifecycle_rules: LifecycleRules,
/// Duration for which the cleanup loop should sleep on average.
/// Defaults to 500 seconds.
pub worker_cleanup_avg_sleep: Duration,
/// An optional connection string to a write buffer for either writing or reading.
pub write_buffer_connection: Option<WriteBufferConnection>,
}
impl DatabaseRules {
pub fn new(name: DatabaseName<'static>) -> Self {
Self {
name,
partition_template: Default::default(),
lifecycle_rules: Default::default(),
worker_cleanup_avg_sleep: Duration::from_secs(500),
write_buffer_connection: None,
}
}
pub fn db_name(&self) -> &str {
self.name.as_str()
}
}
pub const DEFAULT_WORKER_BACKOFF_MILLIS: u64 = 1_000;
pub const DEFAULT_CATALOG_TRANSACTIONS_UNTIL_CHECKPOINT: u64 = 100;
pub const DEFAULT_CATALOG_TRANSACTION_PRUNE_AGE: Duration = Duration::from_secs(24 * 60 * 60);
pub const DEFAULT_MUB_ROW_THRESHOLD: usize = 100_000;
pub const DEFAULT_PERSIST_ROW_THRESHOLD: usize = 1_000_000;
pub const DEFAULT_PERSIST_AGE_THRESHOLD_SECONDS: u32 = 30 * 60;
pub const DEFAULT_LATE_ARRIVE_WINDOW_SECONDS: u32 = 5 * 60;
/// Configures how data automatically flows through the system
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct LifecycleRules {
/// Once the total amount of buffered data in memory reaches this size start
/// dropping data from memory
pub buffer_size_soft: Option<NonZeroUsize>,
/// Once the amount of data in memory reaches this size start
/// rejecting writes
pub buffer_size_hard: Option<NonZeroUsize>,
/// Persists chunks to object storage.
pub persist: bool,
/// Do not allow writing new data to this database
pub immutable: bool,
/// If the background worker doesn't find anything to do it
/// will sleep for this many milliseconds before looking again
pub worker_backoff_millis: NonZeroU64,
/// The maximum number of permitted concurrently executing compactions.
pub max_active_compactions: MaxActiveCompactions,
/// After how many transactions should IOx write a new checkpoint?
pub catalog_transactions_until_checkpoint: NonZeroU64,
/// Prune catalog transactions older than the given age.
///
/// Keeping old transaction can be useful for debugging.
pub catalog_transaction_prune_age: Duration,
/// Once a partition hasn't received a write for this period of time,
/// it will be compacted and, if set, persisted. Writers will generally
/// have this amount of time to send late arriving writes or this could
/// be their clock skew.
pub late_arrive_window_seconds: NonZeroU32,
/// Maximum number of rows before triggering persistence
pub persist_row_threshold: NonZeroUsize,
/// Maximum age of a write before triggering persistence
pub persist_age_threshold_seconds: NonZeroU32,
/// Maximum number of rows to buffer in a MUB chunk before compacting it
pub mub_row_threshold: NonZeroUsize,
/// Use up to this amount of space in bytes for caching Parquet files. None
/// will disable Parquet file caching.
pub parquet_cache_limit: Option<NonZeroU64>,
}
#[derive(Debug, PartialEq, Clone)]
pub enum MaxActiveCompactions {
/// The maximum number of permitted concurrently executing compactions.
/// It is not currently possible to set a limit that disables compactions
/// entirely, nor is it possible to set an "unlimited" value.
MaxActiveCompactions(NonZeroU32),
// The maximum number of concurrent active compactions that can run
// expressed as a fraction of the available cpus (rounded to the next smallest non-zero integer).
MaxActiveCompactionsCpuFraction {
fraction: f32,
effective: NonZeroU32,
},
}
impl MaxActiveCompactions {
pub fn new(fraction: f32) -> Self {
let cpus = num_cpus::get() as f32 * fraction;
let effective = (cpus as u32).saturating_sub(1) + 1;
let effective = NonZeroU32::new(effective).unwrap();
Self::MaxActiveCompactionsCpuFraction {
fraction,
effective,
}
}
pub fn get(&self) -> u32 {
match self {
Self::MaxActiveCompactions(effective) => effective,
Self::MaxActiveCompactionsCpuFraction { effective, .. } => effective,
}
.get()
}
}
// Defaults to number of CPUs.
impl Default for MaxActiveCompactions {
fn default() -> Self {
Self::new(1.0)
}
}
// Required because database rules must be Eq but cannot derive Eq for Self
// since f32 is not Eq.
impl Eq for MaxActiveCompactions {}
impl LifecycleRules {
/// The max timestamp skew across concurrent writers before persisted chunks might overlap
pub fn late_arrive_window(&self) -> Duration {
Duration::from_secs(self.late_arrive_window_seconds.get() as u64)
}
}
impl Default for LifecycleRules {
fn default() -> Self {
Self {
buffer_size_soft: None,
buffer_size_hard: None,
persist: false,
immutable: false,
worker_backoff_millis: NonZeroU64::new(DEFAULT_WORKER_BACKOFF_MILLIS).unwrap(),
max_active_compactions: Default::default(),
catalog_transactions_until_checkpoint: NonZeroU64::new(
DEFAULT_CATALOG_TRANSACTIONS_UNTIL_CHECKPOINT,
)
.unwrap(),
catalog_transaction_prune_age: DEFAULT_CATALOG_TRANSACTION_PRUNE_AGE,
late_arrive_window_seconds: NonZeroU32::new(DEFAULT_LATE_ARRIVE_WINDOW_SECONDS)
.unwrap(),
persist_row_threshold: NonZeroUsize::new(DEFAULT_PERSIST_ROW_THRESHOLD).unwrap(),
persist_age_threshold_seconds: NonZeroU32::new(DEFAULT_PERSIST_AGE_THRESHOLD_SECONDS)
.unwrap(),
mub_row_threshold: NonZeroUsize::new(DEFAULT_MUB_ROW_THRESHOLD).unwrap(),
parquet_cache_limit: None,
}
}
}
/// `PartitionTemplate` is used to compute the partition key of each row that
/// gets written. It can consist of the table name, a column name and its value,
/// a formatted time, or a string column and regex captures of its value. For
/// columns that do not appear in the input row, a blank value is output.
///
/// The key is constructed in order of the template parts; thus ordering changes
/// what partition key is generated.
#[derive(Debug, Default, Eq, PartialEq, Clone)]
pub struct PartitionTemplate {
pub parts: Vec<TemplatePart>,
}
/// `TemplatePart` specifies what part of a row should be used to compute this
/// part of a partition key.
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum TemplatePart {
/// The name of a table
Table,
/// The value in a named column
Column(String),
/// Applies a `strftime` format to the "time" column.
///
/// For example, a time format of "%Y-%m-%d %H:%M:%S" will produce
/// partition key parts such as "2021-03-14 12:25:21" and
/// "2021-04-14 12:24:21"
TimeFormat(String),
/// Applies a regex to the value in a string column
RegexCapture(RegexCapture),
/// Applies a `strftime` pattern to some column other than "time"
StrftimeColumn(StrftimeColumn),
}
/// `RegexCapture` is for pulling parts of a string column into the partition
/// key.
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct RegexCapture {
pub column: String,
pub regex: String,
}
/// [`StrftimeColumn`] is used to create a time based partition key off some
/// column other than the builtin `time` column.
///
/// The value of the named column is formatted using a `strftime`
/// style string.
///
/// For example, a time format of "%Y-%m-%d %H:%M:%S" will produce
/// partition key parts such as "2021-03-14 12:25:21" and
/// "2021-04-14 12:24:21"
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct StrftimeColumn {
pub column: String,
pub format: String,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_max_active_compactions_cpu_fraction() {
let n = MaxActiveCompactions::new(1.0);
let cpus = n.get();
let n = MaxActiveCompactions::new(0.5);
let half_cpus = n.get();
assert_eq!(half_cpus, cpus / 2);
let n = MaxActiveCompactions::new(0.0);
let non_zero = n.get();
assert_eq!(non_zero, 1);
}
}

View File

@ -1,357 +0,0 @@
use crate::timestamp::TimestampRange;
use std::{fmt::Write, num::FpCategory};
/// Represents a parsed delete predicate for evaluation by the InfluxDB IOx
/// query engine.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct DeletePredicate {
/// Only rows within this range are included in
/// results. Other rows are excluded.
pub range: TimestampRange,
/// Optional arbitrary predicates, represented as list of
/// expressions applied a logical conjunction (aka they
/// are 'AND'ed together). Only rows that evaluate to TRUE for all
/// these expressions should be returned. Other rows are excluded
/// from the results.
pub exprs: Vec<DeleteExpr>,
}
impl DeletePredicate {
/// Format expr to SQL string.
pub fn expr_sql_string(&self) -> String {
let mut out = String::new();
for expr in &self.exprs {
if !out.is_empty() {
write!(&mut out, " AND ").expect("writing to a string shouldn't fail");
}
write!(&mut out, "{}", expr).expect("writing to a string shouldn't fail");
}
out
}
/// Return the approximate memory size of the predicate, in bytes.
///
/// This includes `Self`.
pub fn size(&self) -> usize {
std::mem::size_of::<Self>() + self.exprs.iter().map(|expr| expr.size()).sum::<usize>()
}
}
/// Single expression to be used as parts of a predicate.
///
/// Only very simple expression of the type `<column> <op> <scalar>` are supported.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct DeleteExpr {
/// Column (w/o table name).
pub column: String,
/// Operator.
pub op: Op,
/// Scalar value.
pub scalar: Scalar,
}
impl DeleteExpr {
/// Create a new [`DeleteExpr`]
pub fn new(column: String, op: Op, scalar: Scalar) -> Self {
Self { column, op, scalar }
}
/// Column (w/o table name).
pub fn column(&self) -> &str {
&self.column
}
/// Operator.
pub fn op(&self) -> Op {
self.op
}
/// Scalar value.
pub fn scalar(&self) -> &Scalar {
&self.scalar
}
/// Return the approximate memory size of the expression, in bytes.
///
/// This includes `Self`.
pub fn size(&self) -> usize {
std::mem::size_of::<Self>() + self.column.capacity() + self.scalar.size()
}
}
impl std::fmt::Display for DeleteExpr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
r#""{}"{}{}"#,
self.column().replace('\\', r#"\\"#).replace('"', r#"\""#),
self.op(),
self.scalar(),
)
}
}
/// Binary operator that can be evaluated on a column and a scalar value.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Op {
/// Strict equality (`=`).
Eq,
/// Inequality (`!=`).
Ne,
}
impl std::fmt::Display for Op {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Eq => write!(f, "="),
Self::Ne => write!(f, "!="),
}
}
}
/// Scalar value of a certain type.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[allow(missing_docs)]
pub enum Scalar {
Bool(bool),
I64(i64),
F64(ordered_float::OrderedFloat<f64>),
String(String),
}
impl Scalar {
/// Return the approximate memory size of the scalar, in bytes.
///
/// This includes `Self`.
pub fn size(&self) -> usize {
std::mem::size_of::<Self>()
+ match &self {
Self::Bool(_) | Self::I64(_) | Self::F64(_) => 0,
Self::String(s) => s.capacity(),
}
}
}
impl std::fmt::Display for Scalar {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Scalar::Bool(value) => value.fmt(f),
Scalar::I64(value) => value.fmt(f),
Scalar::F64(value) => match value.classify() {
FpCategory::Nan => write!(f, "'NaN'"),
FpCategory::Infinite if *value.as_ref() < 0.0 => write!(f, "'-Infinity'"),
FpCategory::Infinite => write!(f, "'Infinity'"),
_ => write!(f, "{:?}", value.as_ref()),
},
Scalar::String(value) => {
write!(
f,
"'{}'",
value.replace('\\', r#"\\"#).replace('\'', r#"\'"#),
)
}
}
}
}
#[cfg(test)]
mod tests {
use ordered_float::OrderedFloat;
use super::*;
#[test]
fn test_expr_to_sql_no_expressions() {
let pred = DeletePredicate {
range: TimestampRange::new(1, 2),
exprs: vec![],
};
assert_eq!(&pred.expr_sql_string(), "");
}
#[test]
fn test_expr_to_sql_operators() {
let pred = DeletePredicate {
range: TimestampRange::new(1, 2),
exprs: vec![
DeleteExpr {
column: String::from("col1"),
op: Op::Eq,
scalar: Scalar::I64(1),
},
DeleteExpr {
column: String::from("col2"),
op: Op::Ne,
scalar: Scalar::I64(2),
},
],
};
assert_eq!(&pred.expr_sql_string(), r#""col1"=1 AND "col2"!=2"#);
}
#[test]
fn test_expr_to_sql_column_escape() {
let pred = DeletePredicate {
range: TimestampRange::new(1, 2),
exprs: vec![
DeleteExpr {
column: String::from("col 1"),
op: Op::Eq,
scalar: Scalar::I64(1),
},
DeleteExpr {
column: String::from(r#"col\2"#),
op: Op::Eq,
scalar: Scalar::I64(2),
},
DeleteExpr {
column: String::from(r#"col"3"#),
op: Op::Eq,
scalar: Scalar::I64(3),
},
],
};
assert_eq!(
&pred.expr_sql_string(),
r#""col 1"=1 AND "col\\2"=2 AND "col\"3"=3"#
);
}
#[test]
fn test_expr_to_sql_bool() {
let pred = DeletePredicate {
range: TimestampRange::new(1, 2),
exprs: vec![
DeleteExpr {
column: String::from("col1"),
op: Op::Eq,
scalar: Scalar::Bool(false),
},
DeleteExpr {
column: String::from("col2"),
op: Op::Eq,
scalar: Scalar::Bool(true),
},
],
};
assert_eq!(&pred.expr_sql_string(), r#""col1"=false AND "col2"=true"#);
}
#[test]
fn test_expr_to_sql_i64() {
let pred = DeletePredicate {
range: TimestampRange::new(1, 2),
exprs: vec![
DeleteExpr {
column: String::from("col1"),
op: Op::Eq,
scalar: Scalar::I64(0),
},
DeleteExpr {
column: String::from("col2"),
op: Op::Eq,
scalar: Scalar::I64(-1),
},
DeleteExpr {
column: String::from("col3"),
op: Op::Eq,
scalar: Scalar::I64(1),
},
DeleteExpr {
column: String::from("col4"),
op: Op::Eq,
scalar: Scalar::I64(i64::MIN),
},
DeleteExpr {
column: String::from("col5"),
op: Op::Eq,
scalar: Scalar::I64(i64::MAX),
},
],
};
assert_eq!(
&pred.expr_sql_string(),
r#""col1"=0 AND "col2"=-1 AND "col3"=1 AND "col4"=-9223372036854775808 AND "col5"=9223372036854775807"#
);
}
#[test]
fn test_expr_to_sql_f64() {
let pred = DeletePredicate {
range: TimestampRange::new(1, 2),
exprs: vec![
DeleteExpr {
column: String::from("col1"),
op: Op::Eq,
scalar: Scalar::F64(OrderedFloat::from(0.0)),
},
DeleteExpr {
column: String::from("col2"),
op: Op::Eq,
scalar: Scalar::F64(OrderedFloat::from(-0.0)),
},
DeleteExpr {
column: String::from("col3"),
op: Op::Eq,
scalar: Scalar::F64(OrderedFloat::from(1.0)),
},
DeleteExpr {
column: String::from("col4"),
op: Op::Eq,
scalar: Scalar::F64(OrderedFloat::from(f64::INFINITY)),
},
DeleteExpr {
column: String::from("col5"),
op: Op::Eq,
scalar: Scalar::F64(OrderedFloat::from(f64::NEG_INFINITY)),
},
DeleteExpr {
column: String::from("col6"),
op: Op::Eq,
scalar: Scalar::F64(OrderedFloat::from(f64::NAN)),
},
],
};
assert_eq!(
&pred.expr_sql_string(),
r#""col1"=0.0 AND "col2"=-0.0 AND "col3"=1.0 AND "col4"='Infinity' AND "col5"='-Infinity' AND "col6"='NaN'"#
);
}
#[test]
fn test_expr_to_sql_string() {
let pred = DeletePredicate {
range: TimestampRange::new(1, 2),
exprs: vec![
DeleteExpr {
column: String::from("col1"),
op: Op::Eq,
scalar: Scalar::String(String::from("")),
},
DeleteExpr {
column: String::from("col2"),
op: Op::Eq,
scalar: Scalar::String(String::from("foo")),
},
DeleteExpr {
column: String::from("col3"),
op: Op::Eq,
scalar: Scalar::String(String::from(r#"fo\o"#)),
},
DeleteExpr {
column: String::from("col4"),
op: Op::Eq,
scalar: Scalar::String(String::from(r#"fo'o"#)),
},
],
};
assert_eq!(
&pred.expr_sql_string(),
r#""col1"='' AND "col2"='foo' AND "col3"='fo\\o' AND "col4"='fo\'o'"#
);
}
}

View File

@ -1,30 +0,0 @@
//! Common error utilities
use std::fmt::Debug;
use observability_deps::tracing::error;
/// Add ability for Results to log error messages via `error!` logs.
/// This is useful when using async tasks that may not have any code
/// checking their return values.
pub trait ErrorLogger {
/// Log the contents of self with a string of context. The context
/// should appear in a message such as
///
/// "Error <context>: <formatted error message>
fn log_if_error(self, context: &str) -> Self;
/// Provided method to log an error via the `error!` macro
fn log_error<E: Debug>(context: &str, e: E) {
error!("Error {}: {:?}", context, e);
}
}
/// Implement logging for all results
impl<T, E: Debug> ErrorLogger for Result<T, E> {
fn log_if_error(self, context: &str) -> Self {
if let Err(e) = &self {
Self::log_error(context, e);
}
self
}
}

View File

@ -1,162 +0,0 @@
use std::fmt::Formatter;
use std::sync::Arc;
use crate::{
chunk_metadata::{ChunkAddr, ChunkId},
partition_metadata::PartitionAddr,
};
/// Metadata associated with a set of background tasks
/// Used in combination with TrackerRegistry
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Job {
Dummy {
db_name: Option<Arc<str>>,
nanos: Vec<u64>,
},
/// Write a chunk from read buffer to object store
WriteChunk { chunk: ChunkAddr },
/// Compact a set of chunks
CompactChunks {
partition: PartitionAddr,
chunks: Vec<ChunkId>,
},
/// Compact a set of object store chunks
CompactObjectStoreChunks {
partition: PartitionAddr,
chunks: Vec<ChunkId>,
},
/// Split and persist a set of chunks
PersistChunks {
partition: PartitionAddr,
chunks: Vec<ChunkId>,
},
/// Drop chunk from memory and (if persisted) from object store.
DropChunk { chunk: ChunkAddr },
/// Drop partition from memory and (if persisted) from object store.
DropPartition { partition: PartitionAddr },
/// Wipe preserved catalog
WipePreservedCatalog { db_name: Arc<str> },
/// Load chunk to read buffer from object store
LoadReadBufferChunk { chunk: ChunkAddr },
/// Rebuild preserved catalog
RebuildPreservedCatalog { db_name: Arc<str> },
}
impl Job {
/// Returns the database name associated with this job, if any
pub fn db_name(&self) -> Option<&Arc<str>> {
match self {
Self::Dummy { db_name, .. } => db_name.as_ref(),
Self::WriteChunk { chunk, .. } => Some(&chunk.db_name),
Self::CompactChunks { partition, .. } => Some(&partition.db_name),
Self::CompactObjectStoreChunks { partition, .. } => Some(&partition.db_name),
Self::PersistChunks { partition, .. } => Some(&partition.db_name),
Self::DropChunk { chunk, .. } => Some(&chunk.db_name),
Self::DropPartition { partition, .. } => Some(&partition.db_name),
Self::WipePreservedCatalog { db_name, .. } => Some(db_name),
Self::LoadReadBufferChunk { chunk } => Some(&chunk.db_name),
Self::RebuildPreservedCatalog { db_name } => Some(db_name),
}
}
/// Returns the partition name associated with this job, if any
pub fn partition_key(&self) -> Option<&Arc<str>> {
match self {
Self::Dummy { .. } => None,
Self::WriteChunk { chunk, .. } => Some(&chunk.partition_key),
Self::CompactChunks { partition, .. } => Some(&partition.partition_key),
Self::CompactObjectStoreChunks { partition, .. } => Some(&partition.partition_key),
Self::PersistChunks { partition, .. } => Some(&partition.partition_key),
Self::DropChunk { chunk, .. } => Some(&chunk.partition_key),
Self::DropPartition { partition, .. } => Some(&partition.partition_key),
Self::WipePreservedCatalog { .. } => None,
Self::LoadReadBufferChunk { chunk } => Some(&chunk.partition_key),
Self::RebuildPreservedCatalog { .. } => None,
}
}
/// Returns the table name associated with this job, if any
pub fn table_name(&self) -> Option<&Arc<str>> {
match self {
Self::Dummy { .. } => None,
Self::WriteChunk { chunk, .. } => Some(&chunk.table_name),
Self::CompactChunks { partition, .. } => Some(&partition.table_name),
Self::CompactObjectStoreChunks { partition, .. } => Some(&partition.table_name),
Self::PersistChunks { partition, .. } => Some(&partition.table_name),
Self::DropChunk { chunk, .. } => Some(&chunk.table_name),
Self::DropPartition { partition, .. } => Some(&partition.table_name),
Self::WipePreservedCatalog { .. } => None,
Self::LoadReadBufferChunk { chunk } => Some(&chunk.table_name),
Self::RebuildPreservedCatalog { .. } => None,
}
}
/// Returns the chunk_ids associated with this job, if any
pub fn chunk_ids(&self) -> Option<Vec<ChunkId>> {
match self {
Self::Dummy { .. } => None,
Self::WriteChunk { chunk, .. } => Some(vec![chunk.chunk_id]),
Self::CompactChunks { chunks, .. } => Some(chunks.clone()),
Self::CompactObjectStoreChunks { chunks, .. } => Some(chunks.clone()),
Self::PersistChunks { chunks, .. } => Some(chunks.clone()),
Self::DropChunk { chunk, .. } => Some(vec![chunk.chunk_id]),
Self::DropPartition { .. } => None,
Self::WipePreservedCatalog { .. } => None,
Self::LoadReadBufferChunk { chunk } => Some(vec![chunk.chunk_id]),
Self::RebuildPreservedCatalog { .. } => None,
}
}
/// Returns a human readable description associated with this job, if any
pub fn description(&self) -> &'static str {
match self {
Self::Dummy { .. } => "Dummy Job, for testing",
Self::WriteChunk { .. } => "Writing chunk to Object Storage",
Self::CompactChunks { .. } => "Compacting chunks to ReadBuffer",
Self::CompactObjectStoreChunks { .. } => {
"Compacting Object Store chunks to an Object Store chunk"
}
Self::PersistChunks { .. } => "Persisting chunks to object storage",
Self::DropChunk { .. } => "Drop chunk from memory and (if persisted) from object store",
Self::DropPartition { .. } => {
"Drop partition from memory and (if persisted) from object store"
}
Self::WipePreservedCatalog { .. } => "Wipe preserved catalog",
Self::LoadReadBufferChunk { .. } => "Loading chunk to read buffer",
Self::RebuildPreservedCatalog { .. } => "Rebuild preserved catalog",
}
}
}
impl std::fmt::Display for Job {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Job::Dummy { .. } => write!(f, "Job::Dummy"),
Job::WriteChunk { chunk } => write!(f, "Job::WriteChunk({}))", chunk),
Job::CompactChunks { partition, .. } => write!(f, "Job::CompactChunks({})", partition),
Job::CompactObjectStoreChunks { partition, .. } => {
write!(f, "Job::CompactObjectStoreChunks({})", partition)
}
Job::PersistChunks { partition, .. } => write!(f, "Job::PersistChunks({})", partition),
Job::DropChunk { chunk } => write!(f, "Job::DropChunk({})", chunk),
Job::DropPartition { partition } => write!(f, "Job::DropPartition({})", partition),
Job::WipePreservedCatalog { db_name } => {
write!(f, "Job::WipePreservedCatalog({})", db_name)
}
Job::LoadReadBufferChunk { chunk } => write!(f, "Job::LoadReadBufferChunk({})", chunk),
Job::RebuildPreservedCatalog { db_name } => {
write!(f, "Job::RebuildPreservedCatalog({})", db_name)
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,90 +0,0 @@
use std::borrow::Cow;
use crate::{DatabaseName, DatabaseNameError};
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use snafu::{ResultExt, Snafu};
#[derive(Debug, Snafu)]
pub enum OrgBucketMappingError {
#[snafu(display("Invalid database name: {}", source))]
InvalidDatabaseName { source: DatabaseNameError },
#[snafu(display("missing org/bucket value"))]
NotSpecified,
}
/// Map an InfluxDB 2.X org & bucket into an IOx DatabaseName.
///
/// This function ensures the mapping is unambiguous by requiring both `org` and
/// `bucket` to not contain the `_` character in addition to the
/// [`DatabaseName`] validation.
pub fn org_and_bucket_to_database<'a, O: AsRef<str>, B: AsRef<str>>(
org: O,
bucket: B,
) -> Result<DatabaseName<'a>, OrgBucketMappingError> {
const SEPARATOR: char = '_';
let org: Cow<'_, str> = utf8_percent_encode(org.as_ref(), NON_ALPHANUMERIC).into();
let bucket: Cow<'_, str> = utf8_percent_encode(bucket.as_ref(), NON_ALPHANUMERIC).into();
// An empty org or bucket is not acceptable.
if org.is_empty() || bucket.is_empty() {
return Err(OrgBucketMappingError::NotSpecified);
}
let db_name = format!("{}{}{}", org.as_ref(), SEPARATOR, bucket.as_ref());
DatabaseName::new(db_name).context(InvalidDatabaseNameSnafu)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_org_bucket_map_db_ok() {
let got = org_and_bucket_to_database("org", "bucket").expect("failed on valid DB mapping");
assert_eq!(got.as_str(), "org_bucket");
}
#[test]
fn test_org_bucket_map_db_contains_underscore() {
let got = org_and_bucket_to_database("my_org", "bucket").unwrap();
assert_eq!(got.as_str(), "my%5Forg_bucket");
let got = org_and_bucket_to_database("org", "my_bucket").unwrap();
assert_eq!(got.as_str(), "org_my%5Fbucket");
let got = org_and_bucket_to_database("org", "my__bucket").unwrap();
assert_eq!(got.as_str(), "org_my%5F%5Fbucket");
let got = org_and_bucket_to_database("my_org", "my_bucket").unwrap();
assert_eq!(got.as_str(), "my%5Forg_my%5Fbucket");
}
#[test]
fn test_org_bucket_map_db_contains_underscore_and_percent() {
let got = org_and_bucket_to_database("my%5Forg", "bucket").unwrap();
assert_eq!(got.as_str(), "my%255Forg_bucket");
let got = org_and_bucket_to_database("my%5Forg_", "bucket").unwrap();
assert_eq!(got.as_str(), "my%255Forg%5F_bucket");
}
#[test]
fn test_bad_database_name_is_encoded() {
let got = org_and_bucket_to_database("org", "bucket?").unwrap();
assert_eq!(got.as_str(), "org_bucket%3F");
let got = org_and_bucket_to_database("org!", "bucket").unwrap();
assert_eq!(got.as_str(), "org%21_bucket");
}
#[test]
fn test_empty_org_bucket() {
let err = org_and_bucket_to_database("", "")
.expect_err("should fail with empty org/bucket valuese");
assert!(matches!(err, OrgBucketMappingError::NotSpecified));
}
}

View File

@ -1,29 +0,0 @@
use std::ops::Deref;
/// A string that cannot be empty
///
/// This is particularly useful for types that map to/from protobuf, where string fields
/// are not nullable - that is they default to an empty string if not specified
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct NonEmptyString(Box<str>);
impl NonEmptyString {
/// Create a new `NonEmptyString` from the provided `String`
///
/// Returns None if empty
pub fn new(s: impl Into<String>) -> Option<Self> {
let s = s.into();
match s.is_empty() {
true => None,
false => Some(Self(s.into_boxed_str())),
}
}
}
impl Deref for NonEmptyString {
type Target = str;
fn deref(&self) -> &Self::Target {
self.0.as_ref()
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,141 +0,0 @@
use std::collections::BTreeMap;
use regex::Regex;
use crate::{
consistent_hasher::ConsistentHasher, server_id::ServerId, write_buffer::WriteBufferConnection,
};
#[derive(Debug, Eq, PartialEq, Hash, PartialOrd, Ord, Clone, Copy)]
pub struct ShardId(u32);
impl ShardId {
pub fn new(id: u32) -> Self {
Self(id)
}
pub fn get(&self) -> u32 {
self.0
}
}
impl std::fmt::Display for ShardId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "ShardId({})", self.get())
}
}
/// ShardConfig defines rules for assigning a line/row to an individual
/// host or a group of hosts. A shard
/// is a logical concept, but the usage is meant to split data into
/// mutually exclusive areas. The rough order of organization is:
/// database -> shard -> partition -> chunk. For example, you could shard
/// based on table name and assign to 1 of 10 shards. Within each
/// shard you would have partitions, which would likely be based off time.
/// This makes it possible to horizontally scale out writes.
#[derive(Debug, Eq, PartialEq, Clone, Default)]
pub struct ShardConfig {
/// Each matcher, if any, is evaluated in order.
/// If there is a match, the route will be evaluated to
/// the given targets, otherwise the hash ring will be evaluated.
/// This is useful for overriding the hashring function on some hot spot. For
/// example, if you use the table name as the input to the hash function
/// and your ring has 4 slots. If two tables that are very hot get
/// assigned to the same slot you can override that by putting in a
/// specific matcher to pull that table over to a different node.
pub specific_targets: Vec<MatcherToShard>,
/// An optional default hasher which will route to one in a collection of
/// nodes.
pub hash_ring: Option<HashRing>,
}
/// Maps a matcher with specific shard. If the line/row matches
/// it should be sent to the group.
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct MatcherToShard {
pub matcher: Matcher,
pub shard: ShardId,
}
/// HashRing is a rule for creating a hash key for a row and mapping that to
/// an individual node on a ring.
#[derive(Debug, Eq, PartialEq, Clone, Default)]
pub struct HashRing {
/// ring of shard ids
pub shards: ConsistentHasher<ShardId>,
}
/// A matcher is used to match routing rules or subscriptions on a row-by-row
/// (or line) basis.
#[derive(Debug, Clone, Default)]
pub struct Matcher {
/// if provided, match if the table name matches against the regex
pub table_name_regex: Option<Regex>,
}
impl PartialEq for Matcher {
fn eq(&self, other: &Self) -> bool {
// this is kind of janky, but it's only used during tests and should get the job
// done
format!("{:?}", self.table_name_regex) == format!("{:?}", other.table_name_regex)
}
}
impl Eq for Matcher {}
/// Sinks for query requests.
///
/// Queries are sent to one of these sinks and the resulting data is received from it.
///
/// Note that the query results are flowing into the opposite direction (aka a query sink is a result source).
#[derive(Debug, Eq, PartialEq, Clone, Default)]
pub struct QuerySinks {
pub grpc_remotes: Vec<ServerId>,
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum WriteSinkVariant {
/// gRPC-based remote, addressed by its server ID.
GrpcRemote(ServerId),
/// Write buffer connection.
WriteBuffer(WriteBufferConnection),
}
/// Sink of write requests aka new data.
///
/// Data is sent to this sink and a status is received from it.
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct WriteSink {
pub sink: WriteSinkVariant,
/// If set, errors during writing to this sink are ignored and do NOT lead to an overall failure.
pub ignore_errors: bool,
}
/// Set of write sinks.
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct WriteSinkSet {
/// Sinks within the set.
pub sinks: Vec<WriteSink>,
}
/// Router for writes and queries.
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct Router {
/// Router name.
///
/// The name corresponds to the database name on the database node.
///
/// The router name is unique for this router node.
pub name: String,
/// Write sharder.
pub write_sharder: ShardConfig,
/// Sinks for write requests.
pub write_sinks: BTreeMap<ShardId, WriteSinkSet>,
/// Sinks for query requests.
pub query_sinks: QuerySinks,
}

View File

@ -1,16 +0,0 @@
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Sequence {
/// The sequencer id (kafka partition id)
pub sequencer_id: u32,
/// The sequence number (kafka offset)
pub sequence_number: u64,
}
impl Sequence {
pub fn new(sequencer_id: u32, sequence_number: u64) -> Self {
Self {
sequencer_id,
sequence_number,
}
}
}

View File

@ -1,106 +0,0 @@
use snafu::{OptionExt, ResultExt, Snafu};
use std::{
convert::TryFrom,
fmt,
num::{NonZeroU32, ParseIntError},
str::FromStr,
};
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
pub struct ServerId(NonZeroU32);
impl ServerId {
pub fn new(id: NonZeroU32) -> Self {
Self(id)
}
pub fn get(&self) -> NonZeroU32 {
self.0
}
pub fn get_u32(&self) -> u32 {
self.0.get()
}
}
impl FromStr for ServerId {
type Err = Error;
fn from_str(value: &str) -> Result<Self, Self::Err> {
let value: u32 = value.parse().context(UnableToParseSnafu { value })?;
Self::try_from(value)
}
}
impl TryFrom<u32> for ServerId {
type Error = Error;
fn try_from(value: u32) -> Result<Self, Self::Error> {
NonZeroU32::new(value)
.map(Self)
.context(ValueMayNotBeZeroSnafu)
.map_err(Into::into)
}
}
impl fmt::Display for ServerId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
#[derive(Debug, Snafu)]
pub struct Error(InnerError);
#[derive(Debug, Snafu)]
enum InnerError {
#[snafu(display("The server ID may not be zero"))]
ValueMayNotBeZero,
#[snafu(display("Could not parse {} as a non-zero 32-bit unsigned number", value))]
UnableToParse {
source: ParseIntError,
value: String,
},
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cannot_be_zero() {
assert!(matches!(
ServerId::try_from(0),
Err(Error(InnerError::ValueMayNotBeZero))
));
}
#[test]
fn can_be_nonzero() {
let value = 2;
let server_id = ServerId::try_from(value).unwrap();
assert_eq!(server_id.get_u32(), value);
}
#[test]
fn can_be_parsed_from_a_string() {
assert!(matches!(
"0".parse::<ServerId>(),
Err(Error(InnerError::ValueMayNotBeZero)),
));
assert!(matches!(
"moo".parse::<ServerId>(),
Err(Error(InnerError::UnableToParse { source: _, value })) if value == "moo",
));
let server_id = "1337".parse::<ServerId>().unwrap();
assert_eq!(server_id.get_u32(), 1337);
}
#[test]
fn can_be_displayed() {
let server_id = ServerId::try_from(42).unwrap();
assert_eq!("42", format!("{}", server_id));
}
}

View File

@ -1,196 +0,0 @@
/// minimum time that can be represented.
///
/// 1677-09-21 00:12:43.145224194 +0000 UTC
///
/// The two lowest minimum integers are used as sentinel values. The
/// minimum value needs to be used as a value lower than any other value for
/// comparisons and another separate value is needed to act as a sentinel
/// default value that is unusable by the user, but usable internally.
/// Because these two values need to be used for a special purpose, we do
/// not allow users to write points at these two times.
///
/// Source: [influxdb](https://github.com/influxdata/influxdb/blob/540bb66e1381a48a6d1ede4fc3e49c75a7d9f4af/models/time.go#L12-L34)
pub const MIN_NANO_TIME: i64 = i64::MIN + 2;
/// maximum time that can be represented.
///
/// 2262-04-11 23:47:16.854775806 +0000 UTC
///
/// The highest time represented by a nanosecond needs to be used for an
/// exclusive range in the shard group, so the maximum time needs to be one
/// less than the possible maximum number of nanoseconds representable by an
/// int64 so that we don't lose a point at that one time.
/// Source: [influxdb](https://github.com/influxdata/influxdb/blob/540bb66e1381a48a6d1ede4fc3e49c75a7d9f4af/models/time.go#L12-L34)
pub const MAX_NANO_TIME: i64 = i64::MAX - 1;
/// Specifies a continuous range of nanosecond timestamps. Timestamp
/// predicates are so common and critical to performance of timeseries
/// databases in general, and IOx in particular, that they are handled
/// specially
///
/// Timestamp ranges are defined such that a value `v` is within the
/// range iff:
///
/// ```text
/// range.start <= v < range.end
/// ```
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Copy, Debug, Hash)]
pub struct TimestampRange {
/// Start defines the inclusive lower bound. Minimum value is [MIN_NANO_TIME]
start: i64,
/// End defines the inclusive upper bound. Maximum value is [MAX_NANO_TIME]
end: i64,
}
impl TimestampRange {
pub fn new(start: i64, end: i64) -> Self {
debug_assert!(end >= start);
let start = start.max(MIN_NANO_TIME);
let end = end.min(MAX_NANO_TIME);
Self { start, end }
}
#[inline]
/// Returns true if this range contains the value v
pub fn contains(&self, v: i64) -> bool {
self.start <= v && v < self.end
}
#[inline]
/// Returns true if this range contains the value v
pub fn contains_opt(&self, v: Option<i64>) -> bool {
Some(true) == v.map(|ts| self.contains(ts))
}
/// Return the timestamp range's end.
pub fn end(&self) -> i64 {
self.end
}
/// Return the timestamp range's start.
pub fn start(&self) -> i64 {
self.start
}
}
/// Specifies a min/max timestamp value.
///
/// Note this differs subtlety (but critically) from a
/// `TimestampRange` as the minimum and maximum values are included
#[derive(Clone, Debug, Copy)]
pub struct TimestampMinMax {
/// The minimum timestamp value
pub min: i64,
/// the maximum timestamp value
pub max: i64,
}
impl TimestampMinMax {
pub fn new(min: i64, max: i64) -> Self {
assert!(min <= max, "expected min ({}) <= max ({})", min, max);
Self { min, max }
}
#[inline]
/// Returns true if any of the values between min / max
/// (inclusive) are contained within the specified timestamp range
pub fn overlaps(&self, range: TimestampRange) -> bool {
range.contains(self.min)
|| range.contains(self.max)
|| (self.min <= range.start && self.max >= range.end)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_timestamp_nano_min_max() {
let cases = vec![
(
"MIN/MAX Nanos",
TimestampRange::new(MIN_NANO_TIME, MAX_NANO_TIME),
),
("MIN/MAX i64", TimestampRange::new(i64::MIN, i64::MAX)),
];
for (name, range) in cases {
println!("case: {}", name);
assert!(!range.contains(i64::MIN));
assert!(range.contains(MIN_NANO_TIME));
assert!(range.contains(MIN_NANO_TIME + 1));
assert!(range.contains(MAX_NANO_TIME - 1));
assert!(!range.contains(MAX_NANO_TIME));
assert!(!range.contains(i64::MAX));
}
}
#[test]
fn test_timestamp_i64_min_max_offset() {
let range = TimestampRange::new(MIN_NANO_TIME + 1, MAX_NANO_TIME - 1);
assert!(!range.contains(i64::MIN));
assert!(!range.contains(MIN_NANO_TIME));
assert!(range.contains(MIN_NANO_TIME + 1));
assert!(range.contains(MAX_NANO_TIME - 2));
assert!(!range.contains(MAX_NANO_TIME - 1));
assert!(!range.contains(MAX_NANO_TIME));
assert!(!range.contains(i64::MAX));
}
#[test]
fn test_timestamp_range_contains() {
let range = TimestampRange::new(100, 200);
assert!(!range.contains(99));
assert!(range.contains(100));
assert!(range.contains(101));
assert!(range.contains(199));
assert!(!range.contains(200));
assert!(!range.contains(201));
}
#[test]
fn test_timestamp_range_contains_opt() {
let range = TimestampRange::new(100, 200);
assert!(!range.contains_opt(Some(99)));
assert!(range.contains_opt(Some(100)));
assert!(range.contains_opt(Some(101)));
assert!(range.contains_opt(Some(199)));
assert!(!range.contains_opt(Some(200)));
assert!(!range.contains_opt(Some(201)));
assert!(!range.contains_opt(None));
}
#[test]
fn test_timestamp_range_overlaps() {
let range = TimestampRange::new(100, 200);
assert!(!TimestampMinMax::new(0, 99).overlaps(range));
assert!(TimestampMinMax::new(0, 100).overlaps(range));
assert!(TimestampMinMax::new(0, 101).overlaps(range));
assert!(TimestampMinMax::new(0, 200).overlaps(range));
assert!(TimestampMinMax::new(0, 201).overlaps(range));
assert!(TimestampMinMax::new(0, 300).overlaps(range));
assert!(TimestampMinMax::new(100, 101).overlaps(range));
assert!(TimestampMinMax::new(100, 200).overlaps(range));
assert!(TimestampMinMax::new(100, 201).overlaps(range));
assert!(TimestampMinMax::new(101, 101).overlaps(range));
assert!(TimestampMinMax::new(101, 200).overlaps(range));
assert!(TimestampMinMax::new(101, 201).overlaps(range));
assert!(!TimestampMinMax::new(200, 200).overlaps(range));
assert!(!TimestampMinMax::new(200, 201).overlaps(range));
assert!(!TimestampMinMax::new(201, 300).overlaps(range));
}
#[test]
#[should_panic(expected = "expected min (2) <= max (1)")]
fn test_timestamp_min_max_invalid() {
TimestampMinMax::new(2, 1);
}
}

View File

@ -1,64 +0,0 @@
use std::{collections::BTreeMap, num::NonZeroU32};
pub const DEFAULT_N_SEQUENCERS: u32 = 1;
/// Configures the use of a write buffer.
#[derive(Debug, Eq, PartialEq, Clone, Hash)]
pub struct WriteBufferConnection {
/// Which type should be used (e.g. "kafka", "mock")
pub type_: String,
/// Connection string, depends on [`type_`](Self::type_).
pub connection: String,
/// Special configs to be applied when establishing the connection.
///
/// This depends on [`type_`](Self::type_) and can configure aspects like timeouts.
///
/// Note: This config should be a [`BTreeMap`] to ensure that a stable hash.
pub connection_config: BTreeMap<String, String>,
/// Specifies if the sequencers (e.g. for Kafka in form of a topic) should be automatically created if they do not
/// existing prior to reading or writing.
pub creation_config: Option<WriteBufferCreationConfig>,
}
impl Default for WriteBufferConnection {
fn default() -> Self {
Self {
type_: "unspecified".to_string(),
connection: Default::default(),
connection_config: Default::default(),
creation_config: Default::default(),
}
}
}
/// Configs sequencer auto-creation for write buffers.
///
/// What that means depends on the used write buffer, e.g. for Kafka this will create a new topic w/
/// [`n_sequencers`](Self::n_sequencers) partitions.
#[derive(Debug, Eq, PartialEq, Clone, Hash)]
pub struct WriteBufferCreationConfig {
/// Number of sequencers.
///
/// How they are implemented depends on [type](WriteBufferConnection::type_), e.g. for Kafka this is mapped to the
/// number of partitions.
pub n_sequencers: NonZeroU32,
/// Special configs to by applied when sequencers are created.
///
/// This depends on [type](WriteBufferConnection::type_) and can setup parameters like retention policy.
///
/// Note: This config should be a [`BTreeMap`] to ensure that a stable hash.
pub options: BTreeMap<String, String>,
}
impl Default for WriteBufferCreationConfig {
fn default() -> Self {
Self {
n_sequencers: NonZeroU32::try_from(DEFAULT_N_SEQUENCERS).unwrap(),
options: Default::default(),
}
}
}

View File

@ -1,74 +0,0 @@
use crate::partition_metadata::StatValues;
use iox_time::Time;
/// A description of a set of writes
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct WriteSummary {
/// The wall clock timestamp of the first write in this summary
pub time_of_first_write: Time,
/// The wall clock timestamp of the last write in this summary
pub time_of_last_write: Time,
/// The minimum row timestamp for data in this summary
pub min_timestamp: Time,
/// The maximum row timestamp value for data in this summary
pub max_timestamp: Time,
/// The number of rows in this summary
pub row_count: usize,
}
/// A description of the distribution of timestamps in a
/// set of writes, bucketed based on minute within the hour
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TimestampSummary {
/// Stores the count of how many rows in the set of writes have a timestamp
/// with a minute matching a given index
///
/// E.g. a row with timestamp 12:31:12 would store a count at index 31
pub counts: [u32; 60],
/// Standard timestamp statistics
pub stats: StatValues<i64>,
}
impl Default for TimestampSummary {
fn default() -> Self {
Self {
counts: [0; 60],
stats: Default::default(),
}
}
}
impl TimestampSummary {
/// Returns an iterator returning cumulative counts suitable for exposing
/// as a cumulative histogram
pub fn cumulative_counts(&self) -> impl Iterator<Item = (usize, u64)> + '_ {
let mut acc = 0_u64;
self.counts.iter().enumerate().map(move |(idx, count)| {
acc += *count as u64;
(idx, acc)
})
}
/// Merges the counts from the provided summary into this
pub fn merge(&mut self, other: &Self) {
for (a, b) in self.counts.iter_mut().zip(&other.counts) {
*a += *b
}
}
/// Records a timestamp value
pub fn record(&mut self, timestamp: Time) {
self.counts[timestamp.minute() as usize] += 1;
self.stats.update(&timestamp.timestamp_nanos())
}
/// Records a timestamp value from nanos
pub fn record_nanos(&mut self, timestamp_nanos: i64) {
self.record(Time::from_timestamp_nanos(timestamp_nanos))
}
}

View File

@ -1,15 +0,0 @@
[package]
name = "data_types2"
version = "0.1.0"
edition = "2021"
description = "Shared data types in the Iox NG architecture"
[dependencies]
data_types = { path = "../data_types" }
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
mutable_batch = { path = "../mutable_batch" }
predicate = { path = "../predicate" }
schema = { path = "../schema" }
sqlx = { version = "0.5", features = ["runtime-tokio-rustls", "postgres", "uuid"] }
uuid = { version = "0.8", features = ["v4"] }
workspace-hack = { path = "../workspace-hack"}

View File

@ -1,955 +0,0 @@
//! Shared data types in the Iox NG architecture
#![warn(
missing_copy_implementations,
missing_debug_implementations,
missing_docs,
clippy::explicit_iter_loop,
clippy::future_not_send,
clippy::use_self,
clippy::clone_on_ref_ptr
)]
use influxdb_line_protocol::FieldValue;
use predicate::{delete_predicate::parse_delete_predicate, Predicate};
use schema::{builder::SchemaBuilder, sort::SortKey, InfluxColumnType, InfluxFieldType, Schema};
use std::{
collections::BTreeMap,
convert::TryFrom,
fmt::{Debug, Formatter},
ops::{Add, Sub},
sync::Arc,
};
use uuid::Uuid;
pub use data_types::{
chunk_metadata::{ChunkAddr, ChunkId, ChunkOrder, ChunkSummary},
database_rules::{PartitionTemplate, TemplatePart},
delete_predicate::{DeleteExpr, DeletePredicate, Op, Scalar},
names::{org_and_bucket_to_database, OrgBucketMappingError},
non_empty::NonEmptyString,
partition_metadata::{
ColumnSummary, InfluxDbType, PartitionAddr, StatValues, Statistics, TableSummary,
},
sequence::Sequence,
timestamp::TimestampRange,
DatabaseName,
};
/// Unique ID for a `Namespace`
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)]
pub struct NamespaceId(i64);
#[allow(missing_docs)]
impl NamespaceId {
pub fn new(v: i64) -> Self {
Self(v)
}
pub fn get(&self) -> i64 {
self.0
}
}
impl std::fmt::Display for NamespaceId {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
/// Unique ID for a `KafkaTopic`
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)]
pub struct KafkaTopicId(i64);
#[allow(missing_docs)]
impl KafkaTopicId {
pub fn new(v: i64) -> Self {
Self(v)
}
pub fn get(&self) -> i64 {
self.0
}
}
impl std::fmt::Display for KafkaTopicId {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
/// Unique ID for a `QueryPool`
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)]
pub struct QueryPoolId(i64);
#[allow(missing_docs)]
impl QueryPoolId {
pub fn new(v: i64) -> Self {
Self(v)
}
pub fn get(&self) -> i64 {
self.0
}
}
/// Unique ID for a `Table`
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)]
pub struct TableId(i64);
#[allow(missing_docs)]
impl TableId {
pub fn new(v: i64) -> Self {
Self(v)
}
pub fn get(&self) -> i64 {
self.0
}
}
impl std::fmt::Display for TableId {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
/// Unique ID for a `Column`
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)]
pub struct ColumnId(i64);
#[allow(missing_docs)]
impl ColumnId {
pub fn new(v: i64) -> Self {
Self(v)
}
pub fn get(&self) -> i64 {
self.0
}
}
/// Unique ID for a `Sequencer`. Note this is NOT the same as the
/// "sequencer_number" in the `write_buffer` which currently means
/// "kafka partition".
///
/// <https://github.com/influxdata/influxdb_iox/issues/4237>
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)]
pub struct SequencerId(i64);
#[allow(missing_docs)]
impl SequencerId {
pub fn new(v: i64) -> Self {
Self(v)
}
pub fn get(&self) -> i64 {
self.0
}
}
impl std::fmt::Display for SequencerId {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
/// The kafka partition identifier. This is in the actual Kafka cluster.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)]
pub struct KafkaPartition(i32);
#[allow(missing_docs)]
impl KafkaPartition {
pub fn new(v: i32) -> Self {
Self(v)
}
pub fn get(&self) -> i32 {
self.0
}
}
impl std::fmt::Display for KafkaPartition {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
/// Unique ID for a `Partition`
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)]
pub struct PartitionId(i64);
#[allow(missing_docs)]
impl PartitionId {
pub fn new(v: i64) -> Self {
Self(v)
}
pub fn get(&self) -> i64 {
self.0
}
}
impl std::fmt::Display for PartitionId {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
/// Combination of Sequencer ID, Table ID, and Partition ID useful for identifying groups of
/// Parquet files to be compacted together.
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord)]
pub struct TablePartition {
/// The sequencer ID
pub sequencer_id: SequencerId,
/// The table ID
pub table_id: TableId,
/// The partition ID
pub partition_id: PartitionId,
}
impl TablePartition {
/// Combine the relevant parts
pub fn new(sequencer_id: SequencerId, table_id: TableId, partition_id: PartitionId) -> Self {
Self {
sequencer_id,
table_id,
partition_id,
}
}
}
/// Unique ID for a `Tombstone`
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)]
pub struct TombstoneId(i64);
#[allow(missing_docs)]
impl TombstoneId {
pub fn new(v: i64) -> Self {
Self(v)
}
pub fn get(&self) -> i64 {
self.0
}
}
impl std::fmt::Display for TombstoneId {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
/// A sequence number from a `Sequencer` (kafka partition)
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)]
pub struct SequenceNumber(i64);
#[allow(missing_docs)]
impl SequenceNumber {
pub fn new(v: i64) -> Self {
Self(v)
}
pub fn get(&self) -> i64 {
self.0
}
}
impl Add<i64> for SequenceNumber {
type Output = Self;
fn add(self, other: i64) -> Self {
Self(self.0 + other)
}
}
impl Sub<i64> for SequenceNumber {
type Output = Self;
fn sub(self, other: i64) -> Self {
Self(self.0 - other)
}
}
/// A time in nanoseconds from epoch
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)]
pub struct Timestamp(i64);
#[allow(missing_docs)]
impl Timestamp {
pub fn new(v: i64) -> Self {
Self(v)
}
pub fn get(&self) -> i64 {
self.0
}
}
impl Add<i64> for Timestamp {
type Output = Self;
fn add(self, other: i64) -> Self {
Self(self.0 + other)
}
}
impl Sub<i64> for Timestamp {
type Output = Self;
fn sub(self, other: i64) -> Self {
Self(self.0 - other)
}
}
/// Unique ID for a `ParquetFile`
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)]
pub struct ParquetFileId(i64);
#[allow(missing_docs)]
impl ParquetFileId {
pub fn new(v: i64) -> Self {
Self(v)
}
pub fn get(&self) -> i64 {
self.0
}
}
impl std::fmt::Display for ParquetFileId {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
// Use `self.number` to refer to each positional data point.
write!(f, "{}", self.0)
}
}
/// Data object for a kafka topic
#[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
pub struct KafkaTopic {
/// The id of the topic
pub id: KafkaTopicId,
/// The unique name of the topic
pub name: String,
}
/// Data object for a query pool
#[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
pub struct QueryPool {
/// The id of the pool
pub id: QueryPoolId,
/// The unique name of the pool
pub name: String,
}
/// Data object for a namespace
#[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
pub struct Namespace {
/// The id of the namespace
pub id: NamespaceId,
/// The unique name of the namespace
pub name: String,
/// The retention duration as a string. 'inf' or not present represents infinite duration (i.e. never drop data).
#[sqlx(default)]
pub retention_duration: Option<String>,
/// The kafka topic that writes to this namespace will land in
pub kafka_topic_id: KafkaTopicId,
/// The query pool assigned to answer queries for this namespace
pub query_pool_id: QueryPoolId,
/// The maximum number of tables that can exist in this namespace
pub max_tables: i32,
/// The maximum number of columns per table in this namespace
pub max_columns_per_table: i32,
}
/// Schema collection for a namespace. This is an in-memory object useful for a schema
/// cache.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct NamespaceSchema {
/// the namespace id
pub id: NamespaceId,
/// the kafka topic this namespace gets data written to
pub kafka_topic_id: KafkaTopicId,
/// the query pool assigned to answer queries for this namespace
pub query_pool_id: QueryPoolId,
/// the tables in the namespace by name
pub tables: BTreeMap<String, TableSchema>,
}
impl NamespaceSchema {
/// Create a new `NamespaceSchema`
pub fn new(id: NamespaceId, kafka_topic_id: KafkaTopicId, query_pool_id: QueryPoolId) -> Self {
Self {
id,
tables: BTreeMap::new(),
kafka_topic_id,
query_pool_id,
}
}
}
/// Data object for a table
#[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)]
pub struct Table {
/// The id of the table
pub id: TableId,
/// The namespace id that the table is in
pub namespace_id: NamespaceId,
/// The name of the table, which is unique within the associated namespace
pub name: String,
}
/// Column definitions for a table
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TableSchema {
/// the table id
pub id: TableId,
/// the table's columns by their name
pub columns: BTreeMap<String, ColumnSchema>,
}
impl TableSchema {
/// Initialize new `TableSchema`
pub fn new(id: TableId) -> Self {
Self {
id,
columns: BTreeMap::new(),
}
}
/// Add `col` to this table schema.
///
/// # Panics
///
/// This method panics if a column of the same name already exists in
/// `self`, or the provided [`Column`] cannot be converted into a valid
/// [`ColumnSchema`].
pub fn add_column(&mut self, col: &Column) {
let old = self.columns.insert(
col.name.clone(),
ColumnSchema::try_from(col).expect("column is invalid"),
);
assert!(old.is_none());
}
}
/// Data object for a column
#[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)]
pub struct Column {
/// the column id
pub id: ColumnId,
/// the table id the column is in
pub table_id: TableId,
/// the name of the column, which is unique in the table
pub name: String,
/// the logical type of the column
pub column_type: i16,
}
impl Column {
/// returns true if the column type is a tag
pub fn is_tag(&self) -> bool {
self.column_type == ColumnType::Tag as i16
}
/// returns true if the column type matches the line protocol field value type
pub fn matches_field_type(&self, field_value: &FieldValue) -> bool {
match field_value {
FieldValue::I64(_) => self.column_type == ColumnType::I64 as i16,
FieldValue::U64(_) => self.column_type == ColumnType::U64 as i16,
FieldValue::F64(_) => self.column_type == ColumnType::F64 as i16,
FieldValue::String(_) => self.column_type == ColumnType::String as i16,
FieldValue::Boolean(_) => self.column_type == ColumnType::Bool as i16,
}
}
}
/// The column id and its type for a column
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct ColumnSchema {
/// the column id
pub id: ColumnId,
/// the column type
pub column_type: ColumnType,
}
impl ColumnSchema {
/// returns true if the column is a tag
pub fn is_tag(&self) -> bool {
self.column_type == ColumnType::Tag
}
/// returns true if the column matches the line protocol field value type
pub fn matches_field_type(&self, field_value: &FieldValue) -> bool {
matches!(
(field_value, self.column_type),
(FieldValue::I64(_), ColumnType::I64)
| (FieldValue::U64(_), ColumnType::U64)
| (FieldValue::F64(_), ColumnType::F64)
| (FieldValue::String(_), ColumnType::String)
| (FieldValue::Boolean(_), ColumnType::Bool)
)
}
/// Returns true if `mb_column` is of the same type as `self`.
pub fn matches_type(&self, mb_column: &mutable_batch::column::Column) -> bool {
self.column_type == mb_column.influx_type()
}
}
impl TryFrom<&Column> for ColumnSchema {
type Error = Box<dyn std::error::Error>;
fn try_from(c: &Column) -> Result<Self, Self::Error> {
Ok(Self {
id: c.id,
column_type: ColumnType::try_from(c.column_type)?,
})
}
}
/// The column data type
#[allow(missing_docs)]
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ColumnType {
I64 = 1,
U64 = 2,
F64 = 3,
Bool = 4,
String = 5,
Time = 6,
Tag = 7,
}
impl ColumnType {
/// the short string description of the type
pub fn as_str(&self) -> &'static str {
match self {
Self::I64 => "i64",
Self::U64 => "u64",
Self::F64 => "f64",
Self::Bool => "bool",
Self::String => "string",
Self::Time => "time",
Self::Tag => "tag",
}
}
}
impl std::fmt::Display for ColumnType {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let s = self.as_str();
write!(f, "{}", s)
}
}
impl TryFrom<i16> for ColumnType {
type Error = Box<dyn std::error::Error>;
fn try_from(value: i16) -> Result<Self, Self::Error> {
match value {
x if x == Self::I64 as i16 => Ok(Self::I64),
x if x == Self::U64 as i16 => Ok(Self::U64),
x if x == Self::F64 as i16 => Ok(Self::F64),
x if x == Self::Bool as i16 => Ok(Self::Bool),
x if x == Self::String as i16 => Ok(Self::String),
x if x == Self::Time as i16 => Ok(Self::Time),
x if x == Self::Tag as i16 => Ok(Self::Tag),
_ => Err("invalid column value".into()),
}
}
}
impl From<InfluxColumnType> for ColumnType {
fn from(value: InfluxColumnType) -> Self {
match value {
InfluxColumnType::Tag => Self::Tag,
InfluxColumnType::Field(InfluxFieldType::Float) => Self::F64,
InfluxColumnType::Field(InfluxFieldType::Integer) => Self::I64,
InfluxColumnType::Field(InfluxFieldType::UInteger) => Self::U64,
InfluxColumnType::Field(InfluxFieldType::String) => Self::String,
InfluxColumnType::Field(InfluxFieldType::Boolean) => Self::Bool,
InfluxColumnType::Timestamp => Self::Time,
}
}
}
impl From<ColumnType> for InfluxColumnType {
fn from(value: ColumnType) -> Self {
match value {
ColumnType::I64 => Self::Field(InfluxFieldType::Integer),
ColumnType::U64 => Self::Field(InfluxFieldType::UInteger),
ColumnType::F64 => Self::Field(InfluxFieldType::Float),
ColumnType::Bool => Self::Field(InfluxFieldType::Boolean),
ColumnType::String => Self::Field(InfluxFieldType::String),
ColumnType::Time => Self::Timestamp,
ColumnType::Tag => Self::Tag,
}
}
}
impl TryFrom<TableSchema> for Schema {
type Error = schema::builder::Error;
fn try_from(value: TableSchema) -> Result<Self, Self::Error> {
let mut builder = SchemaBuilder::new();
for (column_name, column_schema) in &value.columns {
let t = InfluxColumnType::from(column_schema.column_type);
builder.influx_column(column_name, t);
}
builder.build()
}
}
impl PartialEq<InfluxColumnType> for ColumnType {
fn eq(&self, got: &InfluxColumnType) -> bool {
match self {
Self::I64 => matches!(got, InfluxColumnType::Field(InfluxFieldType::Integer)),
Self::U64 => matches!(got, InfluxColumnType::Field(InfluxFieldType::UInteger)),
Self::F64 => matches!(got, InfluxColumnType::Field(InfluxFieldType::Float)),
Self::Bool => matches!(got, InfluxColumnType::Field(InfluxFieldType::Boolean)),
Self::String => matches!(got, InfluxColumnType::Field(InfluxFieldType::String)),
Self::Time => matches!(got, InfluxColumnType::Timestamp),
Self::Tag => matches!(got, InfluxColumnType::Tag),
}
}
}
/// Returns the `ColumnType` for the passed in line protocol `FieldValue` type
pub fn column_type_from_field(field_value: &FieldValue) -> ColumnType {
match field_value {
FieldValue::I64(_) => ColumnType::I64,
FieldValue::U64(_) => ColumnType::U64,
FieldValue::F64(_) => ColumnType::F64,
FieldValue::String(_) => ColumnType::String,
FieldValue::Boolean(_) => ColumnType::Bool,
}
}
/// Data object for a sequencer. Only one sequencer record can exist for a given
/// kafka topic and partition (enforced via uniqueness constraint).
#[derive(Debug, Copy, Clone, PartialEq, sqlx::FromRow)]
pub struct Sequencer {
/// the id of the sequencer
pub id: SequencerId,
/// the topic the sequencer is reading from
pub kafka_topic_id: KafkaTopicId,
/// the kafka partition the sequencer is reading from
pub kafka_partition: KafkaPartition,
/// The minimum unpersisted sequence number. Because different tables
/// can be persisted at different times, it is possible some data has been persisted
/// with a higher sequence number than this. However, all data with a sequence number
/// lower than this must have been persisted to Parquet.
pub min_unpersisted_sequence_number: i64,
}
/// Data object for a partition. The combination of sequencer, table and key are unique (i.e. only
/// one record can exist for each combo)
#[derive(Debug, Clone, PartialEq, sqlx::FromRow)]
pub struct Partition {
/// the id of the partition
pub id: PartitionId,
/// the sequencer the data in the partition arrived from
pub sequencer_id: SequencerId,
/// the table the partition is under
pub table_id: TableId,
/// the string key of the partition
pub partition_key: String,
/// The sort key for the partition. Should be computed on the first persist operation for
/// this partition and updated if new tag columns are added.
pub sort_key: Option<String>,
}
impl Partition {
/// The sort key for the partition, if present, structured as a `SortKey`
pub fn sort_key(&self) -> Option<SortKey> {
self.sort_key
.as_ref()
.map(|s| SortKey::from_columns(s.split(',')))
}
}
/// Information for a partition from the catalog.
#[derive(Debug)]
#[allow(missing_docs)]
pub struct PartitionInfo {
pub partition: Partition,
pub namespace_name: String,
pub table_name: String,
}
/// Data object for a tombstone.
#[derive(Debug, Clone, PartialEq, PartialOrd, sqlx::FromRow)]
pub struct Tombstone {
/// the id of the tombstone
pub id: TombstoneId,
/// the table the tombstone is associated with
pub table_id: TableId,
/// the sequencer the tombstone was sent through
pub sequencer_id: SequencerId,
/// the sequence nubmer assigned to the tombstone from the sequencer
pub sequence_number: SequenceNumber,
/// the min time (inclusive) that the delete applies to
pub min_time: Timestamp,
/// the max time (exclusive) that the delete applies to
pub max_time: Timestamp,
/// the full delete predicate
pub serialized_predicate: String,
}
/// Convert tombstones to delete predicates
pub fn tombstones_to_delete_predicates(tombstones: &[Tombstone]) -> Vec<Arc<DeletePredicate>> {
tombstones_to_delete_predicates_iter(tombstones).collect()
}
/// Return Iterator of delete predicates
pub fn tombstones_to_delete_predicates_iter(
tombstones: &[Tombstone],
) -> impl Iterator<Item = Arc<DeletePredicate>> + '_ {
tombstones.iter().map(|tombstone| {
Arc::new(
parse_delete_predicate(
&tombstone.min_time.get().to_string(),
&tombstone.max_time.get().to_string(),
&tombstone.serialized_predicate,
)
.expect("Error building delete predicate"),
)
})
}
/// Data for a parquet file reference that has been inserted in the catalog.
#[derive(Debug, Clone, Copy, PartialEq, sqlx::FromRow)]
pub struct ParquetFile {
/// the id of the file in the catalog
pub id: ParquetFileId,
/// the sequencer that sequenced writes that went into this file
pub sequencer_id: SequencerId,
/// the namespace
pub namespace_id: NamespaceId,
/// the table
pub table_id: TableId,
/// the partition
pub partition_id: PartitionId,
/// the uuid used in the object store path for this file
pub object_store_id: Uuid,
/// the minimum sequence number from a record in this file
pub min_sequence_number: SequenceNumber,
/// the maximum sequence number from a record in this file
pub max_sequence_number: SequenceNumber,
/// the min timestamp of data in this file
pub min_time: Timestamp,
/// the max timestamp of data in this file
pub max_time: Timestamp,
/// When this file was marked for deletion
pub to_delete: Option<Timestamp>,
/// file size in bytes
pub file_size_bytes: i64,
/// the number of rows of data in this file
pub row_count: i64,
/// the compaction level of the file
pub compaction_level: i16,
/// the creation time of the parquet file
pub created_at: Timestamp,
}
/// Data for a parquet file reference that has been inserted in the catalog, including the
/// `parquet_metadata` field that can be expensive to fetch.
#[derive(Debug, Clone, PartialEq, sqlx::FromRow)]
pub struct ParquetFileWithMetadata {
/// the id of the file in the catalog
pub id: ParquetFileId,
/// the sequencer that sequenced writes that went into this file
pub sequencer_id: SequencerId,
/// the namespace
pub namespace_id: NamespaceId,
/// the table
pub table_id: TableId,
/// the partition
pub partition_id: PartitionId,
/// the uuid used in the object store path for this file
pub object_store_id: Uuid,
/// the minimum sequence number from a record in this file
pub min_sequence_number: SequenceNumber,
/// the maximum sequence number from a record in this file
pub max_sequence_number: SequenceNumber,
/// the min timestamp of data in this file
pub min_time: Timestamp,
/// the max timestamp of data in this file
pub max_time: Timestamp,
/// When this file was marked for deletion
pub to_delete: Option<Timestamp>,
/// file size in bytes
pub file_size_bytes: i64,
/// thrift-encoded parquet metadata
pub parquet_metadata: Vec<u8>,
/// the number of rows of data in this file
pub row_count: i64,
/// the compaction level of the file
pub compaction_level: i16,
/// the creation time of the parquet file
pub created_at: Timestamp,
}
impl ParquetFileWithMetadata {
/// Create an instance from an instance of ParquetFile and metadata bytes fetched from the
/// catalog.
pub fn new(parquet_file: ParquetFile, parquet_metadata: Vec<u8>) -> Self {
let ParquetFile {
id,
sequencer_id,
namespace_id,
table_id,
partition_id,
object_store_id,
min_sequence_number,
max_sequence_number,
min_time,
max_time,
to_delete,
file_size_bytes,
row_count,
compaction_level,
created_at,
} = parquet_file;
Self {
id,
sequencer_id,
namespace_id,
table_id,
partition_id,
object_store_id,
min_sequence_number,
max_sequence_number,
min_time,
max_time,
to_delete,
file_size_bytes,
parquet_metadata,
row_count,
compaction_level,
created_at,
}
}
/// Split the parquet_metadata off, leaving a regular ParquetFile and the bytes to transfer
/// ownership separately.
pub fn split_off_metadata(self) -> (ParquetFile, Vec<u8>) {
let Self {
id,
sequencer_id,
namespace_id,
table_id,
partition_id,
object_store_id,
min_sequence_number,
max_sequence_number,
min_time,
max_time,
to_delete,
file_size_bytes,
parquet_metadata,
row_count,
compaction_level,
created_at,
} = self;
(
ParquetFile {
id,
sequencer_id,
namespace_id,
table_id,
partition_id,
object_store_id,
min_sequence_number,
max_sequence_number,
min_time,
max_time,
to_delete,
file_size_bytes,
row_count,
compaction_level,
created_at,
},
parquet_metadata,
)
}
}
/// Data for a parquet file to be inserted into the catalog.
#[derive(Debug, Clone, PartialEq)]
pub struct ParquetFileParams {
/// the sequencer that sequenced writes that went into this file
pub sequencer_id: SequencerId,
/// the namespace
pub namespace_id: NamespaceId,
/// the table
pub table_id: TableId,
/// the partition
pub partition_id: PartitionId,
/// the uuid used in the object store path for this file
pub object_store_id: Uuid,
/// the minimum sequence number from a record in this file
pub min_sequence_number: SequenceNumber,
/// the maximum sequence number from a record in this file
pub max_sequence_number: SequenceNumber,
/// the min timestamp of data in this file
pub min_time: Timestamp,
/// the max timestamp of data in this file
pub max_time: Timestamp,
/// file size in bytes
pub file_size_bytes: i64,
/// thrift-encoded parquet metadata
pub parquet_metadata: Vec<u8>,
/// the number of rows of data in this file
pub row_count: i64,
/// the compaction level of the file
pub compaction_level: i16,
/// the creation time of the parquet file
pub created_at: Timestamp,
}
/// Data for a processed tombstone reference in the catalog.
#[derive(Debug, Copy, Clone, PartialEq, sqlx::FromRow)]
pub struct ProcessedTombstone {
/// the id of the tombstone applied to the parquet file
pub tombstone_id: TombstoneId,
/// the id of the parquet file the tombstone was applied
pub parquet_file_id: ParquetFileId,
}
/// Request from the querier service to the ingester service
#[derive(Debug, PartialEq, Clone)]
pub struct IngesterQueryRequest {
/// namespace to search
pub namespace: String,
/// Table to search
pub table: String,
/// Columns the query service is interested in
pub columns: Vec<String>,
/// Predicate for filtering
pub predicate: Option<Predicate>,
}
impl IngesterQueryRequest {
/// Make a request to return data for a specified table for
/// all sequencers an ingester is responsible for
pub fn new(
namespace: String,
table: String,
columns: Vec<String>,
predicate: Option<Predicate>,
) -> Self {
Self {
namespace,
table,
columns,
predicate,
}
}
}

View File

@ -9,12 +9,7 @@ arrow_util = { path = "../arrow_util" }
data_types = { path = "../data_types" }
hashbrown = "0.12"
mutable_batch = { path = "../mutable_batch" }
ordered-float = "3"
schema = { path = "../schema" }
iox_time = { path = "../iox_time" }
trace = { path = "../trace" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
mutable_batch_lp = { path = "../mutable_batch_lp" }
regex = "1"

View File

@ -11,15 +11,8 @@
clippy::clone_on_ref_ptr
)]
use std::collections::{BTreeMap, HashSet};
use data_types::router::{ShardConfig, ShardId};
use data_types::{DeletePredicate, NonEmptyString, Sequence, StatValues, Statistics};
use hashbrown::HashMap;
use data_types::delete_predicate::DeletePredicate;
use data_types::non_empty::NonEmptyString;
use data_types::partition_metadata::{StatValues, Statistics};
use data_types::sequence::Sequence;
use iox_time::Time;
use mutable_batch::MutableBatch;
use trace::ctx::SpanContext;
@ -131,22 +124,6 @@ impl DmlOperation {
}
}
/// Shards this [`DmlOperation`]
pub fn shard(self, config: &ShardConfig) -> BTreeMap<ShardId, Self> {
match self {
DmlOperation::Write(write) => write
.shard(config)
.into_iter()
.map(|(shard, write)| (shard, Self::Write(write)))
.collect(),
DmlOperation::Delete(delete) => delete
.shard(config)
.into_iter()
.map(|(shard, delete)| (shard, Self::Delete(delete)))
.collect(),
}
}
/// Return the approximate memory size of the operation, in bytes.
///
/// This includes `Self`.
@ -283,31 +260,6 @@ impl DmlWrite {
self.max_timestamp
}
/// Shards this [`DmlWrite`]
pub fn shard(self, config: &ShardConfig) -> BTreeMap<ShardId, Self> {
let mut batches: HashMap<ShardId, HashMap<String, MutableBatch>> = HashMap::new();
for (table, batch) in self.tables {
if let Some(shard_id) = shard_table(&table, config) {
assert!(batches
.entry(shard_id)
.or_default()
.insert(table, batch.clone())
.is_none());
}
}
batches
.into_iter()
.map(|(shard_id, tables)| {
(
shard_id,
Self::new(&self.namespace, tables, self.meta.clone()),
)
})
.collect()
}
/// Return the approximate memory size of the write, in bytes.
///
/// This includes `Self`.
@ -373,32 +325,6 @@ impl DmlDelete {
self.meta = meta
}
/// Shards this [`DmlDelete`]
pub fn shard(self, config: &ShardConfig) -> BTreeMap<ShardId, Self> {
if let Some(table) = self.table_name() {
if let Some(shard_id) = shard_table(table, config) {
BTreeMap::from([(shard_id, self)])
} else {
BTreeMap::default()
}
} else {
let shards: HashSet<ShardId> =
config
.specific_targets
.iter()
.map(|matcher2shard| matcher2shard.shard)
.chain(config.hash_ring.iter().flat_map(|hashring| {
Vec::<ShardId>::from(hashring.shards.clone()).into_iter()
}))
.collect();
shards
.into_iter()
.map(|shard| (shard, self.clone()))
.collect()
}
}
/// Return the approximate memory size of the delete, in bytes.
///
/// This includes `Self`.
@ -414,25 +340,6 @@ impl DmlDelete {
}
}
/// Shard only based on table name
fn shard_table(table: &str, config: &ShardConfig) -> Option<ShardId> {
for matcher2shard in &config.specific_targets {
if let Some(regex) = &matcher2shard.matcher.table_name_regex {
if regex.is_match(table) {
return Some(matcher2shard.shard);
}
}
}
if let Some(hash_ring) = &config.hash_ring {
if let Some(id) = hash_ring.shards.find(table) {
return Some(id);
}
}
None
}
/// Test utilities
pub mod test_util {
use arrow_util::display::pretty_format_batches;
@ -484,221 +391,3 @@ pub mod test_util {
}
}
}
#[cfg(test)]
mod tests {
use data_types::{
consistent_hasher::ConsistentHasher,
delete_predicate::DeletePredicate,
non_empty::NonEmptyString,
router::{HashRing, Matcher, MatcherToShard},
timestamp::TimestampRange,
};
use mutable_batch_lp::lines_to_batches;
use regex::Regex;
use crate::test_util::assert_writes_eq;
use super::*;
#[test]
fn test_write_sharding() {
let config = ShardConfig {
specific_targets: vec![
MatcherToShard {
matcher: Matcher {
table_name_regex: None,
},
shard: ShardId::new(1),
},
MatcherToShard {
matcher: Matcher {
table_name_regex: Some(Regex::new("some_foo").unwrap()),
},
shard: ShardId::new(2),
},
MatcherToShard {
matcher: Matcher {
table_name_regex: Some(Regex::new("other").unwrap()),
},
shard: ShardId::new(3),
},
MatcherToShard {
matcher: Matcher {
table_name_regex: Some(Regex::new("some_.*").unwrap()),
},
shard: ShardId::new(4),
},
MatcherToShard {
matcher: Matcher {
table_name_regex: Some(Regex::new("baz").unwrap()),
},
shard: ShardId::new(2),
},
],
hash_ring: Some(HashRing {
shards: ConsistentHasher::new(&[
ShardId::new(11),
ShardId::new(12),
ShardId::new(13),
]),
}),
};
let meta = DmlMeta::unsequenced(None);
let write = db_write(
&[
"some_foo x=1 10",
"some_foo x=2 20",
"some_bar y=3 30",
"other z=4 40",
"rnd1 r=5 50",
"rnd2 r=6 60",
"rnd3 r=7 70",
"baz b=8 80",
],
&meta,
);
let actual = write.shard(&config);
let expected = BTreeMap::from([
(
ShardId::new(2),
db_write(&["some_foo x=1 10", "some_foo x=2 20", "baz b=8 80"], &meta),
),
(ShardId::new(3), db_write(&["other z=4 40"], &meta)),
(ShardId::new(4), db_write(&["some_bar y=3 30"], &meta)),
(ShardId::new(11), db_write(&["rnd1 r=5 50"], &meta)),
(ShardId::new(12), db_write(&["rnd3 r=7 70"], &meta)),
(ShardId::new(13), db_write(&["rnd2 r=6 60"], &meta)),
]);
let actual_shard_ids: Vec<_> = actual.keys().cloned().collect();
let expected_shard_ids: Vec<_> = expected.keys().cloned().collect();
assert_eq!(actual_shard_ids, expected_shard_ids);
for (actual_write, expected_write) in actual.values().zip(expected.values()) {
assert_writes_eq(actual_write, expected_write);
}
}
#[test]
fn test_write_no_match() {
let config = ShardConfig::default();
let meta = DmlMeta::default();
let write = db_write(&["foo x=1 10"], &meta);
let actual = write.shard(&config);
assert!(actual.is_empty());
}
#[test]
fn test_delete_sharding() {
let config = ShardConfig {
specific_targets: vec![
MatcherToShard {
matcher: Matcher {
table_name_regex: None,
},
shard: ShardId::new(1),
},
MatcherToShard {
matcher: Matcher {
table_name_regex: Some(Regex::new("some_foo").unwrap()),
},
shard: ShardId::new(2),
},
MatcherToShard {
matcher: Matcher {
table_name_regex: Some(Regex::new("some_.*").unwrap()),
},
shard: ShardId::new(3),
},
],
hash_ring: Some(HashRing {
shards: ConsistentHasher::new(&[
ShardId::new(11),
ShardId::new(12),
ShardId::new(13),
]),
}),
};
// Deletes w/o table name go to all shards
let meta = DmlMeta::unsequenced(None);
let delete = DmlDelete::new(
"test_db",
DeletePredicate {
range: TimestampRange::new(1, 2),
exprs: vec![],
},
None,
meta,
);
let actual = delete.clone().shard(&config);
let expected = BTreeMap::from([
(ShardId::new(1), delete.clone()),
(ShardId::new(2), delete.clone()),
(ShardId::new(3), delete.clone()),
(ShardId::new(11), delete.clone()),
(ShardId::new(12), delete.clone()),
(ShardId::new(13), delete),
]);
assert_sharded_deletes_eq(&actual, &expected);
// Deletes are matched by table name regex
let meta = DmlMeta::unsequenced(None);
let delete = DmlDelete::new(
"test_db",
DeletePredicate {
range: TimestampRange::new(3, 4),
exprs: vec![],
},
Some(NonEmptyString::new("some_foo").unwrap()),
meta,
);
let actual = delete.clone().shard(&config);
let expected = BTreeMap::from([(ShardId::new(2), delete)]);
assert_sharded_deletes_eq(&actual, &expected);
// Deletes can be matched by hash-ring
let meta = DmlMeta::unsequenced(None);
let delete = DmlDelete::new(
"test_db",
DeletePredicate {
range: TimestampRange::new(5, 6),
exprs: vec![],
},
Some(NonEmptyString::new("bar").unwrap()),
meta,
);
let actual = delete.clone().shard(&config);
let expected = BTreeMap::from([(ShardId::new(13), delete)]);
assert_sharded_deletes_eq(&actual, &expected);
}
fn db_write(lines: &[&str], meta: &DmlMeta) -> DmlWrite {
DmlWrite::new(
"test_db",
lines_to_batches(&lines.join("\n"), 0).unwrap(),
meta.clone(),
)
}
fn assert_sharded_deletes_eq(
actual: &BTreeMap<ShardId, DmlDelete>,
expected: &BTreeMap<ShardId, DmlDelete>,
) {
let actual_shard_ids: Vec<_> = actual.keys().cloned().collect();
let expected_shard_ids: Vec<_> = expected.keys().cloned().collect();
assert_eq!(actual_shard_ids, expected_shard_ids);
for (actual_delete, expected_delete) in actual.values().zip(expected.values()) {
assert_eq!(actual_delete, expected_delete);
}
}
}

View File

@ -7,7 +7,6 @@ edition = "2021"
[dependencies] # In alphabetical order
bytes = "1.0"
data_types = { path = "../data_types", optional = true }
data_types2 = { path = "../data_types2", optional = true }
datafusion = { path = "../datafusion", optional = true }
observability_deps = { path = "../observability_deps" }
pbjson = "0.3"
@ -15,16 +14,10 @@ pbjson-types = "0.3"
predicate = { path = "../predicate", optional = true }
prost = "0.10"
query_functions = { path = "../query_functions" }
regex = "1"
serde = { version = "1.0", features = ["derive"] }
tonic = "0.7"
iox_time = { path = "../iox_time" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
data_types = { path = "../data_types" }
num_cpus = "1.13.0"
[build-dependencies] # In alphabetical order
tonic-build = "0.7"
prost-build = "0.10"
@ -32,4 +25,4 @@ pbjson-build = "0.3"
[features]
default = ["data_types_conversions"]
data_types_conversions = ["data_types", "data_types2", "datafusion", "predicate"]
data_types_conversions = ["data_types", "datafusion", "predicate"]

View File

@ -1,8 +1,9 @@
//! Code to serialize and deserialize certain expressions.
//!
//! Note that [Ballista] also provides a serialization using [Protocol Buffers 3]. However the protocol is meant as a
//! communication channel between workers and clients of Ballista, not for long term preservation. For IOx we need a
//! more stable solution. Luckily we only need to support a very small subset of expression.
//! Note that [Ballista] also provides a serialization using [Protocol Buffers 3]. However the
//! protocol is meant as a communication channel between workers and clients of Ballista, not for
//! long term preservation. For IOx we need a more stable solution. Luckily we only need to support
//! a very small subset of expression.
//!
//! [Ballista]: https://github.com/apache/arrow-datafusion/blob/22fcb3d7a68a56afbe12eab9e7d98f7b8de33703/ballista/rust/core/proto/ballista.proto
//! [Protocol Buffers 3]: https://developers.google.com/protocol-buffers/docs/proto3
@ -11,10 +12,7 @@ use crate::google::{FieldViolation, FromOptionalField, FromRepeatedField, Option
use crate::influxdata::iox::predicate::v1 as proto;
use crate::influxdata::iox::predicate::v1::scalar::Value;
use crate::influxdata::iox::predicate::v1::{Expr, Predicate};
use data_types::{
delete_predicate::{DeleteExpr, DeletePredicate, Op, Scalar},
timestamp::TimestampRange,
};
use data_types::{DeleteExpr, DeletePredicate, Op, Scalar, TimestampRange};
impl From<DeletePredicate> for proto::Predicate {
fn from(predicate: DeletePredicate) -> Self {

View File

@ -233,7 +233,6 @@ pub enum ResourceType {
DatabaseUuid,
Job,
Router,
ServerId,
Unknown(String),
}
@ -247,7 +246,6 @@ impl ResourceType {
Self::Chunk => "chunk",
Self::Job => "job",
Self::Router => "router",
Self::ServerId => "server_id",
Self::Unknown(unknown) => unknown,
}
}
@ -263,7 +261,6 @@ impl From<String> for ResourceType {
"chunk" => Self::Chunk,
"job" => Self::Job,
"router" => Self::Router,
"server_id" => Self::ServerId,
_ => Self::Unknown(s),
}
}
@ -417,8 +414,6 @@ pub fn decode_not_found(status: &tonic::Status) -> impl Iterator<Item = NotFound
/// prevents performing the requested operation
#[derive(Debug, Clone, PartialEq)]
pub enum PreconditionViolation {
/// Server ID not set
ServerIdNotSet,
/// Database is not mutable
DatabaseImmutable,
/// Server not in required state for operation
@ -444,7 +439,6 @@ pub enum PreconditionViolation {
impl PreconditionViolation {
fn description(&self) -> String {
match self {
Self::ServerIdNotSet => "server id must be set".to_string(),
Self::DatabaseImmutable => "database must be mutable".to_string(),
Self::ServerInvalidState(description) => description.clone(),
Self::DatabaseInvalidState(description) => description.clone(),
@ -460,11 +454,6 @@ impl PreconditionViolation {
impl From<PreconditionViolation> for rpc::precondition_failure::Violation {
fn from(v: PreconditionViolation) -> Self {
match v {
PreconditionViolation::ServerIdNotSet => Self {
r#type: "server_id".to_string(),
subject: "influxdata.com/iox".to_string(),
description: v.description(),
},
PreconditionViolation::ServerInvalidState(_) => Self {
r#type: "state".to_string(),
subject: "influxdata.com/iox".to_string(),
@ -516,7 +505,6 @@ impl From<PreconditionViolation> for rpc::precondition_failure::Violation {
impl From<rpc::precondition_failure::Violation> for PreconditionViolation {
fn from(v: rpc::precondition_failure::Violation) -> Self {
match (v.r#type.as_str(), v.subject.as_str()) {
("server_id", "influxdata.com/iox") => PreconditionViolation::ServerIdNotSet,
("state", "influxdata.com/iox") => {
PreconditionViolation::ServerInvalidState(v.description)
}

View File

@ -1,6 +1,5 @@
use crate::{google::FieldViolation, influxdata::iox::ingester::v1 as proto};
use data_types::timestamp::TimestampRange;
use data_types2::IngesterQueryRequest;
use data_types::TimestampRange;
use datafusion::{
common::DataFusionError, datafusion_proto::bytes::Serializeable, logical_plan::Expr,
};
@ -20,6 +19,37 @@ fn expr_from_bytes_violation(field: impl Into<String>, e: DataFusionError) -> Fi
}
}
/// Request from the querier service to the ingester service
#[derive(Debug, PartialEq, Clone)]
pub struct IngesterQueryRequest {
/// namespace to search
pub namespace: String,
/// Table to search
pub table: String,
/// Columns the query service is interested in
pub columns: Vec<String>,
/// Predicate for filtering
pub predicate: Option<Predicate>,
}
impl IngesterQueryRequest {
/// Make a request to return data for a specified table for
/// all sequencers an ingester is responsible for
pub fn new(
namespace: String,
table: String,
columns: Vec<String>,
predicate: Option<Predicate>,
) -> Self {
Self {
namespace,
table,
columns,
predicate,
}
}
}
impl TryFrom<proto::IngesterQueryRequest> for IngesterQueryRequest {
type Error = FieldViolation;

View File

@ -8,7 +8,6 @@ description = "Protobuf used in test for the grpc-router crate; need to be in a
[dependencies]
tonic = "0.7"
prost = "0.10"
prost-types = "0.10"
[build-dependencies]
tonic-build = "0.7"

View File

@ -5,24 +5,14 @@ authors = ["Marko Mikulicic <mkm@influxdata.com>"]
edition = "2021"
[dependencies]
bytes = "1.0"
cache_loader_async = {version = "0.2.0", features = ["ttl-cache"] }
futures = "0.3"
observability_deps = { path = "../observability_deps" }
paste = "1.0.7"
prost = "0.10"
prost-types = "0.10"
thiserror = "1.0.31"
tokio = { version = "1.18", features = ["macros", "parking_lot", "rt-multi-thread"] }
tokio-stream = { version = "0.1", features = ["net"] }
tokio-util = { version = "0.7.1" }
tonic = "0.7"
tonic-reflection = "0.4.0"
[build-dependencies]
paste = "1.0.7"
prost-build = "0.10"
tonic-build = "0.7"
[dev-dependencies]
grpc-router-test-gen = { path = "../grpc-router-test-gen" }

View File

@ -8,81 +8,48 @@ default-run = "influxdb_iox"
[dependencies]
# Workspace dependencies, in alphabetical order
clap_blocks = { path = "../clap_blocks" }
compactor = { path = "../compactor" }
data_types = { path = "../data_types" }
data_types2 = { path = "../data_types2" }
datafusion = { path = "../datafusion" }
dml = { path = "../dml" }
generated_types = { path = "../generated_types" }
influxdb_iox_client = { path = "../influxdb_iox_client", features = ["flight", "format", "write_lp"] }
influxdb_storage_client = { path = "../influxdb_storage_client" }
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
ingester = { path = "../ingester" }
influxrpc_parser = { path = "../influxrpc_parser"}
iox_catalog = { path = "../iox_catalog" }
iox_object_store = { path = "../iox_object_store" }
ioxd_common = { path = "../ioxd_common"}
ioxd_compactor = { path = "../ioxd_compactor"}
ioxd_ingester = { path = "../ioxd_ingester"}
ioxd_router2 = { path = "../ioxd_router2"}
ioxd_router = { path = "../ioxd_router"}
ioxd_querier = { path = "../ioxd_querier"}
ioxd_test = { path = "../ioxd_test"}
logfmt = { path = "../logfmt" }
metric = { path = "../metric" }
metric_exporters = { path = "../metric_exporters" }
mutable_batch = { path = "../mutable_batch" }
mutable_batch_lp = { path = "../mutable_batch_lp" }
mutable_batch_pb = { path = "../mutable_batch_pb" }
object_store = { path = "../object_store" }
observability_deps = { path = "../observability_deps" }
panic_logging = { path = "../panic_logging" }
parquet_file = { path = "../parquet_file" }
predicate = { path = "../predicate" }
querier = { path = "../querier" }
query = { path = "../query" }
read_buffer = { path = "../read_buffer" }
router2 = { path = "../router2" }
schema = { path = "../schema" }
iox_time = { path = "../iox_time" }
trace = { path = "../trace" }
trace_exporters = { path = "../trace_exporters" }
trace_http = { path = "../trace_http" }
tracker = { path = "../tracker" }
trogging = { path = "../trogging", default-features = false, features = ["clap"] }
write_buffer = { path = "../write_buffer" }
# Crates.io dependencies, in alphabetical order
ansi_term = "0.12"
arrow = { version = "13", features = ["prettyprint"] }
arrow-flight = "13"
async-trait = "0.1"
backtrace = "0.3"
byteorder = "1.3.4"
bytes = "1.0"
chrono = { version = "0.4", default-features = false }
clap = { version = "3", features = ["derive", "env"] }
console-subscriber = { version = "0.1.5", optional = true, features = ["parking_lot"] }
csv = "1.1"
dotenv = "0.15.0"
flate2 = "1.0"
futures = "0.3"
hashbrown = "0.12"
http = "0.2.7"
humantime = "2.1.0"
hyper = "0.14"
itertools = "0.10.1"
libc = { version = "0.2" }
log = "0.4"
num_cpus = "1.13.0"
once_cell = { version = "1.10.0", features = ["parking_lot"] }
parking_lot = "0.12"
parquet = "13"
pin-project = "1.0"
prost = "0.10"
rustyline = { version = "9.0", default-features = false }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.81"
serde_urlencoded = "0.7.0"
snafu = "0.7"
thiserror = "1.0.31"
tikv-jemalloc-ctl = { version = "0.4.0", optional = true }
@ -90,9 +57,6 @@ tokio = { version = "1.18", features = ["macros", "net", "parking_lot", "rt-mult
tokio-stream = { version = "0.1", features = ["net"] }
tokio-util = { version = "0.7.1" }
tonic = "0.7"
tonic-health = "0.6.0"
tonic-reflection = "0.4.0"
tower = "0.4"
uuid = { version = "0.8", features = ["v4"] }
# jemalloc-sys with unprefixed_malloc_on_supported_platforms feature and heappy are mutually exclusive
tikv-jemalloc-sys = { version = "0.4.0", optional = true, features = ["unprefixed_malloc_on_supported_platforms"] }
@ -103,6 +67,7 @@ workspace-hack = { path = "../workspace-hack"}
# In alphabetical order
arrow_util = { path = "../arrow_util" }
assert_cmd = "2.0.2"
predicate = { path = "../predicate" }
predicates = "2.1.0"
tempfile = "3.1.0"
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }

View File

@ -2,7 +2,7 @@
use bytes::Bytes;
use clap_blocks::{catalog_dsn::CatalogDsnConfig, object_store::ObjectStoreConfig};
use data_types2::{
use data_types::{
ColumnType, KafkaPartition, NamespaceId, NamespaceSchema as CatalogNamespaceSchema,
ParquetFile as CatalogParquetFile, ParquetFileParams, PartitionId, SequenceNumber, SequencerId,
TableId, Timestamp,
@ -18,9 +18,9 @@ use influxdb_iox_client::{
store,
};
use iox_catalog::interface::{get_schema_by_name, Catalog};
use iox_object_store::ParquetFilePath;
use object_store::{DynObjectStore, ObjectStoreImpl};
use std::sync::Arc;
use parquet_file::ParquetFilePath;
use std::{ops::Deref, sync::Arc};
use thiserror::Error;
use tokio_stream::StreamExt;
use uuid::Uuid;
@ -150,15 +150,14 @@ pub async fn command(connection: Connection, config: Config) -> Result<(), Error
let mut handles = vec![];
let store_client = store::Client::new(connection);
for parquet_file in parquet_files {
let path = ParquetFilePath::new_new_gen(
let path = ParquetFilePath::new(
parquet_file.namespace_id,
parquet_file.table_id,
parquet_file.sequencer_id,
parquet_file.partition_id,
parquet_file.object_store_id,
)
.absolute_dirs_and_file_name();
let path = object_store.path_from_dirs_and_filename(path);
);
let path = path.object_store_path(object_store.deref());
match object_store.get(&path).await {
Ok(_) => {
println!(
@ -371,7 +370,7 @@ struct PartitionMapping {
#[cfg(test)]
mod tests {
use super::*;
use data_types2::{ColumnType, ParquetFileId};
use data_types::{ColumnType, ParquetFileId};
use influxdb_iox_client::schema::generated_types::*;
use iox_catalog::mem::MemCatalog;
use std::collections::HashMap;

View File

@ -14,7 +14,7 @@ use ioxd_common::{
use ioxd_compactor::create_compactor_server_type;
use ioxd_ingester::create_ingester_server_type;
use ioxd_querier::create_querier_server_type;
use ioxd_router2::create_router2_server_type;
use ioxd_router::create_router_server_type;
use object_store::{DynObjectStore, ObjectStoreImpl};
use observability_deps::tracing::*;
use query::exec::Executor;
@ -55,8 +55,8 @@ pub enum Error {
#[error("Cannot parse object store config: {0}")]
ObjectStoreParsing(#[from] clap_blocks::object_store::ParseError),
#[error("Router2 error: {0}")]
Router2(#[from] ioxd_router2::Error),
#[error("Router error: {0}")]
Router(#[from] ioxd_router::Error),
#[error("Ingester error: {0}")]
Ingester(#[from] ioxd_ingester::Error),
@ -421,8 +421,8 @@ pub async fn command(config: Config) -> Result<()> {
info!(%num_threads, "Creating shared query executor");
let exec = Arc::new(Executor::new(num_threads));
info!("starting router2");
let router2 = create_router2_server_type(
info!("starting router");
let router = create_router_server_type(
&common_state,
Arc::clone(&metrics),
Arc::clone(&catalog),
@ -473,7 +473,7 @@ pub async fn command(config: Config) -> Result<()> {
info!("starting all in one server");
let services = vec![
Service::create(router2, &router_run_config),
Service::create(router, &router_run_config),
Service::create_grpc_only(ingester, &ingester_run_config),
Service::create_grpc_only(compactor, &compactor_run_config),
Service::create_grpc_only(querier, &querier_run_config),

View File

@ -6,7 +6,7 @@ mod compactor;
mod ingester;
mod main;
mod querier;
mod router2;
mod router;
mod test;
#[derive(Debug, Snafu)]
@ -18,8 +18,8 @@ pub enum Error {
#[snafu(display("Error in querier subcommand: {}", source))]
QuerierError { source: querier::Error },
#[snafu(display("Error in router2 subcommand: {}", source))]
Router2Error { source: router2::Error },
#[snafu(display("Error in router subcommand: {}", source))]
RouterError { source: router::Error },
#[snafu(display("Error in ingester subcommand: {}", source))]
IngesterError { source: ingester::Error },
@ -49,7 +49,7 @@ impl Config {
None => &self.all_in_one_config.logging_config,
Some(Command::Compactor(config)) => config.run_config.logging_config(),
Some(Command::Querier(config)) => config.run_config.logging_config(),
Some(Command::Router2(config)) => config.run_config.logging_config(),
Some(Command::Router(config)) => config.run_config.logging_config(),
Some(Command::Ingester(config)) => config.run_config.logging_config(),
Some(Command::AllInOne(config)) => &config.logging_config,
Some(Command::Test(config)) => config.run_config.logging_config(),
@ -65,8 +65,8 @@ enum Command {
/// Run the server in querier mode
Querier(querier::Config),
/// Run the server in router2 mode
Router2(router2::Config),
/// Run the server in router mode
Router(router::Config),
/// Run the server in ingester mode
Ingester(ingester::Config),
@ -87,7 +87,7 @@ pub async fn command(config: Config) -> Result<()> {
compactor::command(config).await.context(CompactorSnafu)
}
Some(Command::Querier(config)) => querier::command(config).await.context(QuerierSnafu),
Some(Command::Router2(config)) => router2::command(config).await.context(Router2Snafu),
Some(Command::Router(config)) => router::command(config).await.context(RouterSnafu),
Some(Command::Ingester(config)) => ingester::command(config).await.context(IngesterSnafu),
Some(Command::AllInOne(config)) => all_in_one::command(config).await.context(AllInOneSnafu),
Some(Command::Test(config)) => test::command(config).await.context(TestSnafu),

View File

@ -1,20 +1,19 @@
//! Implementation of command line option for running router2
use std::sync::Arc;
//! Implementation of command line option for running router
use super::main;
use clap_blocks::{
catalog_dsn::CatalogDsnConfig, run_config::RunConfig, write_buffer::WriteBufferConfig,
};
use ioxd_common::server_type::{CommonServerState, CommonServerStateError};
use ioxd_common::Service;
use ioxd_router2::create_router2_server_type;
use ioxd_common::{
server_type::{CommonServerState, CommonServerStateError},
Service,
};
use ioxd_router::create_router_server_type;
use object_store::{instrumentation::ObjectStoreMetrics, DynObjectStore, ObjectStoreImpl};
use observability_deps::tracing::*;
use std::sync::Arc;
use thiserror::Error;
use super::main;
#[derive(Debug, Error)]
pub enum Error {
#[error("Run: {0}")]
@ -27,7 +26,7 @@ pub enum Error {
ObjectStoreParsing(#[from] clap_blocks::object_store::ParseError),
#[error("Creating router: {0}")]
Router(#[from] ioxd_router2::Error),
Router(#[from] ioxd_router::Error),
#[error("Catalog DSN error: {0}")]
CatalogDsn(#[from] clap_blocks::catalog_dsn::Error),
@ -38,8 +37,8 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
#[derive(Debug, clap::Parser)]
#[clap(
name = "run",
about = "Runs in router2 mode",
long_about = "Run the IOx router2 server.\n\nThe configuration options below can be \
about = "Runs in router mode",
long_about = "Run the IOx router server.\n\nThe configuration options below can be \
set either with the command line flags or with the specified environment \
variable. If there is a file named '.env' in the current working directory, \
it is sourced before loading the configuration.
@ -91,7 +90,7 @@ pub async fn command(config: Config) -> Result<()> {
let catalog = config
.catalog_dsn
.get_catalog("router2", Arc::clone(&metrics))
.get_catalog("router", Arc::clone(&metrics))
.await?;
let object_store = ObjectStoreImpl::try_from(config.run_config.object_store_config())
@ -100,7 +99,7 @@ pub async fn command(config: Config) -> Result<()> {
let object_store: Arc<DynObjectStore> =
Arc::new(ObjectStoreMetrics::new(object_store, &*metrics));
let server_type = create_router2_server_type(
let server_type = create_router_server_type(
&common_state,
Arc::clone(&metrics),
catalog,
@ -111,7 +110,7 @@ pub async fn command(config: Config) -> Result<()> {
)
.await?;
info!("starting router2");
info!("starting router");
let services = vec![Service::create(server_type, common_state.run_config())];
Ok(main::main(common_state, services, metrics).await?)
}

View File

@ -54,7 +54,7 @@ async fn remote_partition_and_get_from_store_and_pull() {
// Run the 'remote partition' command
Step::Custom(Box::new(|state: &mut StepTestState| {
async {
let router_addr = state.cluster().router2().router_grpc_base().to_string();
let router_addr = state.cluster().router().router_grpc_base().to_string();
let namespace = state.cluster().namespace().to_string();
// Validate the output of the remote partittion CLI command

View File

@ -1,14 +1,13 @@
use std::collections::BTreeMap;
use generated_types::influxdata::iox::ingester::v1::PartitionStatus;
use arrow_util::assert_batches_sorted_eq;
use generated_types::{
influxdata::iox::ingester::v1::PartitionStatus, ingester::IngesterQueryRequest,
};
use http::StatusCode;
use std::collections::BTreeMap;
use test_helpers_end_to_end_ng::{
get_write_token, maybe_skip_integration, wait_for_readable, MiniCluster,
};
use arrow_util::assert_batches_sorted_eq;
use data_types2::IngesterQueryRequest;
#[tokio::test]
async fn ingester_flight_api() {
test_helpers::maybe_start_logging();

View File

@ -9,13 +9,13 @@ async fn querier_namespace_client() {
let table_name = "the_table";
let router2_config = TestConfig::new_router2(&database_url);
let ingester_config = TestConfig::new_ingester(&router2_config);
let router_config = TestConfig::new_router(&database_url);
let ingester_config = TestConfig::new_ingester(&router_config);
let querier_config = TestConfig::new_querier(&ingester_config);
// Set up the cluster ====================================
let cluster = MiniCluster::new()
.with_router2(router2_config)
.with_router(router_config)
.await
.with_ingester(ingester_config)
.await

View File

@ -82,16 +82,16 @@ async fn basic_no_ingester_connection() {
let table_name = "the_table";
let router2_config = TestConfig::new_router2(&database_url);
let router_config = TestConfig::new_router(&database_url);
// fast parquet
let ingester_config = TestConfig::new_ingester(&router2_config);
let ingester_config = TestConfig::new_ingester(&router_config);
// specially create a querier config that is NOT connected to the ingester
let querier_config = TestConfig::new_querier_without_ingester(&ingester_config);
// Set up the cluster ====================================
let mut cluster = MiniCluster::new()
.with_router2(router2_config)
.with_router(router_config)
.await
.with_ingester(ingester_config)
.await

View File

@ -1,10 +1,9 @@
use std::time::Duration;
use arrow::{array::as_primitive_array, datatypes::Int64Type, record_batch::RecordBatch};
use futures::FutureExt;
use influxdb_iox_client::write_info::generated_types::{
GetWriteInfoResponse, KafkaPartitionStatus,
};
use std::time::Duration;
use test_helpers::timeout::FutureTimeout;
use test_helpers_end_to_end_ng::{
all_readable, combined_token_info, maybe_skip_integration, MiniCluster, Step, StepTest,
@ -17,12 +16,12 @@ async fn basic_multi_ingesters() {
let database_url = maybe_skip_integration!();
// write into two different kafka partitions: 0 and 1
let router2_config =
TestConfig::new_router2(&database_url).with_new_write_buffer_kafka_partitions(2);
let router_config =
TestConfig::new_router(&database_url).with_new_write_buffer_kafka_partitions(2);
// ingester gets partition 0
let ingester_config = TestConfig::new_ingester(&router2_config).with_kafka_partition(0);
let ingester2_config = TestConfig::new_ingester(&router2_config).with_kafka_partition(1);
let ingester_config = TestConfig::new_ingester(&router_config).with_kafka_partition(0);
let ingester2_config = TestConfig::new_ingester(&router_config).with_kafka_partition(1);
let querier_config = TestConfig::new_querier_without_ingester(&ingester_config)
// Configure to talk with both the ingesters
@ -33,7 +32,7 @@ async fn basic_multi_ingesters() {
// Set up the cluster ====================================
let mut cluster = MiniCluster::new()
.with_router2(router2_config)
.with_router(router_config)
.await
.with_ingester(ingester_config)
.await

View File

@ -1,11 +1,10 @@
use assert_cmd::Command;
use futures::FutureExt;
use predicates::prelude::*;
use test_helpers_end_to_end_ng::{
maybe_skip_integration, MiniCluster, Step, StepTest, StepTestState,
};
use assert_cmd::Command;
use predicates::prelude::*;
/// Test the schema client
#[tokio::test]
async fn ingester_schema_client() {
@ -23,7 +22,7 @@ async fn ingester_schema_client() {
Step::Custom(Box::new(|state: &mut StepTestState| {
async {
let mut client = influxdb_iox_client::schema::Client::new(
state.cluster().router2().router_grpc_connection(),
state.cluster().router().router_grpc_connection(),
);
let response = client
.get_schema(state.cluster().namespace())
@ -69,7 +68,7 @@ async fn ingester_schema_cli() {
)),
Step::Custom(Box::new(|state: &mut StepTestState| {
async {
let router_addr = state.cluster().router2().router_grpc_base().to_string();
let router_addr = state.cluster().router().router_grpc_base().to_string();
// Validate the output of the schema CLI command
Command::cargo_bin("influxdb_iox")

View File

@ -6,33 +6,28 @@ edition = "2021"
[features]
default = ["flight", "format", "write_lp"]
flight = ["arrow", "arrow-flight", "arrow_util", "serde/derive", "serde_json", "futures-util"]
flight = ["arrow", "arrow-flight", "arrow_util", "futures-util"]
format = ["arrow", "arrow_util"]
write_lp = ["dml", "mutable_batch", "mutable_batch_lp", "mutable_batch_pb"]
write_lp = ["dml", "mutable_batch_lp", "mutable_batch_pb"]
[dependencies]
# Workspace dependencies, in alphabetical order
arrow_util = { path = "../arrow_util", optional = true }
client_util = { path = "../client_util" }
dml = { path = "../dml", optional = true }
generated_types = { path = "../generated_types", default-features = false }
mutable_batch_lp = { path = "../mutable_batch_lp", optional = true }
mutable_batch_pb = { path = "../mutable_batch_pb", optional = true }
# Crates.io dependencies, in alphabetical order
arrow = { version = "13", optional = true }
arrow-flight = { version = "13", optional = true }
bytes = "1.0"
futures-util = { version = "0.3", optional = true }
dml = { path = "../dml", optional = true }
mutable_batch = { path = "../mutable_batch", optional = true }
mutable_batch_lp = { path = "../mutable_batch_lp", optional = true }
mutable_batch_pb = { path = "../mutable_batch_pb", optional = true }
prost = "0.10"
rand = "0.8.3"
serde = "1.0.137"
serde_json = { version = "1.0.81", optional = true }
thiserror = "1.0.31"
tonic = { version = "0.7" }
uuid = { version = "0.8", features = ["v4"] }
[dev-dependencies] # In alphabetical order
serde_json = "1.0"
tokio = { version = "1.18", features = ["macros", "parking_lot", "rt-multi-thread"] }

View File

@ -10,26 +10,22 @@ arrow-flight = "13"
arrow_util = { path = "../arrow_util" }
async-trait = "0.1.53"
backoff = { path = "../backoff" }
base64 = "0.13"
bytes = "1.0"
datafusion = { path = "../datafusion" }
datafusion_util = { path = "../datafusion_util" }
data_types = { path = "../data_types" }
data_types2 = { path = "../data_types2" }
futures = "0.3"
generated_types = { path = "../generated_types" }
chrono = { version = "0.4", default-features = false }
dml = { path = "../dml" }
hyper = "0.14"
iox_catalog = { path = "../iox_catalog" }
iox_object_store = { path = "../iox_object_store" }
metric = { path = "../metric" }
mutable_batch = { path = "../mutable_batch"}
mutable_batch_lp = { path = "../mutable_batch_lp" }
object_store = { path = "../object_store" }
observability_deps = { path = "../observability_deps" }
parking_lot = "0.12"
parquet = "13"
parquet_file = { path = "../parquet_file" }
pin-project = "1.0"
predicate = { path = "../predicate" }

View File

@ -2,7 +2,7 @@
use crate::data::{PersistingBatch, QueryableBatch};
use arrow::record_batch::RecordBatch;
use data_types2::{NamespaceId, PartitionInfo};
use data_types::{NamespaceId, PartitionInfo};
use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream};
use iox_catalog::interface::INITIAL_COMPACTION_LEVEL;
use iox_time::{Time, TimeProvider};
@ -178,7 +178,7 @@ mod tests {
make_persisting_batch, make_queryable_batch, make_queryable_batch_with_deletes,
};
use arrow_util::assert_batches_eq;
use data_types2::{Partition, PartitionId, SequencerId, TableId};
use data_types::{Partition, PartitionId, SequencerId, TableId};
use iox_time::SystemProvider;
use mutable_batch_lp::lines_to_batches;
use schema::selection::Selection;

View File

@ -10,7 +10,7 @@ use crate::{
use arrow::record_batch::RecordBatch;
use async_trait::async_trait;
use backoff::{Backoff, BackoffConfig};
use data_types2::{
use data_types::{
DeletePredicate, KafkaPartition, NamespaceId, PartitionId, PartitionInfo, SequenceNumber,
SequencerId, TableId, Timestamp, Tombstone,
};
@ -312,7 +312,11 @@ impl Persister for IngesterData {
// save the compacted data to a parquet file in object storage
let file_size_and_md = Backoff::new(&self.backoff_config)
.retry_all_errors("persist to object store", || {
persist(&iox_meta, record_batches.to_vec(), &self.object_store)
persist(
&iox_meta,
record_batches.to_vec(),
Arc::clone(&self.object_store),
)
})
.await
.expect("retry forever");
@ -1515,7 +1519,7 @@ mod tests {
};
use arrow_util::assert_batches_sorted_eq;
use assert_matches::assert_matches;
use data_types2::{
use data_types::{
NamespaceSchema, NonEmptyString, ParquetFileParams, Sequence, TimestampRange,
};
use dml::{DmlDelete, DmlMeta, DmlWrite};

View File

@ -13,13 +13,13 @@ use crate::{
};
use async_trait::async_trait;
use backoff::BackoffConfig;
use data_types2::{IngesterQueryRequest, KafkaPartition, KafkaTopic, Sequencer};
use data_types::{KafkaPartition, KafkaTopic, Sequencer};
use futures::{
future::{BoxFuture, Shared},
stream::FuturesUnordered,
FutureExt, StreamExt, TryFutureExt,
};
use generated_types::ingester::IngesterQueryRequest;
use iox_catalog::interface::Catalog;
use iox_time::SystemProvider;
use object_store::DynObjectStore;
@ -305,7 +305,7 @@ impl Drop for IngestHandlerImpl {
#[cfg(test)]
mod tests {
use super::*;
use data_types2::{Namespace, NamespaceSchema, QueryPool, Sequence, SequenceNumber};
use data_types::{Namespace, NamespaceSchema, QueryPool, Sequence, SequenceNumber};
use dml::{DmlMeta, DmlWrite};
use iox_catalog::{mem::MemCatalog, validate_or_insert_schema};
use iox_time::Time;

View File

@ -1,4 +1,4 @@
use data_types2::PartitionId;
use data_types::PartitionId;
use iox_time::TimeProvider;
use parking_lot::Mutex;
use std::sync::Arc;

View File

@ -10,7 +10,7 @@ use crate::{
job::{Job, JobRegistry},
poison::{PoisonCabinet, PoisonPill},
};
use data_types2::{PartitionId, SequenceNumber, SequencerId};
use data_types::{PartitionId, SequenceNumber, SequencerId};
use iox_time::{Time, TimeProvider};
use metric::{Metric, U64Counter};
use observability_deps::tracing::{error, info};

View File

@ -2,11 +2,13 @@
use arrow::record_batch::RecordBatch;
use bytes::Bytes;
use iox_object_store::ParquetFilePath;
use object_store::DynObjectStore;
use parquet_file::metadata::{IoxMetadata, IoxParquetMetaData};
use parquet_file::{
metadata::{IoxMetadata, IoxParquetMetaData},
ParquetFilePath,
};
use snafu::{ResultExt, Snafu};
use std::sync::Arc;
use std::{ops::Deref, sync::Arc};
#[derive(Debug, Snafu)]
#[allow(missing_docs)]
@ -29,7 +31,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
pub async fn persist(
metadata: &IoxMetadata,
record_batches: Vec<RecordBatch>,
object_store: &Arc<DynObjectStore>,
object_store: Arc<DynObjectStore>,
) -> Result<Option<(usize, IoxParquetMetaData)>> {
if record_batches.is_empty() {
return Ok(None);
@ -39,16 +41,7 @@ pub async fn persist(
.expect("record_batches.is_empty was just checked")
.schema();
// Make a fake IOx object store to conform to the parquet file
// interface, but note this isn't actually used to find parquet
// paths to write to
use iox_object_store::IoxObjectStore;
let iox_object_store = Arc::new(IoxObjectStore::existing(
Arc::clone(object_store),
IoxObjectStore::root_path_for(&**object_store, uuid::Uuid::new_v4()),
));
let data = parquet_file::storage::Storage::new(Arc::clone(&iox_object_store))
let data = parquet_file::storage::Storage::new(Arc::clone(&object_store))
.parquet_bytes(record_batches, schema, metadata)
.await
.context(ConvertingToBytesSnafu)?;
@ -67,7 +60,7 @@ pub async fn persist(
let file_size = data.len();
let bytes = Bytes::from(data);
let path = ParquetFilePath::new_new_gen(
let path = ParquetFilePath::new(
metadata.namespace_id,
metadata.table_id,
metadata.sequencer_id,
@ -75,8 +68,10 @@ pub async fn persist(
metadata.object_store_id,
);
iox_object_store
.put_parquet_file(&path, bytes)
let path = path.object_store_path(object_store.deref());
object_store
.put(&path, bytes)
.await
.context(WritingToObjectStoreSnafu)?;
@ -86,7 +81,7 @@ pub async fn persist(
#[cfg(test)]
mod tests {
use super::*;
use data_types2::{NamespaceId, PartitionId, SequenceNumber, SequencerId, TableId};
use data_types::{NamespaceId, PartitionId, SequenceNumber, SequencerId, TableId};
use iox_catalog::interface::INITIAL_COMPACTION_LEVEL;
use iox_time::Time;
use object_store::{ObjectStoreImpl, ObjectStoreTestConvenience};
@ -124,7 +119,9 @@ mod tests {
};
let object_store = object_store();
persist(&metadata, vec![], &object_store).await.unwrap();
persist(&metadata, vec![], Arc::clone(&object_store))
.await
.unwrap();
assert!(object_store.list_all().await.unwrap().is_empty());
}
@ -163,7 +160,9 @@ mod tests {
let object_store = object_store();
persist(&metadata, batches, &object_store).await.unwrap();
persist(&metadata, batches, Arc::clone(&object_store))
.await
.unwrap();
let obj_store_paths = object_store.list_all().await.unwrap();
assert_eq!(obj_store_paths.len(), 1);

View File

@ -1,4 +1,4 @@
use data_types2::KafkaPartition;
use data_types::KafkaPartition;
use futures::Future;
use parking_lot::{RwLock, RwLockUpgradableReadGuard};
use pin_project::pin_project;

View File

@ -5,7 +5,6 @@ use crate::data::{
};
use arrow::record_batch::RecordBatch;
use arrow_util::util::merge_record_batches;
use data_types2::IngesterQueryRequest;
use datafusion::{
error::DataFusionError,
physical_plan::{
@ -14,6 +13,7 @@ use datafusion::{
SendableRecordBatchStream,
},
};
use generated_types::ingester::IngesterQueryRequest;
use predicate::Predicate;
use query::{
exec::{Executor, ExecutorType},
@ -322,7 +322,7 @@ mod tests {
};
use arrow_util::{assert_batches_eq, assert_batches_sorted_eq};
use assert_matches::assert_matches;
use data_types2::PartitionId;
use data_types::PartitionId;
use datafusion::logical_plan::{col, lit};
use predicate::PredicateBuilder;

View File

@ -3,10 +3,9 @@
use crate::data::{QueryableBatch, SnapshotBatch};
use arrow::record_batch::RecordBatch;
use arrow_util::util::merge_record_batches;
use data_types::timestamp::TimestampMinMax;
use data_types2::{
tombstones_to_delete_predicates, tombstones_to_delete_predicates_iter, ChunkAddr, ChunkId,
ChunkOrder, DeletePredicate, PartitionId, SequenceNumber, TableSummary, Tombstone,
use data_types::{
ChunkAddr, ChunkId, ChunkOrder, DeletePredicate, PartitionId, SequenceNumber, TableSummary,
TimestampMinMax, Tombstone,
};
use datafusion::{
logical_plan::ExprRewritable,
@ -18,7 +17,10 @@ use datafusion::{
};
use datafusion_util::batch_filter;
use observability_deps::tracing::{debug, trace};
use predicate::{Predicate, PredicateMatch};
use predicate::{
delete_predicate::{tombstones_to_delete_predicates, tombstones_to_delete_predicates_iter},
Predicate, PredicateMatch,
};
use query::{
exec::{stringset::StringSet, IOxSessionContext},
util::{df_physical_expr_from_schema_and_expr, MissingColumnsToNull},
@ -311,7 +313,7 @@ mod tests {
datatypes::{DataType, Int32Type, TimeUnit},
};
use arrow_util::assert_batches_eq;
use data_types2::{DeleteExpr, Op, Scalar, TimestampRange};
use data_types::{DeleteExpr, Op, Scalar, TimestampRange};
use datafusion::logical_plan::{col, lit};
use predicate::PredicateBuilder;

View File

@ -5,7 +5,7 @@ use hyper::{Body, Request, Response, StatusCode};
use std::sync::Arc;
use thiserror::Error;
/// Errors returned by the `router2` HTTP request handler.
/// Errors returned by the `router` HTTP request handler.
#[derive(Debug, Error, Copy, Clone)]
pub enum Error {
/// The requested path has no registered handler.

View File

@ -1,18 +1,15 @@
use std::{fmt::Debug, time::Duration};
use data_types2::KafkaPartition;
use super::DmlSink;
use crate::lifecycle::{LifecycleHandle, LifecycleHandleImpl};
use data_types::KafkaPartition;
use dml::DmlOperation;
use futures::{pin_mut, FutureExt, Stream, StreamExt};
use iox_time::{SystemProvider, TimeProvider};
use metric::{Attributes, U64Counter, U64Gauge};
use observability_deps::tracing::*;
use std::{fmt::Debug, time::Duration};
use tokio_util::sync::CancellationToken;
use write_buffer::core::{WriteBufferError, WriteBufferErrorKind};
use crate::lifecycle::{LifecycleHandle, LifecycleHandleImpl};
use super::DmlSink;
/// When the [`LifecycleManager`] indicates that ingest should be paused because
/// of memory pressure, the sequencer will loop, sleeping this long between
/// calls to [`LifecycleHandle::can_resume_ingest()`] with the manager if it
@ -378,7 +375,7 @@ mod tests {
stream_handler::mock_sink::MockDmlSink,
};
use assert_matches::assert_matches;
use data_types2::{DeletePredicate, Sequence, TimestampRange};
use data_types::{DeletePredicate, Sequence, TimestampRange};
use dml::{DmlDelete, DmlMeta, DmlWrite};
use futures::stream;
use iox_time::{SystemProvider, Time};

View File

@ -1,3 +1,7 @@
use super::sink_instrumentation::WatermarkFetcher;
use data_types::KafkaPartition;
use metric::U64Counter;
use observability_deps::tracing::*;
use std::{
sync::{
atomic::{AtomicU64, Ordering},
@ -5,15 +9,9 @@ use std::{
},
time::{Duration, Instant},
};
use data_types2::KafkaPartition;
use metric::U64Counter;
use observability_deps::tracing::*;
use tokio::task::JoinHandle;
use write_buffer::core::WriteBufferReading;
use super::sink_instrumentation::WatermarkFetcher;
/// Periodically fetch and cache the maximum known write buffer offset
/// (watermark) from the write buffer for a given sequencer.
///
@ -187,7 +185,7 @@ impl WatermarkFetcher for PeriodicWatermarkFetcher {
#[cfg(test)]
mod tests {
use data_types2::Sequence;
use data_types::Sequence;
use metric::{Attributes, Metric};
use test_helpers::timeout::FutureTimeout;
use write_buffer::mock::{

View File

@ -1,14 +1,11 @@
//! Compatibility layer providing a [`DmlSink`] impl for [`IngesterData`].
use std::sync::Arc;
use async_trait::async_trait;
use data_types2::SequencerId;
use dml::DmlOperation;
use crate::{data::IngesterData, lifecycle::LifecycleHandleImpl};
use super::DmlSink;
use crate::{data::IngesterData, lifecycle::LifecycleHandleImpl};
use async_trait::async_trait;
use data_types::SequencerId;
use dml::DmlOperation;
use std::sync::Arc;
/// Provides a [`DmlSink`] implementation for a [`IngesterData`] instance.
#[derive(Debug)]

View File

@ -1,15 +1,14 @@
//! Instrumentation for [`DmlSink`] implementations.
use std::fmt::Debug;
use super::DmlSink;
use async_trait::async_trait;
use data_types2::KafkaPartition;
use data_types::KafkaPartition;
use dml::DmlOperation;
use iox_time::{SystemProvider, TimeProvider};
use metric::{Attributes, U64Counter, U64Gauge, U64Histogram, U64HistogramOptions};
use std::fmt::Debug;
use trace::span::SpanRecorder;
use super::DmlSink;
/// A [`WatermarkFetcher`] abstracts a source of the write buffer high watermark
/// (max known offset).
///
@ -255,7 +254,7 @@ mod tests {
use std::sync::Arc;
use assert_matches::assert_matches;
use data_types2::Sequence;
use data_types::Sequence;
use dml::{DmlMeta, DmlWrite};
use iox_time::Time;
use metric::{Metric, MetricObserver, Observation};

View File

@ -1,4 +1,5 @@
//! Test setups and data for ingester crate
#![allow(missing_docs)]
use crate::{
@ -11,7 +12,7 @@ use crate::{
use arrow::record_batch::RecordBatch;
use arrow_util::assert_batches_eq;
use bitflags::bitflags;
use data_types2::{
use data_types::{
KafkaPartition, NamespaceId, PartitionId, SequenceNumber, SequencerId, TableId, Timestamp,
Tombstone, TombstoneId,
};

View File

@ -5,22 +5,22 @@ authors = ["Paul Dix <paul@pauldix.net>"]
edition = "2021"
[dependencies] # In alphabetical order
assert_matches = "1.5.0"
async-trait = "0.1.53"
data_types2 = { path = "../data_types2" }
data_types = { path = "../data_types" }
futures = "0.3"
iox_time = { version = "0.1.0", path = "../iox_time" }
metric = { version = "0.1.0", path = "../metric" }
mutable_batch = { path = "../mutable_batch" }
observability_deps = { path = "../observability_deps" }
snafu = "0.7"
sqlx = { version = "0.5", features = [ "runtime-tokio-rustls" , "postgres", "uuid" ] }
sqlx-hotswap-pool = { path = "../sqlx-hotswap-pool" }
iox_time = { version = "0.1.0", path = "../iox_time" }
tokio = { version = "1.18", features = ["io-util", "macros", "parking_lot", "rt-multi-thread", "time"] }
uuid = { version = "0.8", features = ["v4"] }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
assert_matches = "1.5.0"
dotenv = "0.15.0"
mutable_batch_lp = { path = "../mutable_batch_lp" }
paste = "1.0.7"
@ -28,5 +28,3 @@ pretty_assertions = "1.2.1"
rand = "0.8"
tempfile = "3"
test_helpers = { path = "../test_helpers" }
[features]

View File

@ -1,7 +1,7 @@
//! This module contains the traits and data objects for the Catalog API.
use async_trait::async_trait;
use data_types2::{
use data_types::{
Column, ColumnSchema, ColumnType, KafkaPartition, KafkaTopic, KafkaTopicId, Namespace,
NamespaceId, NamespaceSchema, ParquetFile, ParquetFileId, ParquetFileParams,
ParquetFileWithMetadata, Partition, PartitionId, PartitionInfo, ProcessedTombstone, QueryPool,
@ -773,7 +773,7 @@ pub(crate) mod test_helpers {
use super::*;
use ::test_helpers::{assert_contains, tracing::TracingCapture};
use data_types2::ColumnId;
use data_types::ColumnId;
use metric::{Attributes, Metric, U64Histogram};
use std::{
ops::{Add, DerefMut},

View File

@ -11,12 +11,11 @@
clippy::clone_on_ref_ptr
)]
use crate::interface::{Error, Result, Transaction};
use data_types2::{
use crate::interface::{ColumnUpsertRequest, Error, RepoCollection, Result, Transaction};
use data_types::{
ColumnType, KafkaPartition, KafkaTopic, NamespaceSchema, QueryPool, Sequencer, SequencerId,
TableSchema,
};
use interface::{ColumnUpsertRequest, RepoCollection};
use mutable_batch::MutableBatch;
use std::{borrow::Cow, collections::BTreeMap};
@ -125,7 +124,7 @@ where
// If it does, validate it. If it does not exist, create it and insert
// it into the cached schema.
match table.columns.get(name.as_str()) {
Some(existing) if existing.matches_type(col) => {
Some(existing) if existing.matches_type(col.influx_type()) => {
// No action is needed as the column matches the existing column
// schema.
}

View File

@ -11,7 +11,7 @@ use crate::{
metrics::MetricDecorator,
};
use async_trait::async_trait;
use data_types2::{
use data_types::{
Column, ColumnId, ColumnType, KafkaPartition, KafkaTopic, KafkaTopicId, Namespace, NamespaceId,
ParquetFile, ParquetFileId, ParquetFileParams, ParquetFileWithMetadata, Partition, PartitionId,
PartitionInfo, ProcessedTombstone, QueryPool, QueryPoolId, SequenceNumber, Sequencer,

View File

@ -6,7 +6,7 @@ use crate::interface::{
SequencerRepo, TablePersistInfo, TableRepo, TombstoneRepo,
};
use async_trait::async_trait;
use data_types2::{
use data_types::{
Column, ColumnType, KafkaPartition, KafkaTopic, KafkaTopicId, Namespace, NamespaceId,
ParquetFile, ParquetFileId, ParquetFileParams, ParquetFileWithMetadata, Partition, PartitionId,
PartitionInfo, ProcessedTombstone, QueryPool, QueryPoolId, SequenceNumber, Sequencer,

View File

@ -10,7 +10,7 @@ use crate::{
metrics::MetricDecorator,
};
use async_trait::async_trait;
use data_types2::{
use data_types::{
Column, ColumnType, KafkaPartition, KafkaTopic, KafkaTopicId, Namespace, NamespaceId,
ParquetFile, ParquetFileId, ParquetFileParams, ParquetFileWithMetadata, Partition, PartitionId,
PartitionInfo, ProcessedTombstone, QueryPool, QueryPoolId, SequenceNumber, Sequencer,
@ -18,8 +18,9 @@ use data_types2::{
};
use iox_time::{SystemProvider, TimeProvider};
use observability_deps::tracing::{info, warn};
use sqlx::types::Uuid;
use sqlx::{migrate::Migrator, postgres::PgPoolOptions, Acquire, Executor, Postgres, Row};
use sqlx::{
migrate::Migrator, postgres::PgPoolOptions, types::Uuid, Acquire, Executor, Postgres, Row,
};
use sqlx_hotswap_pool::HotSwapPool;
use std::{sync::Arc, time::Duration};

View File

@ -3,20 +3,13 @@ name = "iox_catalog_service"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
async-trait = "0.1"
data_types2 = { path = "../data_types2" }
data_types = { path = "../data_types" }
generated_types = { path = "../generated_types" }
iox_catalog = { path = "../iox_catalog" }
observability_deps = { path = "../observability_deps" }
serde = "1.0"
serde_urlencoded = "0.7"
iox_time = { path = "../iox_time" }
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
tonic = "0.7"
trace = { path = "../trace/" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]

View File

@ -1,4 +1,4 @@
//! gRPC service for the Catalog. Used in router2, but can be included in any gRPC server.
//! gRPC service for the Catalog. Used in router, but can be included in any gRPC server.
#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)]
#![warn(
@ -11,7 +11,7 @@
clippy::clone_on_ref_ptr
)]
use data_types2::{PartitionId, TableId};
use data_types::{PartitionId, TableId};
use generated_types::influxdata::iox::catalog::v1::*;
use iox_catalog::interface::Catalog;
use observability_deps::tracing::*;
@ -81,7 +81,7 @@ impl catalog_service_server::CatalogService for CatalogService {
}
// converts the catalog ParquetFile to protobuf
fn to_parquet_file(p: data_types2::ParquetFile) -> ParquetFile {
fn to_parquet_file(p: data_types::ParquetFile) -> ParquetFile {
ParquetFile {
id: p.id.get(),
sequencer_id: p.sequencer_id.get(),
@ -102,7 +102,7 @@ fn to_parquet_file(p: data_types2::ParquetFile) -> ParquetFile {
}
// converts the catalog Partition to protobuf
fn to_partition(p: data_types2::Partition) -> Partition {
fn to_partition(p: data_types::Partition) -> Partition {
Partition {
id: p.id.get(),
sequencer_id: p.sequencer_id.get(),
@ -115,7 +115,7 @@ fn to_partition(p: data_types2::Partition) -> Partition {
#[cfg(test)]
mod tests {
use super::*;
use data_types2::{KafkaPartition, ParquetFileParams, SequenceNumber, Timestamp};
use data_types::{KafkaPartition, ParquetFileParams, SequenceNumber, Timestamp};
use generated_types::influxdata::iox::catalog::v1::catalog_service_server::CatalogService;
use iox_catalog::mem::MemCatalog;
use uuid::Uuid;

View File

@ -12,9 +12,7 @@ clap = { version = "3", features = ["cargo"] }
futures = "0.3"
handlebars = "4.2.2"
humantime = "2.1.0"
data_types = { path = "../data_types" }
influxdb2_client = { path = "../influxdb2_client" }
influxdb_iox_client = { path = "../influxdb_iox_client" }
itertools = "0.10.0"
rand = { version = "0.8.3", features = ["small_rng"] }
regex = "1.5"

View File

@ -23,22 +23,15 @@ k8s-openapi = { version = "0.14", features = ["v1_19", "schemars"], default-feat
kube = { version = "0.71", default-features = false, features = ["client", "rustls-tls", "derive"] }
kube-derive = { version = "0.71", default-features = false } # only needed to opt out of schema
kube-runtime = "0.71"
prost = "0.10"
schemars = { version = "0.8.8", features = ["derive"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
thiserror = "1.0"
tokio = { version = "1.18", features = ["rt-multi-thread", "macros", "parking_lot"] }
tonic = "0.7"
tracing = { version = "0.1" }
workspace-hack = { path = "../workspace-hack"}
trogging = { path = "../trogging", default-features = false, features = ["clap"] }
[build-dependencies]
glob = "0.3.0"
pbjson-build = "0.3"
tonic-build = "0.7"
[dev-dependencies]
assert_matches = "1.5"
parking_lot = { version = "0.11.1" }

View File

@ -1,21 +0,0 @@
[package]
name = "iox_object_store"
version = "0.1.0"
edition = "2021"
description = "IOx-specific semantics wrapping the general-purpose object store crate"
[dependencies]
bytes = "1.0"
data_types = { path = "../data_types" }
data_types2 = { path = "../data_types2" }
futures = "0.3"
object_store = { path = "../object_store" }
observability_deps = { path = "../observability_deps" }
snafu = "0.7"
tokio = { version = "1.18", features = ["macros", "parking_lot", "sync", "rt"] }
tokio-stream = "0.1"
uuid = { version = "0.8", features = ["v4"] }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
test_helpers = { path = "../test_helpers" }

View File

@ -1,859 +0,0 @@
//! Wraps the object_store crate with IOx-specific semantics. The main responsibility of this crate
//! is to be the single source of truth for the paths of files in object storage. There is a
//! specific path type for each IOx-specific reason an object storage file exists. Content of the
//! files is managed outside of this crate.
#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)]
#![warn(
missing_copy_implementations,
missing_debug_implementations,
missing_docs,
clippy::explicit_iter_loop,
clippy::future_not_send,
clippy::use_self,
clippy::clone_on_ref_ptr
)]
use bytes::Bytes;
use data_types::server_id::ServerId;
use futures::{stream::BoxStream, StreamExt, TryStreamExt};
use object_store::{path::Path, DynObjectStore, GetResult, Result};
use observability_deps::tracing::warn;
use snafu::{ensure, ResultExt, Snafu};
use std::{ops::Deref, sync::Arc};
use tokio::sync::mpsc::channel;
use tokio_stream::wrappers::ReceiverStream;
use uuid::Uuid;
mod paths;
pub use paths::{
parquet_file::{ParquetFilePath, ParquetFilePathParseError},
transaction_file::TransactionFilePath,
};
use paths::{DataPath, RootPath, TransactionsPath};
#[derive(Debug, Snafu)]
#[allow(missing_docs)]
pub enum IoxObjectStoreError {
#[snafu(display("{}", source))]
UnderlyingObjectStoreError { source: object_store::Error },
#[snafu(display("Cannot create database with UUID `{}`; it already exists", uuid))]
DatabaseAlreadyExists { uuid: Uuid },
#[snafu(display("No rules found to load at {}", root_path))]
NoRulesFound { root_path: RootPath },
}
/// Handles persistence of data for a particular database. Writes within its directory/prefix.
///
/// This wrapper on top of an `ObjectStore` maps IOx specific concepts to ObjectStore locations
#[derive(Debug)]
pub struct IoxObjectStore {
inner: Arc<DynObjectStore>,
root_path: RootPath,
data_path: DataPath,
transactions_path: TransactionsPath,
}
impl IoxObjectStore {
/// Get the data for the server config to determine the names and locations of the databases
/// that this server owns.
///
/// TEMPORARY: Server config used to be at the top level instead of beneath `/nodes/`. Until
/// all deployments have transitioned, check both locations before reporting that the server
/// config is not found.
pub async fn get_server_config_file(
inner: &DynObjectStore,
server_id: ServerId,
) -> Result<Bytes> {
let path = paths::server_config_path(inner, server_id);
let result = match inner.get(&path).await {
Err(object_store::Error::NotFound { .. }) => {
use object_store::path::ObjectStorePath;
let mut legacy_path = inner.new_path();
legacy_path.push_dir(server_id.to_string());
legacy_path.set_file_name(paths::SERVER_CONFIG_FILE_NAME);
inner.get(&legacy_path).await
}
other => other,
}?;
Ok(result.bytes().await?.into())
}
/// Store the data for the server config with the names and locations of the databases
/// that this server owns.
pub async fn put_server_config_file(
inner: &DynObjectStore,
server_id: ServerId,
bytes: Bytes,
) -> Result<()> {
let path = paths::server_config_path(inner, server_id);
inner.put(&path, bytes).await
}
/// Return the path to the server config file to be used in database ownership information to
/// identify the current server that a database thinks is its owner.
pub fn server_config_path(inner: &DynObjectStore, server_id: ServerId) -> Path {
paths::server_config_path(inner, server_id)
}
/// Returns what the root path would be for a given database. Does not check existence or
/// validity of the path in object storage.
pub fn root_path_for(inner: &DynObjectStore, uuid: Uuid) -> RootPath {
RootPath::new(inner, uuid)
}
/// Create a database-specific wrapper. Takes all the information needed to create a new
/// root directory of a database. Checks that there isn't already anything in this database's
/// directory in object storage.
///
/// Caller *MUST* ensure there is at most 1 concurrent call of this function with the same
/// parameters; this function does *NOT* do any locking.
pub async fn create(
inner: Arc<DynObjectStore>,
uuid: Uuid,
) -> Result<Self, IoxObjectStoreError> {
let root_path = Self::root_path_for(&*inner, uuid);
let list_result = inner
.list_with_delimiter(&root_path.inner)
.await
.context(UnderlyingObjectStoreSnafu)?;
ensure!(
list_result.objects.is_empty(),
DatabaseAlreadyExistsSnafu { uuid }
);
Ok(Self::existing(inner, root_path))
}
/// Look in object storage for an existing, active database with this UUID.
pub async fn load(inner: Arc<DynObjectStore>, uuid: Uuid) -> Result<Self, IoxObjectStoreError> {
let root_path = Self::root_path_for(&*inner, uuid);
Self::find(inner, root_path).await
}
/// Look in object storage for an existing database with this name and the given root path
/// that was retrieved from a server config
pub async fn load_at_root_path(
inner: Arc<DynObjectStore>,
root_path_str: &str,
) -> Result<Self, IoxObjectStoreError> {
let root_path = RootPath::from_str(&*inner, root_path_str);
Self::find(inner, root_path).await
}
async fn find(
inner: Arc<DynObjectStore>,
root_path: RootPath,
) -> Result<Self, IoxObjectStoreError> {
let list_result = inner
.list_with_delimiter(&root_path.inner)
.await
.context(UnderlyingObjectStoreSnafu)?;
let rules_file = root_path.rules_path();
let rules_exists = list_result
.objects
.iter()
.any(|object| object.location == rules_file.inner);
ensure!(rules_exists, NoRulesFoundSnafu { root_path });
Ok(Self::existing(inner, root_path))
}
/// Access the database-specific object storage files for an existing database that has
/// already been located and verified to be active. Does not check object storage.
pub fn existing(inner: Arc<DynObjectStore>, root_path: RootPath) -> Self {
let data_path = root_path.data_path();
let transactions_path = root_path.transactions_path();
Self {
inner,
root_path,
data_path,
transactions_path,
}
}
/// In the database's root directory, write out a file pointing to the server's config. This
/// data can serve as an extra check on which server owns this database.
pub async fn put_owner_file(&self, bytes: Bytes) -> Result<()> {
let owner_path = self.root_path.owner_path();
self.inner.put(&owner_path, bytes).await
}
/// Return the contents of the owner file in the database's root directory that provides
/// information on the server that owns this database.
pub async fn get_owner_file(&self) -> Result<Bytes> {
let owner_path = self.root_path.owner_path();
Ok(self.inner.get(&owner_path).await?.bytes().await?.into())
}
/// Delete owner file for testing
pub async fn delete_owner_file_for_testing(&self) -> Result<()> {
let owner_path = self.root_path.owner_path();
self.inner.delete(&owner_path).await
}
/// The location in object storage for all files for this database, suitable for logging or
/// debugging purposes only. Do not parse this, as its format is subject to change!
pub fn debug_database_path(&self) -> String {
self.root_path.inner.to_string()
}
/// The possibly valid location in object storage for this database. Suitable for serialization
/// to use during initial database load, but not parsing for semantic meaning, as its format is
/// subject to change!
pub fn root_path(&self) -> String {
self.root_path.to_string()
}
// Catalog transaction file methods ===========================================================
/// List all the catalog transaction files in object storage for this database.
pub async fn catalog_transaction_files(
&self,
) -> Result<BoxStream<'static, Result<Vec<TransactionFilePath>>>> {
Ok(self
.list(Some(&self.transactions_path.inner))
.await?
.map_ok(move |list| {
list.into_iter()
// This `flat_map` ignores any filename in the transactions_path we couldn't
// parse as a TransactionFilePath
.flat_map(TransactionFilePath::from_absolute)
.collect()
})
.boxed())
}
/// Get the catalog transaction data in this relative path in this database's object store.
pub async fn get_catalog_transaction_file(
&self,
location: &TransactionFilePath,
) -> Result<GetResult<object_store::Error>> {
let full_path = self.transactions_path.join(location);
self.inner.get(&full_path).await
}
/// Store the data for this parquet file in this database's object store.
pub async fn put_catalog_transaction_file(
&self,
location: &TransactionFilePath,
bytes: Bytes,
) -> Result<()> {
let full_path = self.transactions_path.join(location);
self.inner.put(&full_path, bytes).await
}
/// Delete all catalog transaction files for this database.
pub async fn wipe_catalog(&self) -> Result<()> {
let mut stream = self.catalog_transaction_files().await?;
while let Some(transaction_file_list) = stream.try_next().await? {
for transaction_file_path in &transaction_file_list {
self.delete_catalog_transaction_file(transaction_file_path)
.await?;
}
}
Ok(())
}
/// Remove the data for this catalog transaction file from this database's object store
pub async fn delete_catalog_transaction_file(
&self,
location: &TransactionFilePath,
) -> Result<()> {
let full_path = self.transactions_path.join(location);
self.inner.delete(&full_path).await
}
// Parquet file methods =======================================================================
/// List all parquet file paths in object storage for this database.
pub async fn parquet_files(&self) -> Result<BoxStream<'static, Result<Vec<ParquetFilePath>>>> {
Ok(self
.list(Some(&self.data_path.inner))
.await?
.map_ok(move |list| {
list.into_iter()
// This `flat_map` ignores any filename in the data_path we couldn't parse as
// a ParquetFilePath
.flat_map(ParquetFilePath::from_absolute)
.collect()
})
.boxed())
}
/// Get the parquet file data in this relative path in this database's object store.
pub async fn get_parquet_file(
&self,
location: &ParquetFilePath,
) -> Result<GetResult<object_store::Error>> {
self.inner.get(&self.full_parquet_path(location)).await
}
/// Store the data for this parquet file in this database's object store.
pub async fn put_parquet_file(&self, location: &ParquetFilePath, bytes: Bytes) -> Result<()> {
self.inner
.put(&self.full_parquet_path(location), bytes)
.await
}
/// Remove the data for this parquet file from this database's object store
pub async fn delete_parquet_file(&self, location: &ParquetFilePath) -> Result<()> {
self.inner.delete(&self.full_parquet_path(location)).await
}
fn full_parquet_path(&self, location: &ParquetFilePath) -> Path {
if location.is_new_gen() {
self.inner
.deref()
.path_from_dirs_and_filename(location.absolute_dirs_and_file_name())
} else {
self.data_path.join(location)
}
}
// Database rule file methods =================================================================
// Deliberately private; this should not leak outside this crate
// so assumptions about the object store organization are confined
// (and can be changed) in this crate
fn db_rules_path(&self) -> Path {
self.root_path.rules_path().inner
}
/// Get the data for the database rules
pub async fn get_database_rules_file(&self) -> Result<Bytes> {
let path = &self.db_rules_path();
Ok(self.inner.get(path).await?.bytes().await?.into())
}
/// Return the database rules file content without creating an IoxObjectStore instance. Useful
/// when restoring a database given a UUID to check existence of the specified database and
/// get information such as the database name from the rules before proceeding with restoring
/// and initializing the database.
pub async fn load_database_rules(inner: Arc<DynObjectStore>, uuid: Uuid) -> Result<Bytes> {
let root_path = Self::root_path_for(&*inner, uuid);
let db_rules_path = root_path.rules_path().inner;
Ok(inner.get(&db_rules_path).await?.bytes().await?.into())
}
/// Store the data for the database rules
pub async fn put_database_rules_file(&self, bytes: Bytes) -> Result<()> {
self.inner.put(&self.db_rules_path(), bytes).await
}
/// Delete the data for the database rules
pub async fn delete_database_rules_file(&self) -> Result<()> {
self.inner.delete(&self.db_rules_path()).await
}
/// List the relative paths in this database's object store.
///
// Deliberately private; this should not leak outside this crate
// so assumptions about the object store organization are confined
// (and can be changed) in this crate
/// All outside calls should go to one of the more specific listing methods.
async fn list(&self, prefix: Option<&Path>) -> Result<BoxStream<'static, Result<Vec<Path>>>> {
let (tx, rx) = channel(4);
let inner = Arc::clone(&self.inner);
let prefix = prefix.cloned();
// This is necessary because of the lifetime restrictions on the ObjectStoreApi trait's
// methods, which might not actually be necessary but fixing it involves changes to the
// cloud_storage crate that are longer term.
tokio::spawn(async move {
match inner.list(prefix.as_ref()).await {
Err(e) => {
let _ = tx.send(Err(e)).await;
}
Ok(mut stream) => {
while let Some(list) = stream.next().await {
let _ = tx.send(list).await;
}
}
}
});
Ok(ReceiverStream::new(rx).boxed())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::paths::ALL_DATABASES_DIRECTORY;
use data_types::chunk_metadata::{ChunkAddr, ChunkId};
use data_types2::{NamespaceId, PartitionId, SequencerId, TableId};
use object_store::{parsed_path, path::ObjectStorePath, ObjectStoreImpl};
use test_helpers::assert_error;
use uuid::Uuid;
/// Creates a new in-memory object store
fn make_object_store() -> Arc<DynObjectStore> {
Arc::new(ObjectStoreImpl::new_in_memory())
}
async fn add_file(object_store: &DynObjectStore, location: &Path) {
let data = Bytes::from("arbitrary data");
object_store.put(location, data).await.unwrap();
}
async fn parquet_files(iox_object_store: &IoxObjectStore) -> Vec<ParquetFilePath> {
iox_object_store
.parquet_files()
.await
.unwrap()
.try_collect::<Vec<_>>()
.await
.unwrap()
.into_iter()
.flatten()
.collect()
}
async fn add_parquet_file(iox_object_store: &IoxObjectStore, location: &ParquetFilePath) {
let data = Bytes::from("arbitrary data");
iox_object_store
.put_parquet_file(location, data)
.await
.unwrap();
}
#[tokio::test]
async fn only_lists_relevant_parquet_files() {
let object_store = make_object_store();
let uuid = Uuid::new_v4();
let uuid_string = uuid.to_string();
let uuid_str = uuid_string.as_str();
let iox_object_store = IoxObjectStore::create(Arc::clone(&object_store), uuid)
.await
.unwrap();
let parquet_uuid = Uuid::new_v4();
let good_filename = format!("111.{}.parquet", parquet_uuid);
let good_filename_str = good_filename.as_str();
// Put a non-database file in
let path = object_store.path_from_dirs_and_filename(parsed_path!(["foo"]));
add_file(&*object_store, &path).await;
// Put a file for some other server in
let path = object_store.path_from_dirs_and_filename(parsed_path!(["12345"]));
add_file(&*object_store, &path).await;
// Put a file for some other database in
let other_db_uuid = Uuid::new_v4().to_string();
let path = object_store.path_from_dirs_and_filename(parsed_path!([
ALL_DATABASES_DIRECTORY,
other_db_uuid.as_str()
]));
add_file(&*object_store, &path).await;
// Put a file in the database dir but not the data dir
let path = object_store.path_from_dirs_and_filename(parsed_path!(
[ALL_DATABASES_DIRECTORY, uuid_str],
good_filename_str
));
add_file(&*object_store, &path).await;
// Put files in the data dir whose names are in the wrong format
let mut path = object_store.path_from_dirs_and_filename(parsed_path!(
[ALL_DATABASES_DIRECTORY, uuid_str, "data"],
"111.parquet"
));
add_file(&*object_store, &path).await;
path.set_file_name(&format!("111.{}.xls", parquet_uuid));
add_file(&*object_store, &path).await;
// Parquet files should be empty
let pf = parquet_files(&iox_object_store).await;
assert!(pf.is_empty(), "{:?}", pf);
// Add a real parquet file
let chunk_addr = ChunkAddr {
db_name: "clouds".into(),
table_name: "my_table".into(),
partition_key: "my_partition".into(),
chunk_id: ChunkId::new_test(13),
};
let p1 = ParquetFilePath::new_old_gen(&chunk_addr);
add_parquet_file(&iox_object_store, &p1).await;
// Only the real file should be returned
let pf = parquet_files(&iox_object_store).await;
assert_eq!(&pf, &[p1]);
}
async fn catalog_transaction_files(
iox_object_store: &IoxObjectStore,
) -> Vec<TransactionFilePath> {
iox_object_store
.catalog_transaction_files()
.await
.unwrap()
.try_collect::<Vec<_>>()
.await
.unwrap()
.into_iter()
.flatten()
.collect()
}
async fn add_catalog_transaction_file(
iox_object_store: &IoxObjectStore,
location: &TransactionFilePath,
) {
let data = Bytes::from("arbitrary data");
iox_object_store
.put_catalog_transaction_file(location, data)
.await
.unwrap();
}
#[tokio::test]
async fn only_lists_relevant_catalog_transaction_files() {
let object_store = make_object_store();
let uuid = Uuid::new_v4();
let uuid_string = uuid.to_string();
let uuid_str = uuid_string.as_str();
let iox_object_store = IoxObjectStore::create(Arc::clone(&object_store), uuid)
.await
.unwrap();
let txn_uuid = Uuid::new_v4();
let good_txn_filename = format!("{}.txn", txn_uuid);
let good_txn_filename_str = good_txn_filename.as_str();
// Put a non-database file in
let path = object_store.path_from_dirs_and_filename(parsed_path!(["foo"]));
add_file(&*object_store, &path).await;
// Put a file in a directory other than the databases directory
let path = object_store.path_from_dirs_and_filename(parsed_path!(["12345"]));
add_file(&*object_store, &path).await;
// Put a file for some other database in
let other_db_uuid = Uuid::new_v4().to_string();
let path = object_store.path_from_dirs_and_filename(parsed_path!([
ALL_DATABASES_DIRECTORY,
other_db_uuid.as_str()
]));
add_file(&*object_store, &path).await;
// Put a file in the database dir but not the transactions dir
let path = object_store.path_from_dirs_and_filename(parsed_path!(
[ALL_DATABASES_DIRECTORY, uuid_str],
good_txn_filename_str
));
add_file(&*object_store, &path).await;
// Put files in the transactions dir whose names are in the wrong format
let mut path = object_store.path_from_dirs_and_filename(parsed_path!(
[ALL_DATABASES_DIRECTORY, uuid_str],
"111.parquet"
));
add_file(&*object_store, &path).await;
path.set_file_name(&format!("{}.xls", txn_uuid));
add_file(&*object_store, &path).await;
// Catalog transaction files should be empty
let ctf = catalog_transaction_files(&iox_object_store).await;
assert!(ctf.is_empty(), "{:?}", ctf);
// Add a real transaction file
let t1 = TransactionFilePath::new_transaction(123, txn_uuid);
add_catalog_transaction_file(&iox_object_store, &t1).await;
// Add a real checkpoint file
let t2 = TransactionFilePath::new_checkpoint(123, txn_uuid);
add_catalog_transaction_file(&iox_object_store, &t2).await;
// Only the real files should be returned
let ctf = catalog_transaction_files(&iox_object_store).await;
assert_eq!(ctf.len(), 2);
assert!(ctf.contains(&t1));
assert!(ctf.contains(&t2));
}
fn make_db_rules_path(object_store: &DynObjectStore, uuid: Uuid) -> Path {
let mut p = object_store.new_path();
p.push_all_dirs(&[ALL_DATABASES_DIRECTORY, uuid.to_string().as_str()]);
p.set_file_name("rules.pb");
p
}
#[tokio::test]
async fn db_rules_should_be_a_file() {
let object_store = make_object_store();
let uuid = Uuid::new_v4();
let rules_path = make_db_rules_path(&*object_store, uuid);
let iox_object_store = IoxObjectStore::create(Arc::clone(&object_store), uuid)
.await
.unwrap();
// PUT
let original_file_content = Bytes::from("hello world");
iox_object_store
.put_database_rules_file(original_file_content.clone())
.await
.unwrap();
let actual_content = object_store
.get(&rules_path)
.await
.unwrap()
.bytes()
.await
.unwrap();
assert_eq!(original_file_content, actual_content);
// GET
let updated_file_content = Bytes::from("goodbye moon");
let expected_content = updated_file_content.clone();
object_store
.put(&rules_path, updated_file_content)
.await
.unwrap();
let actual_content = iox_object_store.get_database_rules_file().await.unwrap();
assert_eq!(expected_content, actual_content);
// DELETE
iox_object_store.delete_database_rules_file().await.unwrap();
let file_count = object_store
.list(None)
.await
.unwrap()
.try_fold(0, |a, paths| async move { Ok(a + paths.len()) })
.await
.unwrap();
assert_eq!(file_count, 0);
}
fn make_owner_path(object_store: &DynObjectStore, uuid: Uuid) -> Path {
let mut p = object_store.new_path();
p.push_all_dirs(&[ALL_DATABASES_DIRECTORY, uuid.to_string().as_str()]);
p.set_file_name("owner.pb");
p
}
#[tokio::test]
async fn owner_should_be_a_file() {
let object_store = make_object_store();
let uuid = Uuid::new_v4();
let owner_path = make_owner_path(&*object_store, uuid);
let iox_object_store = IoxObjectStore::create(Arc::clone(&object_store), uuid)
.await
.unwrap();
// PUT
let original_file_content = Bytes::from("hello world");
iox_object_store
.put_owner_file(original_file_content.clone())
.await
.unwrap();
let actual_content = object_store
.get(&owner_path)
.await
.unwrap()
.bytes()
.await
.unwrap();
assert_eq!(original_file_content, actual_content);
// GET
let updated_file_content = Bytes::from("goodbye moon");
let expected_content = updated_file_content.clone();
object_store
.put(&owner_path, updated_file_content)
.await
.unwrap();
let actual_content = iox_object_store.get_owner_file().await.unwrap();
assert_eq!(expected_content, actual_content);
}
#[tokio::test]
async fn create_new_with_same_uuid_errors() {
let object_store = make_object_store();
let uuid = Uuid::new_v4();
let iox_object_store = IoxObjectStore::create(Arc::clone(&object_store), uuid)
.await
.unwrap();
iox_object_store
.put_database_rules_file(Bytes::new())
.await
.unwrap();
assert_error!(
IoxObjectStore::create(Arc::clone(&object_store), uuid).await,
IoxObjectStoreError::DatabaseAlreadyExists { uuid: err_uuid } if err_uuid == uuid,
);
}
#[tokio::test]
async fn create_new_with_any_files_under_uuid_errors() {
let object_store = make_object_store();
let uuid = Uuid::new_v4();
let mut not_rules_path = object_store.new_path();
not_rules_path.push_all_dirs(&[ALL_DATABASES_DIRECTORY, uuid.to_string().as_str()]);
not_rules_path.set_file_name("not_rules.txt");
object_store
.put(&not_rules_path, Bytes::new())
.await
.unwrap();
assert_error!(
IoxObjectStore::create(Arc::clone(&object_store), uuid).await,
IoxObjectStoreError::DatabaseAlreadyExists { uuid: err_uuid } if err_uuid == uuid,
);
}
async fn create_database(object_store: Arc<DynObjectStore>, uuid: Uuid) -> IoxObjectStore {
let iox_object_store = IoxObjectStore::create(Arc::clone(&object_store), uuid)
.await
.unwrap();
iox_object_store
.put_database_rules_file(Bytes::new())
.await
.unwrap();
iox_object_store
}
#[tokio::test]
async fn cant_read_rules_if_no_rules_exist() {
let object_store = make_object_store();
// Create a uuid but don't create a corresponding database
let db = Uuid::new_v4();
// This fails, there are no rules to read
assert_error!(
IoxObjectStore::load_database_rules(object_store, db).await,
object_store::Error::NotFound { .. },
);
}
#[tokio::test]
async fn test_load() {
let object_store = make_object_store();
// Load can't find nonexistent database
let nonexistent = Uuid::new_v4();
assert_error!(
IoxObjectStore::load(Arc::clone(&object_store), nonexistent).await,
IoxObjectStoreError::NoRulesFound { .. },
);
// Create a database
let db = Uuid::new_v4();
create_database(Arc::clone(&object_store), db).await;
// Load should return that database
let returned = IoxObjectStore::load(Arc::clone(&object_store), db)
.await
.unwrap();
assert_eq!(
returned.root_path(),
format!("{}/{}/", ALL_DATABASES_DIRECTORY, db)
);
}
#[tokio::test]
async fn round_trip_through_object_store_root_path() {
let object_store = make_object_store();
// Create a new iox object store that doesn't exist yet
let uuid = Uuid::new_v4();
let db_iox_store = create_database(Arc::clone(&object_store), uuid).await;
// Save its root path as the server config would
let saved_root_path = db_iox_store.root_path();
// Simulate server restarting and reading the server config to construct iox object stores,
// the database files in object storage should be found in the same root
let restarted_iox_store =
IoxObjectStore::load_at_root_path(Arc::clone(&object_store), &saved_root_path)
.await
.unwrap();
assert_eq!(db_iox_store.root_path(), restarted_iox_store.root_path());
// This should also equal root_path_for, which can be constructed even if a database
// hasn't been fully initialized yet
let alternate = IoxObjectStore::root_path_for(&*object_store, uuid).to_string();
assert_eq!(alternate, saved_root_path);
}
#[tokio::test]
async fn test_ng_parquet_io() {
let object_store = make_object_store();
let iox_object_store = Arc::new(IoxObjectStore::existing(
Arc::clone(&object_store),
IoxObjectStore::root_path_for(&*object_store, uuid::Uuid::new_v4()),
));
let pfp = ParquetFilePath::new_new_gen(
NamespaceId::new(1),
TableId::new(2),
SequencerId::new(3),
PartitionId::new(4),
Uuid::nil(),
);
// file does not exist yet
iox_object_store.get_parquet_file(&pfp).await.unwrap_err();
// create file
let content = Bytes::from(b"foo".to_vec());
iox_object_store
.put_parquet_file(&pfp, content.clone())
.await
.unwrap();
let actual = iox_object_store
.get_parquet_file(&pfp)
.await
.unwrap()
.bytes()
.await
.unwrap();
assert_eq!(content.to_vec(), actual);
// delete file
iox_object_store.delete_parquet_file(&pfp).await.unwrap();
iox_object_store.get_parquet_file(&pfp).await.unwrap_err();
}
}

View File

@ -1,233 +0,0 @@
//! Paths for specific types of files within a database's object storage.
use data_types::server_id::ServerId;
use object_store::{
path::{ObjectStorePath, Path},
DynObjectStore,
};
use std::fmt;
use uuid::Uuid;
pub mod parquet_file;
use parquet_file::ParquetFilePath;
pub mod transaction_file;
use transaction_file::TransactionFilePath;
pub(crate) const ALL_DATABASES_DIRECTORY: &str = "dbs";
const ALL_SERVERS_DIRECTORY: &str = "nodes";
pub(crate) const SERVER_CONFIG_FILE_NAME: &str = "config.pb";
const DATABASE_OWNER_FILE_NAME: &str = "owner.pb";
/// The path to the server file containing the list of databases this server owns.
// TODO: this is in the process of replacing all_databases_path for the floating databases design
pub(crate) fn server_config_path(object_store: &DynObjectStore, server_id: ServerId) -> Path {
let mut path = object_store.new_path();
path.push_dir(ALL_SERVERS_DIRECTORY);
path.push_dir(server_id.to_string());
path.set_file_name(SERVER_CONFIG_FILE_NAME);
path
}
/// A database-specific object store path that all `IoxObjectStore` `Path`s should be within.
/// This can be serialized to facilitate initial loading of a database from object storage, but
/// the path should not be parsed into its component parts as the format might change.
#[derive(Debug, Clone, PartialEq)]
pub struct RootPath {
pub(crate) inner: Path,
}
impl RootPath {
/// How the root of a database is defined in object storage.
pub(crate) fn new(object_store: &DynObjectStore, uuid: Uuid) -> Self {
let mut inner = object_store.new_path();
inner.push_dir(ALL_DATABASES_DIRECTORY);
inner.push_dir(uuid.to_string());
Self { inner }
}
pub(crate) fn from_str(object_store: &DynObjectStore, raw: &str) -> Self {
Self {
inner: object_store.path_from_raw(raw),
}
}
fn join(&self, dir: &str) -> Path {
let mut result = self.inner.clone();
result.push_dir(dir);
result
}
pub(crate) fn owner_path(&self) -> Path {
let mut result = self.inner.clone();
result.set_file_name(DATABASE_OWNER_FILE_NAME);
result
}
pub(crate) fn rules_path(&self) -> RulesPath {
RulesPath::new(self)
}
pub(crate) fn data_path(&self) -> DataPath {
DataPath::new(self)
}
pub(crate) fn transactions_path(&self) -> TransactionsPath {
TransactionsPath::new(self)
}
}
impl fmt::Display for RootPath {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.inner.to_raw())
}
}
#[derive(Debug, Clone)]
pub(crate) struct RulesPath {
pub(crate) inner: Path,
}
impl RulesPath {
const DB_RULES_FILE_NAME: &'static str = "rules.pb";
/// How the rules path of a database is defined in object storage in terms of the
/// root path.
pub(crate) fn new(root_path: &RootPath) -> Self {
Self::new_from_object_store_path(&root_path.inner)
}
/// Creating a potential rules file location given an object storage path received from
/// an object storage list operation.
pub(crate) fn new_from_object_store_path(path: &Path) -> Self {
let mut inner = path.clone();
inner.set_file_name(Self::DB_RULES_FILE_NAME);
Self { inner }
}
}
/// A database-specific object store path for all catalog transaction files. This should not be
/// leaked outside this crate.
#[derive(Debug, Clone)]
pub(crate) struct TransactionsPath {
pub(crate) inner: Path,
}
impl TransactionsPath {
/// How the transactions path of a database is defined in object storage in terms of the
/// root path.
pub(crate) fn new(root_path: &RootPath) -> Self {
Self {
inner: root_path.join("transactions"),
}
}
pub(crate) fn join(&self, transaction_file_path: &TransactionFilePath) -> Path {
let mut result = self.inner.clone();
let relative = transaction_file_path.relative_dirs_and_file_name();
for part in relative.directories {
result.push_dir(part.to_string());
}
result.set_file_name(
relative
.file_name
.expect("Transaction file paths have filenames")
.to_string(),
);
result
}
}
/// A database-specific object store path for all data files. This should not be leaked outside
/// this crate.
#[derive(Debug, Clone)]
pub(crate) struct DataPath {
pub(crate) inner: Path,
}
impl DataPath {
/// How the data path of a database is defined in object storage in terms of the root path.
pub(crate) fn new(root_path: &RootPath) -> Self {
Self {
inner: root_path.join("data"),
}
}
pub(crate) fn join(&self, parquet_file_path: &ParquetFilePath) -> Path {
let mut result = self.inner.clone();
let relative = parquet_file_path.relative_dirs_and_file_name();
for part in relative.directories {
result.push_dir(part.to_string());
}
result.set_file_name(
relative
.file_name
.expect("Parquet file paths have filenames")
.to_string(),
);
result
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::IoxObjectStore;
use object_store::ObjectStoreImpl;
use std::sync::Arc;
/// Creates a new in-memory object store. These tests rely on the `Path`s being of type
/// `DirsAndFileName` and thus using object_store::path::DELIMITER as the separator
fn make_object_store() -> Arc<DynObjectStore> {
Arc::new(ObjectStoreImpl::new_in_memory())
}
#[test]
fn root_path_contains_dbs_and_db_uuid() {
let object_store = make_object_store();
let uuid = Uuid::new_v4();
let root_path = RootPath::new(&*object_store, uuid);
assert_eq!(
root_path.inner.to_string(),
format!("mem:{}/{}/", ALL_DATABASES_DIRECTORY, uuid)
);
}
#[test]
fn root_path_join_concatenates() {
let object_store = make_object_store();
let uuid = Uuid::new_v4();
let root_path = RootPath::new(&*object_store, uuid);
let path = root_path.join("foo");
assert_eq!(
path.to_string(),
format!("mem:{}/{}/foo/", ALL_DATABASES_DIRECTORY, uuid)
);
}
#[test]
fn transactions_path_is_relative_to_root_path() {
let object_store = make_object_store();
let uuid = Uuid::new_v4();
let root_path = RootPath::new(&*object_store, uuid);
let iox_object_store = IoxObjectStore::existing(Arc::clone(&object_store), root_path);
assert_eq!(
iox_object_store.transactions_path.inner.to_string(),
format!("mem:{}/{}/transactions/", ALL_DATABASES_DIRECTORY, uuid)
);
}
#[test]
fn data_path_is_relative_to_root_path() {
let object_store = make_object_store();
let uuid = Uuid::new_v4();
let root_path = RootPath::new(&*object_store, uuid);
let iox_object_store = IoxObjectStore::existing(Arc::clone(&object_store), root_path);
assert_eq!(
iox_object_store.data_path.inner.to_string(),
format!("mem:{}/{}/data/", ALL_DATABASES_DIRECTORY, uuid)
);
}
}

View File

@ -1,460 +0,0 @@
use data_types::chunk_metadata::{ChunkAddr, ChunkId};
use data_types2::{NamespaceId, PartitionId, SequencerId, TableId};
use object_store::{
path::{parsed::DirsAndFileName, ObjectStorePath, Path as ObjStoPath},
Result,
};
use snafu::{ensure, OptionExt, ResultExt, Snafu};
use std::sync::Arc;
use uuid::Uuid;
#[derive(Debug, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
enum Variant {
Old {
table_name: Arc<str>,
partition_key: Arc<str>,
chunk_id: ChunkId,
},
New {
namespace_id: NamespaceId,
table_id: TableId,
sequencer_id: SequencerId,
partition_id: PartitionId,
object_store_id: Uuid,
},
}
/// Location of a Parquet file within a database's object store.
/// The exact format is an implementation detail and is subject to change.
#[derive(Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
pub struct ParquetFilePath(Variant);
impl std::fmt::Debug for ParquetFilePath {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let path = match self.0 {
Variant::Old { .. } => self.relative_dirs_and_file_name().to_string(),
Variant::New { .. } => self.absolute_dirs_and_file_name().to_string(),
};
f.debug_struct("ParquetFilePath")
.field("inner", &self.0)
.field("resolved_path", &path)
.finish()
}
}
impl ParquetFilePath {
/// Create a location for this chunk's parquet file. Calling this twice on the same `ChunkAddr`
/// will return different `parquet_file::Path`s.
pub fn new_old_gen(chunk_addr: &ChunkAddr) -> Self {
Self(Variant::Old {
table_name: Arc::clone(&chunk_addr.table_name),
partition_key: Arc::clone(&chunk_addr.partition_key),
chunk_id: chunk_addr.chunk_id,
})
}
/// Create parquet file path relevant for the NG storage layout.
pub fn new_new_gen(
namespace_id: NamespaceId,
table_id: TableId,
sequencer_id: SequencerId,
partition_id: PartitionId,
object_store_id: Uuid,
) -> Self {
Self(Variant::New {
namespace_id,
table_id,
sequencer_id,
partition_id,
object_store_id,
})
}
/// Checks if this is an NG-style path.
pub fn is_new_gen(&self) -> bool {
matches!(self.0, Variant::New { .. })
}
/// Turn this into directories and file names to be added to a root path or to be serialized
/// in protobuf.
///
/// # Panic
/// Panics if this is an NG-style path.
pub fn relative_dirs_and_file_name(&self) -> DirsAndFileName {
match &self.0 {
Variant::Old {
table_name,
partition_key,
chunk_id,
} => {
let mut result = DirsAndFileName::default();
result.push_all_dirs(&[table_name.as_ref(), partition_key.as_ref()]);
result.set_file_name(format!("{}.parquet", chunk_id.get()));
result
}
Variant::New { .. } => {
panic!("relative dirs don't apply to new-gen parquet file paths")
}
}
}
/// Get absolute storage location.
///
/// # Panic
/// Panics if this is an old-style path.
pub fn absolute_dirs_and_file_name(&self) -> DirsAndFileName {
match &self.0 {
Variant::Old { .. } => {
panic!("absolute dirs don't apply to old-gen parquet file paths")
}
Variant::New {
namespace_id,
table_id,
sequencer_id,
partition_id,
object_store_id,
} => {
let mut result = DirsAndFileName::default();
result.push_all_dirs(&[
namespace_id.to_string().as_str(),
table_id.to_string().as_str(),
sequencer_id.to_string().as_str(),
partition_id.to_string().as_str(),
]);
result.set_file_name(format!("{}.parquet", object_store_id));
result
}
}
}
/// Create from serialized protobuf strings.
pub fn from_relative_dirs_and_file_name(
dirs_and_file_name: &DirsAndFileName,
) -> Result<Self, ParquetFilePathParseError> {
let mut directories = dirs_and_file_name.directories.iter();
let table_name = directories
.next()
.context(MissingTableNameSnafu)?
.to_string()
.into();
let partition_key = directories
.next()
.context(MissingPartitionKeySnafu)?
.to_string()
.into();
ensure!(directories.next().is_none(), UnexpectedDirectorySnafu);
let file_name = dirs_and_file_name
.file_name
.as_ref()
.context(MissingChunkIdSnafu)?
.to_string();
let mut parts = file_name.split('.');
let chunk_id = parts
.next()
.context(MissingChunkIdSnafu)?
.parse::<Uuid>()
.context(InvalidChunkIdSnafu)?
.into();
let ext = parts.next().context(MissingExtensionSnafu)?;
ensure!(ext == "parquet", InvalidExtensionSnafu { ext });
ensure!(parts.next().is_none(), UnexpectedExtensionSnafu);
Ok(Self(Variant::Old {
table_name,
partition_key,
chunk_id,
}))
}
// Deliberately pub(crate); this transformation should only happen within this crate
pub(crate) fn from_absolute(
absolute_path: ObjStoPath,
) -> Result<Self, ParquetFilePathParseError> {
let absolute_path: DirsAndFileName = absolute_path.into();
let mut absolute_dirs = absolute_path.directories.into_iter().fuse();
// The number of `next`s here needs to match the total number of directories in
// iox_object_store data_paths
absolute_dirs.next(); // server id
absolute_dirs.next(); // database uuid
absolute_dirs.next(); // "data"
let remaining = DirsAndFileName {
directories: absolute_dirs.collect(),
file_name: absolute_path.file_name,
};
Self::from_relative_dirs_and_file_name(&remaining)
}
}
impl From<&Self> for ParquetFilePath {
fn from(borrowed: &Self) -> Self {
borrowed.clone()
}
}
#[derive(Snafu, Debug, PartialEq)]
#[allow(missing_docs)]
pub enum ParquetFilePathParseError {
#[snafu(display("Could not find required table name"))]
MissingTableName,
#[snafu(display("Could not find required partition key"))]
MissingPartitionKey,
#[snafu(display("Too many directories found"))]
UnexpectedDirectory,
#[snafu(display("Could not find required chunk id"))]
MissingChunkId,
#[snafu(display("Could not parse chunk id: {}", source))]
InvalidChunkId { source: uuid::Error },
#[snafu(display("Could not find required file extension"))]
MissingExtension,
#[snafu(display("Extension should have been `parquet`, instead found `{}`", ext))]
InvalidExtension { ext: String },
#[snafu(display("Too many extensions found"))]
UnexpectedExtension,
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{paths::ALL_DATABASES_DIRECTORY, IoxObjectStore, RootPath};
use object_store::{DynObjectStore, ObjectStoreImpl};
use test_helpers::assert_error;
/// Creates a new in-memory object store. These tests rely on the `Path`s being of type
/// `DirsAndFileName` and thus using object_store::path::DELIMITER as the separator
fn make_object_store() -> Arc<DynObjectStore> {
Arc::new(ObjectStoreImpl::new_in_memory())
}
#[test]
fn test_parquet_file_path_deserialization() {
// Error cases
use ParquetFilePathParseError::*;
let mut df = DirsAndFileName::default();
let result = ParquetFilePath::from_relative_dirs_and_file_name(&df);
assert!(matches!(result, Err(MissingTableName)), "got {:?}", result);
df.push_dir("foo");
let result = ParquetFilePath::from_relative_dirs_and_file_name(&df);
assert!(
matches!(result, Err(MissingPartitionKey)),
"got {:?}",
result
);
df.push_dir("bar");
let result = ParquetFilePath::from_relative_dirs_and_file_name(&df);
assert!(
matches!(result, Err(MissingChunkId { .. })),
"got {:?}",
result
);
let mut extra = df.clone();
extra.push_dir("nope");
let result = ParquetFilePath::from_relative_dirs_and_file_name(&extra);
assert!(
matches!(result, Err(UnexpectedDirectory)),
"got {:?}",
result
);
df.set_file_name("bleh");
let result = ParquetFilePath::from_relative_dirs_and_file_name(&df);
assert!(
matches!(result, Err(InvalidChunkId { .. })),
"got {:?}",
result
);
df.set_file_name("00000000-0000-0000-0000-00000000000a");
let result = ParquetFilePath::from_relative_dirs_and_file_name(&df);
assert!(matches!(result, Err(MissingExtension)), "got {:?}", result);
df.set_file_name("00000000-0000-0000-0000-00000000000a.exe");
let result = ParquetFilePath::from_relative_dirs_and_file_name(&df);
assert!(
matches!(result, Err(InvalidExtension { .. })),
"got {:?}",
result
);
df.set_file_name("00000000-0000-0000-0000-00000000000a.parquet.v6");
let result = ParquetFilePath::from_relative_dirs_and_file_name(&df);
assert!(
matches!(result, Err(UnexpectedExtension)),
"got {:?}",
result
);
// Success case
df.set_file_name("00000000-0000-0000-0000-00000000000a.parquet");
let result = ParquetFilePath::from_relative_dirs_and_file_name(&df).unwrap();
assert_eq!(
result,
ParquetFilePath(Variant::Old {
table_name: "foo".into(),
partition_key: "bar".into(),
chunk_id: ChunkId::new_test(10),
})
);
let round_trip = result.relative_dirs_and_file_name();
assert_eq!(round_trip, df);
}
#[test]
fn parquet_file_from_absolute() {
let object_store = make_object_store();
// Success case
let mut path = object_store.new_path();
path.push_all_dirs(&["server", "uuid", "data", "}*", "aoeu"]);
path.set_file_name("00000000-0000-0000-0000-00000000000a.parquet");
let result = ParquetFilePath::from_absolute(path);
assert_eq!(
result.unwrap(),
ParquetFilePath(Variant::Old {
table_name: "}*".into(),
partition_key: "aoeu".into(),
chunk_id: ChunkId::new_test(10),
})
);
// Error cases
use ParquetFilePathParseError::*;
let mut path = object_store.new_path();
// incorrect directories are fine, we're assuming that list(data_path) scoped to the
// right directories so we don't check again on the way out
path.push_all_dirs(&["server", "uuid", "data", "}*", "aoeu"]);
// but this file name doesn't contain a chunk id
path.set_file_name("rules.pb");
assert_error!(ParquetFilePath::from_absolute(path), InvalidChunkId { .. });
let mut path = object_store.new_path();
path.push_all_dirs(&["server", "uuid", "data", "}*", "aoeu"]);
// missing file name
assert_error!(ParquetFilePath::from_absolute(path), MissingChunkId);
}
#[test]
fn parquet_file_relative_dirs_and_file_path() {
let pfp = ParquetFilePath(Variant::Old {
table_name: "}*".into(),
partition_key: "aoeu".into(),
chunk_id: ChunkId::new_test(10),
});
let dirs_and_file_name = pfp.relative_dirs_and_file_name();
assert_eq!(
dirs_and_file_name.to_string(),
"%7D%2A/aoeu/00000000-0000-0000-0000-00000000000a.parquet".to_string(),
);
let round_trip =
ParquetFilePath::from_relative_dirs_and_file_name(&dirs_and_file_name).unwrap();
assert_eq!(pfp, round_trip);
assert_eq!(format!("{:?}", pfp), "ParquetFilePath { inner: Old { table_name: \"}*\", partition_key: \"aoeu\", chunk_id: ChunkId(10) }, resolved_path: \"%7D%2A/aoeu/00000000-0000-0000-0000-00000000000a.parquet\" }");
}
#[test]
#[should_panic(expected = "relative dirs don't apply to new-gen parquet file paths")]
fn parquet_file_relative_dirs_and_file_path_new_gen() {
let pfp = ParquetFilePath(Variant::New {
namespace_id: NamespaceId::new(1),
table_id: TableId::new(2),
sequencer_id: SequencerId::new(3),
partition_id: PartitionId::new(4),
object_store_id: Uuid::nil(),
});
pfp.relative_dirs_and_file_name();
}
#[test]
fn parquet_file_absolute_dirs_and_file_path() {
let pfp = ParquetFilePath(Variant::New {
namespace_id: NamespaceId::new(1),
table_id: TableId::new(2),
sequencer_id: SequencerId::new(3),
partition_id: PartitionId::new(4),
object_store_id: Uuid::nil(),
});
let dirs_and_file_name = pfp.absolute_dirs_and_file_name();
assert_eq!(
dirs_and_file_name.to_string(),
"1/2/3/4/00000000-0000-0000-0000-000000000000.parquet".to_string(),
);
assert_eq!(format!("{:?}", pfp), "ParquetFilePath { inner: New { namespace_id: NamespaceId(1), table_id: TableId(2), sequencer_id: SequencerId(3), partition_id: PartitionId(4), object_store_id: 00000000-0000-0000-0000-000000000000 }, resolved_path: \"1/2/3/4/00000000-0000-0000-0000-000000000000.parquet\" }");
}
#[test]
#[should_panic(expected = "absolute dirs don't apply to old-gen parquet file paths")]
fn parquet_file_absolute_dirs_and_file_path_old_gen() {
let pfp = ParquetFilePath(Variant::Old {
table_name: "}*".into(),
partition_key: "aoeu".into(),
chunk_id: ChunkId::new_test(10),
});
pfp.absolute_dirs_and_file_name();
}
#[test]
fn parquet_file_is_new_gen() {
let pfp = ParquetFilePath(Variant::Old {
table_name: "}*".into(),
partition_key: "aoeu".into(),
chunk_id: ChunkId::new_test(10),
});
assert!(!pfp.is_new_gen());
let pfp = ParquetFilePath(Variant::New {
namespace_id: NamespaceId::new(1),
table_id: TableId::new(2),
sequencer_id: SequencerId::new(3),
partition_id: PartitionId::new(4),
object_store_id: Uuid::nil(),
});
assert!(pfp.is_new_gen());
}
#[test]
fn data_path_join_with_parquet_file_path() {
let db_uuid = Uuid::new_v4();
let object_store = make_object_store();
let root_path = RootPath::new(&*object_store, db_uuid);
let iox_object_store = IoxObjectStore::existing(Arc::clone(&object_store), root_path);
let pfp = ParquetFilePath(Variant::Old {
table_name: "}*".into(),
partition_key: "aoeu".into(),
chunk_id: ChunkId::new_test(10),
});
let path = iox_object_store.data_path.join(&pfp);
let mut expected_path = object_store.new_path();
expected_path.push_all_dirs(&[
ALL_DATABASES_DIRECTORY,
&db_uuid.to_string(),
"data",
"}*",
"aoeu",
]);
expected_path.set_file_name("00000000-0000-0000-0000-00000000000a.parquet");
assert_eq!(path, expected_path);
}
}

View File

@ -1,384 +0,0 @@
use object_store::{
path::{parsed::DirsAndFileName, ObjectStorePath, Path as ObjStoPath},
Result,
};
use snafu::{ensure, OptionExt, ResultExt, Snafu};
use std::str::FromStr;
use uuid::Uuid;
/// File suffix for transaction files in object store.
const TRANSACTION_FILE_SUFFIX: &str = "txn";
/// File suffix for checkpoint files in object store.
const CHECKPOINT_FILE_SUFFIX: &str = "ckpt";
/// Location of a catalog transaction file within a database's object store.
/// The exact format is an implementation detail and is subject to change.
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct TransactionFilePath {
/// Transaction revision
pub revision_counter: u64,
/// Transaction identifier
pub uuid: Uuid,
suffix: TransactionFileSuffix,
}
impl TransactionFilePath {
/// Create a new file path to store transaction info.
pub fn new_transaction(revision_counter: u64, uuid: Uuid) -> Self {
Self {
revision_counter,
uuid,
suffix: TransactionFileSuffix::Transaction,
}
}
/// Create a new file path to store checkpoint info.
pub fn new_checkpoint(revision_counter: u64, uuid: Uuid) -> Self {
Self {
revision_counter,
uuid,
suffix: TransactionFileSuffix::Checkpoint,
}
}
/// Returns true if this path is to a checkpoint file; false otherwise.
pub fn is_checkpoint(&self) -> bool {
self.suffix == TransactionFileSuffix::Checkpoint
}
/// Turn this into directories and file names to be added to a root path
pub fn relative_dirs_and_file_name(&self) -> DirsAndFileName {
let mut result = DirsAndFileName::default();
// pad number: `u64::MAX.to_string().len()` is 20
result.push_dir(format!("{:0>20}", self.revision_counter));
let file_name = format!("{}.{}", self.uuid, self.suffix.as_str());
result.set_file_name(file_name);
result
}
/// Create from serialized protobuf strings.
pub fn from_relative_dirs_and_file_name(
dirs_and_file_name: &DirsAndFileName,
) -> Result<Self, TransactionFilePathParseError> {
let mut directories = dirs_and_file_name.directories.iter();
let revision_counter = directories
.next()
.context(MissingRevisionCounterSnafu)?
.to_string()
.parse()
.context(InvalidRevisionCounterSnafu)?;
ensure!(directories.next().is_none(), UnexpectedDirectorySnafu);
let file_name = dirs_and_file_name
.file_name
.as_ref()
.context(MissingFileNameSnafu)?
.to_string();
let mut parts = file_name.split('.');
let uuid = parts
.next()
.context(MissingUuidSnafu)?
.parse()
.context(InvalidUuidSnafu)?;
let suffix = parts
.next()
.context(MissingSuffixSnafu)?
.parse()
.context(InvalidSuffixSnafu)?;
ensure!(parts.next().is_none(), UnexpectedExtensionSnafu);
Ok(Self {
revision_counter,
uuid,
suffix,
})
}
// Deliberately pub(crate); this transformation should only happen within this crate
pub(crate) fn from_absolute(
absolute_path: ObjStoPath,
) -> Result<Self, TransactionFilePathParseError> {
let absolute_path: DirsAndFileName = absolute_path.into();
let mut absolute_dirs = absolute_path.directories.into_iter().fuse();
// The number of `next`s here needs to match the total number of directories in
// iox_object_store transactions_path
absolute_dirs.next(); // "dbs"
absolute_dirs.next(); // database uuid
absolute_dirs.next(); // "transactions"
let remaining = DirsAndFileName {
directories: absolute_dirs.collect(),
file_name: absolute_path.file_name,
};
Self::from_relative_dirs_and_file_name(&remaining)
}
}
#[derive(Snafu, Debug, PartialEq)]
#[allow(missing_docs)]
pub enum TransactionFilePathParseError {
#[snafu(display("Could not find required revision counter"))]
MissingRevisionCounter,
#[snafu(display("Could not parse revision counter: {}", source))]
InvalidRevisionCounter { source: std::num::ParseIntError },
#[snafu(display("Too many directories found"))]
UnexpectedDirectory,
#[snafu(display("Could not find required file name"))]
MissingFileName,
#[snafu(display("Could not find required UUID"))]
MissingUuid,
#[snafu(display("Could not parse UUID: {}", source))]
InvalidUuid { source: uuid::Error },
#[snafu(display("Could not find required suffix"))]
MissingSuffix,
#[snafu(display("Invalid suffix: {}", source))]
InvalidSuffix {
source: TransactionFileSuffixParseError,
},
#[snafu(display("Too many extensions found"))]
UnexpectedExtension,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum TransactionFileSuffix {
Transaction,
Checkpoint,
}
impl TransactionFileSuffix {
fn as_str(&self) -> &'static str {
match self {
Self::Transaction => TRANSACTION_FILE_SUFFIX,
Self::Checkpoint => CHECKPOINT_FILE_SUFFIX,
}
}
}
#[derive(Snafu, Debug, PartialEq)]
#[allow(missing_docs)]
pub enum TransactionFileSuffixParseError {
#[snafu(display("Unknown suffix: {}", suffix))]
UnknownSuffix { suffix: String },
}
impl FromStr for TransactionFileSuffix {
type Err = TransactionFileSuffixParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
TRANSACTION_FILE_SUFFIX => Ok(Self::Transaction),
CHECKPOINT_FILE_SUFFIX => Ok(Self::Checkpoint),
suffix => UnknownSuffixSnafu { suffix }.fail(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{paths::ALL_DATABASES_DIRECTORY, IoxObjectStore, RootPath};
use object_store::{DynObjectStore, ObjectStoreImpl};
use std::sync::Arc;
use test_helpers::assert_error;
/// Creates a new in-memory object store. These tests rely on the `Path`s being of type
/// `DirsAndFileName` and thus using object_store::path::DELIMITER as the separator
fn make_object_store() -> Arc<DynObjectStore> {
Arc::new(ObjectStoreImpl::new_in_memory())
}
#[test]
fn is_checkpoint_works() {
let uuid = Uuid::new_v4();
let transaction = TransactionFilePath::new_transaction(0, uuid);
assert!(!transaction.is_checkpoint());
let checkpoint = TransactionFilePath::new_checkpoint(0, uuid);
assert!(checkpoint.is_checkpoint());
}
#[test]
fn test_transaction_file_path_deserialization() {
// Error cases
use TransactionFilePathParseError::*;
let mut df = DirsAndFileName::default();
let result = TransactionFilePath::from_relative_dirs_and_file_name(&df);
assert!(
matches!(result, Err(MissingRevisionCounter)),
"got {:?}",
result
);
df.push_dir("foo");
let result = TransactionFilePath::from_relative_dirs_and_file_name(&df);
assert!(
matches!(result, Err(InvalidRevisionCounter { .. })),
"got {:?}",
result
);
let mut df = DirsAndFileName::default();
df.push_dir("00000000000000000123");
df.push_dir("foo");
let result = TransactionFilePath::from_relative_dirs_and_file_name(&df);
assert!(
matches!(result, Err(UnexpectedDirectory)),
"got {:?}",
result
);
let mut df = DirsAndFileName::default();
df.push_dir("00000000000000000123");
let result = TransactionFilePath::from_relative_dirs_and_file_name(&df);
assert!(matches!(result, Err(MissingFileName)), "got {:?}", result);
df.set_file_name("foo");
let result = TransactionFilePath::from_relative_dirs_and_file_name(&df);
assert!(
matches!(result, Err(InvalidUuid { .. })),
"got {:?}",
result
);
let uuid = Uuid::new_v4();
df.set_file_name(&format!("{}", uuid));
let result = TransactionFilePath::from_relative_dirs_and_file_name(&df);
assert!(matches!(result, Err(MissingSuffix)), "got {:?}", result);
df.set_file_name(&format!("{}.exe", uuid));
let result = TransactionFilePath::from_relative_dirs_and_file_name(&df);
assert!(
matches!(result, Err(InvalidSuffix { .. })),
"got {:?}",
result
);
df.set_file_name(&format!("{}.{}.foo", uuid, TRANSACTION_FILE_SUFFIX));
let result = TransactionFilePath::from_relative_dirs_and_file_name(&df);
assert!(
matches!(result, Err(UnexpectedExtension)),
"got {:?}",
result
);
// Success case
df.set_file_name(&format!("{}.{}", uuid, TRANSACTION_FILE_SUFFIX));
let result = TransactionFilePath::from_relative_dirs_and_file_name(&df).unwrap();
assert_eq!(
result,
TransactionFilePath {
revision_counter: 123,
uuid,
suffix: TransactionFileSuffix::Transaction,
}
);
let round_trip = result.relative_dirs_and_file_name();
assert_eq!(round_trip, df);
}
#[test]
fn transaction_file_from_absolute() {
let object_store = make_object_store();
// Success case
let uuid = Uuid::new_v4();
let mut path = object_store.new_path();
path.push_all_dirs(&["dbs", "uuid", "data", "00000000000000000123"]);
path.set_file_name(&format!("{}.{}", uuid, CHECKPOINT_FILE_SUFFIX));
let result = TransactionFilePath::from_absolute(path);
assert_eq!(
result.unwrap(),
TransactionFilePath {
revision_counter: 123,
uuid,
suffix: TransactionFileSuffix::Checkpoint,
}
);
// Error cases
use TransactionFilePathParseError::*;
let mut path = object_store.new_path();
// incorrect directories are fine, we're assuming that list(transactions_path) scoped to the
// right directories so we don't check again on the way out
path.push_all_dirs(&["foo", "bar", "baz", "}*", "aoeu", "blah"]);
path.set_file_name("rules.pb");
assert_error!(
TransactionFilePath::from_absolute(path),
InvalidRevisionCounter { .. },
);
let mut path = object_store.new_path();
path.push_all_dirs(&["dbs", "uuid", "data", "00000000000000000123"]);
// missing file name
assert_error!(TransactionFilePath::from_absolute(path), MissingFileName,);
}
#[test]
fn transaction_file_relative_dirs_and_file_path() {
let uuid = Uuid::new_v4();
let tfp = TransactionFilePath {
revision_counter: 555,
uuid,
suffix: TransactionFileSuffix::Transaction,
};
let dirs_and_file_name = tfp.relative_dirs_and_file_name();
assert_eq!(
dirs_and_file_name.to_string(),
format!("00000000000000000555/{}.{}", uuid, TRANSACTION_FILE_SUFFIX)
);
let round_trip =
TransactionFilePath::from_relative_dirs_and_file_name(&dirs_and_file_name).unwrap();
assert_eq!(tfp, round_trip);
}
#[test]
fn transactions_path_join_with_parquet_file_path() {
let db_uuid = Uuid::new_v4();
let object_store = make_object_store();
let root_path = RootPath::new(&*object_store, db_uuid);
let iox_object_store = IoxObjectStore::existing(Arc::clone(&object_store), root_path);
let uuid = Uuid::new_v4();
let tfp = TransactionFilePath {
revision_counter: 555,
uuid,
suffix: TransactionFileSuffix::Checkpoint,
};
let path = iox_object_store.transactions_path.join(&tfp);
let mut expected_path = object_store.new_path();
expected_path.push_all_dirs(&[
ALL_DATABASES_DIRECTORY,
&db_uuid.to_string(),
"transactions",
"00000000000000000555",
]);
expected_path.set_file_name(&format!("{}.{}", uuid, CHECKPOINT_FILE_SUFFIX));
assert_eq!(path, expected_path);
}
}

View File

@ -3,24 +3,16 @@ name = "iox_object_store_service"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
async-trait = "0.1"
data_types2 = { path = "../data_types2" }
data_types = { path = "../data_types" }
futures = "0.3"
generated_types = { path = "../generated_types" }
iox_catalog = { path = "../iox_catalog" }
iox_object_store = { path = "../iox_object_store" }
object_store = { path = "../object_store" }
observability_deps = { path = "../observability_deps" }
serde = "1.0"
serde_urlencoded = "0.7"
iox_time = { path = "../iox_time" }
parquet_file = { path = "../parquet_file" }
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
tokio-stream = "0.1"
tonic = "0.7"
trace = { path = "../trace/" }
uuid = { version = "0.8", features = ["v4"] }
workspace-hack = { path = "../workspace-hack"}

View File

@ -1,5 +1,5 @@
//! gRPC service for getting files from the object store a remote IOx service is connected to. Used
//! in router2, but can be included in any gRPC server.
//! in router, but can be included in any gRPC server.
#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)]
#![warn(
@ -12,14 +12,13 @@
clippy::clone_on_ref_ptr
)]
use futures::stream::BoxStream;
use futures::StreamExt;
use futures::{stream::BoxStream, StreamExt};
use generated_types::influxdata::iox::object_store::v1::*;
use iox_catalog::interface::Catalog;
use iox_object_store::ParquetFilePath;
use object_store::DynObjectStore;
use observability_deps::tracing::*;
use std::sync::Arc;
use parquet_file::ParquetFilePath;
use std::{ops::Deref, sync::Arc};
use tonic::{Request, Response, Status};
use uuid::Uuid;
@ -66,15 +65,14 @@ impl object_store_service_server::ObjectStoreService for ObjectStoreService {
})?
.ok_or_else(|| Status::not_found(req.uuid))?;
let path = ParquetFilePath::new_new_gen(
let path = ParquetFilePath::new(
parquet_file.namespace_id,
parquet_file.table_id,
parquet_file.sequencer_id,
parquet_file.partition_id,
parquet_file.object_store_id,
)
.absolute_dirs_and_file_name();
let path = self.object_store.path_from_dirs_and_filename(path);
);
let path = path.object_store_path(self.object_store.deref());
let res = self
.object_store
@ -97,7 +95,7 @@ impl object_store_service_server::ObjectStoreService for ObjectStoreService {
mod tests {
use super::*;
use bytes::Bytes;
use data_types2::{KafkaPartition, ParquetFileParams, SequenceNumber, Timestamp};
use data_types::{KafkaPartition, ParquetFileParams, SequenceNumber, Timestamp};
use generated_types::influxdata::iox::object_store::v1::object_store_service_server::ObjectStoreService;
use iox_catalog::mem::MemCatalog;
use object_store::{ObjectStoreApi, ObjectStoreImpl};
@ -164,15 +162,14 @@ mod tests {
let object_store = Arc::new(ObjectStoreImpl::new_in_memory());
let path = ParquetFilePath::new_new_gen(
let path = ParquetFilePath::new(
p1.namespace_id,
p1.table_id,
p1.sequencer_id,
p1.partition_id,
p1.object_store_id,
)
.absolute_dirs_and_file_name();
let path = object_store.path_from_dirs_and_filename(path);
);
let path = path.object_store_path(object_store.deref());
let data = Bytes::from_static(b"some data");

View File

@ -8,16 +8,15 @@ description = "IOx NG test utils and tests"
[dependencies]
arrow = "13"
bytes = "1.0"
data_types2 = { path = "../data_types2" }
data_types = { path = "../data_types" }
datafusion = { path = "../datafusion" }
iox_catalog = { path = "../iox_catalog" }
iox_object_store = { path = "../iox_object_store" }
iox_time = { path = "../iox_time" }
metric = { path = "../metric" }
mutable_batch_lp = { path = "../mutable_batch_lp" }
object_store = { path = "../object_store" }
parquet_file = { path = "../parquet_file" }
query = { path = "../query" }
schema = { path = "../schema" }
iox_time = { path = "../iox_time" }
uuid = { version = "0.8", features = ["v4"] }
workspace-hack = { path = "../workspace-hack"}

View File

@ -5,7 +5,7 @@ use arrow::{
record_batch::RecordBatch,
};
use bytes::Bytes;
use data_types2::{
use data_types::{
Column, ColumnType, KafkaPartition, KafkaTopic, Namespace, ParquetFile, ParquetFileId,
ParquetFileParams, ParquetFileWithMetadata, Partition, PartitionId, QueryPool, SequenceNumber,
Sequencer, SequencerId, Table, TableId, Timestamp, Tombstone, TombstoneId,
@ -15,18 +15,20 @@ use iox_catalog::{
interface::{Catalog, PartitionRepo, INITIAL_COMPACTION_LEVEL},
mem::MemCatalog,
};
use iox_object_store::{IoxObjectStore, ParquetFilePath};
use iox_time::{MockProvider, Time, TimeProvider};
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
use object_store::{DynObjectStore, ObjectStoreImpl};
use parquet_file::metadata::{IoxMetadata, IoxParquetMetaData};
use parquet_file::{
metadata::{IoxMetadata, IoxParquetMetaData},
ParquetFilePath,
};
use query::{exec::Executor, provider::RecordBatchDeduplicator, util::arrow_sort_key_exprs};
use schema::{
selection::Selection,
sort::{adjust_sort_key_columns, SortKey, SortKeyBuilder},
Schema,
};
use std::sync::Arc;
use std::{ops::Deref, sync::Arc};
use uuid::Uuid;
/// Catalog for tests
@ -527,8 +529,12 @@ impl TestPartition {
compaction_level: INITIAL_COMPACTION_LEVEL,
sort_key: Some(sort_key.clone()),
};
let (parquet_metadata_bin, real_file_size_bytes) =
create_parquet_file(&self.catalog.object_store, &metadata, record_batch).await;
let (parquet_metadata_bin, real_file_size_bytes) = create_parquet_file(
Arc::clone(&self.catalog.object_store),
&metadata,
record_batch,
)
.await;
let parquet_file_params = ParquetFileParams {
sequencer_id: self.sequencer.sequencer.id,
@ -640,18 +646,13 @@ async fn update_catalog_sort_key_if_needed(
/// Create parquet file and return thrift-encoded and zstd-compressed parquet metadata as well as the file size.
async fn create_parquet_file(
object_store: &Arc<DynObjectStore>,
object_store: Arc<DynObjectStore>,
metadata: &IoxMetadata,
record_batch: RecordBatch,
) -> (Vec<u8>, usize) {
let iox_object_store = Arc::new(IoxObjectStore::existing(
Arc::clone(object_store),
IoxObjectStore::root_path_for(&**object_store, uuid::Uuid::new_v4()),
));
let schema = record_batch.schema();
let data = parquet_file::storage::Storage::new(Arc::clone(&iox_object_store))
let data = parquet_file::storage::Storage::new(Arc::clone(&object_store))
.parquet_bytes(vec![record_batch], schema, metadata)
.await
.unwrap();
@ -665,18 +666,16 @@ async fn create_parquet_file(
let file_size = data.len();
let bytes = Bytes::from(data);
let path = ParquetFilePath::new_new_gen(
let path = ParquetFilePath::new(
metadata.namespace_id,
metadata.table_id,
metadata.sequencer_id,
metadata.partition_id,
metadata.object_store_id,
);
let path = path.object_store_path(object_store.deref());
iox_object_store
.put_parquet_file(&path, bytes)
.await
.unwrap();
object_store.put(&path, bytes).await.unwrap();
(parquet_md, file_size)
}

View File

@ -3,8 +3,6 @@ name = "ioxd_common"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
# Optional feature 'pprof' enables http://localhost:8080/debug/pprof/profile support support
[dependencies]
@ -12,12 +10,14 @@ edition = "2021"
clap_blocks = { path = "../clap_blocks" }
data_types = { path = "../data_types" }
dml = { path = "../dml" }
generated_types = { path = "../generated_types" }
metric = { path = "../metric" }
observability_deps = { path = "../observability_deps" }
predicate = { path = "../predicate" }
pprof = { version = "0.8", default-features = false, features = ["flamegraph", "prost-codec"], optional = true }
metric_exporters = { path = "../metric_exporters" }
mutable_batch_lp = { path = "../mutable_batch_lp" }
observability_deps = { path = "../observability_deps" }
pprof = { version = "0.8", default-features = false, features = ["flamegraph", "prost-codec"], optional = true }
predicate = { path = "../predicate" }
service_grpc_testing = { path = "../service_grpc_testing" }
trace = { path = "../trace" }
trace_exporters = { path = "../trace_exporters" }
trace_http = { path = "../trace_http" }
@ -34,17 +34,17 @@ http = "0.2.7"
hyper = "0.14"
log = "0.4"
parking_lot = "0.12"
prost = "0.10"
reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.81"
serde_urlencoded = "0.7.0"
snafu = "0.7"
tokio = { version = "1.18", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
tokio-util = { version = "0.7.0" }
tokio-stream = { version = "0.1", features = ["net"] }
tokio-util = { version = "0.7.0" }
tonic = "0.7"
tonic-health = "0.6.0"
tonic-reflection = "0.4.0"
tower = "0.4"
workspace-hack = { path = "../workspace-hack"}

View File

@ -1,27 +1,19 @@
use std::sync::Arc;
use async_trait::async_trait;
use chrono::Utc;
use hyper::{Body, Method, Request, Response, StatusCode};
use serde::Deserialize;
use snafu::{OptionExt, ResultExt, Snafu};
use data_types::{
names::{org_and_bucket_to_database, OrgBucketMappingError},
non_empty::NonEmptyString,
DatabaseName,
};
use dml::{DmlDelete, DmlMeta, DmlOperation, DmlWrite};
use mutable_batch_lp::LinesConverter;
use observability_deps::tracing::debug;
use predicate::delete_predicate::{parse_delete_predicate, parse_http_delete_request};
use crate::{http::utils::parse_body, server_type::ServerType};
use super::{
error::{HttpApiError, HttpApiErrorExt, HttpApiErrorSource},
metrics::LineProtocolMetrics,
};
use crate::{http::utils::parse_body, server_type::ServerType};
use async_trait::async_trait;
use chrono::Utc;
use data_types::{org_and_bucket_to_database, DatabaseName, NonEmptyString, OrgBucketMappingError};
use dml::{DmlDelete, DmlMeta, DmlOperation, DmlWrite};
use hyper::{Body, Method, Request, Response, StatusCode};
use mutable_batch_lp::LinesConverter;
use observability_deps::tracing::debug;
use predicate::delete_predicate::{parse_delete_predicate, parse_http_delete_request};
use serde::Deserialize;
use snafu::{OptionExt, ResultExt, Snafu};
use std::sync::Arc;
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Snafu)]

View File

@ -3,6 +3,18 @@ pub mod rpc;
pub mod server_type;
mod service;
// These crates are used by the macros we export; provide a stable
// path to use them from in downstream crates.
pub mod reexport {
pub use generated_types;
pub use service_grpc_testing;
pub use tokio_stream;
pub use tonic;
pub use tonic_health;
pub use tonic_reflection;
pub use trace_http;
}
pub use service::Service;
use crate::server_type::{CommonServerState, ServerType};

View File

@ -49,7 +49,7 @@ macro_rules! add_service {
} = $builder;
let service = $svc;
let status = tonic_health::ServingStatus::Serving;
let status = $crate::reexport::tonic_health::ServingStatus::Serving;
health_reporter
.set_service_status(service_name(&service), status)
.await;
@ -75,7 +75,7 @@ macro_rules! add_service {
macro_rules! setup_builder {
($input:ident, $server_type:ident) => {{
#[allow(unused_imports)]
use ioxd_common::{add_service, rpc::RpcBuilder, server_type::ServerType};
use $crate::{add_service, rpc::RpcBuilder, server_type::ServerType};
let RpcBuilderInput {
socket,
@ -83,14 +83,17 @@ macro_rules! setup_builder {
shutdown,
} = $input;
let (health_reporter, health_service) = tonic_health::server::health_reporter();
let reflection_service = tonic_reflection::server::Builder::configure()
.register_encoded_file_descriptor_set(generated_types::FILE_DESCRIPTOR_SET)
let (health_reporter, health_service) =
$crate::reexport::tonic_health::server::health_reporter();
let reflection_service = $crate::reexport::tonic_reflection::server::Builder::configure()
.register_encoded_file_descriptor_set(
$crate::reexport::generated_types::FILE_DESCRIPTOR_SET,
)
.build()
.expect("gRPC reflection data broken");
let builder = tonic::transport::Server::builder();
let builder = builder.layer(trace_http::tower::TraceLayer::new(
let builder = $crate::reexport::tonic::transport::Server::builder();
let builder = builder.layer($crate::reexport::trace_http::tower::TraceLayer::new(
trace_header_parser,
$server_type.metric_registry(),
$server_type.trace_collector(),
@ -106,7 +109,10 @@ macro_rules! setup_builder {
add_service!(builder, health_service);
add_service!(builder, reflection_service);
add_service!(builder, service_grpc_testing::make_server());
add_service!(
builder,
$crate::reexport::service_grpc_testing::make_server()
);
builder
}};
@ -116,7 +122,7 @@ macro_rules! setup_builder {
#[macro_export]
macro_rules! serve_builder {
($builder:ident) => {{
use tokio_stream::wrappers::TcpListenerStream;
use $crate::reexport::tokio_stream::wrappers::TcpListenerStream;
use $crate::rpc::RpcBuilder;
let RpcBuilder {

View File

@ -9,26 +9,17 @@ edition = "2021"
# Workspace dependencies, in alphabetical order
clap_blocks = { path = "../clap_blocks" }
compactor = { path = "../compactor" }
data_types2 = { path = "../data_types2" }
generated_types = { path = "../generated_types" }
data_types = { path = "../data_types" }
iox_catalog = { path = "../iox_catalog" }
ioxd_common = { path = "../ioxd_common" }
metric = { path = "../metric" }
query = { path = "../query" }
object_store = { path = "../object_store" }
service_grpc_testing = { path = "../service_grpc_testing" }
iox_time = { path = "../iox_time" }
trace = { path = "../trace" }
trace_http = { path = "../trace_http" }
# Crates.io dependencies, in alphabetical order
async-trait = "0.1"
hyper = "0.14"
thiserror = "1.0.31"
tokio = { version = "1.18", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
tokio-stream = { version = "0.1", features = ["net"] }
tokio-util = { version = "0.7.1" }
tonic = "0.7"
tonic-health = "0.6.0"
tonic-reflection = "0.4.0"
workspace-hack = { path = "../workspace-hack"}

View File

@ -1,23 +1,13 @@
use std::{
fmt::{Debug, Display},
sync::Arc,
};
use async_trait::async_trait;
use clap_blocks::compactor::CompactorConfig;
use compactor::{
handler::{CompactorHandler, CompactorHandlerImpl},
server::CompactorServer,
};
use data_types2::KafkaPartition;
use data_types::KafkaPartition;
use hyper::{Body, Request, Response};
use iox_catalog::interface::Catalog;
use iox_time::TimeProvider;
use metric::Registry;
use object_store::DynObjectStore;
use query::exec::Executor;
use trace::TraceCollector;
use clap_blocks::compactor::CompactorConfig;
use ioxd_common::{
add_service,
http::error::{HttpApiError, HttpApiErrorCode, HttpApiErrorSource},
@ -26,7 +16,15 @@ use ioxd_common::{
server_type::{CommonServerState, RpcError, ServerType},
setup_builder,
};
use metric::Registry;
use object_store::DynObjectStore;
use query::exec::Executor;
use std::{
fmt::{Debug, Display},
sync::Arc,
};
use thiserror::Error;
use trace::TraceCollector;
#[derive(Debug, Error)]
pub enum Error {

View File

@ -3,33 +3,21 @@ name = "ioxd_ingester"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
# Workspace dependencies, in alphabetical order
clap_blocks = { path = "../clap_blocks" }
data_types2 = { path = "../data_types2" }
generated_types = { path = "../generated_types" }
data_types = { path = "../data_types" }
ingester = { path = "../ingester" }
iox_catalog = { path = "../iox_catalog" }
ioxd_common = { path = "../ioxd_common" }
metric = { path = "../metric" }
object_store = { path = "../object_store" }
query = { path = "../query" }
service_grpc_testing = { path = "../service_grpc_testing" }
iox_time = { path = "../iox_time" }
trace = { path = "../trace" }
trace_http = { path = "../trace_http" }
write_buffer = { path = "../write_buffer" }
# Crates.io dependencies, in alphabetical order
async-trait = "0.1"
hyper = "0.14"
thiserror = "1.0.31"
tokio = { version = "1.18", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
tokio-stream = { version = "0.1", features = ["net"] }
tokio-util = { version = "0.7.1" }
tonic = "0.7"
tonic-health = "0.6.0"
tonic-reflection = "0.4.0"
workspace-hack = { path = "../workspace-hack"}

View File

@ -1,26 +1,13 @@
use std::{
collections::BTreeMap,
fmt::{Debug, Display},
sync::Arc,
time::Duration,
};
use async_trait::async_trait;
use clap_blocks::{ingester::IngesterConfig, write_buffer::WriteBufferConfig};
use data_types2::KafkaPartition;
use data_types::KafkaPartition;
use hyper::{Body, Request, Response};
use ingester::{
handler::IngestHandlerImpl,
handler::{IngestHandler, IngestHandlerImpl},
lifecycle::LifecycleConfig,
server::{grpc::GrpcDelegate, http::HttpDelegate, IngesterServer},
};
use iox_catalog::interface::Catalog;
use metric::Registry;
use object_store::DynObjectStore;
use query::exec::Executor;
use trace::TraceCollector;
use ingester::handler::IngestHandler;
use ioxd_common::{
add_service,
http::error::{HttpApiError, HttpApiErrorCode, HttpApiErrorSource},
@ -29,7 +16,17 @@ use ioxd_common::{
server_type::{CommonServerState, RpcError, ServerType},
setup_builder,
};
use metric::Registry;
use object_store::DynObjectStore;
use query::exec::Executor;
use std::{
collections::BTreeMap,
fmt::{Debug, Display},
sync::Arc,
time::Duration,
};
use thiserror::Error;
use trace::TraceCollector;
#[derive(Debug, Error)]
pub enum Error {

View File

@ -7,7 +7,7 @@ edition = "2021"
[dependencies]
# Workspace dependencies, in alphabetical order
data_types2 = { path = "../data_types2" }
data_types = { path = "../data_types" }
generated_types = { path = "../generated_types" }
iox_catalog = { path = "../iox_catalog" }
ioxd_common = { path = "../ioxd_common" }
@ -15,24 +15,17 @@ metric = { path = "../metric" }
object_store = { path = "../object_store" }
querier = { path = "../querier" }
query = { path = "../query" }
service_common = { path = "../service_common" }
service_grpc_flight = { path = "../service_grpc_flight" }
service_grpc_influxrpc = { path = "../service_grpc_influxrpc" }
service_grpc_testing = { path = "../service_grpc_testing" }
iox_time = { path = "../iox_time" }
trace = { path = "../trace" }
trace_http = { path = "../trace_http" }
# Crates.io dependencies, in alphabetical order
arrow-flight = "13"
async-trait = "0.1"
hyper = "0.14"
tokio = { version = "1.18", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
tokio-stream = { version = "0.1", features = ["net"] }
tokio-util = { version = "0.7.1" }
tonic = "0.7"
tonic-health = "0.6.0"
tonic-reflection = "0.4.0"
workspace-hack = { path = "../workspace-hack"}

Some files were not shown because too many files have changed in this diff Show More