Merge branch 'main' into crepererum/dml_shard_new_types

pull/24376/head
kodiakhq[bot] 2021-11-29 10:33:49 +00:00 committed by GitHub
commit 068b44334d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 142 additions and 83 deletions

44
Cargo.lock generated
View File

@ -453,9 +453,9 @@ dependencies = [
[[package]]
name = "cc"
version = "1.0.71"
version = "1.0.72"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79c2681d6594606957bbb8631c4b90a7fcaaa72cdb714743a437b156d6a7eedd"
checksum = "22a9137b95ea06864e018375b72adfb7db6e6f68cfc8df5a04d00288050485ee"
dependencies = [
"jobserver",
]
@ -641,9 +641,9 @@ dependencies = [
[[package]]
name = "crc32fast"
version = "1.2.1"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a"
checksum = "3825b1e8580894917dc4468cb634a1b4e9745fddc854edad72d9c04644c0319f"
dependencies = [
"cfg-if",
]
@ -1111,9 +1111,9 @@ dependencies = [
[[package]]
name = "futures-channel"
version = "0.3.17"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5da6ba8c3bb3c165d3c7319fc1cc8304facf1fb8db99c5de877183c08a273888"
checksum = "7fc8cd39e3dbf865f7340dce6a2d401d24fd37c6fe6c4f0ee0de8bfca2252d27"
dependencies = [
"futures-core",
"futures-sink",
@ -1121,9 +1121,9 @@ dependencies = [
[[package]]
name = "futures-core"
version = "0.3.17"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88d1c26957f23603395cd326b0ffe64124b818f4449552f960d815cfba83a53d"
checksum = "629316e42fe7c2a0b9a65b47d159ceaa5453ab14e8f0a3c5eedbb8cd55b4a445"
[[package]]
name = "futures-executor"
@ -1138,9 +1138,9 @@ dependencies = [
[[package]]
name = "futures-io"
version = "0.3.17"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "522de2a0fe3e380f1bc577ba0474108faf3f6b18321dbf60b3b9c39a75073377"
checksum = "e481354db6b5c353246ccf6a728b0c5511d752c08da7260546fc0933869daa11"
[[package]]
name = "futures-macro"
@ -1157,15 +1157,15 @@ dependencies = [
[[package]]
name = "futures-sink"
version = "0.3.17"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36ea153c13024fe480590b3e3d4cad89a0cfacecc24577b68f86c6ced9c2bc11"
checksum = "996c6442437b62d21a32cd9906f9c41e7dc1e19a9579843fad948696769305af"
[[package]]
name = "futures-task"
version = "0.3.17"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d3d00f4eddb73e498a54394f228cd55853bdf059259e8e7bc6e69d408892e99"
checksum = "dabf1872aaab32c886832f2276d2f5399887e2bd613698a02359e4ea83f8de12"
[[package]]
name = "futures-test"
@ -3053,9 +3053,9 @@ dependencies = [
[[package]]
name = "predicates"
version = "2.0.3"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c6ce811d0b2e103743eec01db1c50612221f173084ce2f7941053e94b6bb474"
checksum = "95e5a7689e456ab905c22c2b48225bb921aba7c8dfa58440d68ba13f6222a715"
dependencies = [
"difflib",
"float-cmp",
@ -3372,9 +3372,9 @@ dependencies = [
[[package]]
name = "rdkafka"
version = "0.27.0"
version = "0.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a22ce72c78b471baba6c75bda6e03511ef2ee1bae3729902d2bb38951db1048"
checksum = "1de127f294f2dba488ed46760b129d5ecbeabbd337ccbf3739cb29d50db2161c"
dependencies = [
"futures",
"libc",
@ -3389,9 +3389,9 @@ dependencies = [
[[package]]
name = "rdkafka-sys"
version = "4.1.0+1.7.0"
version = "4.2.0+1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e212ccf56a2d4e0b9f874a1cad295495769e0cdbabe60e02b4f654d369a2d6a1"
checksum = "9e542c6863b04ce0fa0c5719bc6b7b348cf8dd21af1bb03c9db5f9805b2a6473"
dependencies = [
"libc",
"libz-sys",
@ -3838,9 +3838,9 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.71"
version = "1.0.72"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "063bf466a64011ac24040a49009724ee60a57da1b437617ceb32e53ad61bfb19"
checksum = "d0ffa0837f2dfa6fb90868c2b5468cad482e175f7dad97e7421951e663f2b527"
dependencies = [
"indexmap",
"itoa",

View File

@ -39,4 +39,7 @@ service Storage {
rpc MeasurementTagValues(MeasurementTagValuesRequest) returns (stream StringValuesResponse);
rpc MeasurementFields(MeasurementFieldsRequest) returns (stream MeasurementFieldsResponse);
// Offsets gets the partition offsets of the node
rpc Offsets (google.protobuf.Empty) returns (OffsetsResponse);
}

View File

@ -58,6 +58,21 @@ impl TryFrom<management::LifecycleRules> for LifecycleRules {
type Error = FieldViolation;
fn try_from(proto: management::LifecycleRules) -> Result<Self, Self::Error> {
let persist_age_threshold_seconds = NonZeroU32::new(proto.persist_age_threshold_seconds)
.unwrap_or_else(|| NonZeroU32::new(DEFAULT_PERSIST_AGE_THRESHOLD_SECONDS).unwrap());
let late_arrive_window_seconds = NonZeroU32::new(proto.late_arrive_window_seconds)
.unwrap_or_else(|| NonZeroU32::new(DEFAULT_LATE_ARRIVE_WINDOW_SECONDS).unwrap());
if persist_age_threshold_seconds < late_arrive_window_seconds {
return Err(FieldViolation {
field: "persist_age_threshold_seconds".to_string(),
description:
"persist_age_threshold_seconds must not be less than late_arrive_window_seconds"
.to_string(),
});
}
Ok(Self {
buffer_size_soft: (proto.buffer_size_soft as usize).try_into().ok(),
buffer_size_hard: (proto.buffer_size_hard as usize).try_into().ok(),
@ -79,14 +94,12 @@ impl TryFrom<management::LifecycleRules> for LifecycleRules {
Some(d) => d.try_into().scope("catalog_transaction_prune_age")?,
None => DEFAULT_CATALOG_TRANSACTION_PRUNE_AGE,
},
late_arrive_window_seconds: NonZeroU32::new(proto.late_arrive_window_seconds)
.unwrap_or_else(|| NonZeroU32::new(DEFAULT_LATE_ARRIVE_WINDOW_SECONDS).unwrap()),
late_arrive_window_seconds,
persist_row_threshold: NonZeroUsize::new(proto.persist_row_threshold as usize)
.unwrap_or_else(|| {
NonZeroUsize::new(DEFAULT_PERSIST_ROW_THRESHOLD as usize).unwrap()
}),
persist_age_threshold_seconds: NonZeroU32::new(proto.persist_age_threshold_seconds)
.unwrap_or_else(|| NonZeroU32::new(DEFAULT_PERSIST_AGE_THRESHOLD_SECONDS).unwrap()),
persist_age_threshold_seconds,
mub_row_threshold: NonZeroUsize::new(proto.mub_row_threshold as usize)
.unwrap_or_else(|| NonZeroUsize::new(DEFAULT_MUB_ROW_THRESHOLD).unwrap()),
parquet_cache_limit: NonZeroU64::new(proto.parquet_cache_limit),
@ -116,7 +129,7 @@ mod tests {
#[test]
fn lifecycle_rules() {
let protobuf = management::LifecycleRules {
let mut protobuf = management::LifecycleRules {
buffer_size_soft: 353,
buffer_size_hard: 232,
persist: true,
@ -132,7 +145,7 @@ mod tests {
}),
late_arrive_window_seconds: 23,
persist_row_threshold: 57,
persist_age_threshold_seconds: 23,
persist_age_threshold_seconds: 60,
mub_row_threshold: 3454,
parquet_cache_limit: 10,
};
@ -181,6 +194,12 @@ mod tests {
protobuf.parquet_cache_limit
);
assert_eq!(back.parquet_cache_limit, protobuf.parquet_cache_limit);
protobuf.late_arrive_window_seconds = 20;
protobuf.persist_age_threshold_seconds = 4;
let e = LifecycleRules::try_from(protobuf).unwrap_err().to_string();
assert_eq!(e, "Violation for field \"persist_age_threshold_seconds\": persist_age_threshold_seconds must not be less than late_arrive_window_seconds");
}
#[test]

View File

@ -9,7 +9,7 @@ bytes = "1.0"
futures = { version = "0.3", default-features = false }
reqwest = { version = "0.11", features = ["stream", "json"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.71"
serde_json = "1.0.72"
snafu = "0.6.6"
url = "2.1.1"
workspace-hack = { path = "../workspace-hack"}

View File

@ -70,7 +70,7 @@ pprof = { version = "^0.5", default-features = false, features = ["flamegraph",
prost = "0.8"
rustyline = { version = "9.0", default-features = false }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.71"
serde_json = "1.0.72"
serde_urlencoded = "0.7.0"
snafu = "0.6.9"
structopt = "0.3.25"
@ -105,7 +105,7 @@ write_buffer = { path = "../write_buffer" }
# Crates.io dependencies, in alphabetical order
assert_cmd = "2.0.2"
hex = "0.4.2"
predicates = "2.0.3"
predicates = "2.1.0"
rand = "0.8.3"
reqwest = "0.11"
tempfile = "3.1.0"

View File

@ -11,12 +11,12 @@ use tonic::Status;
use data_types::{error::ErrorLogger, names::org_and_bucket_to_database, DatabaseName};
use generated_types::{
google::protobuf::Empty, storage_server::Storage, CapabilitiesResponse, Capability,
Int64ValuesResponse, MeasurementFieldsRequest, MeasurementFieldsResponse,
MeasurementNamesRequest, MeasurementTagKeysRequest, MeasurementTagValuesRequest, Predicate,
ReadFilterRequest, ReadGroupRequest, ReadResponse, ReadSeriesCardinalityRequest,
ReadWindowAggregateRequest, StringValuesResponse, TagKeyMetaNames, TagKeysRequest,
TagValuesRequest, TimestampRange,
google::protobuf::Empty, offsets_response::PartitionOffsetResponse, storage_server::Storage,
CapabilitiesResponse, Capability, Int64ValuesResponse, MeasurementFieldsRequest,
MeasurementFieldsResponse, MeasurementNamesRequest, MeasurementTagKeysRequest,
MeasurementTagValuesRequest, OffsetsResponse, Predicate, ReadFilterRequest, ReadGroupRequest,
ReadResponse, ReadSeriesCardinalityRequest, ReadWindowAggregateRequest, StringValuesResponse,
TagKeyMetaNames, TagKeysRequest, TagValuesRequest, TimestampRange,
};
use observability_deps::tracing::{error, info, trace};
use predicate::predicate::PredicateBuilder;
@ -674,6 +674,18 @@ where
Ok(tonic::Response::new(ReceiverStream::new(rx)))
}
async fn offsets(
&self,
_req: tonic::Request<Empty>,
) -> Result<tonic::Response<OffsetsResponse>, Status> {
// We present ourselves to the rest of IDPE as a single storage node with 1 partition.
// (Returning offset 1 just in case offset 0 is interpreted by query nodes as being special)
let the_partition = PartitionOffsetResponse { id: 0, offset: 1 };
Ok(tonic::Response::new(OffsetsResponse {
partitions: vec![the_partition],
}))
}
}
trait SetRange {

View File

@ -7,12 +7,14 @@ use generated_types::{
google::protobuf::Empty,
measurement_fields_response::FieldType,
node::{Comparison, Type as NodeType, Value},
offsets_response::PartitionOffsetResponse,
read_group_request::Group,
read_response::{frame::Data, *},
storage_client::StorageClient,
Aggregate, MeasurementFieldsRequest, MeasurementNamesRequest, MeasurementTagKeysRequest,
MeasurementTagValuesRequest, Node, Predicate, ReadFilterRequest, ReadGroupRequest,
ReadWindowAggregateRequest, Tag, TagKeysRequest, TagValuesRequest, TimestampRange,
MeasurementTagValuesRequest, Node, OffsetsResponse, Predicate, ReadFilterRequest,
ReadGroupRequest, ReadWindowAggregateRequest, Tag, TagKeysRequest, TagValuesRequest,
TimestampRange,
};
use influxdb_iox_client::connection::Connection;
use influxdb_storage_client::tag_key_bytes_to_strings;
@ -30,7 +32,6 @@ pub async fn test() {
scenario.create_database(&mut management_client).await;
scenario.load_data(&influxdb2).await;
capabilities_endpoint(&mut storage_client).await;
read_filter_endpoint(&mut storage_client, &scenario).await;
tag_keys_endpoint(&mut storage_client, &scenario).await;
tag_values_endpoint(&mut storage_client, &scenario).await;
@ -41,9 +42,16 @@ pub async fn test() {
}
/// Validate that capabilities storage endpoint is hooked up
async fn capabilities_endpoint(storage_client: &mut StorageClient<Connection>) {
let capabilities_response = storage_client.capabilities(Empty {}).await.unwrap();
let capabilities_response = capabilities_response.into_inner();
#[tokio::test]
async fn capabilities_endpoint() {
let server_fixture = ServerFixture::create_shared(ServerType::Database).await;
let mut storage_client = StorageClient::new(server_fixture.grpc_channel());
let capabilities_response = storage_client
.capabilities(Empty {})
.await
.unwrap()
.into_inner();
assert_eq!(
capabilities_response.caps.len(),
2,
@ -52,6 +60,19 @@ async fn capabilities_endpoint(storage_client: &mut StorageClient<Connection>) {
);
}
/// Validate that storage offsets endpoint is hooked up (required by internal Influx cloud)
#[tokio::test]
async fn offsets_endpoint() {
let server_fixture = ServerFixture::create_shared(ServerType::Database).await;
let mut storage_client = StorageClient::new(server_fixture.grpc_channel());
let offsets_response = storage_client.offsets(Empty {}).await.unwrap().into_inner();
let expected = OffsetsResponse {
partitions: vec![PartitionOffsetResponse { id: 0, offset: 1 }],
};
assert_eq!(offsets_response, expected);
}
async fn read_filter_endpoint(storage_client: &mut StorageClient<Connection>, scenario: &Scenario) {
let read_source = scenario.read_source();
let range = scenario.timestamp_range();

View File

@ -27,7 +27,7 @@ mutable_batch_pb = { path = "../mutable_batch_pb", optional = true }
prost = "0.8"
rand = "0.8.3"
serde = "1.0.128"
serde_json = { version = "1.0.71", optional = true }
serde_json = { version = "1.0.72", optional = true }
thiserror = "1.0.30"
tonic = { version = "0.5.0" }
uuid = { version = "0.8", features = ["v4"] }

View File

@ -19,7 +19,7 @@ influxdb_iox_client = { path = "../influxdb_iox_client" }
itertools = "0.10.0"
rand = { version = "0.8.3", features = ["small_rng"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.71"
serde_json = "1.0.72"
snafu = "0.6.8"
tokio = { version = "1.13", features = ["macros", "rt-multi-thread"] }
toml = "0.5.6"

View File

@ -13,7 +13,7 @@ schema = { path = "../schema" }
observability_deps = { path = "../observability_deps" }
ordered-float = "2"
regex = "1"
serde_json = "1.0.71"
serde_json = "1.0.72"
snafu = "0.6.9"
sqlparser = "0.12.0"
workspace-hack = { path = "../workspace-hack"}

View File

@ -165,7 +165,6 @@ pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync {
&self,
predicate: &Predicate,
selection: Selection<'_>,
delete_predicates: &[Arc<Predicate>],
) -> Result<SendableRecordBatchStream, Self::Error>;
/// Returns true if data of this chunk is sorted

View File

@ -118,22 +118,14 @@ impl<C: QueryChunk + 'static> ExecutionPlan for IOxReadFilterNode<C> {
let selection_cols = restrict_selection(selection_cols, &chunk_table_schema);
let selection = Selection::Some(&selection_cols);
let del_preds = chunk.delete_predicates();
let del_preds: Vec<Arc<Predicate>> = del_preds
.iter()
.map(|pred| Arc::new(pred.as_ref().clone().into()))
.collect();
let stream = chunk
.read_filter(&self.predicate, selection, &del_preds)
.map_err(|e| {
DataFusionError::Execution(format!(
"Error creating scan for table {} chunk {}: {}",
self.table_name,
chunk.id(),
e
))
})?;
let stream = chunk.read_filter(&self.predicate, selection).map_err(|e| {
DataFusionError::Execution(format!(
"Error creating scan for table {} chunk {}: {}",
self.table_name,
chunk.id(),
e
))
})?;
// all CPU time is now done, pass in baseline metrics to adapter
timer.done();

View File

@ -834,7 +834,6 @@ impl QueryChunk for TestChunk {
&self,
predicate: &Predicate,
_selection: Selection<'_>,
_delete_predicates: &[Arc<Predicate>],
) -> Result<SendableRecordBatchStream, Self::Error> {
self.check_error()?;
@ -938,9 +937,8 @@ pub async fn raw_data(chunks: &[Arc<TestChunk>]) -> Vec<RecordBatch> {
for c in chunks {
let pred = Predicate::default();
let selection = Selection::All;
let delete_predicates: Vec<Arc<Predicate>> = vec![];
let mut stream = c
.read_filter(&pred, selection, &delete_predicates)
.read_filter(&pred, selection)
.expect("Error in read_filter");
while let Some(b) = stream.next().await {
let b = b.expect("Error in stream");

17
scripts/parquet-meta-iox Executable file
View File

@ -0,0 +1,17 @@
#!/bin/bash
set -eu -o pipefail
if ! command -v parquet &> /dev/null; then
echo "parquet CLI tool required"
echo " brew install parquet-cli"
echo
echo " (yes there is also linuxbrew; and probably you can find it somewhere else)"
fi
parquet meta "$1" \
| grep IOX:metadata: \
| awk '{print $2}' \
| base64 -d \
| ./scripts/prototxt \
decode influxdata.iox.preserved_catalog.v1.IoxMetadata

View File

@ -11,7 +11,7 @@ async-trait = "0.1"
bytes = "1.0"
chrono = "0.4"
cache_loader_async = { version = "0.1.2", features = ["ttl-cache"] }
crc32fast = "1.2.0"
crc32fast = "1.2.2"
data_types = { path = "../data_types" }
datafusion = { path = "../datafusion" }
datafusion_util = { path = "../datafusion_util" }

View File

@ -1921,7 +1921,7 @@ mod tests {
async fn collect_read_filter(chunk: &DbChunk) -> Vec<RecordBatch> {
chunk
.read_filter(&Default::default(), Selection::All, &[])
.read_filter(&Default::default(), Selection::All)
.unwrap()
.collect::<Vec<_>>()
.await

View File

@ -224,7 +224,7 @@ impl DbChunk {
/// predicate depends on the schema of the chunk. Callers should validate
/// predicates against chunks they are to be executed against using
/// `read_buffer::Chunk::validate_predicate`
pub fn to_rub_negated_predicates(
fn to_rub_negated_predicates(
delete_predicates: &[Arc<Predicate>],
) -> Result<Vec<read_buffer::Predicate>> {
let mut rub_preds: Vec<read_buffer::Predicate> = vec![];
@ -346,7 +346,6 @@ impl QueryChunk for DbChunk {
&self,
predicate: &Predicate,
selection: Selection<'_>,
delete_predicates: &[Arc<Predicate>],
) -> Result<SendableRecordBatchStream, Self::Error> {
// Predicate is not required to be applied for correctness. We only pushed it down
// when possible for performance gain
@ -354,11 +353,16 @@ impl QueryChunk for DbChunk {
debug!(?predicate, "Input Predicate to read_filter");
self.access_recorder.record_access();
debug!(?delete_predicates, "Input Delete Predicates to read_filter");
let delete_predicates: Vec<_> = self
.meta
.delete_predicates
.iter()
.map(|pred| Arc::new(pred.as_ref().clone().into()))
.collect();
// merge the negated delete predicates into the select predicate
let mut pred_with_deleted_exprs = predicate.clone();
pred_with_deleted_exprs.merge_delete_predicates(delete_predicates);
pred_with_deleted_exprs.merge_delete_predicates(&delete_predicates);
debug!(
?pred_with_deleted_exprs,
"Input Predicate plus deleted ranges and deleted predicates"
@ -381,7 +385,7 @@ impl QueryChunk for DbChunk {
debug!(?rb_predicate, "Predicate pushed down to RUB");
// combine all delete expressions to RUB's negated ones
let negated_delete_exprs = Self::to_rub_negated_predicates(delete_predicates)?
let negated_delete_exprs = Self::to_rub_negated_predicates(&delete_predicates)?
.into_iter()
// Any delete predicates unsupported by the Read Buffer will be elided.
.filter_map(|p| chunk.validate_predicate(p).ok())
@ -581,7 +585,7 @@ mod tests {
let t1 = time.inc(Duration::from_secs(1));
snapshot
.read_filter(&Default::default(), Selection::All, &[])
.read_filter(&Default::default(), Selection::All)
.unwrap();
let m3 = chunk.access_recorder().get_metrics();

View File

@ -24,8 +24,7 @@ use persistence_windows::{
checkpoint::{DatabaseCheckpoint, PartitionCheckpoint, PersistCheckpointBuilder},
persistence_windows::FlushHandle,
};
use predicate::predicate::Predicate;
use query::{QueryChunk, QueryChunkMeta};
use query::QueryChunk;
use schema::selection::Selection;
use snafu::ResultExt;
use std::{future::Future, sync::Arc};
@ -95,13 +94,8 @@ pub(super) fn write_chunk_to_object_store(
collect_checkpoints(flush_handle.checkpoint(), &db.catalog);
// Get RecordBatchStream of data from the read buffer chunk
let del_preds: Vec<Arc<Predicate>> = db_chunk
.delete_predicates()
.iter()
.map(|pred| Arc::new(pred.as_ref().clone().into()))
.collect();
let stream = db_chunk
.read_filter(&Default::default(), Selection::All, &del_preds)
.read_filter(&Default::default(), Selection::All)
.expect("read filter should be infallible");
// check that the upcoming state change will very likely succeed

View File

@ -19,7 +19,7 @@ observability_deps = { path = "../observability_deps" }
parking_lot = "0.11.2"
pin-project = "1.0"
prost = "0.8"
rdkafka = "0.27.0"
rdkafka = "0.28.0"
time = { path = "../time" }
tokio = { version = "1.13", features = ["macros", "fs"] }
tokio-util = "0.6.9"