chore: update core deps (#25532)

* chore: update core deps

- arrow/parquet deps are patched (as in core)
- three specific code changes to cope with changes in core crates
  - TransitionPartitionId, use `from_parts` instead of `new`
  - arrow buffers can take &[u8] directly without `to_vec()`/`vec!`
    (used only in tests)
  - `schema` and `influxdb_line_protocol` crates need `v3` feature enabled

* chore: update deny.toml

* chore: formatting and deny toml changes

Unicode-3.0 license is added to allowed licenses list, without it
end up with 19 errors (`zerovec`, `zerovec-derive` etc.)

* chore: address PR feedback

- move enabling v3 feature to root Cargo.toml
- added the upstream PR for datafusion-common that introduced RUSTSEC-2024-0384
pull/25541/head
praveen-influx 2024-11-12 16:07:31 +00:00 committed by GitHub
parent 35e29d1408
commit 814eb31309
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 901 additions and 699 deletions

1406
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -37,28 +37,28 @@ license = "MIT OR Apache-2.0"
[workspace.dependencies]
anyhow = "1.0"
arrow = { version = "52.1.0", features = ["prettyprint", "chrono-tz"] }
arrow-array = "52.2.0"
arrow-buffer = "52.2.0"
arrow-csv = "52.2.0"
arrow-flight = { version = "52.2.0", features = ["flight-sql-experimental"] }
arrow-json = "52.2.0"
arrow-schema = "52.2.0"
arrow = { version = "53.0.0", features = ["prettyprint", "chrono-tz"] }
arrow-array = "53.0.0"
arrow-buffer = "53.0.0"
arrow-csv = "53.0.0"
arrow-flight = { version = "53.0.0", features = ["flight-sql-experimental"] }
arrow-json = "53.0.0"
arrow-schema = "53.0.0"
assert_cmd = "2.0.14"
async-trait = "0.1"
backtrace = "0.3"
base64 = "0.22.0"
bimap = "0.6.3"
byteorder = "1.3.4"
bytes = "1.5"
bytes = "1.8"
chrono = "0.4"
clap = { version = "4", features = ["derive", "env", "string"] }
clru = "0.6.2"
crc32fast = "1.2.0"
crossbeam-channel = "0.5.11"
csv = "1.3.0"
datafusion = { git = "https://github.com/influxdata/arrow-datafusion.git", rev = "5de0c3577fd30dcf9213f428222a29efae789807" }
datafusion-proto = { git = "https://github.com/influxdata/arrow-datafusion.git", rev = "5de0c3577fd30dcf9213f428222a29efae789807" }
datafusion = { git = "https://github.com/influxdata/arrow-datafusion.git", rev = "c27d5f2356a21ee6224c149ee971c89c2cc13e18" }
datafusion-proto = { git = "https://github.com/influxdata/arrow-datafusion.git", rev = "c27d5f2356a21ee6224c149ee971c89c2cc13e18" }
dashmap = "6.1.0"
dotenvy = "0.15.7"
flate2 = "1.0.27"
@ -76,9 +76,9 @@ mime = "0.3.17"
mockito = { version = "1.4.0", default-features = false }
mockall = { version = "0.13.0" }
num_cpus = "1.16.0"
object_store = "0.10.2"
object_store = "0.11.1"
parking_lot = "0.12.1"
parquet = { version = "52.2.0", features = ["object_store"] }
parquet = { version = "53.0.0", features = ["object_store"] }
pbjson = "0.6.0"
pbjson-build = "0.6.2"
pbjson-types = "0.6.0"
@ -117,36 +117,36 @@ uuid = { version = "1", features = ["v4"] }
num = { version = "0.4.3" }
# Core.git crates we depend on
arrow_util = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7"}
authz = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7", features = ["http"] }
clap_blocks = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
data_types = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
datafusion_util = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
influxdb-line-protocol = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7", features = ["v3"] }
influxdb_influxql_parser = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
influxdb_iox_client = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
iox_catalog = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
iox_http = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
iox_query = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
iox_query_params = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
iox_query_influxql = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
iox_system_tables = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
iox_time = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
metric = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
metric_exporters = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
observability_deps = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
panic_logging = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
parquet_file = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
schema = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7", features = ["v3"] }
service_common = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
service_grpc_flight = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
test_helpers = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
tokio_metrics_bridge = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
trace = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
trace_exporters = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
trace_http = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
tracker = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7" }
trogging = { git = "https://github.com/influxdata/influxdb3_core", rev = "1eaa4ed5ea147bc24db98d9686e457c124dfd5b7", default-features = true, features = ["clap"] }
arrow_util = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
authz = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
clap_blocks = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
data_types = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
datafusion_util = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
influxdb-line-protocol = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696", features = ["v3"] }
influxdb_influxql_parser = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
influxdb_iox_client = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
iox_catalog = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
iox_http = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
iox_query = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
iox_query_params = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
iox_query_influxql = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
iox_system_tables = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
iox_time = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
metric = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
metric_exporters = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
observability_deps = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
panic_logging = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
parquet_file = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
schema = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696", features = ["v3"] }
service_common = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
service_grpc_flight = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
test_helpers = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
tokio_metrics_bridge = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
trace = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
trace_exporters = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
trace_http = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
tracker = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
trogging = { git = "https://github.com/influxdata/influxdb3_core", rev = "6fcbb004232738d55655f32f4ad2385523d10696" }
[workspace.lints.rust]
missing_copy_implementations = "deny"
@ -212,3 +212,23 @@ opt-level = 3
# arrow-data = { path = "../arrow-rs/arrow-data" }
# arrow-buffer = { path = "../arrow-rs/arrow-buffer" }
# arrow-ipc = { path = "../arrow-rs/arrow-ipc" }
## NB: This is taken from Iox
## Use patch of arrow-rs with an older version of tonic
## until we have upgraded hyper: https://github.com/influxdata/influxdb_iox/issues/9340
## see https://github.com/influxdata/arrow-rs/pull/3
[patch.crates-io]
arrow = { git = "https://github.com/influxdata/arrow-rs.git", rev = "e38787d2177f2ebfa481bfac62d208eef8ea82fb" }
arrow-array = { git = "https://github.com/influxdata/arrow-rs.git", rev = "e38787d2177f2ebfa481bfac62d208eef8ea82fb" }
arrow-buffer = { git = "https://github.com/influxdata/arrow-rs.git", rev = "e38787d2177f2ebfa481bfac62d208eef8ea82fb" }
arrow-cast = { git = "https://github.com/influxdata/arrow-rs.git", rev = "e38787d2177f2ebfa481bfac62d208eef8ea82fb" }
arrow-csv = { git = "https://github.com/influxdata/arrow-rs.git", rev = "e38787d2177f2ebfa481bfac62d208eef8ea82fb" }
arrow-data = { git = "https://github.com/influxdata/arrow-rs.git", rev = "e38787d2177f2ebfa481bfac62d208eef8ea82fb" }
arrow-ipc = { git = "https://github.com/influxdata/arrow-rs.git", rev = "e38787d2177f2ebfa481bfac62d208eef8ea82fb" }
arrow-json = { git = "https://github.com/influxdata/arrow-rs.git", rev = "e38787d2177f2ebfa481bfac62d208eef8ea82fb" }
arrow-schema = { git = "https://github.com/influxdata/arrow-rs.git", rev = "e38787d2177f2ebfa481bfac62d208eef8ea82fb" }
arrow-select = { git = "https://github.com/influxdata/arrow-rs.git", rev = "e38787d2177f2ebfa481bfac62d208eef8ea82fb" }
arrow-string = { git = "https://github.com/influxdata/arrow-rs.git", rev = "e38787d2177f2ebfa481bfac62d208eef8ea82fb" }
arrow-ord = { git = "https://github.com/influxdata/arrow-rs.git", rev = "e38787d2177f2ebfa481bfac62d208eef8ea82fb" }
arrow-flight = { git = "https://github.com/influxdata/arrow-rs.git", rev = "e38787d2177f2ebfa481bfac62d208eef8ea82fb" }
parquet = { git = "https://github.com/influxdata/arrow-rs.git", rev = "e38787d2177f2ebfa481bfac62d208eef8ea82fb" }

View File

@ -4,10 +4,12 @@
[advisories]
yanked = "deny"
ignore = [
"RUSTSEC-2024-0363",
# dependent on arrow-* upgrading dependencies on lexical-core
# see https://github.com/apache/arrow-rs/pull/6401
"RUSTSEC-2023-0086",
# dependent on datafusion-common moving away from instant
# https://github.com/apache/datafusion/pull/13355
"RUSTSEC-2024-0384",
]
git-fetch-with-cli = true
@ -20,6 +22,7 @@ allow = [
"CC0-1.0",
"ISC",
"MIT",
"Unicode-3.0",
"Zlib",
]

View File

@ -9,7 +9,7 @@ license.workspace = true
# Core Crates
influxdb-line-protocol.workspace = true
observability_deps.workspace = true
schema.workspace = true
schema = { workspace = true }
# Local deps
influxdb3_id = { path = "../influxdb3_id" }

View File

@ -345,16 +345,16 @@ mod tests {
assert_eq!(
batches[0]["host"].to_data().child_data()[0].buffers()[1],
Buffer::from([b'a'].to_vec())
Buffer::from([b'a'])
);
assert_eq!(
batches[0]["time"].to_data().buffers(),
&[Buffer::from(vec![123, 0, 0, 0, 0, 0, 0, 0])]
&[Buffer::from([123, 0, 0, 0, 0, 0, 0, 0])]
);
assert_eq!(
batches[0]["val"].to_data().buffers(),
&[Buffer::from(1_u64.to_le_bytes().to_vec())]
&[Buffer::from(1_u64.to_le_bytes())]
);
shutdown.cancel();

View File

@ -29,7 +29,7 @@ use iox_query::query_log::QueryLog;
use iox_query::query_log::QueryText;
use iox_query::query_log::StateReceived;
use iox_query::query_log::{QueryCompletedToken, QueryLogEntries};
use iox_query::{Extension, QueryDatabase};
use iox_query::QueryDatabase;
use iox_query::{QueryChunk, QueryNamespace};
use iox_query_influxql::frontend::planner::InfluxQLQueryPlanner;
use iox_query_params::StatementParams;
@ -405,24 +405,6 @@ impl Database {
#[async_trait]
impl QueryNamespace for Database {
async fn chunks(
&self,
table_name: &str,
filters: &[Expr],
projection: Option<&Vec<usize>>,
ctx: IOxSessionContext,
) -> Result<Vec<Arc<dyn QueryChunk>>, DataFusionError> {
let _span_recorder = SpanRecorder::new(ctx.child_span("QueryDatabase::chunks"));
debug!(%table_name, ?filters, "Database as QueryNamespace::chunks");
let Some(table) = self.query_table(table_name).await else {
debug!(%table_name, "No entry for table");
return Ok(vec![]);
};
table.chunks(&ctx.inner().state(), projection, filters, None)
}
fn retention_time_ns(&self) -> Option<i64> {
None
}
@ -474,7 +456,7 @@ impl QueryNamespace for Database {
fn new_extended_query_context(
&self,
_extension: Arc<dyn Extension>,
_extension: std::option::Option<std::sync::Arc<(dyn iox_query::Extension + 'static)>>,
_span_ctx: Option<SpanContext>,
_query_config: Option<&QueryConfig>,
) -> IOxSessionContext {
@ -750,10 +732,10 @@ mod tests {
"+------------+------------+-----------+----------+----------+",
"| table_name | size_bytes | row_count | min_time | max_time |",
"+------------+------------+-----------+----------+----------+",
"| cpu | 2142 | 2 | 0 | 10 |",
"| cpu | 2142 | 2 | 20 | 30 |",
"| cpu | 2142 | 2 | 40 | 50 |",
"| cpu | 2142 | 2 | 60 | 70 |",
"| cpu | 1940 | 2 | 0 | 10 |",
"| cpu | 1940 | 2 | 20 | 30 |",
"| cpu | 1940 | 2 | 40 | 50 |",
"| cpu | 1940 | 2 | 60 | 70 |",
"+------------+------------+-----------+----------+----------+",
],
},
@ -763,10 +745,10 @@ mod tests {
"+------------+------------+-----------+----------+----------+",
"| table_name | size_bytes | row_count | min_time | max_time |",
"+------------+------------+-----------+----------+----------+",
"| mem | 2142 | 2 | 0 | 10 |",
"| mem | 2142 | 2 | 20 | 30 |",
"| mem | 2142 | 2 | 40 | 50 |",
"| mem | 2142 | 2 | 60 | 70 |",
"| mem | 1940 | 2 | 0 | 10 |",
"| mem | 1940 | 2 | 20 | 30 |",
"| mem | 1940 | 2 | 40 | 50 |",
"| mem | 1940 | 2 | 60 | 70 |",
"+------------+------------+-----------+----------+----------+",
],
},

View File

@ -9,7 +9,7 @@ license.workspace = true
# Core crates
data_types.workspace = true
iox_time.workspace = true
influxdb-line-protocol.workspace = true
influxdb-line-protocol = { workspace = true }
observability_deps.workspace = true
schema.workspace = true

View File

@ -17,7 +17,10 @@ use crate::{
PersistedSnapshot, Precision, WriteBuffer, WriteLineError,
};
use async_trait::async_trait;
use data_types::{ChunkId, ChunkOrder, ColumnType, NamespaceName, NamespaceNameError};
use data_types::{
ChunkId, ChunkOrder, ColumnType, NamespaceName, NamespaceNameError, PartitionHashId,
PartitionId,
};
use datafusion::catalog::Session;
use datafusion::common::DataFusionError;
use datafusion::datasource::object_store::ObjectStoreUrl;
@ -352,9 +355,12 @@ pub fn parquet_chunk_from_file(
chunk_order: i64,
) -> ParquetChunk {
let partition_key = data_types::PartitionKey::from(parquet_file.chunk_time.to_string());
let partition_id = data_types::partition::TransitionPartitionId::new(
data_types::TableId::new(0),
&partition_key,
let partition_id = data_types::partition::TransitionPartitionId::from_parts(
PartitionId::new(0),
Some(PartitionHashId::new(
data_types::TableId::new(0),
&partition_key,
)),
);
let chunk_stats = create_chunk_statistics(

View File

@ -8,7 +8,10 @@ use crate::write_buffer::table_buffer::TableBuffer;
use crate::{ParquetFile, ParquetFileId, PersistedSnapshot};
use arrow::record_batch::RecordBatch;
use async_trait::async_trait;
use data_types::{ChunkId, ChunkOrder, PartitionKey, TimestampMinMax, TransitionPartitionId};
use data_types::{
ChunkId, ChunkOrder, PartitionHashId, PartitionId, PartitionKey, TimestampMinMax,
TransitionPartitionId,
};
use datafusion::catalog::Session;
use datafusion::common::DataFusionError;
use datafusion::logical_expr::Expr;
@ -111,9 +114,12 @@ impl QueryableBuffer {
batches,
schema: influx_schema.clone(),
stats: Arc::new(chunk_stats),
partition_id: TransitionPartitionId::new(
data_types::TableId::new(0),
&PartitionKey::from(gen_time.to_string()),
partition_id: TransitionPartitionId::from_parts(
PartitionId::new(0),
Some(PartitionHashId::new(
data_types::TableId::new(0),
&PartitionKey::from(gen_time.to_string()),
)),
),
sort_key: None,
id: ChunkId::new(),
@ -478,9 +484,12 @@ async fn sort_dedupe_persist(
batches: vec![persist_job.batch],
schema: persist_job.schema.clone(),
stats: Arc::new(chunk_stats),
partition_id: TransitionPartitionId::new(
data_types::TableId::new(0),
&PartitionKey::from(format!("{}", persist_job.chunk_time)),
partition_id: TransitionPartitionId::from_parts(
PartitionId::new(0),
Some(PartitionHashId::new(
data_types::TableId::new(0),
&PartitionKey::from(format!("{}", persist_job.chunk_time)),
)),
),
sort_key: Some(persist_job.sort_key.clone()),
id: ChunkId::new(),