fix: Move partition metadata types to data_types2
parent
afdff2b1db
commit
d2671355c3
|
@ -1174,6 +1174,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"data_types",
|
||||
"influxdb_line_protocol",
|
||||
"observability_deps",
|
||||
"ordered-float 3.0.0",
|
||||
"percent-encoding",
|
||||
"schema",
|
||||
|
@ -4522,6 +4523,7 @@ dependencies = [
|
|||
"criterion",
|
||||
"croaring",
|
||||
"data_types",
|
||||
"data_types2",
|
||||
"datafusion 0.1.0",
|
||||
"either",
|
||||
"hashbrown 0.12.0",
|
||||
|
|
|
@ -7,6 +7,7 @@ description = "Shared data types in the IOx NG architecture"
|
|||
[dependencies]
|
||||
data_types = { path = "../data_types" }
|
||||
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
ordered-float = "3"
|
||||
percent-encoding = "2.1.0"
|
||||
schema = { path = "../schema" }
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -12,11 +12,10 @@
|
|||
)]
|
||||
|
||||
use data_types::{
|
||||
partition_metadata::{StatValues, Statistics},
|
||||
router::{ShardConfig, ShardId},
|
||||
sequence::Sequence,
|
||||
};
|
||||
use data_types2::{DeletePredicate, NonEmptyString};
|
||||
use data_types2::{DeletePredicate, NonEmptyString, StatValues, Statistics};
|
||||
use hashbrown::HashMap;
|
||||
use iox_time::Time;
|
||||
use mutable_batch::MutableBatch;
|
||||
|
|
|
@ -1,23 +1,18 @@
|
|||
//! A [`Column`] stores the rows for a given column name
|
||||
|
||||
use std::fmt::Formatter;
|
||||
use std::mem;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::error::ArrowError;
|
||||
use arrow::{
|
||||
array::{
|
||||
ArrayDataBuilder, ArrayRef, BooleanArray, Float64Array, Int64Array,
|
||||
TimestampNanosecondArray, UInt64Array,
|
||||
},
|
||||
datatypes::DataType,
|
||||
error::ArrowError,
|
||||
};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
||||
use arrow_util::bitset::BitSet;
|
||||
use arrow_util::string::PackedStringArray;
|
||||
use data_types::partition_metadata::{StatValues, Statistics};
|
||||
use arrow_util::{bitset::BitSet, string::PackedStringArray};
|
||||
use data_types2::{StatValues, Statistics};
|
||||
use schema::{InfluxColumnType, InfluxFieldType, TIME_DATA_TYPE};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{fmt::Formatter, mem, sync::Arc};
|
||||
|
||||
/// A "dictionary ID" (DID) is a compact numeric representation of an interned
|
||||
/// string in the dictionary. The same string always maps the same DID.
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
use crate::column::{Column, ColumnData, INVALID_DID};
|
||||
use crate::MutableBatch;
|
||||
use arrow_util::bitset::{iter_set_positions, iter_set_positions_with_offset, BitSet};
|
||||
use data_types::partition_metadata::{IsNan, StatValues, Statistics};
|
||||
use data_types2::{IsNan, StatValues, Statistics};
|
||||
use schema::{InfluxColumnType, InfluxFieldType};
|
||||
use snafu::Snafu;
|
||||
use std::num::NonZeroU64;
|
||||
|
|
|
@ -1,10 +1,8 @@
|
|||
use arrow_util::assert_batches_eq;
|
||||
use data_types::partition_metadata::{StatValues, Statistics};
|
||||
use mutable_batch::writer::Writer;
|
||||
use mutable_batch::MutableBatch;
|
||||
use data_types2::{StatValues, Statistics};
|
||||
use mutable_batch::{writer::Writer, MutableBatch};
|
||||
use schema::selection::Selection;
|
||||
use std::collections::BTreeMap;
|
||||
use std::num::NonZeroU64;
|
||||
use std::{collections::BTreeMap, num::NonZeroU64};
|
||||
|
||||
#[test]
|
||||
fn test_extend() {
|
||||
|
|
|
@ -1,10 +1,8 @@
|
|||
use arrow_util::assert_batches_eq;
|
||||
use data_types::partition_metadata::{StatValues, Statistics};
|
||||
use mutable_batch::writer::Writer;
|
||||
use mutable_batch::MutableBatch;
|
||||
use data_types2::{StatValues, Statistics};
|
||||
use mutable_batch::{writer::Writer, MutableBatch};
|
||||
use schema::selection::Selection;
|
||||
use std::collections::BTreeMap;
|
||||
use std::num::NonZeroU64;
|
||||
use std::{collections::BTreeMap, num::NonZeroU64};
|
||||
|
||||
#[test]
|
||||
fn test_extend_range() {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use arrow_util::assert_batches_eq;
|
||||
use data_types::partition_metadata::{StatValues, Statistics};
|
||||
use data_types::write_summary::TimestampSummary;
|
||||
use data_types2::{StatValues, Statistics};
|
||||
use mutable_batch::writer::Writer;
|
||||
use mutable_batch::MutableBatch;
|
||||
use schema::selection::Selection;
|
||||
|
|
|
@ -1,3 +1,16 @@
|
|||
use arrow::{
|
||||
array::{
|
||||
ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray, TimestampNanosecondArray,
|
||||
UInt64Array,
|
||||
},
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
use arrow_util::bitset::BitSet;
|
||||
use data_types2::{IsNan, PartitionTemplate, StatValues, Statistics, TemplatePart};
|
||||
use hashbrown::HashSet;
|
||||
use mutable_batch::{writer::Writer, MutableBatch, PartitionWrite, WritePayload};
|
||||
use rand::prelude::*;
|
||||
use schema::selection::Selection;
|
||||
/// A fuzz test of the [`mutable_batch::Writer`] interface:
|
||||
///
|
||||
/// - column writes - `write_i64`, `write_tag`, etc...
|
||||
|
@ -5,25 +18,7 @@
|
|||
/// - batch writes with ranges - `write_batch_ranges`
|
||||
///
|
||||
/// Verifies that the rows and statistics are as expected after a number of interleaved writes
|
||||
use std::collections::BTreeMap;
|
||||
use std::num::NonZeroU64;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{
|
||||
ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray, TimestampNanosecondArray,
|
||||
UInt64Array,
|
||||
};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use hashbrown::HashSet;
|
||||
use rand::prelude::*;
|
||||
|
||||
use arrow_util::bitset::BitSet;
|
||||
use data_types::partition_metadata::{IsNan, StatValues, Statistics};
|
||||
use data_types2::{PartitionTemplate, TemplatePart};
|
||||
use mutable_batch::writer::Writer;
|
||||
use mutable_batch::{MutableBatch, PartitionWrite, WritePayload};
|
||||
use schema::selection::Selection;
|
||||
use std::{collections::BTreeMap, num::NonZeroU64, ops::Range, sync::Arc};
|
||||
|
||||
fn make_rng() -> StdRng {
|
||||
let seed = rand::rngs::OsRng::default().next_u64();
|
||||
|
|
|
@ -3,17 +3,14 @@ use crate::{
|
|||
storage::Storage,
|
||||
ParquetFilePath,
|
||||
};
|
||||
use data_types::{
|
||||
partition_metadata::{Statistics, TableSummary},
|
||||
timestamp::{TimestampMinMax, TimestampRange},
|
||||
use data_types2::{
|
||||
ParquetFile, ParquetFileWithMetadata, Statistics, TableSummary, TimestampMinMax, TimestampRange,
|
||||
};
|
||||
use data_types2::{ParquetFile, ParquetFileWithMetadata};
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use object_store::DynObjectStore;
|
||||
use observability_deps::tracing::*;
|
||||
use predicate::Predicate;
|
||||
use schema::selection::Selection;
|
||||
use schema::{Schema, TIME_COLUMN_NAME};
|
||||
use schema::{selection::Selection, Schema, TIME_COLUMN_NAME};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{collections::BTreeSet, mem, sync::Arc};
|
||||
|
||||
|
|
|
@ -86,9 +86,9 @@
|
|||
//! [Apache Parquet]: https://parquet.apache.org/
|
||||
//! [Apache Thrift]: https://thrift.apache.org/
|
||||
//! [Thrift Compact Protocol]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md
|
||||
use data_types::partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics};
|
||||
use data_types2::{
|
||||
NamespaceId, ParquetFileParams, PartitionId, SequenceNumber, SequencerId, TableId, Timestamp,
|
||||
ColumnSummary, InfluxDbType, NamespaceId, ParquetFileParams, PartitionId, SequenceNumber,
|
||||
SequencerId, StatValues, Statistics, TableId, Timestamp,
|
||||
};
|
||||
use generated_types::influxdata::iox::ingester::v1 as proto;
|
||||
use iox_time::Time;
|
||||
|
@ -106,8 +106,10 @@ use parquet::{
|
|||
schema::types::SchemaDescriptor as ParquetSchemaDescriptor,
|
||||
};
|
||||
use prost::Message;
|
||||
use schema::sort::{SortKey, SortKeyBuilder};
|
||||
use schema::{InfluxColumnType, InfluxFieldType, Schema};
|
||||
use schema::{
|
||||
sort::{SortKey, SortKeyBuilder},
|
||||
InfluxColumnType, InfluxFieldType, Schema,
|
||||
};
|
||||
use snafu::{ensure, OptionExt, ResultExt, Snafu};
|
||||
use std::{convert::TryInto, sync::Arc};
|
||||
use thrift::protocol::{TCompactInputProtocol, TCompactOutputProtocol, TOutputProtocol};
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use data_types::timestamp::TimestampMinMax;
|
||||
use data_types2::{ChunkAddr, ChunkId, ChunkOrder, DeletePredicate, PartitionId, TableSummary};
|
||||
use data_types2::{
|
||||
ChunkAddr, ChunkId, ChunkOrder, DeletePredicate, PartitionId, TableSummary, TimestampMinMax,
|
||||
};
|
||||
use observability_deps::tracing::debug;
|
||||
use predicate::PredicateMatch;
|
||||
use query::{QueryChunk, QueryChunkError, QueryChunkMeta};
|
||||
|
|
|
@ -1,21 +1,18 @@
|
|||
//! Contains the algorithm to determine which chunks may contain
|
||||
//! "duplicate" primary keys (that is where data with the same
|
||||
//! combination of "tag" columns and timestamp in the InfluxDB
|
||||
//! DataModel have been written in via multiple distinct line protocol
|
||||
//! writes (and thus are stored in separate rows)
|
||||
//! Contains the algorithm to determine which chunks may contain "duplicate" primary keys (that is
|
||||
//! where data with the same combination of "tag" columns and timestamp in the InfluxDB DataModel
|
||||
//! have been written in via multiple distinct line protocol writes (and thus are stored in
|
||||
//! separate rows)
|
||||
|
||||
use data_types::{
|
||||
partition_metadata::{ColumnSummary, StatOverlap, Statistics},
|
||||
timestamp::TimestampMinMax,
|
||||
use crate::{QueryChunk, QueryChunkMeta};
|
||||
use data_types2::{
|
||||
ColumnSummary, DeletePredicate, ParquetFileWithMetadata, PartitionId, StatOverlap, Statistics,
|
||||
TableSummary, TimestampMinMax,
|
||||
};
|
||||
use data_types2::{DeletePredicate, ParquetFileWithMetadata, PartitionId, TableSummary};
|
||||
use observability_deps::tracing::debug;
|
||||
use schema::{sort::SortKey, Schema, TIME_COLUMN_NAME};
|
||||
use snafu::Snafu;
|
||||
use std::{cmp::Ordering, sync::Arc};
|
||||
|
||||
use crate::{QueryChunk, QueryChunkMeta};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display(
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
//! Implementation of a DataFusion PhysicalPlan node across partition chunks
|
||||
|
||||
use std::{fmt, sync::Arc};
|
||||
|
||||
use super::adapter::SchemaAdapterStream;
|
||||
use crate::{exec::IOxSessionContext, QueryChunk};
|
||||
use arrow::datatypes::SchemaRef;
|
||||
use data_types::partition_metadata::TableSummary;
|
||||
use data_types2::TableSummary;
|
||||
use datafusion::{
|
||||
error::DataFusionError,
|
||||
execution::context::TaskContext,
|
||||
|
@ -14,13 +14,9 @@ use datafusion::{
|
|||
},
|
||||
};
|
||||
use observability_deps::tracing::debug;
|
||||
use schema::selection::Selection;
|
||||
use schema::Schema;
|
||||
|
||||
use crate::{exec::IOxSessionContext, QueryChunk};
|
||||
use predicate::Predicate;
|
||||
|
||||
use super::adapter::SchemaAdapterStream;
|
||||
use schema::{selection::Selection, Schema};
|
||||
use std::{fmt, sync::Arc};
|
||||
|
||||
/// Implements the DataFusion physical plan interface
|
||||
#[derive(Debug)]
|
||||
|
|
|
@ -1,23 +1,22 @@
|
|||
//! Implementation of statistics based pruning
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{
|
||||
ArrayRef, BooleanArray, DictionaryArray, Float64Array, Int64Array, StringArray, UInt64Array,
|
||||
use crate::QueryChunk;
|
||||
use arrow::{
|
||||
array::{
|
||||
ArrayRef, BooleanArray, DictionaryArray, Float64Array, Int64Array, StringArray, UInt64Array,
|
||||
},
|
||||
datatypes::{DataType, Int32Type, TimeUnit},
|
||||
};
|
||||
use arrow::datatypes::{DataType, Int32Type, TimeUnit};
|
||||
|
||||
use data_types::partition_metadata::{StatValues, Statistics};
|
||||
use data_types2::{StatValues, Statistics};
|
||||
use datafusion::{
|
||||
logical_plan::Column,
|
||||
physical_optimizer::pruning::{PruningPredicate, PruningStatistics},
|
||||
};
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use predicate::Predicate;
|
||||
use schema::Schema;
|
||||
|
||||
use crate::QueryChunk;
|
||||
use query_functions::group_by::Aggregate;
|
||||
use schema::Schema;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Something that cares to be notified when pruning of chunks occurs
|
||||
pub trait PruningObserver {
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
//! Code to translate IOx statistics to DataFusion statistics
|
||||
|
||||
use data_types::partition_metadata::{
|
||||
ColumnSummary, InfluxDbType, Statistics as IOxStatistics, TableSummary,
|
||||
};
|
||||
use data_types2::{ColumnSummary, InfluxDbType, Statistics as IOxStatistics, TableSummary};
|
||||
use datafusion::{
|
||||
physical_plan::{ColumnStatistics, Statistics as DFStatistics},
|
||||
scalar::ScalarValue,
|
||||
|
@ -107,11 +105,10 @@ fn df_from_iox_col(col: &ColumnSummary) -> ColumnStatistics {
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
use super::*;
|
||||
use data_types::partition_metadata::{InfluxDbType, StatValues};
|
||||
use data_types2::{InfluxDbType, StatValues};
|
||||
use schema::{builder::SchemaBuilder, InfluxFieldType};
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
macro_rules! assert_nice_eq {
|
||||
($actual:ident, $expected:ident) => {
|
||||
|
|
|
@ -283,7 +283,7 @@ impl TestChunk {
|
|||
Self {
|
||||
table_name,
|
||||
schema: Arc::new(SchemaBuilder::new().build().unwrap()),
|
||||
table_summary: TableSummary::new(),
|
||||
table_summary: TableSummary::default(),
|
||||
id: ChunkId::new_test(0),
|
||||
may_contain_pk_duplicates: Default::default(),
|
||||
predicates: Default::default(),
|
||||
|
|
|
@ -15,6 +15,7 @@ arrow = { version = "13", features = ["prettyprint"] }
|
|||
arrow_util = { path = "../arrow_util" }
|
||||
croaring = "0.6"
|
||||
data_types = { path = "../data_types" }
|
||||
data_types2 = { path = "../data_types2" }
|
||||
datafusion = { path = "../datafusion" }
|
||||
either = "1.6.1"
|
||||
hashbrown = "0.12"
|
||||
|
|
|
@ -5,11 +5,10 @@ use crate::{
|
|||
table::{self, Table},
|
||||
};
|
||||
use arrow::{error::ArrowError, record_batch::RecordBatch};
|
||||
use data_types::{chunk_metadata::ChunkColumnSummary, partition_metadata::TableSummary};
|
||||
|
||||
use data_types::chunk_metadata::ChunkColumnSummary;
|
||||
use data_types2::TableSummary;
|
||||
use observability_deps::tracing::debug;
|
||||
use schema::selection::Selection;
|
||||
use schema::{builder::Error as SchemaError, Schema};
|
||||
use schema::{builder::Error as SchemaError, selection::Selection, Schema};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
|
@ -527,11 +526,10 @@ mod test {
|
|||
Int32Type,
|
||||
},
|
||||
};
|
||||
use data_types::partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics};
|
||||
use data_types2::{ColumnSummary, InfluxDbType, StatValues, Statistics};
|
||||
use metric::{Attributes, MetricKind, Observation, ObservationSet, RawReporter};
|
||||
use schema::builder::SchemaBuilder;
|
||||
use std::iter::FromIterator;
|
||||
use std::{num::NonZeroU64, sync::Arc};
|
||||
use std::{iter::FromIterator, num::NonZeroU64, sync::Arc};
|
||||
|
||||
// helper to make the `add_remove_tables` test simpler to read.
|
||||
fn gen_recordbatch() -> RecordBatch {
|
||||
|
|
|
@ -241,8 +241,8 @@ impl ColumnType {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn as_influxdb_type(&self) -> Option<data_types::partition_metadata::InfluxDbType> {
|
||||
use data_types::partition_metadata::InfluxDbType;
|
||||
pub fn as_influxdb_type(&self) -> Option<data_types2::InfluxDbType> {
|
||||
use data_types2::InfluxDbType;
|
||||
match self {
|
||||
Self::Tag(_) => Some(InfluxDbType::Tag),
|
||||
Self::Field(_) => Some(InfluxDbType::Field),
|
||||
|
|
|
@ -6,7 +6,8 @@ use crate::{
|
|||
BinaryExpr,
|
||||
};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use data_types::{chunk_metadata::ChunkColumnSummary, partition_metadata::TableSummary};
|
||||
use data_types::chunk_metadata::ChunkColumnSummary;
|
||||
use data_types2::TableSummary;
|
||||
use parking_lot::RwLock;
|
||||
use schema::selection::Selection;
|
||||
use snafu::{ensure, Snafu};
|
||||
|
@ -782,7 +783,7 @@ impl MetaData {
|
|||
}
|
||||
|
||||
pub fn to_summary(&self) -> TableSummary {
|
||||
use data_types::partition_metadata::{ColumnSummary, StatValues, Statistics};
|
||||
use data_types2::{ColumnSummary, StatValues, Statistics};
|
||||
let columns = self
|
||||
.columns
|
||||
.iter()
|
||||
|
@ -850,8 +851,8 @@ impl MetaData {
|
|||
fn make_null_stats(
|
||||
total_count: u64,
|
||||
logical_data_type: &LogicalDataType,
|
||||
) -> data_types::partition_metadata::Statistics {
|
||||
use data_types::partition_metadata::{StatValues, Statistics};
|
||||
) -> data_types2::Statistics {
|
||||
use data_types2::{StatValues, Statistics};
|
||||
use LogicalDataType::*;
|
||||
|
||||
match logical_data_type {
|
||||
|
@ -1103,9 +1104,6 @@ impl std::fmt::Display for DisplayReadAggregateResults<'_> {
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use arrow::array::BooleanArray;
|
||||
use data_types::partition_metadata::{StatValues, Statistics};
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
column::Column,
|
||||
|
@ -1113,6 +1111,8 @@ mod test {
|
|||
schema::{self, LogicalDataType},
|
||||
value::{AggregateVec, OwnedValue, Scalar},
|
||||
};
|
||||
use arrow::array::BooleanArray;
|
||||
use data_types2::{StatValues, Statistics};
|
||||
|
||||
#[test]
|
||||
fn meta_data_update_with() {
|
||||
|
|
Loading…
Reference in New Issue