Merge branch 'main' into alamb/underscore_in_bucket_names_2

2021-01-21 15:53:27 -05:00 · 2021-01-21 15:53:27 -05:00 · c50f9b1baf
parent a6b9ff9c91 747b96d801
commit c50f9b1baf
12 changed files with 377 additions and 352 deletions
--- a/.circleci/get-deploy-tags.sh
+++ b/.circleci/get-deploy-tags.sh
@ -38,8 +38,11 @@ jq --null-input --sort-keys \
 	--arg imgPrefix "${DOCKER_IMAGE}" \
 	--arg appKey "$APP_NAME" \
 	'$tagDigest | split(" ") as $td | {
 		Images: {
 			($appKey): {
 				Tag: ($imgPrefix + ":" + $td[0]),
 				Digest: ($imgPrefix + "@" + $td[1]),
-		}
+			},
 		},
 		PublishedAt: (now | todateiso8601)
 	}'
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/arrow_deps/Cargo.toml
+++ b/arrow_deps/Cargo.toml
@ -11,10 +11,10 @@ description = "Apache Arrow / Parquet / DataFusion dependencies for InfluxDB IOx
 [dependencies]
 # We are using development version of arrow/parquet/datafusion and the dependencies are at the same rev
-# The version can be found here: https://github.com/apache/arrow/commit/c46fd102678fd22b9781642437ad8821f907d9db
+# The version can be found here: https://github.com/apache/arrow/commit/84126d5d70e9f05f82616e9a8506b53fe1df4a22
 #
-arrow = { git = "https://github.com/apache/arrow.git", rev = "c46fd102678fd22b9781642437ad8821f907d9db" , features = ["simd"] }
+arrow = { git = "https://github.com/apache/arrow.git", rev = "84126d5d70e9f05f82616e9a8506b53fe1df4a22" , features = ["simd"] }
-datafusion = { git = "https://github.com/apache/arrow.git", rev = "c46fd102678fd22b9781642437ad8821f907d9db" }
+datafusion = { git = "https://github.com/apache/arrow.git", rev = "84126d5d70e9f05f82616e9a8506b53fe1df4a22" }
 # Turn off the "arrow" feature; it currently has a bug that causes the crate to rebuild every time
 # and we're not currently using it anyway
-parquet = { git = "https://github.com/apache/arrow.git", rev = "c46fd102678fd22b9781642437ad8821f907d9db", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }
+parquet = { git = "https://github.com/apache/arrow.git", rev = "84126d5d70e9f05f82616e9a8506b53fe1df4a22", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }
--- a/data_types/src/database_rules.rs
+++ b/data_types/src/database_rules.rs
@ -287,9 +287,7 @@ mod tests {
    use super::*;
    use influxdb_line_protocol::parse_lines;
    #[allow(dead_code)]
    type TestError = Box<dyn std::error::Error + Send + Sync + 'static>;
    #[allow(dead_code)]
    type Result<T = (), E = TestError> = std::result::Result<T, E>;
    #[test]
--- a/influxdb_tsm/src/lib.rs
+++ b/influxdb_tsm/src/lib.rs
@ -46,7 +46,6 @@ impl TryFrom<u8> for BlockType {
 /// `Block` holds information about location and time range of a block of data.
 #[derive(Debug, Copy, Clone, PartialEq)]
 #[allow(dead_code)]
 pub struct Block {
    pub min_time: i64,
    pub max_time: i64,
@ -77,8 +76,8 @@ const MAX_BLOCK_VALUES: usize = 1000;
 /// organization and bucket identifiers.
 pub struct InfluxID(u64);
 #[allow(dead_code)]
 impl InfluxID {
    #[allow(dead_code)]
    fn new_str(s: &str) -> Result<Self, TSMError> {
        let v = u64::from_str_radix(s, 16).map_err(|e| TSMError {
            description: e.to_string(),
--- a/mutable_buffer/src/chunk.rs
+++ b/mutable_buffer/src/chunk.rs
@ -1,12 +1,16 @@
 //! Represents a Chunk of data (a collection of tables and their data within
 //! some chunk) in the mutable store.
 use arrow_deps::datafusion::error::Result as ArrowResult;
 use arrow_deps::{
    arrow::record_batch::RecordBatch,
    datafusion::{
-        logical_plan::Expr, logical_plan::Operator, optimizer::utils::expr_to_column_names,
+        error::DataFusionError,
        logical_plan::{Expr, ExpressionVisitor, Operator, Recursion},
        optimizer::utils::expr_to_column_names,
        prelude::*,
    },
 };
 use chrono::{DateTime, Utc};
 use generated_types::wal as wb;
 use std::collections::{BTreeSet, HashMap, HashSet};
@ -14,7 +18,7 @@ use std::collections::{BTreeSet, HashMap, HashSet};
 use data_types::{partition_metadata::Table as TableStats, TIME_COLUMN_NAME};
 use query::{
    predicate::{Predicate, TimestampRange},
-    util::{visit_expression, AndExprBuilder, ExpressionVisitor},
+    util::AndExprBuilder,
 };
 use crate::dictionary::{Dictionary, Error as DictionaryError};
@ -36,6 +40,9 @@ pub enum Error {
        source: crate::table::Error,
    },
    #[snafu(display("Unsupported predicate. Mutable buffer does not support: {}", source))]
    UnsupportedPredicate { source: DataFusionError },
    #[snafu(display("Table ID {} not found in dictionary of chunk {}", table, chunk))]
    TableIdNotFoundInDictionary {
        table: u32,
@ -251,7 +258,7 @@ impl Chunk {
        let mut visitor = SupportVisitor {};
        let mut predicate_columns: HashSet<String> = HashSet::new();
        for expr in &chunk_exprs {
-            visit_expression(expr, &mut visitor);
+            visitor = expr.accept(visitor).context(UnsupportedPredicate)?;
            expr_to_column_names(&expr, &mut predicate_columns).unwrap();
        }
@ -439,10 +446,10 @@ impl query::PartitionChunk for Chunk {
 struct SupportVisitor {}
 impl ExpressionVisitor for SupportVisitor {
-    fn pre_visit(&mut self, expr: &Expr) {
+    fn pre_visit(self, expr: &Expr) -> ArrowResult<Recursion<Self>> {
        match expr {
-            Expr::Literal(..) => {}
+            Expr::Literal(..) => Ok(Recursion::Continue(self)),
-            Expr::Column(..) => {}
+            Expr::Column(..) => Ok(Recursion::Continue(self)),
            Expr::BinaryExpr { op, .. } => {
                match op {
                    Operator::Eq
@ -455,17 +462,20 @@ impl ExpressionVisitor for SupportVisitor {
                    | Operator::Multiply
                    | Operator::Divide
                    | Operator::And
-                    | Operator::Or => {}
+                    | Operator::Or => Ok(Recursion::Continue(self)),
                    // Unsupported (need to think about ramifications)
                    Operator::NotEq | Operator::Modulus | Operator::Like | Operator::NotLike => {
-                        panic!("Unsupported binary operator in expression: {:?}", expr)
+                        Err(DataFusionError::NotImplemented(format!(
                            "Operator {:?} not yet supported in IOx MutableBuffer",
                            op
                        )))
                    }
                }
            }
-            _ => panic!(
+            _ => Err(DataFusionError::NotImplemented(format!(
                "Unsupported expression in mutable_buffer database: {:?}",
                expr
-            ),
+            ))),
        }
    }
 }
--- a/mutable_buffer/src/database.rs
+++ b/mutable_buffer/src/database.rs
@ -999,7 +999,7 @@ mod tests {
        datafusion::{physical_plan::collect, prelude::*},
    };
    use influxdb_line_protocol::{parse_lines, ParsedLine};
-    use test_helpers::str_pair_vec_to_vec;
+    use test_helpers::{assert_contains, str_pair_vec_to_vec};
    use tokio::sync::mpsc;
    type TestError = Box<dyn std::error::Error + Send + Sync + 'static>;
@ -1659,9 +1659,6 @@ mod tests {
    }
    #[tokio::test]
    #[should_panic(
        expected = "Unsupported binary operator in expression: #state NotEq Utf8(\"MA\")"
    )]
    async fn test_query_series_pred_neq() {
        let db = MutableBufferDb::new("column_namedb");
@ -1679,8 +1676,12 @@ mod tests {
            .add_expr(col("state").not_eq(lit("MA")))
            .build();
-        // Should panic as the neq path isn't implemented yet
+        // Should err as the neq path isn't implemented yet
-        db.query_series(predicate).await.unwrap();
+        let err = db.query_series(predicate).await.unwrap_err();
        assert_contains!(
            err.to_string(),
            "Operator NotEq not yet supported in IOx MutableBuffer"
        );
    }
    #[tokio::test]
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@ -33,7 +33,7 @@ use bytes::Bytes;
 use chrono::{DateTime, Utc};
 use futures::{Stream, StreamExt, TryStreamExt};
 use snafu::Snafu;
-use std::{io, path::PathBuf, unimplemented};
+use std::{io, path::PathBuf};
 /// Universal interface to multiple object store services.
 #[derive(Debug)]
--- a/query/src/exec/seriesset.rs
+++ b/query/src/exec/seriesset.rs
@ -64,7 +64,6 @@ pub enum Error {
    JoinError { source: tokio::task::JoinError },
 }
 #[allow(dead_code)]
 pub type Result<T, E = Error> = std::result::Result<T, E>;
 #[derive(Debug)]
--- a/query/src/util.rs
+++ b/query/src/util.rs
@ -1,91 +1,6 @@
 //! This module contains DataFusion utility functions and helpers
 use arrow_deps::datafusion::logical_plan::{binary_expr, Expr, Operator};
 /// Encode the traversal of an expression tree. When passed to
 /// `visit_expression`, `ExpressionVisitor::visit` is invoked
 /// recursively on all nodes of an expression tree
 ///
 /// TODO contribute this back upstream to datafusion??
 pub trait ExpressionVisitor {
    /// Invoked before children of expr are visisted
    fn pre_visit(&mut self, expr: &Expr);
    /// Invoked after children of expr are visited. Default
    /// implementation does nothing.
    fn post_visit(&mut self, _expr: &Expr) {}
 }
 pub fn visit_expression<V: ExpressionVisitor>(expr: &Expr, visitor: &mut V) {
    visitor.pre_visit(expr);
    // recurse
    match expr {
        // expression types without inputs
        Expr::Alias(..)
        | Expr::Column(..)
        | Expr::ScalarVariable(..)
        | Expr::Literal(..)
        | Expr::Wildcard => {
            // No inputs, so no more recursion needed
        }
        Expr::BinaryExpr { left, right, .. } => {
            visit_expression(left, visitor);
            visit_expression(right, visitor);
        }
        Expr::Cast { expr, .. } => visit_expression(expr, visitor),
        Expr::Case {
            expr,
            when_then_expr,
            else_expr,
        } => {
            if let Some(expr) = expr.as_ref() {
                visit_expression(expr, visitor);
            }
            when_then_expr.iter().for_each(|(when, then)| {
                visit_expression(when, visitor);
                visit_expression(then, visitor);
            });
            if let Some(else_expr) = else_expr.as_ref() {
                visit_expression(else_expr, visitor);
            }
        }
        Expr::Not(expr) => visit_expression(expr, visitor),
        Expr::Negative(expr) => visit_expression(expr, visitor),
        Expr::Between {
            expr, low, high, ..
        } => {
            visit_expression(expr, visitor);
            visit_expression(low, visitor);
            visit_expression(high, visitor);
        }
        Expr::IsNull(expr) => visit_expression(expr, visitor),
        Expr::IsNotNull(expr) => visit_expression(expr, visitor),
        Expr::ScalarFunction { args, .. } => {
            for arg in args {
                visit_expression(arg, visitor)
            }
        }
        Expr::ScalarUDF { args, .. } => {
            for arg in args {
                visit_expression(arg, visitor)
            }
        }
        Expr::AggregateFunction { args, .. } => {
            for arg in args {
                visit_expression(arg, visitor)
            }
        }
        Expr::AggregateUDF { args, .. } => {
            for arg in args {
                visit_expression(arg, visitor)
            }
        }
        Expr::Sort { expr, .. } => visit_expression(expr, visitor),
    }
    visitor.post_visit(expr);
 }
 /// Creates a single expression representing the conjunction (aka
 /// AND'ing) together of a set of expressions
 #[derive(Debug, Default)]
--- a/server/src/buffer.rs
+++ b/server/src/buffer.rs
@ -70,7 +70,6 @@ pub enum Error {
    InvalidFlatbuffersSegment,
 }
 #[allow(dead_code)]
 pub type Result<T, E = Error> = std::result::Result<T, E>;
 /// An in-memory buffer of a write ahead log. It is split up into segments,
@ -87,7 +86,6 @@ pub struct Buffer {
 }
 impl Buffer {
    #[allow(dead_code)]
    pub fn new(
        max_size: u64,
        segment_size: u64,
@ -109,7 +107,6 @@ impl Buffer {
    /// has been closed out. If the max size of the buffer would be exceeded
    /// by accepting the write, the oldest (first) of the closed segments
    /// will be dropped, if it is persisted. Otherwise, an error is returned.
    #[allow(dead_code)]
    pub fn append(&mut self, write: Arc<ReplicatedWrite>) -> Result<Option<Arc<Segment>>> {
        let write_size = u64::try_from(write.data.len())
            .expect("appended data must be less than a u64 in length");
@ -167,7 +164,6 @@ impl Buffer {
    }
    /// Returns the current size of the buffer.
    #[allow(dead_code)]
    pub fn size(&self) -> u64 {
        self.current_size
    }
@ -177,7 +173,6 @@ impl Buffer {
    /// given writer ID and sequence are to identify from what point to
    /// replay writes. If no write matches the given writer ID and sequence
    /// number, all replicated writes within the buffer will be returned.
    #[allow(dead_code)]
    pub fn all_writes_since(&self, since: WriterSequence) -> Vec<Arc<ReplicatedWrite>> {
        let mut writes = Vec::new();
@ -209,7 +204,6 @@ impl Buffer {
    /// onward. This returns only writes from the passed in writer ID. If no
    /// write matches the given writer ID and sequence number, all
    /// replicated writes within the buffer for that writer will be returned.
    #[allow(dead_code)]
    pub fn writes_since(&self, since: WriterSequence) -> Vec<Arc<ReplicatedWrite>> {
        let mut writes = Vec::new();
@ -244,7 +238,6 @@ impl Buffer {
    }
    // Removes the oldest segment present in the buffer, returning its id
    #[allow(dead_code)]
    fn remove_oldest_segment(&mut self) -> u64 {
        let removed_segment = self.closed_segments.remove(0);
        self.current_size -= removed_segment.size;
@ -276,7 +269,6 @@ pub struct Segment {
 }
 impl Segment {
    #[allow(dead_code)]
    fn new(id: u64) -> Self {
        Self {
            id,
@ -299,7 +291,6 @@ impl Segment {
    // appends the write to the segment, keeping track of the summary information
    // about the writer
    #[allow(dead_code)]
    fn append(&mut self, write: Arc<ReplicatedWrite>) -> Result<()> {
        let (writer_id, sequence_number) = write.writer_and_sequence();
        self.validate_and_update_sequence_summary(writer_id, sequence_number)?;
@ -350,7 +341,6 @@ impl Segment {
    }
    /// sets the time this segment was persisted at
    #[allow(dead_code)]
    pub fn set_persisted_at(&self, time: DateTime<Utc>) {
        let mut persisted = self.persisted.lock().expect("mutex poisoned");
        *persisted = Some(time);
--- a/test_helpers/src/lib.rs
+++ b/test_helpers/src/lib.rs
@ -87,7 +87,11 @@ pub fn enable_logging() {
 #[macro_export]
 /// A macro to assert that one string is contained within another with
-/// a nice error message if they are not. Is a macro so test error
+/// a nice error message if they are not.
 ///
 /// Usage: `assert_contains!(actual, expected)`
 ///
 /// Is a macro so test error
 /// messages are on the same line as the failure;
 ///
 /// Both arguments must be convertable into Strings (Into<String>)