Merge pull request #915 from influxdata/ntran/sync_with_arrow_is_null

chore: use newly added Arrow's Expr::is_not_null function
pull/24376/head
Nga Tran 2021-03-03 12:09:56 -05:00 committed by GitHub
commit 29c0218b38
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 13 additions and 40 deletions

38
Cargo.lock generated
View File

@ -101,7 +101,7 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]]
name = "arrow"
version = "4.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow.git?rev=5ae63f8ec0c14d5e871aa79d90e5d163b369e704#5ae63f8ec0c14d5e871aa79d90e5d163b369e704"
source = "git+https://github.com/apache/arrow.git?rev=a3a619382f90e9472b5b7b93159e77289e8c8031#a3a619382f90e9472b5b7b93159e77289e8c8031"
dependencies = [
"cfg_aliases",
"chrono",
@ -124,7 +124,7 @@ dependencies = [
[[package]]
name = "arrow-flight"
version = "4.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow.git?rev=5ae63f8ec0c14d5e871aa79d90e5d163b369e704#5ae63f8ec0c14d5e871aa79d90e5d163b369e704"
source = "git+https://github.com/apache/arrow.git?rev=a3a619382f90e9472b5b7b93159e77289e8c8031#a3a619382f90e9472b5b7b93159e77289e8c8031"
dependencies = [
"arrow",
"bytes",
@ -643,24 +643,10 @@ dependencies = [
"crossbeam-channel 0.4.4",
"crossbeam-deque 0.7.3",
"crossbeam-epoch 0.8.2",
"crossbeam-queue 0.2.3",
"crossbeam-queue",
"crossbeam-utils 0.7.2",
]
[[package]]
name = "crossbeam"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd01a6eb3daaafa260f6fc94c3a6c36390abc2080e38e3e34ced87393fb77d80"
dependencies = [
"cfg-if 1.0.0",
"crossbeam-channel 0.5.0",
"crossbeam-deque 0.8.0",
"crossbeam-epoch 0.9.3",
"crossbeam-queue 0.3.1",
"crossbeam-utils 0.8.3",
]
[[package]]
name = "crossbeam-channel"
version = "0.4.4"
@ -742,16 +728,6 @@ dependencies = [
"maybe-uninit",
]
[[package]]
name = "crossbeam-queue"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f6cb3c7f5b8e51bc3ebb73a2327ad4abdbd119dc13223f14f961d2f38486756"
dependencies = [
"cfg-if 1.0.0",
"crossbeam-utils 0.8.3",
]
[[package]]
name = "crossbeam-utils"
version = "0.7.2"
@ -836,14 +812,13 @@ dependencies = [
[[package]]
name = "datafusion"
version = "4.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow.git?rev=5ae63f8ec0c14d5e871aa79d90e5d163b369e704#5ae63f8ec0c14d5e871aa79d90e5d163b369e704"
source = "git+https://github.com/apache/arrow.git?rev=a3a619382f90e9472b5b7b93159e77289e8c8031#a3a619382f90e9472b5b7b93159e77289e8c8031"
dependencies = [
"ahash 0.7.1",
"arrow",
"async-trait",
"chrono",
"clap",
"crossbeam 0.8.0",
"futures",
"hashbrown",
"log",
@ -857,6 +832,7 @@ dependencies = [
"sha2",
"sqlparser 0.8.0",
"tokio",
"tokio-stream",
"unicode-segmentation",
]
@ -1853,7 +1829,7 @@ dependencies = [
"chrono",
"criterion",
"croaring",
"crossbeam 0.7.3",
"crossbeam",
"env_logger 0.7.1",
"human_format",
"packers",
@ -2335,7 +2311,7 @@ dependencies = [
[[package]]
name = "parquet"
version = "4.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow.git?rev=5ae63f8ec0c14d5e871aa79d90e5d163b369e704#5ae63f8ec0c14d5e871aa79d90e5d163b369e704"
source = "git+https://github.com/apache/arrow.git?rev=a3a619382f90e9472b5b7b93159e77289e8c8031#a3a619382f90e9472b5b7b93159e77289e8c8031"
dependencies = [
"arrow",
"base64 0.12.3",

View File

@ -8,11 +8,11 @@ description = "Apache Arrow / Parquet / DataFusion dependencies for InfluxDB IOx
[dependencies] # In alphabetical order
# We are using development version of arrow/parquet/datafusion and the dependencies are at the same rev
# The version can be found here: https://github.com/apache/arrow/commit/5ae63f8ec0c14d5e871aa79d90e5d163b369e704
# The version can be found here: https://github.com/apache/arrow/commit/a3a619382f90e9472b5b7b93159e77289e8c8031
#
arrow = { git = "https://github.com/apache/arrow.git", rev = "5ae63f8ec0c14d5e871aa79d90e5d163b369e704" , features = ["simd"] }
arrow-flight = { git = "https://github.com/apache/arrow.git", rev = "5ae63f8ec0c14d5e871aa79d90e5d163b369e704" }
datafusion = { git = "https://github.com/apache/arrow.git", rev = "5ae63f8ec0c14d5e871aa79d90e5d163b369e704" }
arrow = { git = "https://github.com/apache/arrow.git", rev = "a3a619382f90e9472b5b7b93159e77289e8c8031" , features = ["simd"] }
arrow-flight = { git = "https://github.com/apache/arrow.git", rev = "a3a619382f90e9472b5b7b93159e77289e8c8031" }
datafusion = { git = "https://github.com/apache/arrow.git", rev = "a3a619382f90e9472b5b7b93159e77289e8c8031" }
# Turn off the "arrow" feature; it currently has a bug that causes the crate to rebuild every time
# and we're not currently using it anyway
parquet = { git = "https://github.com/apache/arrow.git", rev = "5ae63f8ec0c14d5e871aa79d90e5d163b369e704", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }
parquet = { git = "https://github.com/apache/arrow.git", rev = "a3a619382f90e9472b5b7b93159e77289e8c8031", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }

View File

@ -414,10 +414,7 @@ impl InfluxRPCPlanner {
schema: _,
}) = scan_and_filter
{
// TODO use Expr::is_null() here when this
// https://issues.apache.org/jira/browse/ARROW-11742
// is completed.
let tag_name_is_not_null = Expr::IsNotNull(Box::new(col(tag_name)));
let tag_name_is_not_null = Expr::Column(tag_name.to_string()).is_not_null();
// TODO: optimize this to use "DISINCT" or do
// something more intelligent that simply fetching all