chore: Update arrow to latest version (#335)

* chore: Update arrow to latest version

* fix: Updates needed by new version of datafusion
pull/24376/head
Andrew Lamb 2020-10-02 14:46:07 -04:00 committed by GitHub
parent 1b69a5a79c
commit bc5378c7fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 14 additions and 13 deletions

12
Cargo.lock generated
View File

@ -78,7 +78,7 @@ checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8"
[[package]] [[package]]
name = "arrow" name = "arrow"
version = "2.0.0-SNAPSHOT" version = "2.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow.git?rev=a1157b770dc5d51de22f32ad7a12131a1670aaca#a1157b770dc5d51de22f32ad7a12131a1670aaca" source = "git+https://github.com/apache/arrow.git?rev=238a9497269f39ab4d5bf20c28c2431a1b4e6673#238a9497269f39ab4d5bf20c28c2431a1b4e6673"
dependencies = [ dependencies = [
"chrono", "chrono",
"csv", "csv",
@ -646,9 +646,9 @@ dependencies = [
[[package]] [[package]]
name = "datafusion" name = "datafusion"
version = "2.0.0-SNAPSHOT" version = "2.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow.git?rev=a1157b770dc5d51de22f32ad7a12131a1670aaca#a1157b770dc5d51de22f32ad7a12131a1670aaca" source = "git+https://github.com/apache/arrow.git?rev=238a9497269f39ab4d5bf20c28c2431a1b4e6673#238a9497269f39ab4d5bf20c28c2431a1b4e6673"
dependencies = [ dependencies = [
"arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=a1157b770dc5d51de22f32ad7a12131a1670aaca)", "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=238a9497269f39ab4d5bf20c28c2431a1b4e6673)",
"async-trait", "async-trait",
"chrono", "chrono",
"clap", "clap",
@ -718,7 +718,7 @@ dependencies = [
name = "delorean_arrow" name = "delorean_arrow"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=a1157b770dc5d51de22f32ad7a12131a1670aaca)", "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=238a9497269f39ab4d5bf20c28c2431a1b4e6673)",
"datafusion", "datafusion",
"parquet", "parquet",
] ]
@ -2108,9 +2108,9 @@ dependencies = [
[[package]] [[package]]
name = "parquet" name = "parquet"
version = "2.0.0-SNAPSHOT" version = "2.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow.git?rev=a1157b770dc5d51de22f32ad7a12131a1670aaca#a1157b770dc5d51de22f32ad7a12131a1670aaca" source = "git+https://github.com/apache/arrow.git?rev=238a9497269f39ab4d5bf20c28c2431a1b4e6673#238a9497269f39ab4d5bf20c28c2431a1b4e6673"
dependencies = [ dependencies = [
"arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=a1157b770dc5d51de22f32ad7a12131a1670aaca)", "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=238a9497269f39ab4d5bf20c28c2431a1b4e6673)",
"brotli", "brotli",
"byteorder", "byteorder",
"chrono", "chrono",

View File

@ -11,10 +11,10 @@ description = "Apache Arrow / Parquet / DataFusion dependencies for delorean, to
[dependencies] [dependencies]
# We are using development version of arrow/parquet/datafusion and the dependencies are at the same rev # We are using development version of arrow/parquet/datafusion and the dependencies are at the same rev
# Check out the current rev @ https://github.com/apache/arrow/commit/a1157b770dc5d51de22f32ad7a12131a1670aaca # The version can be found here: https://github.com/apache/arrow/commit/238a9497269f39ab4d5bf20c28c2431a1b4e6673
# #
arrow = { git = "https://github.com/apache/arrow.git", rev = "a1157b770dc5d51de22f32ad7a12131a1670aaca"} arrow = { git = "https://github.com/apache/arrow.git", rev = "238a9497269f39ab4d5bf20c28c2431a1b4e6673"}
datafusion = { git = "https://github.com/apache/arrow.git", rev = "a1157b770dc5d51de22f32ad7a12131a1670aaca" } datafusion = { git = "https://github.com/apache/arrow.git", rev = "238a9497269f39ab4d5bf20c28c2431a1b4e6673" }
# Turn off the "arrow" feature; it currently has a bug that causes the crate to rebuild every time # Turn off the "arrow" feature; it currently has a bug that causes the crate to rebuild every time
# and we're not currently using it anyway # and we're not currently using it anyway
parquet = { git = "https://github.com/apache/arrow.git", rev = "a1157b770dc5d51de22f32ad7a12131a1670aaca", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] } parquet = { git = "https://github.com/apache/arrow.git", rev = "238a9497269f39ab4d5bf20c28c2431a1b4e6673", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }

View File

@ -110,7 +110,7 @@ fn build_store(
let mut total_rows_read = 0; let mut total_rows_read = 0;
let start = std::time::Instant::now(); let start = std::time::Instant::now();
loop { loop {
let rb = reader.next_batch(); let rb = reader.next().transpose();
match rb { match rb {
Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e), Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e),
Ok(Some(rb)) => { Ok(Some(rb)) => {

View File

@ -219,7 +219,8 @@ impl ExecutionPlan for SchemaPivotExec {
let input_batch = input_reader let input_batch = input_reader
.lock() .lock()
.expect("locked input mutex") .expect("locked input mutex")
.next_batch()?; .next()
.transpose()?;
keep_searching = match input_batch { keep_searching = match input_batch {
Some(input_batch) => { Some(input_batch) => {
@ -429,7 +430,7 @@ mod tests {
let mut batches = Vec::new(); let mut batches = Vec::new();
let mut reader = reader.lock().expect("locking record batch reader"); let mut reader = reader.lock().expect("locking record batch reader");
// process the record batches one by one // process the record batches one by one
while let Some(record_batch) = reader.next_batch().expect("reading next batch") { while let Some(record_batch) = reader.next().transpose().expect("reading next batch") {
batches.push(record_batch) batches.push(record_batch)
} }
batches batches