Merge branch 'main' into feature-label
commit
7247467225
|
@ -111,7 +111,7 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
|
|||
[[package]]
|
||||
name = "arrow"
|
||||
version = "4.0.0-SNAPSHOT"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=e69478a890b1e4eee49b540b69b2711d170a0433#e69478a890b1e4eee49b540b69b2711d170a0433"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=00a443629c00079ea03c0b9f415d74669d2759a7#00a443629c00079ea03c0b9f415d74669d2759a7"
|
||||
dependencies = [
|
||||
"cfg_aliases",
|
||||
"chrono",
|
||||
|
@ -134,7 +134,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "arrow-flight"
|
||||
version = "4.0.0-SNAPSHOT"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=e69478a890b1e4eee49b540b69b2711d170a0433#e69478a890b1e4eee49b540b69b2711d170a0433"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=00a443629c00079ea03c0b9f415d74669d2759a7#00a443629c00079ea03c0b9f415d74669d2759a7"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"bytes",
|
||||
|
@ -429,9 +429,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cast"
|
||||
version = "0.2.3"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0"
|
||||
checksum = "cc38c385bfd7e444464011bb24820f40dd1c76bcdfa1b78611cb7c2e5cafab75"
|
||||
dependencies = [
|
||||
"rustc_version",
|
||||
]
|
||||
|
@ -488,9 +488,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "1.1.1"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f54d78e30b388d4815220c8dd03fea5656b6c6d32adb59e89061552a102f8da1"
|
||||
checksum = "853eda514c284c2287f4bf20ae614f8781f40a81d32ecda6e91449304dfe077c"
|
||||
dependencies = [
|
||||
"glob",
|
||||
"libc",
|
||||
|
@ -599,6 +599,7 @@ dependencies = [
|
|||
"clap",
|
||||
"criterion-plot",
|
||||
"csv",
|
||||
"futures",
|
||||
"itertools 0.10.0",
|
||||
"lazy_static",
|
||||
"num-traits",
|
||||
|
@ -611,6 +612,7 @@ dependencies = [
|
|||
"serde_derive",
|
||||
"serde_json",
|
||||
"tinytemplate",
|
||||
"tokio",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
|
@ -662,9 +664,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775"
|
||||
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"crossbeam-utils",
|
||||
|
@ -787,7 +789,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion"
|
||||
version = "4.0.0-SNAPSHOT"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=e69478a890b1e4eee49b540b69b2711d170a0433#e69478a890b1e4eee49b540b69b2711d170a0433"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=00a443629c00079ea03c0b9f415d74669d2759a7#00a443629c00079ea03c0b9f415d74669d2759a7"
|
||||
dependencies = [
|
||||
"ahash 0.7.2",
|
||||
"arrow",
|
||||
|
@ -1044,9 +1046,9 @@ checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
|
|||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f55667319111d593ba876406af7c409c0ebb44dc4be6132a783ccf163ea14c1"
|
||||
checksum = "a9d5813545e459ad3ca1bff9915e9ad7f1a47dc6a91b627ce321d5863b7dd253"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
|
@ -1059,9 +1061,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c2dd2df839b57db9ab69c2c9d8f3e8c81984781937fe2807dc6dcf3b2ad2939"
|
||||
checksum = "ce79c6a52a299137a6013061e0cf0e688fce5d7f1bc60125f520912fdb29ec25"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
|
@ -1069,15 +1071,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-core"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "15496a72fabf0e62bdc3df11a59a3787429221dd0710ba8ef163d6f7a9112c94"
|
||||
checksum = "098cd1c6dda6ca01650f1a37a794245eb73181d0d4d4e955e2f3c37db7af1815"
|
||||
|
||||
[[package]]
|
||||
name = "futures-executor"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "891a4b7b96d84d5940084b2a37632dd65deeae662c114ceaa2c879629c9c0ad1"
|
||||
checksum = "10f6cb7042eda00f0049b1d2080aa4b93442997ee507eb3828e8bd7577f94c9d"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-task",
|
||||
|
@ -1086,15 +1088,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d71c2c65c57704c32f5241c1223167c2c3294fd34ac020c807ddbe6db287ba59"
|
||||
checksum = "365a1a1fb30ea1c03a830fdb2158f5236833ac81fa0ad12fe35b29cddc35cb04"
|
||||
|
||||
[[package]]
|
||||
name = "futures-macro"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea405816a5139fb39af82c2beb921d52143f556038378d6db21183a5c37fbfb7"
|
||||
checksum = "668c6733a182cd7deb4f1de7ba3bf2120823835b3bcfbeacf7d2c4a773c1bb8b"
|
||||
dependencies = [
|
||||
"proc-macro-hack",
|
||||
"proc-macro2",
|
||||
|
@ -1104,21 +1106,21 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85754d98985841b7d4f5e8e6fbfa4a4ac847916893ec511a2917ccd8525b8bb3"
|
||||
checksum = "5c5629433c555de3d82861a7a4e3794a4c40040390907cfbfd7143a92a426c23"
|
||||
|
||||
[[package]]
|
||||
name = "futures-task"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa189ef211c15ee602667a6fcfe1c1fd9e07d42250d2156382820fba33c9df80"
|
||||
checksum = "ba7aa51095076f3ba6d9a1f702f74bd05ec65f555d70d2033d55ba8d69f581bc"
|
||||
|
||||
[[package]]
|
||||
name = "futures-test"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1fe5e51002528907757d5f1648101086f7197f792112db43ba23b06b09e6bce"
|
||||
checksum = "e77baeade98824bc928c21b8ad39918b9d8a06745ebdb6e2c93fb7673fb7968d"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-executor",
|
||||
|
@ -1132,9 +1134,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-util"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1812c7ab8aedf8d6f2701a43e1243acdbcc2b36ab26e2ad421eb99ac963d96d1"
|
||||
checksum = "3c144ad54d60f23927f0a6b6d816e4271278b64f005ad65e4e35291d2de9c025"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
|
@ -1885,6 +1887,7 @@ dependencies = [
|
|||
"influxdb_line_protocol",
|
||||
"internal_types",
|
||||
"observability_deps",
|
||||
"parking_lot",
|
||||
"snafu",
|
||||
"string-interner",
|
||||
"test_helpers",
|
||||
|
@ -2298,7 +2301,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "parquet"
|
||||
version = "4.0.0-SNAPSHOT"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=e69478a890b1e4eee49b540b69b2711d170a0433#e69478a890b1e4eee49b540b69b2711d170a0433"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=00a443629c00079ea03c0b9f415d74669d2759a7#00a443629c00079ea03c0b9f415d74669d2759a7"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"base64 0.12.3",
|
||||
|
@ -2331,6 +2334,7 @@ dependencies = [
|
|||
"bytes",
|
||||
"data_types",
|
||||
"futures",
|
||||
"internal_types",
|
||||
"object_store",
|
||||
"parking_lot",
|
||||
"snafu",
|
||||
|
@ -2646,6 +2650,7 @@ dependencies = [
|
|||
"futures",
|
||||
"influxdb_line_protocol",
|
||||
"internal_types",
|
||||
"libc",
|
||||
"observability_deps",
|
||||
"parking_lot",
|
||||
"snafu",
|
||||
|
@ -2880,9 +2885,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.11.2"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf12057f289428dbf5c591c74bf10392e4a8003f993405a902f20117019022d4"
|
||||
checksum = "2296f2fac53979e8ccbc4a1136b25dcefd37be9ed7e4a1f6b05a6029c84ff124"
|
||||
dependencies = [
|
||||
"base64 0.13.0",
|
||||
"bytes",
|
||||
|
@ -3117,9 +3122,9 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
|||
|
||||
[[package]]
|
||||
name = "sct"
|
||||
version = "0.6.0"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3042af939fca8c3453b7af0f1c66e533a15a86169e39de2657310ade8f98d3c"
|
||||
checksum = "b362b83898e0e69f38515b82ee15aa80636befe47c3b6d3d89a911e78fc228ce"
|
||||
dependencies = [
|
||||
"ring",
|
||||
"untrusted",
|
||||
|
@ -3260,13 +3265,16 @@ dependencies = [
|
|||
"bytes",
|
||||
"chrono",
|
||||
"crc32fast",
|
||||
"criterion",
|
||||
"data_types",
|
||||
"flatbuffers",
|
||||
"flate2",
|
||||
"futures",
|
||||
"generated_types",
|
||||
"influxdb_line_protocol",
|
||||
"internal_types",
|
||||
"mutable_buffer",
|
||||
"num_cpus",
|
||||
"object_store",
|
||||
"observability_deps",
|
||||
"parking_lot",
|
||||
|
@ -3277,6 +3285,7 @@ dependencies = [
|
|||
"serde_json",
|
||||
"snafu",
|
||||
"snap",
|
||||
"tempfile",
|
||||
"test_helpers",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
|
@ -3756,9 +3765,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
|||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.4.0"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "134af885d758d645f0f0505c9a8b3f9bf8a348fd822e112ab5248138348f1722"
|
||||
checksum = "83f0c8e7c0addab50b663055baf787d0af7f413a46e6e7fb9559a4e4db7137a5"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"bytes",
|
||||
|
@ -3819,9 +3828,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tokio-util"
|
||||
version = "0.6.5"
|
||||
version = "0.6.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5143d049e85af7fbc36f5454d990e62c2df705b3589f123b71f441b6b59f443f"
|
||||
checksum = "940a12c99365c31ea8dd9ba04ec1be183ffe4920102bb7122c2f515437601e8e"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-core",
|
||||
|
|
|
@ -71,7 +71,7 @@ csv = "1.1"
|
|||
dirs = "3.0.1"
|
||||
dotenv = "0.15.0"
|
||||
flate2 = "1.0"
|
||||
futures = "0.3.1"
|
||||
futures = "0.3"
|
||||
http = "0.2.0"
|
||||
hyper = "0.14"
|
||||
once_cell = { version = "1.4.0", features = ["parking_lot"] }
|
||||
|
|
|
@ -8,14 +8,14 @@ description = "Apache Arrow / Parquet / DataFusion dependencies for InfluxDB IOx
|
|||
[dependencies] # In alphabetical order
|
||||
# We are using development version of arrow/parquet/datafusion and the dependencies are at the same rev
|
||||
|
||||
# The version can be found here: https://github.com/apache/arrow/commit/e69478a890b1e4eee49b540b69b2711d170a0433
|
||||
# The version can be found here: https://github.com/apache/arrow/commit/00a443629c00079ea03c0b9f415d74669d2759a7
|
||||
#
|
||||
arrow = { git = "https://github.com/apache/arrow.git", rev = "e69478a890b1e4eee49b540b69b2711d170a0433" , features = ["simd"] }
|
||||
arrow-flight = { git = "https://github.com/apache/arrow.git", rev = "e69478a890b1e4eee49b540b69b2711d170a0433" }
|
||||
arrow = { git = "https://github.com/apache/arrow.git", rev = "00a443629c00079ea03c0b9f415d74669d2759a7" , features = ["simd"] }
|
||||
arrow-flight = { git = "https://github.com/apache/arrow.git", rev = "00a443629c00079ea03c0b9f415d74669d2759a7" }
|
||||
|
||||
# Turn off optional datafusion features (function packages)
|
||||
datafusion = { git = "https://github.com/apache/arrow.git", rev = "e69478a890b1e4eee49b540b69b2711d170a0433", default-features = false }
|
||||
datafusion = { git = "https://github.com/apache/arrow.git", rev = "00a443629c00079ea03c0b9f415d74669d2759a7", default-features = false }
|
||||
|
||||
# Turn off the "arrow" feature; it currently has a bug that causes the crate to rebuild every time
|
||||
# and we're not currently using it anyway
|
||||
parquet = { git = "https://github.com/apache/arrow.git", rev = "e69478a890b1e4eee49b540b69b2711d170a0433", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }
|
||||
parquet = { git = "https://github.com/apache/arrow.git", rev = "00a443629c00079ea03c0b9f415d74669d2759a7", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }
|
||||
|
|
|
@ -29,6 +29,46 @@ macro_rules! assert_table_eq {
|
|||
};
|
||||
}
|
||||
|
||||
/// Compares formatted output of a record batch with an expected
|
||||
/// vector of strings in a way that order does not matter.
|
||||
/// This is a macro so errors appear on the correct line
|
||||
///
|
||||
/// Designed so that failure output can be directly copy/pasted
|
||||
/// into the test code as expected results.
|
||||
///
|
||||
/// Expects to be called about like this:
|
||||
///
|
||||
/// `assert_batch_sorted_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
|
||||
#[macro_export]
|
||||
macro_rules! assert_batches_sorted_eq {
|
||||
($EXPECTED_LINES: expr, $CHUNKS: expr) => {
|
||||
let mut expected_lines: Vec<String> = $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
|
||||
|
||||
// sort except for header + footer
|
||||
let num_lines = expected_lines.len();
|
||||
if num_lines > 3 {
|
||||
expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
|
||||
}
|
||||
|
||||
let formatted = arrow_deps::arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap();
|
||||
// fix for windows: \r\n -->
|
||||
|
||||
let mut actual_lines: Vec<&str> = formatted.trim().lines().collect();
|
||||
|
||||
// sort except for header + footer
|
||||
let num_lines = actual_lines.len();
|
||||
if num_lines > 3 {
|
||||
actual_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
expected_lines, actual_lines,
|
||||
"\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
|
||||
expected_lines, actual_lines
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
// sort a record batch by all columns (to provide a stable output order for test
|
||||
// comparison)
|
||||
pub fn sort_record_batch(batch: RecordBatch) -> RecordBatch {
|
||||
|
|
|
@ -21,8 +21,11 @@ pub enum ChunkStorage {
|
|||
/// The chunk is in the Read Buffer (where it can not be mutated)
|
||||
ReadBuffer,
|
||||
|
||||
/// The chunk is both in ReadBuffer and Object Store
|
||||
ReadBufferAndObjectStore,
|
||||
|
||||
/// The chunk is stored in Object Storage (where it can not be mutated)
|
||||
ObjectStore,
|
||||
ObjectStoreOnly,
|
||||
}
|
||||
|
||||
impl ChunkStorage {
|
||||
|
@ -32,7 +35,8 @@ impl ChunkStorage {
|
|||
Self::OpenMutableBuffer => "OpenMutableBuffer",
|
||||
Self::ClosedMutableBuffer => "ClosedMutableBuffer",
|
||||
Self::ReadBuffer => "ReadBuffer",
|
||||
Self::ObjectStore => "ObjectStore",
|
||||
Self::ReadBufferAndObjectStore => "ReadBufferAndObjectStore",
|
||||
Self::ObjectStoreOnly => "ObjectStoreOnly",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -134,7 +138,8 @@ impl From<ChunkStorage> for management::ChunkStorage {
|
|||
ChunkStorage::OpenMutableBuffer => Self::OpenMutableBuffer,
|
||||
ChunkStorage::ClosedMutableBuffer => Self::ClosedMutableBuffer,
|
||||
ChunkStorage::ReadBuffer => Self::ReadBuffer,
|
||||
ChunkStorage::ObjectStore => Self::ObjectStore,
|
||||
ChunkStorage::ReadBufferAndObjectStore => Self::ReadBufferAndObjectStore,
|
||||
ChunkStorage::ObjectStoreOnly => Self::ObjectStoreOnly,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -204,7 +209,10 @@ impl TryFrom<management::ChunkStorage> for ChunkStorage {
|
|||
management::ChunkStorage::OpenMutableBuffer => Ok(Self::OpenMutableBuffer),
|
||||
management::ChunkStorage::ClosedMutableBuffer => Ok(Self::ClosedMutableBuffer),
|
||||
management::ChunkStorage::ReadBuffer => Ok(Self::ReadBuffer),
|
||||
management::ChunkStorage::ObjectStore => Ok(Self::ObjectStore),
|
||||
management::ChunkStorage::ReadBufferAndObjectStore => {
|
||||
Ok(Self::ReadBufferAndObjectStore)
|
||||
}
|
||||
management::ChunkStorage::ObjectStoreOnly => Ok(Self::ObjectStoreOnly),
|
||||
management::ChunkStorage::Unspecified => Err(FieldViolation::required("")),
|
||||
}
|
||||
}
|
||||
|
@ -220,7 +228,7 @@ mod test {
|
|||
partition_key: "foo".to_string(),
|
||||
id: 42,
|
||||
estimated_bytes: 1234,
|
||||
storage: management::ChunkStorage::ObjectStore.into(),
|
||||
storage: management::ChunkStorage::ObjectStoreOnly.into(),
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
time_closing: None,
|
||||
|
@ -231,7 +239,7 @@ mod test {
|
|||
partition_key: Arc::new("foo".to_string()),
|
||||
id: 42,
|
||||
estimated_bytes: 1234,
|
||||
storage: ChunkStorage::ObjectStore,
|
||||
storage: ChunkStorage::ObjectStoreOnly,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
time_closing: None,
|
||||
|
@ -250,7 +258,7 @@ mod test {
|
|||
partition_key: Arc::new("foo".to_string()),
|
||||
id: 42,
|
||||
estimated_bytes: 1234,
|
||||
storage: ChunkStorage::ObjectStore,
|
||||
storage: ChunkStorage::ObjectStoreOnly,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
time_closing: None,
|
||||
|
@ -262,7 +270,7 @@ mod test {
|
|||
partition_key: "foo".to_string(),
|
||||
id: 42,
|
||||
estimated_bytes: 1234,
|
||||
storage: management::ChunkStorage::ObjectStore.into(),
|
||||
storage: management::ChunkStorage::ObjectStoreOnly.into(),
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
time_closing: None,
|
||||
|
|
|
@ -181,7 +181,7 @@ pub struct LifecycleRules {
|
|||
pub mutable_size_threshold: Option<NonZeroUsize>,
|
||||
|
||||
/// Once the total amount of buffered data in memory reaches this size start
|
||||
/// dropping data from memory based on the drop_order
|
||||
/// dropping data from memory based on the [`sort_order`](Self::sort_order)
|
||||
pub buffer_size_soft: Option<NonZeroUsize>,
|
||||
|
||||
/// Once the amount of data in memory reaches this size start
|
||||
|
@ -199,6 +199,9 @@ pub struct LifecycleRules {
|
|||
/// Allow dropping data that has not been persisted to object storage
|
||||
pub drop_non_persisted: bool,
|
||||
|
||||
/// Persists chunks to object storage.
|
||||
pub persist: bool,
|
||||
|
||||
/// Do not allow writing new data to this database
|
||||
pub immutable: bool,
|
||||
}
|
||||
|
@ -228,6 +231,7 @@ impl From<LifecycleRules> for management::LifecycleRules {
|
|||
.unwrap_or_default(),
|
||||
sort_order: Some(config.sort_order.into()),
|
||||
drop_non_persisted: config.drop_non_persisted,
|
||||
persist: config.persist,
|
||||
immutable: config.immutable,
|
||||
}
|
||||
}
|
||||
|
@ -245,6 +249,7 @@ impl TryFrom<management::LifecycleRules> for LifecycleRules {
|
|||
buffer_size_hard: (proto.buffer_size_hard as usize).try_into().ok(),
|
||||
sort_order: proto.sort_order.optional("sort_order")?.unwrap_or_default(),
|
||||
drop_non_persisted: proto.drop_non_persisted,
|
||||
persist: proto.persist,
|
||||
immutable: proto.immutable,
|
||||
})
|
||||
}
|
||||
|
@ -743,6 +748,7 @@ impl TryFrom<management::partition_template::Part> for TemplatePart {
|
|||
|
||||
/// ShardId maps to a nodegroup that holds the the shard.
|
||||
pub type ShardId = u16;
|
||||
pub const NO_SHARD_CONFIG: Option<&ShardConfig> = None;
|
||||
|
||||
/// Assigns a given line to a specific shard id.
|
||||
pub trait Sharder {
|
||||
|
@ -776,6 +782,12 @@ pub struct ShardConfig {
|
|||
pub ignore_errors: bool,
|
||||
}
|
||||
|
||||
impl Sharder for ShardConfig {
|
||||
fn shard(&self, _line: &ParsedLine<'_>) -> Result<ShardId, Error> {
|
||||
todo!("mkm to implement as part of #916");
|
||||
}
|
||||
}
|
||||
|
||||
/// Maps a matcher with specific target group. If the line/row matches
|
||||
/// it should be sent to the group.
|
||||
#[derive(Debug, Eq, PartialEq, Clone, Default)]
|
||||
|
@ -1281,6 +1293,7 @@ mod tests {
|
|||
buffer_size_hard: 232,
|
||||
sort_order: None,
|
||||
drop_non_persisted: true,
|
||||
persist: true,
|
||||
immutable: true,
|
||||
};
|
||||
|
||||
|
|
|
@ -27,6 +27,13 @@ pub enum Job {
|
|||
partition_key: String,
|
||||
chunk_id: u32,
|
||||
},
|
||||
|
||||
/// Write a chunk from read buffer to object store
|
||||
WriteChunk {
|
||||
db_name: String,
|
||||
partition_key: String,
|
||||
chunk_id: u32,
|
||||
},
|
||||
}
|
||||
|
||||
impl From<Job> for management::operation_metadata::Job {
|
||||
|
@ -49,6 +56,15 @@ impl From<Job> for management::operation_metadata::Job {
|
|||
partition_key,
|
||||
chunk_id,
|
||||
}),
|
||||
Job::WriteChunk {
|
||||
db_name,
|
||||
partition_key,
|
||||
chunk_id,
|
||||
} => Self::WriteChunk(management::WriteChunk {
|
||||
db_name,
|
||||
partition_key,
|
||||
chunk_id,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -74,6 +90,15 @@ impl From<management::operation_metadata::Job> for Job {
|
|||
partition_key,
|
||||
chunk_id,
|
||||
},
|
||||
Job::WriteChunk(management::WriteChunk {
|
||||
db_name,
|
||||
partition_key,
|
||||
chunk_id,
|
||||
}) => Self::WriteChunk {
|
||||
db_name,
|
||||
partition_key,
|
||||
chunk_id,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ pub struct TimestampRange {
|
|||
|
||||
impl TimestampRange {
|
||||
pub fn new(start: i64, end: i64) -> Self {
|
||||
debug_assert!(end > start);
|
||||
Self { start, end }
|
||||
}
|
||||
|
||||
|
@ -26,6 +27,12 @@ impl TimestampRange {
|
|||
pub fn contains_opt(&self, v: Option<i64>) -> bool {
|
||||
Some(true) == v.map(|ts| self.contains(ts))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
/// Returns if this range is disjoint w.r.t the provided range
|
||||
pub fn disjoint(&self, other: &Self) -> bool {
|
||||
self.end <= other.start || self.start >= other.end
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -55,4 +62,18 @@ mod tests {
|
|||
|
||||
assert!(!range.contains_opt(None));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_disjoint() {
|
||||
let r1 = TimestampRange::new(100, 200);
|
||||
let r2 = TimestampRange::new(200, 300);
|
||||
let r3 = TimestampRange::new(150, 250);
|
||||
|
||||
assert!(r1.disjoint(&r2));
|
||||
assert!(r2.disjoint(&r1));
|
||||
assert!(!r1.disjoint(&r3));
|
||||
assert!(!r3.disjoint(&r1));
|
||||
assert!(!r2.disjoint(&r3));
|
||||
assert!(!r3.disjoint(&r2));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ As discussed on https://github.com/influxdata/influxdb_iox/pull/221 and https://
|
|||
|
||||
1. Use only async I/O via `tokio` for socket communication. It is ok to use either blocking (e.g. `std::fs::File`) or async APIs (e.g. `tokio::fs::File`) for local File I/O.
|
||||
|
||||
2. All CPU bound tasks should be scheduled on the separate application level `thread_pool` not with `tokio::task::spawn` nor `tokio::task::spawn_blocking` nor a new threadpool.
|
||||
2. All CPU bound tasks should be scheduled on the separate application level `thread_pool` (which can be another tokio executor but should be separate from the executor that handles I/O).
|
||||
|
||||
We will work, over time, to migrate the rest of the codebase to use these patterns.
|
||||
|
||||
|
@ -41,11 +41,11 @@ It is ok to use either blocking (e.g. `std::fs::File`) or async APIs for local
|
|||
|
||||
This can not always be done (e.g. with a library such as parquet writer which is not `async`). In such cases, using `tokio::task::spawn_blocking` should be used to perform the file I/O.
|
||||
|
||||
### All CPU heavy work should be done on the single app level worker pool, separate from the tokio runtime
|
||||
### All CPU heavy work should be done on the single app level worker pool, separate from the tokio runtime handling IO
|
||||
|
||||
**What**: All CPU heavy work should be done on the single app level worker pool. We provide a `thread_pool` interface that interacts nicely with async tasks (e.g. that allows an async task to `await` for a CPU heavy task to complete).
|
||||
**What**: All CPU heavy work should be done on the app level worker pool. We provide a `thread_pool` interface that interacts nicely with async tasks (e.g. that allows an async task to `await` for a CPU heavy task to complete).
|
||||
|
||||
**Rationale**: A single app level worker pool gives us a single place to control work priority, eventually, so that tasks such as compaction of large data files can have lower precedence than incoming queries. By using a different pool than the tokio runtime, with a limited number of threads, we avoid over-saturating the CPU with OS threads and thereby starving the limited number tokio I/O threads. A separate, single app level pool also limits the number of underlying OS CPU threads which are spawned, even under heavy load, keeping thread context switching overhead low.
|
||||
**Rationale**: A single app level worker pool gives us a single place to control work priority, eventually, so that tasks such as compaction of large data files can have lower precedence than incoming queries. By using a different pool than the main tokio runtime, with a limited number of threads, we avoid over-saturating the CPU with OS threads and thereby starving the limited number tokio I/O threads. A separate, single app level pool also limits the number of underlying OS CPU threads which are spawned, even under heavy load, keeping thread context switching overhead low.
|
||||
|
||||
There will, of course, always be a judgment call to be made of where "CPU bound work" starts and "work acceptable for I/O processing" ends. A reasonable rule of thumb is if a job will *always* be completed in less than 100ms then that is probably fine for an I/O thread). This number may be revised as we tune the system.
|
||||
|
||||
|
|
|
@ -5,3 +5,6 @@ When updating the version of the [flatbuffers](https://crates.io/crates/flatbuff
|
|||
To update the generated code, edit `generated_types/regenerate-flatbuffers.sh` and set the `FB_COMMIT` variable at the top of the file to the commit SHA of the same commit in the [flatbuffers repository](https://github.com/google/flatbuffers) where the `flatbuffers` Rust crate version was updated. This ensures we'll be [using the same version of `flatc` that the crate was tested with](https://github.com/google/flatbuffers/issues/6199#issuecomment-714562121).
|
||||
|
||||
Then run the `generated_types/regenerate-flatbuffers.sh` script and check in any changes. Check the whole project builds.
|
||||
|
||||
`generated_types/regenerate-flatbuffers.sh` will build `flatc` from source if it cannot be found.
|
||||
In order to do that your system will require `bazel`; you can likely install this with your favourite package manager.
|
||||
|
|
|
@ -9,7 +9,7 @@ bytes = { version = "1.0", features = ["serde"] }
|
|||
# See docs/regenerating_flatbuffers.md about updating generated code when updating the
|
||||
# version of the flatbuffers crate
|
||||
flatbuffers = "0.8"
|
||||
futures = "0.3.1"
|
||||
futures = "0.3"
|
||||
prost = "0.7"
|
||||
prost-types = "0.7"
|
||||
tonic = "0.4"
|
||||
|
|
|
@ -17,8 +17,11 @@ enum ChunkStorage {
|
|||
// The chunk is in the Read Buffer (where it can not be mutated)
|
||||
CHUNK_STORAGE_READ_BUFFER = 3;
|
||||
|
||||
// The chunk is in the Read Buffer and Object Store
|
||||
CHUNK_STORAGE_READ_BUFFER_AND_OBJECT_STORE = 4;
|
||||
|
||||
// The chunk is stored in Object Storage (where it can not be mutated)
|
||||
CHUNK_STORAGE_OBJECT_STORE = 4;
|
||||
CHUNK_STORAGE_OBJECT_STORE_ONLY = 5;
|
||||
}
|
||||
|
||||
// `Chunk` represents part of a partition of data in a database.
|
||||
|
|
|
@ -151,6 +151,9 @@ message LifecycleRules {
|
|||
// Allow dropping data that has not been persisted to object storage
|
||||
bool drop_non_persisted = 7;
|
||||
|
||||
// Persists chunks to object storage.
|
||||
bool persist = 9;
|
||||
|
||||
// Do not allow writing new data to this database
|
||||
bool immutable = 8;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ message OperationMetadata {
|
|||
Dummy dummy = 5;
|
||||
PersistSegment persist_segment = 6;
|
||||
CloseChunk close_chunk = 7;
|
||||
WriteChunk write_chunk = 8;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -45,3 +46,15 @@ message CloseChunk {
|
|||
// chunk_id
|
||||
uint32 chunk_id = 3;
|
||||
}
|
||||
|
||||
// Write a chunk from read buffer to object store
|
||||
message WriteChunk {
|
||||
// name of the database
|
||||
string db_name = 1;
|
||||
|
||||
// partition key
|
||||
string partition_key = 2;
|
||||
|
||||
// chunk_id
|
||||
uint32 chunk_id = 3;
|
||||
}
|
|
@ -4,7 +4,12 @@ package influxdata.iox.write.v1;
|
|||
|
||||
service WriteService {
|
||||
// write data into a specific Database
|
||||
rpc Write(WriteRequest) returns (WriteResponse);
|
||||
rpc Write(WriteRequest) returns (WriteResponse) {
|
||||
option deprecated = true;
|
||||
};
|
||||
|
||||
// write an entry into a Database
|
||||
rpc WriteEntry(WriteEntryRequest) returns (WriteEntryResponse);
|
||||
}
|
||||
|
||||
message WriteRequest {
|
||||
|
@ -21,3 +26,17 @@ message WriteResponse {
|
|||
// how many lines were parsed and written into the database
|
||||
uint64 lines_written = 1;
|
||||
}
|
||||
|
||||
|
||||
message WriteEntryRequest {
|
||||
// name of database into which to write
|
||||
string db_name = 1;
|
||||
|
||||
// entry, in serialized flatbuffers [Entry] format
|
||||
//
|
||||
// [Entry](https://github.com/influxdata/influxdb_iox/blob/main/generated_types/protos/influxdata/iox/write/v1/entry.fbs)
|
||||
bytes entry = 2;
|
||||
}
|
||||
|
||||
message WriteEntryResponse {
|
||||
}
|
||||
|
|
|
@ -1,11 +1,29 @@
|
|||
#!/bin/bash -e
|
||||
|
||||
# The commit where the Rust `flatbuffers` crate version was changed to the version in `Cargo.lock`
|
||||
# Update this, rerun this script, and check in the changes in the generated code when the
|
||||
# `flatbuffers` crate version is updated.
|
||||
# Instructions
|
||||
#
|
||||
# If you have changed some `*.fbs` files:
|
||||
#
|
||||
# - Run this script to regenerate the corresponding Rust code.
|
||||
# - Run `cargo test` to make sure everything works as you would expect.
|
||||
# - Check in the changes to the generated code along with your changes to the `*.fbs` files.
|
||||
# - You should not need to edit this script.
|
||||
#
|
||||
# If you are updating the version of the `flatbuffers` crate in `Cargo.lock`:
|
||||
#
|
||||
# - The `flatbuffers` crate gets developed in sync with the `flatc` compiler in the same repo,
|
||||
# so when updating the `flatbuffers` crate we also need to update the `flatc` compiler we're
|
||||
# using.
|
||||
# - Go to https://github.com/google/flatbuffers/blame/master/rust/flatbuffers/Cargo.toml and find
|
||||
# the commit SHA where the `version` metadata was updated to the version of the `flatbuffers`
|
||||
# crate we now want to have in our `Cargo.lock`.
|
||||
# - Put that commit SHA in this variable:
|
||||
FB_COMMIT="86401e078d0746d2381735415f8c2dfe849f3f52"
|
||||
# - Run this script to regenerate the corresponding Rust code.
|
||||
# - Run `cargo test` to make sure everything works as you would expect.
|
||||
# - Check in the changes to the generated code along with your changes to the `Cargo.lock` file and
|
||||
# this script.
|
||||
|
||||
# Change to the generated_types crate directory, where this script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
pushd $DIR
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ edition = "2018"
|
|||
|
||||
[dependencies] # In alphabetical order
|
||||
bytes = { version = "1.0", default-features = false }
|
||||
futures = { version = "0.3.5", default-features = false }
|
||||
futures = { version = "0.3", default-features = false }
|
||||
reqwest = { version = "0.11", features = ["stream", "json"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.44"
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
use chrono::{DateTime, Utc};
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use data_types::database_rules::{Error as DataError, Partitioner, Sharder};
|
||||
use influxdb_line_protocol::ParsedLine;
|
||||
use internal_types::entry::{lines_to_sharded_entries, SequencedEntry};
|
||||
use data_types::database_rules::ShardConfig;
|
||||
use internal_types::entry::test_helpers::partitioner;
|
||||
use internal_types::entry::{lines_to_sharded_entries, ClockValue, SequencedEntry};
|
||||
|
||||
static LINES: &str = include_str!("../../tests/fixtures/lineproto/prometheus.lp");
|
||||
|
||||
|
@ -12,7 +11,8 @@ fn sequenced_entry(c: &mut Criterion) {
|
|||
let lines = influxdb_line_protocol::parse_lines(LINES)
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.unwrap();
|
||||
let sharded_entries = lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
|
||||
let shard_config: Option<&ShardConfig> = None;
|
||||
let sharded_entries = lines_to_sharded_entries(&lines, shard_config, &partitioner(1)).unwrap();
|
||||
let entry = &sharded_entries.first().unwrap().entry;
|
||||
let data = entry.data();
|
||||
assert_eq!(
|
||||
|
@ -28,10 +28,13 @@ fn sequenced_entry(c: &mut Criterion) {
|
|||
554
|
||||
);
|
||||
|
||||
let clock_value = ClockValue::new(23);
|
||||
|
||||
group.bench_function("new_from_entry_bytes", |b| {
|
||||
b.iter(|| {
|
||||
let sequenced_entry = SequencedEntry::new_from_entry_bytes(23, 2, data).unwrap();
|
||||
assert_eq!(sequenced_entry.clock_value(), 23);
|
||||
let sequenced_entry =
|
||||
SequencedEntry::new_from_entry_bytes(clock_value, 2, data).unwrap();
|
||||
assert_eq!(sequenced_entry.clock_value(), clock_value);
|
||||
assert_eq!(sequenced_entry.writer_id(), 2);
|
||||
})
|
||||
});
|
||||
|
@ -42,50 +45,3 @@ fn sequenced_entry(c: &mut Criterion) {
|
|||
criterion_group!(benches, sequenced_entry);
|
||||
|
||||
criterion_main!(benches);
|
||||
|
||||
fn sharder(count: u16) -> TestSharder {
|
||||
TestSharder {
|
||||
count,
|
||||
n: std::cell::RefCell::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
// For each line passed to shard returns a shard id from [0, count) in order
|
||||
struct TestSharder {
|
||||
count: u16,
|
||||
n: std::cell::RefCell<u16>,
|
||||
}
|
||||
|
||||
impl Sharder for TestSharder {
|
||||
fn shard(&self, _line: &ParsedLine<'_>) -> Result<u16, DataError> {
|
||||
let n = *self.n.borrow();
|
||||
self.n.replace(n + 1);
|
||||
Ok(n % self.count)
|
||||
}
|
||||
}
|
||||
|
||||
fn partitioner(count: u8) -> TestPartitioner {
|
||||
TestPartitioner {
|
||||
count,
|
||||
n: std::cell::RefCell::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
// For each line passed to partition_key returns a key with a number from [0,
|
||||
// count)
|
||||
struct TestPartitioner {
|
||||
count: u8,
|
||||
n: std::cell::RefCell<u8>,
|
||||
}
|
||||
|
||||
impl Partitioner for TestPartitioner {
|
||||
fn partition_key(
|
||||
&self,
|
||||
_line: &ParsedLine<'_>,
|
||||
_default_time: &DateTime<Utc>,
|
||||
) -> data_types::database_rules::Result<String> {
|
||||
let n = *self.n.borrow();
|
||||
self.n.replace(n + 1);
|
||||
Ok(format!("key_{}", n % self.count))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
//! from line protocol and the `DatabaseRules` configuration.
|
||||
|
||||
use crate::schema::TIME_COLUMN_NAME;
|
||||
use data_types::database_rules::{Error as DataError, Partitioner, ShardId, Sharder};
|
||||
use data_types::database_rules::{Error as DataError, Partitioner, ShardId, Sharder, WriterId};
|
||||
use generated_types::entry as entry_fb;
|
||||
use influxdb_line_protocol::{FieldValue, ParsedLine};
|
||||
|
||||
|
@ -56,14 +56,17 @@ type ColumnResult<T, E = ColumnError> = std::result::Result<T, E>;
|
|||
/// underlying flatbuffers bytes generated.
|
||||
pub fn lines_to_sharded_entries(
|
||||
lines: &[ParsedLine<'_>],
|
||||
sharder: &impl Sharder,
|
||||
sharder: Option<&impl Sharder>,
|
||||
partitioner: &impl Partitioner,
|
||||
) -> Result<Vec<ShardedEntry>> {
|
||||
let default_time = Utc::now();
|
||||
let mut sharded_lines = BTreeMap::new();
|
||||
|
||||
for line in lines {
|
||||
let shard_id = sharder.shard(line).context(GeneratingShardId)?;
|
||||
let shard_id = match &sharder {
|
||||
Some(s) => Some(s.shard(line).context(GeneratingShardId)?),
|
||||
None => None,
|
||||
};
|
||||
let partition_key = partitioner
|
||||
.partition_key(line, &default_time)
|
||||
.context(GeneratingPartitionKey)?;
|
||||
|
@ -90,7 +93,7 @@ pub fn lines_to_sharded_entries(
|
|||
}
|
||||
|
||||
fn build_sharded_entry(
|
||||
shard_id: ShardId,
|
||||
shard_id: Option<ShardId>,
|
||||
partitions: BTreeMap<String, BTreeMap<&str, Vec<&ParsedLine<'_>>>>,
|
||||
default_time: &DateTime<Utc>,
|
||||
) -> Result<ShardedEntry> {
|
||||
|
@ -277,10 +280,12 @@ fn build_table_write_batch<'a>(
|
|||
))
|
||||
}
|
||||
|
||||
/// Holds a shard id to the associated entry
|
||||
/// Holds a shard id to the associated entry. If there is no ShardId, then
|
||||
/// everything goes to the same place. This means a single entry will be
|
||||
/// generated from a batch of line protocol.
|
||||
#[derive(Debug)]
|
||||
pub struct ShardedEntry {
|
||||
pub shard_id: ShardId,
|
||||
pub shard_id: Option<ShardId>,
|
||||
pub entry: Entry,
|
||||
}
|
||||
|
||||
|
@ -337,8 +342,10 @@ pub struct PartitionWrite<'a> {
|
|||
}
|
||||
|
||||
impl<'a> PartitionWrite<'a> {
|
||||
pub fn key(&self) -> Option<&str> {
|
||||
self.fb.key()
|
||||
pub fn key(&self) -> &str {
|
||||
self.fb
|
||||
.key()
|
||||
.expect("key must be present in the flatbuffer PartitionWrite")
|
||||
}
|
||||
|
||||
pub fn table_batches(&self) -> Vec<TableBatch<'_>> {
|
||||
|
@ -360,8 +367,10 @@ pub struct TableBatch<'a> {
|
|||
}
|
||||
|
||||
impl<'a> TableBatch<'a> {
|
||||
pub fn name(&self) -> Option<&str> {
|
||||
self.fb.name()
|
||||
pub fn name(&self) -> &str {
|
||||
self.fb
|
||||
.name()
|
||||
.expect("name must be present in flatbuffers TableWriteBatch")
|
||||
}
|
||||
|
||||
pub fn columns(&self) -> Vec<Column<'_>> {
|
||||
|
@ -420,18 +429,32 @@ impl<'a> TableBatch<'a> {
|
|||
#[derive(Debug)]
|
||||
pub struct Column<'a> {
|
||||
fb: entry_fb::Column<'a>,
|
||||
row_count: usize,
|
||||
pub row_count: usize,
|
||||
}
|
||||
|
||||
impl<'a> Column<'a> {
|
||||
pub fn name(&self) -> Option<&str> {
|
||||
self.fb.name()
|
||||
pub fn name(&self) -> &str {
|
||||
self.fb
|
||||
.name()
|
||||
.expect("name must be present in flatbuffers Column")
|
||||
}
|
||||
|
||||
pub fn logical_type(&self) -> entry_fb::LogicalColumnType {
|
||||
self.fb.logical_column_type()
|
||||
}
|
||||
|
||||
pub fn is_tag(&self) -> bool {
|
||||
self.fb.logical_column_type() == entry_fb::LogicalColumnType::Tag
|
||||
}
|
||||
|
||||
pub fn is_field(&self) -> bool {
|
||||
self.fb.logical_column_type() == entry_fb::LogicalColumnType::Field
|
||||
}
|
||||
|
||||
pub fn is_time(&self) -> bool {
|
||||
self.fb.logical_column_type() == entry_fb::LogicalColumnType::Time
|
||||
}
|
||||
|
||||
pub fn values(&self) -> TypedValuesIterator<'a> {
|
||||
match self.fb.values_type() {
|
||||
entry_fb::ColumnValues::BoolValues => TypedValuesIterator::Bool(BoolIterator {
|
||||
|
@ -554,12 +577,22 @@ impl<'a> TypedValuesIterator<'a> {
|
|||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn type_description(&self) -> &str {
|
||||
match self {
|
||||
Self::Bool(_) => "bool",
|
||||
Self::I64(_) => "i64",
|
||||
Self::F64(_) => "f64",
|
||||
Self::U64(_) => "u64",
|
||||
Self::String(_) => "String",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator over the flatbuffers BoolValues
|
||||
#[derive(Debug)]
|
||||
pub struct BoolIterator<'a> {
|
||||
row_count: usize,
|
||||
pub row_count: usize,
|
||||
position: usize,
|
||||
null_mask: Option<&'a [u8]>,
|
||||
values: &'a [bool],
|
||||
|
@ -589,7 +622,7 @@ impl<'a> Iterator for BoolIterator<'a> {
|
|||
/// Iterator over the flatbuffers I64Values, F64Values, and U64Values.
|
||||
#[derive(Debug)]
|
||||
pub struct ValIterator<'a, T: Follow<'a> + Follow<'a, Inner = T>> {
|
||||
row_count: usize,
|
||||
pub row_count: usize,
|
||||
position: usize,
|
||||
null_mask: Option<&'a [u8]>,
|
||||
values_iter: VectorIter<'a, T>,
|
||||
|
@ -615,7 +648,7 @@ impl<'a, T: Follow<'a> + Follow<'a, Inner = T>> Iterator for ValIterator<'a, T>
|
|||
/// Iterator over the flatbuffers StringValues
|
||||
#[derive(Debug)]
|
||||
pub struct StringIterator<'a> {
|
||||
row_count: usize,
|
||||
pub row_count: usize,
|
||||
position: usize,
|
||||
null_mask: Option<&'a [u8]>,
|
||||
values: VectorIter<'a, ForwardsUOffset<&'a str>>,
|
||||
|
@ -1087,6 +1120,19 @@ enum ColumnRaw<'a> {
|
|||
Bool(Vec<bool>),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialOrd, PartialEq, Copy, Clone)]
|
||||
pub struct ClockValue(u64);
|
||||
|
||||
impl ClockValue {
|
||||
pub fn get(&self) -> u64 {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn new(v: u64) -> Self {
|
||||
Self { 0: v }
|
||||
}
|
||||
}
|
||||
|
||||
#[self_referencing]
|
||||
#[derive(Debug)]
|
||||
pub struct SequencedEntry {
|
||||
|
@ -1101,7 +1147,7 @@ pub struct SequencedEntry {
|
|||
|
||||
impl SequencedEntry {
|
||||
pub fn new_from_entry_bytes(
|
||||
clock_value: u64,
|
||||
clock_value: ClockValue,
|
||||
writer_id: u32,
|
||||
entry_bytes: &[u8],
|
||||
) -> Result<Self> {
|
||||
|
@ -1118,7 +1164,7 @@ impl SequencedEntry {
|
|||
let sequenced_entry = entry_fb::SequencedEntry::create(
|
||||
&mut fbb,
|
||||
&entry_fb::SequencedEntryArgs {
|
||||
clock_value,
|
||||
clock_value: clock_value.get(),
|
||||
writer_id,
|
||||
entry_bytes: Some(entry_bytes),
|
||||
},
|
||||
|
@ -1151,11 +1197,11 @@ impl SequencedEntry {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn clock_value(&self) -> u64 {
|
||||
self.fb().clock_value()
|
||||
pub fn clock_value(&self) -> ClockValue {
|
||||
ClockValue::new(self.fb().clock_value())
|
||||
}
|
||||
|
||||
pub fn writer_id(&self) -> u32 {
|
||||
pub fn writer_id(&self) -> WriterId {
|
||||
self.fb().writer_id()
|
||||
}
|
||||
}
|
||||
|
@ -1180,10 +1226,133 @@ impl TryFrom<Vec<u8>> for SequencedEntry {
|
|||
}
|
||||
}
|
||||
|
||||
pub mod test_helpers {
|
||||
use super::*;
|
||||
use chrono::TimeZone;
|
||||
use influxdb_line_protocol::parse_lines;
|
||||
|
||||
// An appropriate maximum size for batches of LP to be written into IOx. Using
|
||||
// test fixtures containing more than this many lines of LP will result in them
|
||||
// being written as multiple writes.
|
||||
const LP_BATCH_SIZE: usize = 10000;
|
||||
|
||||
/// Converts the line protocol to a single `Entry` with a single shard and
|
||||
/// a single partition.
|
||||
pub fn lp_to_entry(lp: &str) -> Entry {
|
||||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
lines_to_sharded_entries(&lines, sharder(1).as_ref(), &hour_partitioner())
|
||||
.unwrap()
|
||||
.pop()
|
||||
.unwrap()
|
||||
.entry
|
||||
}
|
||||
|
||||
/// Converts the line protocol to a collection of `Entry` with a single
|
||||
/// shard and a single partition, which is useful for testing when `lp` is
|
||||
/// large. Batches are sized according to LP_BATCH_SIZE.
|
||||
pub fn lp_to_entries(lp: &str) -> Vec<Entry> {
|
||||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
lines
|
||||
.chunks(LP_BATCH_SIZE)
|
||||
.map(|batch| {
|
||||
lines_to_sharded_entries(batch, sharder(1).as_ref(), &hour_partitioner())
|
||||
.unwrap()
|
||||
.pop()
|
||||
.unwrap()
|
||||
.entry
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
/// Returns a test sharder that will assign shard ids from [0, count)
|
||||
/// incrementing for each line.
|
||||
pub fn sharder(count: u16) -> Option<TestSharder> {
|
||||
Some(TestSharder {
|
||||
count,
|
||||
n: std::cell::RefCell::new(0),
|
||||
})
|
||||
}
|
||||
|
||||
// For each line passed to shard returns a shard id from [0, count) in order
|
||||
#[derive(Debug)]
|
||||
pub struct TestSharder {
|
||||
count: u16,
|
||||
n: std::cell::RefCell<u16>,
|
||||
}
|
||||
|
||||
impl Sharder for TestSharder {
|
||||
fn shard(&self, _line: &ParsedLine<'_>) -> Result<u16, DataError> {
|
||||
let n = *self.n.borrow();
|
||||
self.n.replace(n + 1);
|
||||
Ok(n % self.count)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a test partitioner that will partition data by the hour
|
||||
pub fn hour_partitioner() -> HourPartitioner {
|
||||
HourPartitioner {}
|
||||
}
|
||||
|
||||
/// Returns a test partitioner that will assign partition keys in the form
|
||||
/// key_# where # is replaced by a number `[0, count)` incrementing for
|
||||
/// each line.
|
||||
pub fn partitioner(count: u8) -> TestPartitioner {
|
||||
TestPartitioner {
|
||||
count,
|
||||
n: std::cell::RefCell::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
// For each line passed to partition_key returns a key with a number from
|
||||
// `[0, count)`
|
||||
#[derive(Debug)]
|
||||
pub struct TestPartitioner {
|
||||
count: u8,
|
||||
n: std::cell::RefCell<u8>,
|
||||
}
|
||||
|
||||
impl Partitioner for TestPartitioner {
|
||||
fn partition_key(
|
||||
&self,
|
||||
_line: &ParsedLine<'_>,
|
||||
_default_time: &DateTime<Utc>,
|
||||
) -> data_types::database_rules::Result<String> {
|
||||
let n = *self.n.borrow();
|
||||
self.n.replace(n + 1);
|
||||
Ok(format!("key_{}", n % self.count))
|
||||
}
|
||||
}
|
||||
|
||||
// Partitions by the hour
|
||||
#[derive(Debug)]
|
||||
pub struct HourPartitioner {}
|
||||
|
||||
impl Partitioner for HourPartitioner {
|
||||
fn partition_key(
|
||||
&self,
|
||||
line: &ParsedLine<'_>,
|
||||
default_time: &DateTime<Utc>,
|
||||
) -> data_types::database_rules::Result<String> {
|
||||
const HOUR_FORMAT: &str = "%Y-%m-%dT%H";
|
||||
|
||||
let key = match line.timestamp {
|
||||
Some(t) => Utc.timestamp_nanos(t).format(HOUR_FORMAT),
|
||||
None => default_time.format(HOUR_FORMAT),
|
||||
}
|
||||
.to_string();
|
||||
|
||||
Ok(key)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::test_helpers::*;
|
||||
use super::*;
|
||||
|
||||
use data_types::database_rules::NO_SHARD_CONFIG;
|
||||
use influxdb_line_protocol::parse_lines;
|
||||
|
||||
#[test]
|
||||
|
@ -1197,11 +1366,28 @@ mod tests {
|
|||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
let sharded_entries =
|
||||
lines_to_sharded_entries(&lines, &sharder(2), &partitioner(1)).unwrap();
|
||||
lines_to_sharded_entries(&lines, sharder(2).as_ref(), &partitioner(1)).unwrap();
|
||||
|
||||
assert_eq!(sharded_entries.len(), 2);
|
||||
assert_eq!(sharded_entries[0].shard_id, 0);
|
||||
assert_eq!(sharded_entries[1].shard_id, 1);
|
||||
assert_eq!(sharded_entries[0].shard_id, Some(0));
|
||||
assert_eq!(sharded_entries[1].shard_id, Some(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_shard_config() {
|
||||
let lp = vec![
|
||||
"cpu,host=a,region=west user=23.1,system=66.1 123",
|
||||
"mem,host=a,region=west used=23432 123",
|
||||
"foo bar=true 21",
|
||||
]
|
||||
.join("\n");
|
||||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
let sharded_entries =
|
||||
lines_to_sharded_entries(&lines, NO_SHARD_CONFIG, &partitioner(1)).unwrap();
|
||||
|
||||
assert_eq!(sharded_entries.len(), 1);
|
||||
assert_eq!(sharded_entries[0].shard_id, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1215,12 +1401,12 @@ mod tests {
|
|||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
let sharded_entries =
|
||||
lines_to_sharded_entries(&lines, &sharder(1), &partitioner(2)).unwrap();
|
||||
lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(2)).unwrap();
|
||||
|
||||
let partition_writes = sharded_entries[0].entry.partition_writes().unwrap();
|
||||
assert_eq!(partition_writes.len(), 2);
|
||||
assert_eq!(partition_writes[0].key().unwrap(), "key_0");
|
||||
assert_eq!(partition_writes[1].key().unwrap(), "key_1");
|
||||
assert_eq!(partition_writes[0].key(), "key_0");
|
||||
assert_eq!(partition_writes[1].key(), "key_1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1236,15 +1422,15 @@ mod tests {
|
|||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
let sharded_entries =
|
||||
lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
|
||||
lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();
|
||||
|
||||
let partition_writes = sharded_entries[0].entry.partition_writes().unwrap();
|
||||
let table_batches = partition_writes[0].table_batches();
|
||||
|
||||
assert_eq!(table_batches.len(), 3);
|
||||
assert_eq!(table_batches[0].name().unwrap(), "cpu");
|
||||
assert_eq!(table_batches[1].name().unwrap(), "disk");
|
||||
assert_eq!(table_batches[2].name().unwrap(), "mem");
|
||||
assert_eq!(table_batches[0].name(), "cpu");
|
||||
assert_eq!(table_batches[1].name(), "disk");
|
||||
assert_eq!(table_batches[2].name(), "mem");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1253,7 +1439,7 @@ mod tests {
|
|||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
let sharded_entries =
|
||||
lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
|
||||
lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();
|
||||
|
||||
let partition_writes = sharded_entries[0].entry.partition_writes().unwrap();
|
||||
let table_batches = partition_writes[0].table_batches();
|
||||
|
@ -1263,22 +1449,22 @@ mod tests {
|
|||
|
||||
assert_eq!(columns.len(), 5);
|
||||
|
||||
assert_eq!(columns[0].name().unwrap(), "host");
|
||||
assert_eq!(columns[0].name(), "host");
|
||||
assert_eq!(columns[0].logical_type(), entry_fb::LogicalColumnType::Tag);
|
||||
|
||||
assert_eq!(columns[1].name().unwrap(), "region");
|
||||
assert_eq!(columns[1].name(), "region");
|
||||
assert_eq!(columns[1].logical_type(), entry_fb::LogicalColumnType::Tag);
|
||||
|
||||
assert_eq!(columns[2].name().unwrap(), "time");
|
||||
assert_eq!(columns[2].name(), "time");
|
||||
assert_eq!(columns[2].logical_type(), entry_fb::LogicalColumnType::Time);
|
||||
|
||||
assert_eq!(columns[3].name().unwrap(), "val");
|
||||
assert_eq!(columns[3].name(), "val");
|
||||
assert_eq!(
|
||||
columns[3].logical_type(),
|
||||
entry_fb::LogicalColumnType::Field
|
||||
);
|
||||
|
||||
assert_eq!(columns[4].name().unwrap(), "val2");
|
||||
assert_eq!(columns[4].name(), "val2");
|
||||
assert_eq!(
|
||||
columns[4].logical_type(),
|
||||
entry_fb::LogicalColumnType::Field
|
||||
|
@ -1295,7 +1481,7 @@ mod tests {
|
|||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
let sharded_entries =
|
||||
lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
|
||||
lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();
|
||||
|
||||
let partition_writes = sharded_entries
|
||||
.first()
|
||||
|
@ -1312,17 +1498,17 @@ mod tests {
|
|||
assert_eq!(columns.len(), 7);
|
||||
|
||||
let col = columns.get(0).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "bval");
|
||||
assert_eq!(col.name(), "bval");
|
||||
let values = col.values().bool_values().unwrap();
|
||||
assert_eq!(&values, &[Some(true), Some(false)]);
|
||||
|
||||
let col = columns.get(1).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "fval");
|
||||
assert_eq!(col.name(), "fval");
|
||||
let values = col.values().f64_values().unwrap();
|
||||
assert_eq!(&values, &[Some(1.2), Some(2.2)]);
|
||||
|
||||
let col = columns.get(2).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "host");
|
||||
assert_eq!(col.name(), "host");
|
||||
let values = match col.values() {
|
||||
TypedValuesIterator::String(v) => v,
|
||||
_ => panic!("wrong type"),
|
||||
|
@ -1331,12 +1517,12 @@ mod tests {
|
|||
assert_eq!(&values, &[Some("a"), Some("b")]);
|
||||
|
||||
let col = columns.get(3).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "ival");
|
||||
assert_eq!(col.name(), "ival");
|
||||
let values = col.values().i64_values().unwrap();
|
||||
assert_eq!(&values, &[Some(23), Some(22)]);
|
||||
|
||||
let col = columns.get(4).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "sval");
|
||||
assert_eq!(col.name(), "sval");
|
||||
let values = match col.values() {
|
||||
TypedValuesIterator::String(v) => v,
|
||||
_ => panic!("wrong type"),
|
||||
|
@ -1345,12 +1531,12 @@ mod tests {
|
|||
assert_eq!(&values, &[Some("hi"), Some("world")]);
|
||||
|
||||
let col = columns.get(5).unwrap();
|
||||
assert_eq!(col.name().unwrap(), TIME_COLUMN_NAME);
|
||||
assert_eq!(col.name(), TIME_COLUMN_NAME);
|
||||
let values = col.values().i64_values().unwrap();
|
||||
assert_eq!(&values, &[Some(1), Some(2)]);
|
||||
|
||||
let col = columns.get(6).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "uval");
|
||||
assert_eq!(col.name(), "uval");
|
||||
let values = col.values().u64_values().unwrap();
|
||||
assert_eq!(&values, &[Some(7), Some(1)]);
|
||||
}
|
||||
|
@ -1366,7 +1552,7 @@ mod tests {
|
|||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
let sharded_entries =
|
||||
lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
|
||||
lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();
|
||||
|
||||
let partition_writes = sharded_entries
|
||||
.first()
|
||||
|
@ -1383,13 +1569,13 @@ mod tests {
|
|||
assert_eq!(columns.len(), 7);
|
||||
|
||||
let col = columns.get(0).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "bool");
|
||||
assert_eq!(col.name(), "bool");
|
||||
assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
|
||||
let values = col.values().bool_values().unwrap();
|
||||
assert_eq!(&values, &[None, None, Some(true)]);
|
||||
|
||||
let col = columns.get(1).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "host");
|
||||
assert_eq!(col.name(), "host");
|
||||
assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Tag);
|
||||
let values = match col.values() {
|
||||
TypedValuesIterator::String(v) => v,
|
||||
|
@ -1399,7 +1585,7 @@ mod tests {
|
|||
assert_eq!(&values, &[Some("a"), Some("a"), None]);
|
||||
|
||||
let col = columns.get(2).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "region");
|
||||
assert_eq!(col.name(), "region");
|
||||
assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Tag);
|
||||
let values = match col.values() {
|
||||
TypedValuesIterator::String(v) => v,
|
||||
|
@ -1409,7 +1595,7 @@ mod tests {
|
|||
assert_eq!(&values, &[None, Some("west"), None]);
|
||||
|
||||
let col = columns.get(3).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "string");
|
||||
assert_eq!(col.name(), "string");
|
||||
assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
|
||||
let values = match col.values() {
|
||||
TypedValuesIterator::String(v) => v,
|
||||
|
@ -1419,19 +1605,19 @@ mod tests {
|
|||
assert_eq!(&values, &[None, None, Some("hello")]);
|
||||
|
||||
let col = columns.get(4).unwrap();
|
||||
assert_eq!(col.name().unwrap(), TIME_COLUMN_NAME);
|
||||
assert_eq!(col.name(), TIME_COLUMN_NAME);
|
||||
assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Time);
|
||||
let values = col.values().i64_values().unwrap();
|
||||
assert_eq!(&values, &[Some(983), Some(2343), Some(222)]);
|
||||
|
||||
let col = columns.get(5).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "val");
|
||||
assert_eq!(col.name(), "val");
|
||||
assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
|
||||
let values = col.values().i64_values().unwrap();
|
||||
assert_eq!(&values, &[Some(23), None, Some(21)]);
|
||||
|
||||
let col = columns.get(6).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "val2");
|
||||
assert_eq!(col.name(), "val2");
|
||||
assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
|
||||
let values = col.values().f64_values().unwrap();
|
||||
assert_eq!(&values, &[None, Some(23.2), None]);
|
||||
|
@ -1491,7 +1677,7 @@ mod tests {
|
|||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
let sharded_entries =
|
||||
lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
|
||||
lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();
|
||||
let partition_writes = sharded_entries
|
||||
.first()
|
||||
.unwrap()
|
||||
|
@ -1504,7 +1690,7 @@ mod tests {
|
|||
|
||||
assert_eq!(batch.row_count(), 1);
|
||||
let col = columns.get(1).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "val");
|
||||
assert_eq!(col.name(), "val");
|
||||
let values = col.values().i64_values().unwrap();
|
||||
assert_eq!(&values, &[Some(1)]);
|
||||
|
||||
|
@ -1522,7 +1708,7 @@ mod tests {
|
|||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
let sharded_entries =
|
||||
lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
|
||||
lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();
|
||||
let partition_writes = sharded_entries
|
||||
.first()
|
||||
.unwrap()
|
||||
|
@ -1535,7 +1721,7 @@ mod tests {
|
|||
|
||||
assert_eq!(batch.row_count(), 8);
|
||||
let col = columns.get(1).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "val");
|
||||
assert_eq!(col.name(), "val");
|
||||
let values = col.values().i64_values().unwrap();
|
||||
assert_eq!(
|
||||
&values,
|
||||
|
@ -1566,7 +1752,7 @@ mod tests {
|
|||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
let sharded_entries =
|
||||
lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
|
||||
lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();
|
||||
let partition_writes = sharded_entries
|
||||
.first()
|
||||
.unwrap()
|
||||
|
@ -1579,7 +1765,7 @@ mod tests {
|
|||
|
||||
assert_eq!(batch.row_count(), 9);
|
||||
let col = columns.get(1).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "val");
|
||||
assert_eq!(col.name(), "val");
|
||||
let values = col.values().i64_values().unwrap();
|
||||
assert_eq!(
|
||||
&values,
|
||||
|
@ -1605,7 +1791,7 @@ mod tests {
|
|||
let t = Utc::now().timestamp_nanos();
|
||||
|
||||
let sharded_entries =
|
||||
lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
|
||||
lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();
|
||||
|
||||
let partition_writes = sharded_entries
|
||||
.first()
|
||||
|
@ -1618,7 +1804,7 @@ mod tests {
|
|||
let columns = batch.columns();
|
||||
|
||||
let col = columns.get(0).unwrap();
|
||||
assert_eq!(col.name().unwrap(), TIME_COLUMN_NAME);
|
||||
assert_eq!(col.name(), TIME_COLUMN_NAME);
|
||||
let values = col.values().i64_values().unwrap();
|
||||
assert!(values[0].unwrap() > t);
|
||||
assert_eq!(values[1], Some(123));
|
||||
|
@ -1629,7 +1815,8 @@ mod tests {
|
|||
let lp = vec!["a val=1i 1", "a val=2.1 123"].join("\n");
|
||||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
let sharded_entries = lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1));
|
||||
let sharded_entries =
|
||||
lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1));
|
||||
|
||||
assert!(sharded_entries.is_err());
|
||||
}
|
||||
|
@ -1639,7 +1826,8 @@ mod tests {
|
|||
let lp = vec!["a,host=a val=1i 1", "a host=\"b\" 123"].join("\n");
|
||||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
let sharded_entries = lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1));
|
||||
let sharded_entries =
|
||||
lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1));
|
||||
|
||||
assert!(sharded_entries.is_err());
|
||||
}
|
||||
|
@ -1655,11 +1843,13 @@ mod tests {
|
|||
let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
|
||||
|
||||
let sharded_entries =
|
||||
lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
|
||||
lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();
|
||||
|
||||
let entry_bytes = sharded_entries.first().unwrap().entry.data();
|
||||
let sequenced_entry = SequencedEntry::new_from_entry_bytes(23, 2, entry_bytes).unwrap();
|
||||
assert_eq!(sequenced_entry.clock_value(), 23);
|
||||
let clock_value = ClockValue::new(23);
|
||||
let sequenced_entry =
|
||||
SequencedEntry::new_from_entry_bytes(clock_value, 2, entry_bytes).unwrap();
|
||||
assert_eq!(sequenced_entry.clock_value(), clock_value);
|
||||
assert_eq!(sequenced_entry.writer_id(), 2);
|
||||
|
||||
let partition_writes = sequenced_entry.partition_writes().unwrap();
|
||||
|
@ -1672,13 +1862,13 @@ mod tests {
|
|||
assert_eq!(columns.len(), 7);
|
||||
|
||||
let col = columns.get(0).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "bool");
|
||||
assert_eq!(col.name(), "bool");
|
||||
assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
|
||||
let values = col.values().bool_values().unwrap();
|
||||
assert_eq!(&values, &[None, None, Some(true)]);
|
||||
|
||||
let col = columns.get(1).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "host");
|
||||
assert_eq!(col.name(), "host");
|
||||
assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Tag);
|
||||
let values = match col.values() {
|
||||
TypedValuesIterator::String(v) => v,
|
||||
|
@ -1688,7 +1878,7 @@ mod tests {
|
|||
assert_eq!(&values, &[Some("a"), Some("a"), None]);
|
||||
|
||||
let col = columns.get(2).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "region");
|
||||
assert_eq!(col.name(), "region");
|
||||
assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Tag);
|
||||
let values = match col.values() {
|
||||
TypedValuesIterator::String(v) => v,
|
||||
|
@ -1698,7 +1888,7 @@ mod tests {
|
|||
assert_eq!(&values, &[None, Some("west"), None]);
|
||||
|
||||
let col = columns.get(3).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "string");
|
||||
assert_eq!(col.name(), "string");
|
||||
assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
|
||||
let values = match col.values() {
|
||||
TypedValuesIterator::String(v) => v,
|
||||
|
@ -1708,68 +1898,21 @@ mod tests {
|
|||
assert_eq!(&values, &[None, None, Some("hello")]);
|
||||
|
||||
let col = columns.get(4).unwrap();
|
||||
assert_eq!(col.name().unwrap(), TIME_COLUMN_NAME);
|
||||
assert_eq!(col.name(), TIME_COLUMN_NAME);
|
||||
assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Time);
|
||||
let values = col.values().i64_values().unwrap();
|
||||
assert_eq!(&values, &[Some(983), Some(2343), Some(222)]);
|
||||
|
||||
let col = columns.get(5).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "val");
|
||||
assert_eq!(col.name(), "val");
|
||||
assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
|
||||
let values = col.values().i64_values().unwrap();
|
||||
assert_eq!(&values, &[Some(23), None, Some(21)]);
|
||||
|
||||
let col = columns.get(6).unwrap();
|
||||
assert_eq!(col.name().unwrap(), "val2");
|
||||
assert_eq!(col.name(), "val2");
|
||||
assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
|
||||
let values = col.values().f64_values().unwrap();
|
||||
assert_eq!(&values, &[None, Some(23.2), None]);
|
||||
}
|
||||
|
||||
fn sharder(count: u16) -> TestSharder {
|
||||
TestSharder {
|
||||
count,
|
||||
n: std::cell::RefCell::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
// For each line passed to shard returns a shard id from [0, count) in order
|
||||
struct TestSharder {
|
||||
count: u16,
|
||||
n: std::cell::RefCell<u16>,
|
||||
}
|
||||
|
||||
impl Sharder for TestSharder {
|
||||
fn shard(&self, _line: &ParsedLine<'_>) -> Result<u16, DataError> {
|
||||
let n = *self.n.borrow();
|
||||
self.n.replace(n + 1);
|
||||
Ok(n % self.count)
|
||||
}
|
||||
}
|
||||
|
||||
fn partitioner(count: u8) -> TestPartitioner {
|
||||
TestPartitioner {
|
||||
count,
|
||||
n: std::cell::RefCell::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
// For each line passed to partition_key returns a key with a number from [0,
|
||||
// count)
|
||||
struct TestPartitioner {
|
||||
count: u8,
|
||||
n: std::cell::RefCell<u8>,
|
||||
}
|
||||
|
||||
impl Partitioner for TestPartitioner {
|
||||
fn partition_key(
|
||||
&self,
|
||||
_line: &ParsedLine<'_>,
|
||||
_default_time: &DateTime<Utc>,
|
||||
) -> data_types::database_rules::Result<String> {
|
||||
let n = *self.n.borrow();
|
||||
self.n.replace(n + 1);
|
||||
Ok(format!("key_{}", n % self.count))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,11 +94,8 @@ pub enum Error {
|
|||
source: arrow_deps::arrow::error::ArrowError,
|
||||
},
|
||||
|
||||
#[snafu(display("Schema Selection error while selecting '{}': {}", column_name, source))]
|
||||
SelectingColumns {
|
||||
column_name: String,
|
||||
source: arrow_deps::arrow::error::ArrowError,
|
||||
},
|
||||
#[snafu(display("Column not found '{}'", column_name))]
|
||||
ColumnNotFound { column_name: String },
|
||||
}
|
||||
|
||||
fn nullable_to_str(nullability: bool) -> &'static str {
|
||||
|
@ -470,6 +467,44 @@ impl Schema {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the field indexes for a given selection
|
||||
///
|
||||
/// Returns an error if a corresponding column isn't found
|
||||
pub fn select(&self, columns: &[&str]) -> Result<Vec<usize>> {
|
||||
columns
|
||||
.iter()
|
||||
.map(|column_name| {
|
||||
self.find_index_of(column_name)
|
||||
.ok_or_else(|| Error::ColumnNotFound {
|
||||
column_name: column_name.to_string(),
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Returns the schema for a given set of column projects
|
||||
pub fn project(&self, projection: &[usize]) -> Self {
|
||||
let mut metadata = HashMap::with_capacity(projection.len() + 1);
|
||||
let mut fields = Vec::with_capacity(projection.len());
|
||||
let current_metadata = self.inner.metadata();
|
||||
for idx in projection {
|
||||
let (_, field) = self.field(*idx);
|
||||
fields.push(field.clone());
|
||||
|
||||
if let Some(value) = current_metadata.get(field.name()) {
|
||||
metadata.insert(field.name().clone(), value.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(measurement) = current_metadata.get(MEASUREMENT_METADATA_KEY).cloned() {
|
||||
metadata.insert(MEASUREMENT_METADATA_KEY.to_string(), measurement);
|
||||
}
|
||||
|
||||
Self {
|
||||
inner: Arc::new(ArrowSchema::new_with_metadata(fields, metadata)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Valid types for InfluxDB data model, as defined in [the documentation]
|
||||
|
@ -1180,4 +1215,58 @@ mod test {
|
|||
expected_schema, sorted_schema
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_select() {
|
||||
let schema1 = SchemaBuilder::new()
|
||||
.influx_field("the_field", String)
|
||||
.tag("the_tag")
|
||||
.timestamp()
|
||||
.measurement("the_measurement")
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let projection = schema1.select(&[TIME_COLUMN_NAME]).unwrap();
|
||||
|
||||
let schema2 = schema1.project(&projection);
|
||||
let schema3 = Schema::try_from_arrow(Arc::clone(&schema2.inner)).unwrap();
|
||||
|
||||
assert_eq!(schema1.measurement(), schema2.measurement());
|
||||
assert_eq!(schema1.measurement(), schema3.measurement());
|
||||
|
||||
assert_eq!(schema1.len(), 3);
|
||||
assert_eq!(schema2.len(), 1);
|
||||
assert_eq!(schema3.len(), 1);
|
||||
|
||||
assert_eq!(schema1.inner.fields().len(), 3);
|
||||
assert_eq!(schema2.inner.fields().len(), 1);
|
||||
assert_eq!(schema3.inner.fields().len(), 1);
|
||||
|
||||
let get_type = |x: &Schema, field: &str| -> InfluxColumnType {
|
||||
let idx = x.find_index_of(field).unwrap();
|
||||
x.field(idx).0.unwrap()
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
get_type(&schema1, TIME_COLUMN_NAME),
|
||||
InfluxColumnType::Timestamp
|
||||
);
|
||||
assert_eq!(
|
||||
get_type(&schema2, TIME_COLUMN_NAME),
|
||||
InfluxColumnType::Timestamp
|
||||
);
|
||||
assert_eq!(get_type(&schema1, "the_tag"), InfluxColumnType::Tag);
|
||||
assert_eq!(
|
||||
get_type(&schema1, "the_field"),
|
||||
InfluxColumnType::Field(InfluxFieldType::String)
|
||||
);
|
||||
assert_eq!(
|
||||
get_type(&schema2, TIME_COLUMN_NAME),
|
||||
InfluxColumnType::Timestamp
|
||||
);
|
||||
assert_eq!(
|
||||
get_type(&schema3, TIME_COLUMN_NAME),
|
||||
InfluxColumnType::Timestamp
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ generated_types = { path = "../generated_types" }
|
|||
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
|
||||
internal_types = { path = "../internal_types" }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
parking_lot = "0.11.1"
|
||||
snafu = "0.6.2"
|
||||
string-interner = "0.12.2"
|
||||
tokio = { version = "1.0", features = ["macros"] }
|
||||
|
|
|
@ -1,21 +1,26 @@
|
|||
//! Represents a Chunk of data (a collection of tables and their data within
|
||||
//! some chunk) in the mutable store.
|
||||
use arrow_deps::{arrow::record_batch::RecordBatch, datafusion::logical_plan::Expr};
|
||||
|
||||
use generated_types::wal as wb;
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
use std::sync::Arc;
|
||||
|
||||
use data_types::partition_metadata::TableSummary;
|
||||
use internal_types::{schema::Schema, selection::Selection};
|
||||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
|
||||
use arrow_deps::arrow::record_batch::RecordBatch;
|
||||
use data_types::{database_rules::WriterId, partition_metadata::TableSummary};
|
||||
use internal_types::{
|
||||
entry::{ClockValue, TableBatch},
|
||||
selection::Selection,
|
||||
};
|
||||
use tracker::{MemRegistry, MemTracker};
|
||||
|
||||
use crate::chunk::snapshot::ChunkSnapshot;
|
||||
use crate::{
|
||||
column::Column,
|
||||
dictionary::{Dictionary, Error as DictionaryError},
|
||||
pred::{ChunkPredicate, ChunkPredicateBuilder},
|
||||
dictionary::{Dictionary, Error as DictionaryError, DID},
|
||||
table::Table,
|
||||
};
|
||||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
use tracker::{MemRegistry, MemTracker};
|
||||
use parking_lot::Mutex;
|
||||
|
||||
pub mod snapshot;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
|
@ -31,57 +36,12 @@ pub enum Error {
|
|||
source: crate::table::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Error checking predicate in table {}: {}", table_id, source))]
|
||||
PredicateCheck {
|
||||
table_id: u32,
|
||||
source: crate::table::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Error checking predicate in table '{}': {}", table_name, source))]
|
||||
NamedTablePredicateCheck {
|
||||
table_name: String,
|
||||
source: crate::table::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Unsupported predicate when mutable buffer table names. Found a general expression: {:?}",
|
||||
exprs
|
||||
))]
|
||||
PredicateNotYetSupported { exprs: Vec<Expr> },
|
||||
|
||||
#[snafu(display("Table ID {} not found in dictionary of chunk {}", table_id, chunk))]
|
||||
TableIdNotFoundInDictionary {
|
||||
table_id: u32,
|
||||
chunk: u64,
|
||||
source: DictionaryError,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Internal error: table {} not found in dictionary of chunk {}",
|
||||
table_name,
|
||||
chunk_id
|
||||
))]
|
||||
InternalTableNotFoundInDictionary { table_name: String, chunk_id: u32 },
|
||||
|
||||
#[snafu(display("Table {} not found in chunk {}", table, chunk))]
|
||||
TableNotFoundInChunk { table: u32, chunk: u64 },
|
||||
|
||||
#[snafu(display("Table '{}' not found in chunk {}", table_name, chunk_id))]
|
||||
NamedTableNotFoundInChunk { table_name: String, chunk_id: u64 },
|
||||
|
||||
#[snafu(display("Attempt to write table batch without a name"))]
|
||||
TableWriteWithoutName,
|
||||
|
||||
#[snafu(display("Value ID {} not found in dictionary of chunk {}", value_id, chunk_id))]
|
||||
InternalColumnValueIdNotFoundInDictionary {
|
||||
value_id: u32,
|
||||
chunk_id: u64,
|
||||
source: DictionaryError,
|
||||
},
|
||||
TableNotFoundInChunk { table: DID, chunk: u64 },
|
||||
|
||||
#[snafu(display("Column ID {} not found in dictionary of chunk {}", column_id, chunk))]
|
||||
ColumnIdNotFoundInDictionary {
|
||||
column_id: u32,
|
||||
column_id: DID,
|
||||
chunk: u64,
|
||||
source: DictionaryError,
|
||||
},
|
||||
|
@ -96,12 +56,6 @@ pub enum Error {
|
|||
chunk_id: u64,
|
||||
source: DictionaryError,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Column '{}' is not a string tag column and thus can not list values",
|
||||
column_name
|
||||
))]
|
||||
UnsupportedColumnTypeForListingValues { column_name: String },
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
@ -109,34 +63,25 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
|
|||
#[derive(Debug)]
|
||||
pub struct Chunk {
|
||||
/// The id for this chunk
|
||||
pub id: u32,
|
||||
id: u32,
|
||||
|
||||
/// `dictionary` maps &str -> u32. The u32s are used in place of String or
|
||||
/// `dictionary` maps &str -> DID. The DIDs are used in place of String or
|
||||
/// str to avoid slow string operations. The same dictionary is used for
|
||||
/// table names, tag names, tag values, and column names.
|
||||
// TODO: intern string field values too?
|
||||
pub dictionary: Dictionary,
|
||||
dictionary: Dictionary,
|
||||
|
||||
/// map of the dictionary ID for the table name to the table
|
||||
pub tables: HashMap<u32, Table>,
|
||||
tables: HashMap<DID, Table>,
|
||||
|
||||
/// keep track of memory used by chunk
|
||||
tracker: MemTracker,
|
||||
}
|
||||
|
||||
impl Clone for Chunk {
|
||||
fn clone(&self) -> Self {
|
||||
// TODO: The performance of this is not great - (#635)
|
||||
let mut ret = Self {
|
||||
id: self.id,
|
||||
dictionary: self.dictionary.clone(),
|
||||
tables: self.tables.clone(),
|
||||
tracker: self.tracker.clone_empty(),
|
||||
};
|
||||
|
||||
ret.tracker.set_bytes(ret.size());
|
||||
ret
|
||||
}
|
||||
/// Cached chunk snapshot
|
||||
///
|
||||
/// Note: This is a mutex to allow mutation within
|
||||
/// `Chunk::snapshot()` which only takes an immutable borrow
|
||||
snapshot: Mutex<Option<Arc<ChunkSnapshot>>>,
|
||||
}
|
||||
|
||||
impl Chunk {
|
||||
|
@ -146,38 +91,41 @@ impl Chunk {
|
|||
dictionary: Dictionary::new(),
|
||||
tables: HashMap::new(),
|
||||
tracker: memory_registry.register(),
|
||||
snapshot: Mutex::new(None),
|
||||
};
|
||||
chunk.tracker.set_bytes(chunk.size());
|
||||
chunk
|
||||
}
|
||||
|
||||
pub fn write_entry(&mut self, entry: &wb::WriteBufferEntry<'_>) -> Result<()> {
|
||||
if let Some(table_batches) = entry.table_batches() {
|
||||
for batch in table_batches {
|
||||
self.write_table_batch(&batch)?;
|
||||
}
|
||||
}
|
||||
pub fn write_table_batches(
|
||||
&mut self,
|
||||
clock_value: ClockValue,
|
||||
writer_id: WriterId,
|
||||
batches: &[TableBatch<'_>],
|
||||
) -> Result<()> {
|
||||
for batch in batches {
|
||||
let table_name = batch.name();
|
||||
let table_id = self.dictionary.lookup_value_or_insert(table_name);
|
||||
|
||||
self.tracker.set_bytes(self.size());
|
||||
let table = self
|
||||
.tables
|
||||
.entry(table_id)
|
||||
.or_insert_with(|| Table::new(table_id));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_table_batch(&mut self, batch: &wb::TableWriteBatch<'_>) -> Result<()> {
|
||||
let table_name = batch.name().context(TableWriteWithoutName)?;
|
||||
let table_id = self.dictionary.lookup_value_or_insert(table_name);
|
||||
|
||||
let table = self
|
||||
.tables
|
||||
.entry(table_id)
|
||||
.or_insert_with(|| Table::new(table_id));
|
||||
|
||||
if let Some(rows) = batch.rows() {
|
||||
let columns = batch.columns();
|
||||
table
|
||||
.append_rows(&mut self.dictionary, &rows)
|
||||
.write_columns(&mut self.dictionary, clock_value, writer_id, columns)
|
||||
.context(TableWrite { table_name })?;
|
||||
}
|
||||
|
||||
// Invalidate chunk snapshot
|
||||
*self
|
||||
.snapshot
|
||||
.try_lock()
|
||||
.expect("concurrent readers/writers to MBChunk") = None;
|
||||
|
||||
self.tracker.set_bytes(self.size());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -191,212 +139,17 @@ impl Chunk {
|
|||
}
|
||||
}
|
||||
|
||||
/// Return all the names of the tables names in this chunk that match
|
||||
/// chunk predicate
|
||||
pub fn table_names(&self, chunk_predicate: &ChunkPredicate) -> Result<Vec<&str>> {
|
||||
// we don't support arbitrary expressions in chunk predicate yet
|
||||
if !chunk_predicate.chunk_exprs.is_empty() {
|
||||
return PredicateNotYetSupported {
|
||||
exprs: chunk_predicate.chunk_exprs.clone(),
|
||||
}
|
||||
.fail();
|
||||
/// Returns a queryable snapshot of this chunk
|
||||
pub fn snapshot(&self) -> Arc<ChunkSnapshot> {
|
||||
let mut guard = self.snapshot.lock();
|
||||
if let Some(snapshot) = &*guard {
|
||||
return Arc::clone(snapshot);
|
||||
}
|
||||
|
||||
self.tables
|
||||
.iter()
|
||||
.filter_map(|(&table_id, table)| {
|
||||
// could match is good enough for this metadata query
|
||||
match table.could_match_predicate(chunk_predicate) {
|
||||
Ok(true) => Some(self.dictionary.lookup_id(table_id).context(
|
||||
TableIdNotFoundInDictionary {
|
||||
table_id,
|
||||
chunk: self.id,
|
||||
},
|
||||
)),
|
||||
Ok(false) => None,
|
||||
Err(e) => Some(Err(e).context(PredicateCheck { table_id })),
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// If the column names that match the predicate can be found
|
||||
/// from the predicate entirely using metadata, return those
|
||||
/// strings.
|
||||
///
|
||||
/// If the predicate cannot be evaluated entirely with
|
||||
/// metadata, return `Ok(None)`.
|
||||
pub fn column_names(
|
||||
&self,
|
||||
table_name: &str,
|
||||
chunk_predicate: &ChunkPredicate,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<Option<BTreeSet<String>>> {
|
||||
// No support for general purpose expressions
|
||||
if !chunk_predicate.chunk_exprs.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let table_name_id = self.table_name_id(table_name)?;
|
||||
|
||||
let mut chunk_column_ids = BTreeSet::new();
|
||||
|
||||
// Is this table in the chunk?
|
||||
if let Some(table) = self.tables.get(&table_name_id) {
|
||||
for (&column_id, column) in &table.columns {
|
||||
let column_matches_predicate = table
|
||||
.column_matches_predicate(&column, chunk_predicate)
|
||||
.context(NamedTableError { table_name })?;
|
||||
|
||||
if column_matches_predicate {
|
||||
chunk_column_ids.insert(column_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Only return subset of these selection_cols if not all_cols
|
||||
let mut all_cols = true;
|
||||
let selection_cols = match selection {
|
||||
Selection::All => &[""],
|
||||
Selection::Some(cols) => {
|
||||
all_cols = false;
|
||||
cols
|
||||
}
|
||||
};
|
||||
|
||||
let mut column_names = BTreeSet::new();
|
||||
for &column_id in &chunk_column_ids {
|
||||
let column_name =
|
||||
self.dictionary
|
||||
.lookup_id(column_id)
|
||||
.context(ColumnIdNotFoundInDictionary {
|
||||
column_id,
|
||||
chunk: self.id,
|
||||
})?;
|
||||
|
||||
if !column_names.contains(column_name)
|
||||
&& (all_cols || selection_cols.contains(&column_name))
|
||||
{
|
||||
// only use columns in selection_cols
|
||||
column_names.insert(column_name.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(column_names))
|
||||
}
|
||||
|
||||
/// Return the id of the table in the chunk's dictionary
|
||||
fn table_name_id(&self, table_name: &str) -> Result<u32> {
|
||||
self.dictionary
|
||||
.id(table_name)
|
||||
.context(InternalTableNotFoundInDictionary {
|
||||
table_name,
|
||||
chunk_id: self.id(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the strings of the specified Tag column that satisfy
|
||||
/// the predicate, if they can be determined entirely using metadata.
|
||||
///
|
||||
/// If the predicate cannot be evaluated entirely with metadata,
|
||||
/// return `Ok(None)`.
|
||||
pub fn tag_column_values(
|
||||
&self,
|
||||
table_name: &str,
|
||||
column_name: &str,
|
||||
chunk_predicate: &ChunkPredicate,
|
||||
) -> Result<Option<BTreeSet<String>>> {
|
||||
// No support for general purpose expressions
|
||||
if !chunk_predicate.chunk_exprs.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
let chunk_id = self.id();
|
||||
|
||||
let table_name_id = self.table_name_id(table_name)?;
|
||||
|
||||
// Is this table even in the chunk?
|
||||
let table = self
|
||||
.tables
|
||||
.get(&table_name_id)
|
||||
.context(NamedTableNotFoundInChunk {
|
||||
table_name,
|
||||
chunk_id,
|
||||
})?;
|
||||
|
||||
// See if we can rule out the table entire on metadata
|
||||
let could_match = table
|
||||
.could_match_predicate(chunk_predicate)
|
||||
.context(NamedTablePredicateCheck { table_name })?;
|
||||
|
||||
if !could_match {
|
||||
// No columns could match, return empty set
|
||||
return Ok(Default::default());
|
||||
}
|
||||
|
||||
let column_id =
|
||||
self.dictionary
|
||||
.lookup_value(column_name)
|
||||
.context(ColumnNameNotFoundInDictionary {
|
||||
column_name,
|
||||
chunk_id,
|
||||
})?;
|
||||
|
||||
let column = table
|
||||
.column(column_id)
|
||||
.context(NamedTableError { table_name })?;
|
||||
|
||||
if let Column::Tag(column, _) = column {
|
||||
// if we have a timestamp predicate, find all values
|
||||
// where the timestamp is within range. Otherwise take
|
||||
// all values.
|
||||
|
||||
// Collect matching ids into BTreeSet to deduplicate on
|
||||
// ids *before* looking up Strings
|
||||
let column_value_ids: BTreeSet<u32> = match chunk_predicate.range {
|
||||
None => {
|
||||
// take all non-null values
|
||||
column.iter().filter_map(|&s| s).collect()
|
||||
}
|
||||
Some(range) => {
|
||||
// filter out all values that don't match the timestmap
|
||||
let time_column = table
|
||||
.column_i64(chunk_predicate.time_column_id)
|
||||
.context(NamedTableError { table_name })?;
|
||||
|
||||
column
|
||||
.iter()
|
||||
.zip(time_column.iter())
|
||||
.filter_map(|(&column_value_id, ×tamp_value)| {
|
||||
if range.contains_opt(timestamp_value) {
|
||||
column_value_id
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
};
|
||||
|
||||
// convert all the (deduplicated) ids to Strings
|
||||
let column_values = column_value_ids
|
||||
.into_iter()
|
||||
.map(|value_id| {
|
||||
let value = self.dictionary.lookup_id(value_id).context(
|
||||
InternalColumnValueIdNotFoundInDictionary { value_id, chunk_id },
|
||||
)?;
|
||||
Ok(value.to_string())
|
||||
})
|
||||
.collect::<Result<BTreeSet<String>>>()?;
|
||||
|
||||
Ok(Some(column_values))
|
||||
} else {
|
||||
UnsupportedColumnTypeForListingValues { column_name }.fail()
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a builder suitable to create predicates for this Chunk
|
||||
pub fn predicate_builder(&self) -> Result<ChunkPredicateBuilder<'_>, crate::pred::Error> {
|
||||
ChunkPredicateBuilder::new(&self.dictionary)
|
||||
// TODO: Incremental snapshot generation
|
||||
let snapshot = Arc::new(ChunkSnapshot::new(self));
|
||||
*guard = Some(Arc::clone(&snapshot));
|
||||
snapshot
|
||||
}
|
||||
|
||||
/// returns true if there is no data in this chunk
|
||||
|
@ -420,7 +173,7 @@ impl Chunk {
|
|||
if let Some(table) = self.table(table_name)? {
|
||||
dst.push(
|
||||
table
|
||||
.to_arrow(&self, selection)
|
||||
.to_arrow(&self.dictionary, selection)
|
||||
.context(NamedTableError { table_name })?,
|
||||
);
|
||||
}
|
||||
|
@ -439,7 +192,7 @@ impl Chunk {
|
|||
|
||||
TableSummary {
|
||||
name: name.to_string(),
|
||||
columns: table.stats(&self),
|
||||
columns: table.stats(&self.dictionary),
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
|
@ -459,21 +212,6 @@ impl Chunk {
|
|||
Ok(table)
|
||||
}
|
||||
|
||||
/// Return Schema for the specified table / columns
|
||||
pub fn table_schema(&self, table_name: &str, selection: Selection<'_>) -> Result<Schema> {
|
||||
let table = self
|
||||
.table(table_name)?
|
||||
// Option --> Result
|
||||
.context(NamedTableNotFoundInChunk {
|
||||
table_name,
|
||||
chunk_id: self.id(),
|
||||
})?;
|
||||
|
||||
table
|
||||
.schema(self, selection)
|
||||
.context(NamedTableError { table_name })
|
||||
}
|
||||
|
||||
/// Return the approximate memory size of the chunk, in bytes including the
|
||||
/// dictionary, tables, and their rows.
|
||||
pub fn size(&self) -> usize {
|
||||
|
@ -486,3 +224,155 @@ impl Chunk {
|
|||
matches!(self.table(table_name), Ok(Some(_)))
|
||||
}
|
||||
}
|
||||
|
||||
pub mod test_helpers {
|
||||
use super::*;
|
||||
use internal_types::entry::test_helpers::lp_to_entry;
|
||||
|
||||
/// A helper that will write line protocol string to the passed in Chunk.
|
||||
/// All data will be under a single partition with a clock value and
|
||||
/// writer id of 0.
|
||||
pub fn write_lp_to_chunk(lp: &str, chunk: &mut Chunk) -> Result<()> {
|
||||
let entry = lp_to_entry(lp);
|
||||
|
||||
for w in entry.partition_writes().unwrap() {
|
||||
chunk.write_table_batches(ClockValue::new(0), 0, &w.table_batches())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::test_helpers::write_lp_to_chunk;
|
||||
use super::*;
|
||||
use arrow_deps::arrow::util::pretty::pretty_format_batches;
|
||||
|
||||
#[test]
|
||||
fn writes_table_batches() {
|
||||
let mr = MemRegistry::new();
|
||||
let mut chunk = Chunk::new(1, &mr);
|
||||
|
||||
let lp = vec![
|
||||
"cpu,host=a val=23 1",
|
||||
"cpu,host=b val=2 1",
|
||||
"mem,host=a val=23432i 1",
|
||||
]
|
||||
.join("\n");
|
||||
|
||||
write_lp_to_chunk(&lp, &mut chunk).unwrap();
|
||||
|
||||
assert_table(
|
||||
&chunk,
|
||||
"cpu",
|
||||
&[
|
||||
"+------+------+-----+",
|
||||
"| host | time | val |",
|
||||
"+------+------+-----+",
|
||||
"| a | 1 | 23 |",
|
||||
"| b | 1 | 2 |",
|
||||
"+------+------+-----+\n",
|
||||
],
|
||||
);
|
||||
|
||||
assert_table(
|
||||
&chunk,
|
||||
"mem",
|
||||
&[
|
||||
"+------+------+-------+",
|
||||
"| host | time | val |",
|
||||
"+------+------+-------+",
|
||||
"| a | 1 | 23432 |",
|
||||
"+------+------+-------+\n",
|
||||
],
|
||||
);
|
||||
|
||||
let lp = vec![
|
||||
"cpu,host=c val=11 1",
|
||||
"mem sval=\"hi\" 2",
|
||||
"disk val=true 1",
|
||||
]
|
||||
.join("\n");
|
||||
|
||||
write_lp_to_chunk(&lp, &mut chunk).unwrap();
|
||||
|
||||
assert_table(
|
||||
&chunk,
|
||||
"cpu",
|
||||
&[
|
||||
"+------+------+-----+",
|
||||
"| host | time | val |",
|
||||
"+------+------+-----+",
|
||||
"| a | 1 | 23 |",
|
||||
"| b | 1 | 2 |",
|
||||
"| c | 1 | 11 |",
|
||||
"+------+------+-----+\n",
|
||||
],
|
||||
);
|
||||
|
||||
assert_table(
|
||||
&chunk,
|
||||
"disk",
|
||||
&[
|
||||
"+------+------+",
|
||||
"| time | val |",
|
||||
"+------+------+",
|
||||
"| 1 | true |",
|
||||
"+------+------+\n",
|
||||
],
|
||||
);
|
||||
|
||||
assert_table(
|
||||
&chunk,
|
||||
"mem",
|
||||
&[
|
||||
"+------+------+------+-------+",
|
||||
"| host | sval | time | val |",
|
||||
"+------+------+------+-------+",
|
||||
"| a | | 1 | 23432 |",
|
||||
"| | hi | 2 | |",
|
||||
"+------+------+------+-------+\n",
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_snapshot() {
|
||||
let mr = MemRegistry::new();
|
||||
let mut chunk = Chunk::new(1, &mr);
|
||||
|
||||
let lp = vec![
|
||||
"cpu,host=a val=23 1",
|
||||
"cpu,host=b val=2 1",
|
||||
"mem,host=a val=23432i 1",
|
||||
]
|
||||
.join("\n");
|
||||
|
||||
write_lp_to_chunk(&lp, &mut chunk).unwrap();
|
||||
let s1 = chunk.snapshot();
|
||||
let s2 = chunk.snapshot();
|
||||
|
||||
write_lp_to_chunk(&lp, &mut chunk).unwrap();
|
||||
let s3 = chunk.snapshot();
|
||||
let s4 = chunk.snapshot();
|
||||
|
||||
assert_eq!(Arc::as_ptr(&s1), Arc::as_ptr(&s2));
|
||||
assert_ne!(Arc::as_ptr(&s1), Arc::as_ptr(&s3));
|
||||
assert_eq!(Arc::as_ptr(&s3), Arc::as_ptr(&s4));
|
||||
}
|
||||
|
||||
fn assert_table(chunk: &Chunk, table: &str, data: &[&str]) {
|
||||
let mut batches = vec![];
|
||||
chunk
|
||||
.table_to_arrow(&mut batches, table, Selection::All)
|
||||
.unwrap();
|
||||
let res = pretty_format_batches(&batches).unwrap();
|
||||
let data = data.join("\n");
|
||||
assert_eq!(
|
||||
res, data,
|
||||
"\n{} table results not as expected:\nEXPECTED:\n{}\nRECEIVED:\n{}",
|
||||
table, data, res
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,181 @@
|
|||
use std::collections::{BTreeSet, HashMap};
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_deps::arrow::record_batch::RecordBatch;
|
||||
use data_types::timestamp::TimestampRange;
|
||||
use internal_types::schema::{Schema, TIME_COLUMN_NAME};
|
||||
use internal_types::selection::Selection;
|
||||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
|
||||
use super::Chunk;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Table not found: {}", table_name))]
|
||||
TableNotFound { table_name: String },
|
||||
|
||||
#[snafu(display("Failed to select columns: {}", source))]
|
||||
SelectColumns {
|
||||
source: internal_types::schema::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// A queryable snapshot of a mutable buffer chunk
|
||||
#[derive(Debug)]
|
||||
pub struct ChunkSnapshot {
|
||||
/// The ID of the chunk this is a snapshot of
|
||||
chunk_id: u32,
|
||||
|
||||
/// Maps table name to `TableSnapshot`
|
||||
records: HashMap<String, TableSnapshot>,
|
||||
// TODO: Memory tracking
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TableSnapshot {
|
||||
schema: Schema,
|
||||
batch: RecordBatch,
|
||||
timestamp_range: Option<TimestampRange>,
|
||||
}
|
||||
|
||||
impl TableSnapshot {
|
||||
fn matches_predicate(&self, timestamp_range: &Option<TimestampRange>) -> bool {
|
||||
match (self.timestamp_range, timestamp_range) {
|
||||
(Some(a), Some(b)) => !a.disjoint(b),
|
||||
(None, Some(_)) => false, /* If this chunk doesn't have a time column it can't match */
|
||||
// the predicate
|
||||
(_, None) => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ChunkSnapshot {
|
||||
pub fn new(chunk: &Chunk) -> Self {
|
||||
let mut records: HashMap<String, TableSnapshot> = Default::default();
|
||||
for (id, table) in &chunk.tables {
|
||||
let schema = table.schema(&chunk.dictionary, Selection::All).unwrap();
|
||||
let batch = table.to_arrow(&chunk.dictionary, Selection::All).unwrap();
|
||||
let name = chunk.dictionary.lookup_id(*id).unwrap();
|
||||
|
||||
let timestamp_range = chunk
|
||||
.dictionary
|
||||
.lookup_value(TIME_COLUMN_NAME)
|
||||
.ok()
|
||||
.and_then(|column_id| {
|
||||
table.column(column_id).ok().and_then(|column| {
|
||||
// TimestampRange has an exclusive upper bound
|
||||
column
|
||||
.get_i64_stats()
|
||||
.map(|x| TimestampRange::new(x.min, x.max + 1))
|
||||
})
|
||||
});
|
||||
|
||||
records.insert(
|
||||
name.to_string(),
|
||||
TableSnapshot {
|
||||
batch,
|
||||
schema,
|
||||
timestamp_range,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
Self {
|
||||
chunk_id: chunk.id,
|
||||
records,
|
||||
}
|
||||
}
|
||||
|
||||
/// return the ID of the chunk this is a snapshot of
|
||||
pub fn chunk_id(&self) -> u32 {
|
||||
self.chunk_id
|
||||
}
|
||||
|
||||
/// returns true if there is no data in this snapshot
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.records.is_empty()
|
||||
}
|
||||
|
||||
/// Return true if this snapshot has the specified table name
|
||||
pub fn has_table(&self, table_name: &str) -> bool {
|
||||
self.records.get(table_name).is_some()
|
||||
}
|
||||
|
||||
/// Return Schema for the specified table / columns
|
||||
pub fn table_schema(&self, table_name: &str, selection: Selection<'_>) -> Result<Schema> {
|
||||
let table = self
|
||||
.records
|
||||
.get(table_name)
|
||||
.context(TableNotFound { table_name })?;
|
||||
|
||||
Ok(match selection {
|
||||
Selection::All => table.schema.clone(),
|
||||
Selection::Some(columns) => {
|
||||
let columns = table.schema.select(columns).context(SelectColumns)?;
|
||||
table.schema.project(&columns)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a list of tables with writes matching the given timestamp_range
|
||||
pub fn table_names(
|
||||
&self,
|
||||
timestamp_range: Option<TimestampRange>,
|
||||
) -> impl Iterator<Item = &String> + '_ {
|
||||
self.records
|
||||
.iter()
|
||||
.flat_map(move |(table_name, table_snapshot)| {
|
||||
match table_snapshot.matches_predicate(×tamp_range) {
|
||||
true => Some(table_name),
|
||||
false => None,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a RecordBatch with the given selection
|
||||
pub fn read_filter(&self, table_name: &str, selection: Selection<'_>) -> Result<RecordBatch> {
|
||||
let table = self
|
||||
.records
|
||||
.get(table_name)
|
||||
.context(TableNotFound { table_name })?;
|
||||
|
||||
Ok(match selection {
|
||||
Selection::All => table.batch.clone(),
|
||||
Selection::Some(columns) => {
|
||||
let projection = table.schema.select(columns).context(SelectColumns)?;
|
||||
let schema = table.schema.project(&projection).into();
|
||||
let columns = projection
|
||||
.into_iter()
|
||||
.map(|x| Arc::clone(table.batch.column(x)))
|
||||
.collect();
|
||||
|
||||
RecordBatch::try_new(schema, columns).expect("failed to project record batch")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a given selection of column names from a table
|
||||
pub fn column_names(
|
||||
&self,
|
||||
table_name: &str,
|
||||
selection: Selection<'_>,
|
||||
) -> Option<BTreeSet<String>> {
|
||||
let table = self.records.get(table_name)?;
|
||||
let fields = table.schema.inner().fields().iter();
|
||||
|
||||
Some(match selection {
|
||||
Selection::Some(cols) => fields
|
||||
.filter_map(|x| {
|
||||
if cols.contains(&x.name().as_str()) {
|
||||
Some(x.name().clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
Selection::All => fields.map(|x| x.name().clone()).collect(),
|
||||
})
|
||||
}
|
||||
}
|
|
@ -1,10 +1,9 @@
|
|||
use generated_types::wal as wb;
|
||||
use snafu::Snafu;
|
||||
|
||||
use crate::dictionary::Dictionary;
|
||||
use arrow_deps::arrow::datatypes::DataType as ArrowDataType;
|
||||
use crate::dictionary::{Dictionary, DID};
|
||||
use data_types::partition_metadata::StatValues;
|
||||
use internal_types::data::type_description;
|
||||
use generated_types::entry::LogicalColumnType;
|
||||
use internal_types::entry::TypedValuesIterator;
|
||||
|
||||
use std::mem;
|
||||
|
||||
|
@ -37,80 +36,276 @@ pub enum Column {
|
|||
U64(Vec<Option<u64>>, StatValues<u64>),
|
||||
String(Vec<Option<String>>, StatValues<String>),
|
||||
Bool(Vec<Option<bool>>, StatValues<bool>),
|
||||
Tag(Vec<Option<u32>>, StatValues<String>),
|
||||
Tag(Vec<Option<DID>>, StatValues<String>),
|
||||
}
|
||||
|
||||
impl Column {
|
||||
pub fn with_value(
|
||||
/// Initializes a new column from typed values, the column on a table write
|
||||
/// batch on an Entry. Will initialize the stats with the first
|
||||
/// non-null value and update with any other non-null values included.
|
||||
pub fn new_from_typed_values(
|
||||
dictionary: &mut Dictionary,
|
||||
capacity: usize,
|
||||
value: wb::Value<'_>,
|
||||
) -> Result<Self> {
|
||||
Ok(match value.value_type() {
|
||||
wb::ColumnValue::F64Value => {
|
||||
let val = value
|
||||
.value_as_f64value()
|
||||
.expect("f64 value should be present")
|
||||
.value();
|
||||
let mut vals = vec![None; capacity];
|
||||
vals.push(Some(val));
|
||||
Self::F64(vals, StatValues::new(val))
|
||||
}
|
||||
wb::ColumnValue::I64Value => {
|
||||
let val = value
|
||||
.value_as_i64value()
|
||||
.expect("i64 value should be present")
|
||||
.value();
|
||||
let mut vals = vec![None; capacity];
|
||||
vals.push(Some(val));
|
||||
Self::I64(vals, StatValues::new(val))
|
||||
}
|
||||
wb::ColumnValue::U64Value => {
|
||||
let val = value
|
||||
.value_as_u64value()
|
||||
.expect("u64 value should be present")
|
||||
.value();
|
||||
let mut vals = vec![None; capacity];
|
||||
vals.push(Some(val));
|
||||
Self::U64(vals, StatValues::new(val))
|
||||
}
|
||||
wb::ColumnValue::StringValue => {
|
||||
let val = value
|
||||
.value_as_string_value()
|
||||
.expect("string value should be present")
|
||||
.value()
|
||||
.expect("string must be present");
|
||||
let mut vals = vec![None; capacity];
|
||||
vals.push(Some(val.to_string()));
|
||||
Self::String(vals, StatValues::new(val.to_string()))
|
||||
}
|
||||
wb::ColumnValue::BoolValue => {
|
||||
let val = value
|
||||
.value_as_bool_value()
|
||||
.expect("bool value should be present")
|
||||
.value();
|
||||
let mut vals = vec![None; capacity];
|
||||
vals.push(Some(val));
|
||||
Self::Bool(vals, StatValues::new(val))
|
||||
}
|
||||
wb::ColumnValue::TagValue => {
|
||||
let val = value
|
||||
.value_as_tag_value()
|
||||
.expect("tag value should be present")
|
||||
.value()
|
||||
.expect("tag value must have string value");
|
||||
let mut vals = vec![None; capacity];
|
||||
let id = dictionary.lookup_value_or_insert(val);
|
||||
vals.push(Some(id));
|
||||
Self::Tag(vals, StatValues::new(val.to_string()))
|
||||
}
|
||||
_ => {
|
||||
return UnknownColumnType {
|
||||
inserted_value_type: type_description(value.value_type()),
|
||||
row_count: usize,
|
||||
logical_type: LogicalColumnType,
|
||||
values: TypedValuesIterator<'_>,
|
||||
) -> Self {
|
||||
match values {
|
||||
TypedValuesIterator::String(vals) => match logical_type {
|
||||
LogicalColumnType::Tag => {
|
||||
let mut tag_values = vec![None; row_count];
|
||||
let mut stats: Option<StatValues<String>> = None;
|
||||
|
||||
let mut added_tag_values: Vec<_> = vals
|
||||
.map(|tag| {
|
||||
tag.map(|tag| {
|
||||
match stats.as_mut() {
|
||||
Some(s) => StatValues::update_string(s, tag),
|
||||
None => {
|
||||
stats = Some(StatValues::new(tag.to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
dictionary.lookup_value_or_insert(tag)
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
tag_values.append(&mut added_tag_values);
|
||||
|
||||
Self::Tag(
|
||||
tag_values,
|
||||
stats.expect("can't insert tag column with no values"),
|
||||
)
|
||||
}
|
||||
.fail()
|
||||
LogicalColumnType::Field => {
|
||||
let mut values = vec![None; row_count];
|
||||
let mut stats: Option<StatValues<String>> = None;
|
||||
|
||||
for value in vals {
|
||||
match value {
|
||||
Some(v) => {
|
||||
match stats.as_mut() {
|
||||
Some(s) => StatValues::update_string(s, v),
|
||||
None => stats = Some(StatValues::new(v.to_string())),
|
||||
}
|
||||
|
||||
values.push(Some(v.to_string()));
|
||||
}
|
||||
None => values.push(None),
|
||||
}
|
||||
}
|
||||
|
||||
Self::String(
|
||||
values,
|
||||
stats.expect("can't insert string column with no values"),
|
||||
)
|
||||
}
|
||||
_ => panic!("unsupported!"),
|
||||
},
|
||||
TypedValuesIterator::I64(vals) => {
|
||||
let mut values = vec![None; row_count];
|
||||
let mut stats: Option<StatValues<i64>> = None;
|
||||
|
||||
for v in vals {
|
||||
if let Some(val) = v {
|
||||
match stats.as_mut() {
|
||||
Some(s) => s.update(val),
|
||||
None => stats = Some(StatValues::new(val)),
|
||||
}
|
||||
}
|
||||
values.push(v);
|
||||
}
|
||||
|
||||
Self::I64(
|
||||
values,
|
||||
stats.expect("can't insert i64 column with no values"),
|
||||
)
|
||||
}
|
||||
})
|
||||
TypedValuesIterator::F64(vals) => {
|
||||
let mut values = vec![None; row_count];
|
||||
let mut stats: Option<StatValues<f64>> = None;
|
||||
|
||||
for v in vals {
|
||||
if let Some(val) = v {
|
||||
match stats.as_mut() {
|
||||
Some(s) => s.update(val),
|
||||
None => stats = Some(StatValues::new(val)),
|
||||
}
|
||||
}
|
||||
values.push(v);
|
||||
}
|
||||
|
||||
Self::F64(
|
||||
values,
|
||||
stats.expect("can't insert f64 column with no values"),
|
||||
)
|
||||
}
|
||||
TypedValuesIterator::U64(vals) => {
|
||||
let mut values = vec![None; row_count];
|
||||
let mut stats: Option<StatValues<u64>> = None;
|
||||
|
||||
for v in vals {
|
||||
if let Some(val) = v {
|
||||
match stats.as_mut() {
|
||||
Some(s) => s.update(val),
|
||||
None => stats = Some(StatValues::new(val)),
|
||||
}
|
||||
}
|
||||
values.push(v);
|
||||
}
|
||||
|
||||
Self::U64(
|
||||
values,
|
||||
stats.expect("can't insert u64 column with no values"),
|
||||
)
|
||||
}
|
||||
TypedValuesIterator::Bool(vals) => {
|
||||
let mut values = vec![None; row_count];
|
||||
let mut stats: Option<StatValues<bool>> = None;
|
||||
|
||||
for v in vals {
|
||||
if let Some(val) = v {
|
||||
match stats.as_mut() {
|
||||
Some(s) => s.update(val),
|
||||
None => stats = Some(StatValues::new(val)),
|
||||
}
|
||||
}
|
||||
values.push(v);
|
||||
}
|
||||
|
||||
Self::Bool(
|
||||
values,
|
||||
stats.expect("can't insert bool column with no values"),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pushes typed values, the column from a table write batch on an Entry.
|
||||
/// Updates statsistics for any non-null values.
|
||||
pub fn push_typed_values(
|
||||
&mut self,
|
||||
dictionary: &mut Dictionary,
|
||||
logical_type: LogicalColumnType,
|
||||
values: TypedValuesIterator<'_>,
|
||||
) -> Result<()> {
|
||||
match (self, values) {
|
||||
(Self::Bool(col, stats), TypedValuesIterator::Bool(values)) => {
|
||||
for val in values {
|
||||
if let Some(v) = val {
|
||||
stats.update(v)
|
||||
};
|
||||
col.push(val);
|
||||
}
|
||||
}
|
||||
(Self::I64(col, stats), TypedValuesIterator::I64(values)) => {
|
||||
for val in values {
|
||||
if let Some(v) = val {
|
||||
stats.update(v)
|
||||
};
|
||||
col.push(val);
|
||||
}
|
||||
}
|
||||
(Self::F64(col, stats), TypedValuesIterator::F64(values)) => {
|
||||
for val in values {
|
||||
if let Some(v) = val {
|
||||
stats.update(v)
|
||||
};
|
||||
col.push(val);
|
||||
}
|
||||
}
|
||||
(Self::U64(col, stats), TypedValuesIterator::U64(values)) => {
|
||||
for val in values {
|
||||
if let Some(v) = val {
|
||||
stats.update(v)
|
||||
};
|
||||
col.push(val);
|
||||
}
|
||||
}
|
||||
(Self::String(col, stats), TypedValuesIterator::String(values)) => {
|
||||
if logical_type != LogicalColumnType::Field {
|
||||
TypeMismatch {
|
||||
existing_column_type: "String",
|
||||
inserted_value_type: "tag",
|
||||
}
|
||||
.fail()?;
|
||||
}
|
||||
|
||||
for val in values {
|
||||
match val {
|
||||
Some(v) => {
|
||||
StatValues::update_string(stats, v);
|
||||
col.push(Some(v.to_string()));
|
||||
}
|
||||
None => col.push(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
(Self::Tag(col, stats), TypedValuesIterator::String(values)) => {
|
||||
if logical_type != LogicalColumnType::Tag {
|
||||
TypeMismatch {
|
||||
existing_column_type: "tag",
|
||||
inserted_value_type: "String",
|
||||
}
|
||||
.fail()?;
|
||||
}
|
||||
|
||||
for val in values {
|
||||
match val {
|
||||
Some(v) => {
|
||||
StatValues::update_string(stats, v);
|
||||
let id = dictionary.lookup_value_or_insert(v);
|
||||
col.push(Some(id));
|
||||
}
|
||||
None => col.push(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
(existing, values) => TypeMismatch {
|
||||
existing_column_type: existing.type_description(),
|
||||
inserted_value_type: values.type_description(),
|
||||
}
|
||||
.fail()?,
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Pushes None values onto the column until its len is equal to that passed
|
||||
/// in
|
||||
pub fn push_nulls_to_len(&mut self, len: usize) {
|
||||
match self {
|
||||
Self::Tag(vals, _) => {
|
||||
if len > vals.len() {
|
||||
vals.resize(len, None);
|
||||
}
|
||||
}
|
||||
Self::I64(vals, _) => {
|
||||
if len > vals.len() {
|
||||
vals.resize(len, None);
|
||||
}
|
||||
}
|
||||
Self::F64(vals, _) => {
|
||||
if len > vals.len() {
|
||||
vals.resize(len, None);
|
||||
}
|
||||
}
|
||||
Self::U64(vals, _) => {
|
||||
if len > vals.len() {
|
||||
vals.resize(len, None);
|
||||
}
|
||||
}
|
||||
Self::Bool(vals, _) => {
|
||||
if len > vals.len() {
|
||||
vals.resize(len, None);
|
||||
}
|
||||
}
|
||||
Self::String(vals, _) => {
|
||||
if len > vals.len() {
|
||||
vals.resize(len, None);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
|
@ -124,10 +319,6 @@ impl Column {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
pub fn type_description(&self) -> &'static str {
|
||||
match self {
|
||||
Self::F64(_, _) => "f64",
|
||||
|
@ -139,167 +330,10 @@ impl Column {
|
|||
}
|
||||
}
|
||||
|
||||
/// Return the arrow DataType for this column
|
||||
pub fn data_type(&self) -> ArrowDataType {
|
||||
pub fn get_i64_stats(&self) -> Option<StatValues<i64>> {
|
||||
match self {
|
||||
Self::F64(..) => ArrowDataType::Float64,
|
||||
Self::I64(..) => ArrowDataType::Int64,
|
||||
Self::U64(..) => ArrowDataType::UInt64,
|
||||
Self::String(..) => ArrowDataType::Utf8,
|
||||
Self::Bool(..) => ArrowDataType::Boolean,
|
||||
Self::Tag(..) => ArrowDataType::Utf8,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push(&mut self, dictionary: &mut Dictionary, value: &wb::Value<'_>) -> Result<()> {
|
||||
let inserted = match self {
|
||||
Self::Tag(vals, stats) => match value.value_as_tag_value() {
|
||||
Some(tag) => {
|
||||
let tag_value = tag.value().expect("tag must have string value");
|
||||
let id = dictionary.lookup_value_or_insert(tag_value);
|
||||
vals.push(Some(id));
|
||||
StatValues::update_string(stats, tag_value);
|
||||
true
|
||||
}
|
||||
None => false,
|
||||
},
|
||||
Self::String(vals, stats) => match value.value_as_string_value() {
|
||||
Some(str_val) => {
|
||||
let str_val = str_val.value().expect("string must have value");
|
||||
vals.push(Some(str_val.to_string()));
|
||||
StatValues::update_string(stats, str_val);
|
||||
true
|
||||
}
|
||||
None => false,
|
||||
},
|
||||
Self::Bool(vals, stats) => match value.value_as_bool_value() {
|
||||
Some(bool_val) => {
|
||||
let bool_val = bool_val.value();
|
||||
vals.push(Some(bool_val));
|
||||
stats.update(bool_val);
|
||||
true
|
||||
}
|
||||
None => false,
|
||||
},
|
||||
Self::I64(vals, stats) => match value.value_as_i64value() {
|
||||
Some(i64_val) => {
|
||||
let i64_val = i64_val.value();
|
||||
vals.push(Some(i64_val));
|
||||
stats.update(i64_val);
|
||||
true
|
||||
}
|
||||
None => false,
|
||||
},
|
||||
Self::U64(vals, stats) => match value.value_as_u64value() {
|
||||
Some(u64_val) => {
|
||||
let u64_val = u64_val.value();
|
||||
vals.push(Some(u64_val));
|
||||
stats.update(u64_val);
|
||||
true
|
||||
}
|
||||
None => false,
|
||||
},
|
||||
Self::F64(vals, stats) => match value.value_as_f64value() {
|
||||
Some(f64_val) => {
|
||||
let f64_val = f64_val.value();
|
||||
vals.push(Some(f64_val));
|
||||
stats.update(f64_val);
|
||||
true
|
||||
}
|
||||
None => false,
|
||||
},
|
||||
};
|
||||
|
||||
if inserted {
|
||||
Ok(())
|
||||
} else {
|
||||
TypeMismatch {
|
||||
existing_column_type: self.type_description(),
|
||||
inserted_value_type: type_description(value.value_type()),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
// push_none_if_len_equal will add a None value to the end of the Vec of values
|
||||
// if the length is equal to the passed in value. This is used to ensure
|
||||
// columns are all the same length.
|
||||
pub fn push_none_if_len_equal(&mut self, len: usize) {
|
||||
match self {
|
||||
Self::F64(v, _) => {
|
||||
if v.len() == len {
|
||||
v.push(None);
|
||||
}
|
||||
}
|
||||
Self::I64(v, _) => {
|
||||
if v.len() == len {
|
||||
v.push(None);
|
||||
}
|
||||
}
|
||||
Self::U64(v, _) => {
|
||||
if v.len() == len {
|
||||
v.push(None);
|
||||
}
|
||||
}
|
||||
Self::String(v, _) => {
|
||||
if v.len() == len {
|
||||
v.push(None);
|
||||
}
|
||||
}
|
||||
Self::Bool(v, _) => {
|
||||
if v.len() == len {
|
||||
v.push(None);
|
||||
}
|
||||
}
|
||||
Self::Tag(v, _) => {
|
||||
if v.len() == len {
|
||||
v.push(None);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if any rows are within the range [min_value,
|
||||
/// max_value). Inclusive of `start`, exclusive of `end`
|
||||
pub fn has_i64_range(&self, start: i64, end: i64) -> Result<bool> {
|
||||
match self {
|
||||
Self::I64(_, stats) => {
|
||||
if stats.max < start || stats.min >= end {
|
||||
Ok(false)
|
||||
} else {
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
_ => InternalTypeMismatchForTimePredicate {}.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return true of this column's type is a Tag
|
||||
pub fn is_tag(&self) -> bool {
|
||||
matches!(self, Self::Tag(..))
|
||||
}
|
||||
|
||||
/// Returns true if there exists at least one row idx where this
|
||||
/// self[i] is within the range [min_value, max_value). Inclusive
|
||||
/// of `start`, exclusive of `end` and where col[i] is non null
|
||||
pub fn has_non_null_i64_range<T>(
|
||||
&self,
|
||||
column: &[Option<T>],
|
||||
start: i64,
|
||||
end: i64,
|
||||
) -> Result<bool> {
|
||||
match self {
|
||||
Self::I64(v, _) => {
|
||||
for (index, val) in v.iter().enumerate() {
|
||||
if let Some(val) = val {
|
||||
if start <= *val && *val < end && column[index].is_some() {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
_ => InternalTypeMismatchForTimePredicate {}.fail(),
|
||||
Self::I64(_, values) => Some(values.clone()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -322,7 +356,7 @@ impl Column {
|
|||
mem::size_of::<Option<bool>>() * v.len() + mem::size_of_val(&stats)
|
||||
}
|
||||
Self::Tag(v, stats) => {
|
||||
mem::size_of::<Option<u32>>() * v.len() + mem::size_of_val(&stats)
|
||||
mem::size_of::<Option<DID>>() * v.len() + mem::size_of_val(&stats)
|
||||
}
|
||||
Self::String(v, stats) => {
|
||||
let string_bytes_size = v
|
||||
|
@ -334,89 +368,3 @@ impl Column {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_has_i64_range() {
|
||||
let mut stats = StatValues::new(1);
|
||||
stats.update(2);
|
||||
let col = Column::I64(vec![Some(1), None, Some(2)], stats.clone());
|
||||
assert!(!col.has_i64_range(-1, 0).unwrap());
|
||||
assert!(!col.has_i64_range(0, 1).unwrap());
|
||||
assert!(col.has_i64_range(1, 2).unwrap());
|
||||
assert!(col.has_i64_range(2, 3).unwrap());
|
||||
assert!(!col.has_i64_range(3, 4).unwrap());
|
||||
|
||||
let col = Column::I64(vec![Some(2), None, Some(1)], stats);
|
||||
assert!(!col.has_i64_range(-1, 0).unwrap());
|
||||
assert!(!col.has_i64_range(0, 1).unwrap());
|
||||
assert!(col.has_i64_range(1, 2).unwrap());
|
||||
assert!(col.has_i64_range(2, 3).unwrap());
|
||||
assert!(!col.has_i64_range(3, 4).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_i64_range_does_not_panic() {
|
||||
// providing the wrong column type should get an internal error, not a panic
|
||||
let col = Column::F64(vec![Some(1.2)], StatValues::new(1.2));
|
||||
let res = col.has_i64_range(-1, 0);
|
||||
assert!(res.is_err());
|
||||
let res_string = format!("{:?}", res);
|
||||
let expected = "InternalTypeMismatchForTimePredicate";
|
||||
assert!(
|
||||
res_string.contains(expected),
|
||||
"Did not find expected text '{}' in '{}'",
|
||||
expected,
|
||||
res_string
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_non_null_i64_range_() {
|
||||
let none_col: Vec<Option<u32>> = vec![None, None, None];
|
||||
let some_col: Vec<Option<u32>> = vec![Some(0), Some(0), Some(0)];
|
||||
|
||||
let mut stats = StatValues::new(1);
|
||||
stats.update(2);
|
||||
let col = Column::I64(vec![Some(1), None, Some(2)], stats);
|
||||
|
||||
assert!(!col.has_non_null_i64_range(&some_col, -1, 0).unwrap());
|
||||
assert!(!col.has_non_null_i64_range(&some_col, 0, 1).unwrap());
|
||||
assert!(col.has_non_null_i64_range(&some_col, 1, 2).unwrap());
|
||||
assert!(col.has_non_null_i64_range(&some_col, 2, 3).unwrap());
|
||||
assert!(!col.has_non_null_i64_range(&some_col, 3, 4).unwrap());
|
||||
|
||||
assert!(!col.has_non_null_i64_range(&none_col, -1, 0).unwrap());
|
||||
assert!(!col.has_non_null_i64_range(&none_col, 0, 1).unwrap());
|
||||
assert!(!col.has_non_null_i64_range(&none_col, 1, 2).unwrap());
|
||||
assert!(!col.has_non_null_i64_range(&none_col, 2, 3).unwrap());
|
||||
assert!(!col.has_non_null_i64_range(&none_col, 3, 4).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn column_size() {
|
||||
let i64col = Column::I64(vec![Some(1), Some(1)], StatValues::new(1));
|
||||
assert_eq!(40, i64col.size());
|
||||
|
||||
let f64col = Column::F64(vec![Some(1.1), Some(1.1), Some(1.1)], StatValues::new(1.1));
|
||||
assert_eq!(56, f64col.size());
|
||||
|
||||
let boolcol = Column::Bool(vec![Some(true)], StatValues::new(true));
|
||||
assert_eq!(9, boolcol.size());
|
||||
|
||||
let tagcol = Column::Tag(
|
||||
vec![Some(1), Some(1), Some(1), Some(1)],
|
||||
StatValues::new("foo".to_string()),
|
||||
);
|
||||
assert_eq!(40, tagcol.size());
|
||||
|
||||
let stringcol = Column::String(
|
||||
vec![Some("foo".to_string()), Some("hello world".to_string())],
|
||||
StatValues::new("foo".to_string()),
|
||||
);
|
||||
assert_eq!(70, stringcol.size());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ use string_interner::{
|
|||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Dictionary lookup error on id {}", id))]
|
||||
DictionaryIdLookupError { id: u32 },
|
||||
DictionaryIdLookupError { id: DID },
|
||||
|
||||
#[snafu(display("Dictionary lookup error for value {}", value))]
|
||||
DictionaryValueLookupError { value: String },
|
||||
|
@ -16,6 +16,30 @@ pub enum Error {
|
|||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// A "dictionary ID" (DID) is a compact numeric representation of an interned
|
||||
/// string in the dictionary. The same string always maps the same DID. DIDs can
|
||||
/// be compared, hashed and cheaply copied around, just like small integers.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||
pub struct DID(DefaultSymbol);
|
||||
|
||||
impl DID {
|
||||
fn new(s: DefaultSymbol) -> Self {
|
||||
Self(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DID> for DefaultSymbol {
|
||||
fn from(id: DID) -> Self {
|
||||
id.0
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for DID {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0.to_usize())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Dictionary {
|
||||
interner: StringInterner<DefaultSymbol, StringBackend<DefaultSymbol>, DefaultHashBuilder>,
|
||||
|
@ -39,43 +63,37 @@ impl Dictionary {
|
|||
|
||||
/// Returns the id corresponding to value, adding an entry for the
|
||||
/// id if it is not yet present in the dictionary.
|
||||
pub fn lookup_value_or_insert(&mut self, value: &str) -> u32 {
|
||||
pub fn lookup_value_or_insert(&mut self, value: &str) -> DID {
|
||||
self.id(value).unwrap_or_else(|| {
|
||||
self.size += value.len();
|
||||
self.size += std::mem::size_of::<u32>();
|
||||
symbol_to_u32(self.interner.get_or_intern(value))
|
||||
DID::new(self.interner.get_or_intern(value))
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the ID in self.dictionary that corresponds to `value`, if any.
|
||||
/// Returns an error if no such value is found. Does not add the value
|
||||
/// to the dictionary.
|
||||
pub fn lookup_value(&self, value: &str) -> Result<u32> {
|
||||
pub fn lookup_value(&self, value: &str) -> Result<DID> {
|
||||
self.id(value).context(DictionaryValueLookupError { value })
|
||||
}
|
||||
|
||||
/// Returns the ID in self.dictionary that corresponds to `value`,
|
||||
/// if any. No error is returned to avoid an allocation when no value is
|
||||
/// present
|
||||
pub fn id(&self, value: &str) -> Option<u32> {
|
||||
self.interner.get(value).map(symbol_to_u32)
|
||||
pub fn id(&self, value: &str) -> Option<DID> {
|
||||
self.interner.get(value).map(DID::new)
|
||||
}
|
||||
|
||||
/// Returns the str in self.dictionary that corresponds to `id`,
|
||||
/// if any. Returns an error if no such id is found
|
||||
pub fn lookup_id(&self, id: u32) -> Result<&str> {
|
||||
let symbol =
|
||||
Symbol::try_from_usize(id as usize).expect("to be able to convert u32 to symbol");
|
||||
pub fn lookup_id(&self, id: DID) -> Result<&str> {
|
||||
self.interner
|
||||
.resolve(symbol)
|
||||
.resolve(id.into())
|
||||
.context(DictionaryIdLookupError { id })
|
||||
}
|
||||
}
|
||||
|
||||
fn symbol_to_u32(sym: DefaultSymbol) -> u32 {
|
||||
sym.to_usize() as u32
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::dictionary::Dictionary;
|
||||
|
|
|
@ -60,5 +60,4 @@
|
|||
pub mod chunk;
|
||||
mod column;
|
||||
mod dictionary;
|
||||
pub mod pred;
|
||||
mod table;
|
||||
|
|
|
@ -1,298 +0,0 @@
|
|||
use std::collections::{BTreeSet, HashSet};
|
||||
|
||||
use crate::dictionary::{Dictionary, Error as DictionaryError};
|
||||
|
||||
use arrow_deps::{
|
||||
datafusion::{
|
||||
error::{DataFusionError, Result as DatafusionResult},
|
||||
logical_plan::{Expr, ExpressionVisitor, Operator, Recursion},
|
||||
optimizer::utils::expr_to_column_names,
|
||||
},
|
||||
util::{make_range_expr, AndExprBuilder},
|
||||
};
|
||||
use data_types::timestamp::TimestampRange;
|
||||
use internal_types::schema::TIME_COLUMN_NAME;
|
||||
|
||||
//use snafu::{OptionExt, ResultExt, Snafu};
|
||||
use snafu::{ensure, ResultExt, Snafu};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Error writing table '{}': {}", table_name, source))]
|
||||
TableWrite {
|
||||
table_name: String,
|
||||
source: crate::table::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Time Column was not not found in dictionary: {}", source))]
|
||||
TimeColumnNotFound { source: DictionaryError },
|
||||
|
||||
#[snafu(display("Unsupported predicate. Mutable buffer does not support: {}", source))]
|
||||
UnsupportedPredicate { source: DataFusionError },
|
||||
|
||||
#[snafu(display(
|
||||
"Internal error visiting expressions in ChunkPredicateBuilder: {}",
|
||||
source
|
||||
))]
|
||||
InternalVisitingExpressions { source: DataFusionError },
|
||||
|
||||
#[snafu(display("table_names has already been specified in ChunkPredicateBuilder"))]
|
||||
TableNamesAlreadySet {},
|
||||
|
||||
#[snafu(display("field_names has already been specified in ChunkPredicateBuilder"))]
|
||||
FieldNamesAlreadySet {},
|
||||
|
||||
#[snafu(display("range has already been specified in ChunkPredicateBuilder"))]
|
||||
RangeAlreadySet {},
|
||||
|
||||
#[snafu(display("exprs has already been specified in ChunkPredicateBuilder"))]
|
||||
ExprsAlreadySet {},
|
||||
|
||||
#[snafu(display("required_columns has already been specified in ChunkPredicateBuilder"))]
|
||||
RequiredColumnsAlreadySet {},
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Describes the result of translating a set of strings into
|
||||
/// chunk specific ids
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum ChunkIdSet {
|
||||
/// At least one of the strings was not present in the chunks'
|
||||
/// dictionary.
|
||||
///
|
||||
/// This is important when testing for the presence of all ids in
|
||||
/// a set, as we know they can not all be present
|
||||
AtLeastOneMissing,
|
||||
|
||||
/// All strings existed in this chunk's dictionary
|
||||
Present(BTreeSet<u32>),
|
||||
}
|
||||
|
||||
/// a 'Compiled' set of predicates / filters that can be evaluated on
|
||||
/// this chunk (where strings have been translated to chunk
|
||||
/// specific u32 ids)
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ChunkPredicate {
|
||||
/// If present, restrict the request to just those tables whose
|
||||
/// names are in table_names. If present but empty, means there
|
||||
/// was a predicate but no tables named that way exist in the
|
||||
/// chunk (so no table can pass)
|
||||
pub table_name_predicate: Option<BTreeSet<u32>>,
|
||||
|
||||
/// Optional column restriction. If present, further
|
||||
/// restrict any field columns returned to only those named, and
|
||||
/// skip tables entirely when querying metadata that do not have
|
||||
/// *any* of the fields
|
||||
pub field_name_predicate: Option<BTreeSet<u32>>,
|
||||
|
||||
/// General DataFusion expressions (arbitrary predicates) applied
|
||||
/// as a filter using logical conjuction (aka are 'AND'ed
|
||||
/// together). Only rows that evaluate to TRUE for all these
|
||||
/// expressions should be returned.
|
||||
///
|
||||
/// TODO these exprs should eventually be removed (when they are
|
||||
/// all handled one layer up in the query layer)
|
||||
pub chunk_exprs: Vec<Expr>,
|
||||
|
||||
/// If Some, then the table must contain all columns specified
|
||||
/// to pass the predicate
|
||||
pub required_columns: Option<ChunkIdSet>,
|
||||
|
||||
/// The id of the "time" column in this chunk
|
||||
pub time_column_id: u32,
|
||||
|
||||
/// Timestamp range: only rows within this range should be considered
|
||||
pub range: Option<TimestampRange>,
|
||||
}
|
||||
|
||||
impl ChunkPredicate {
|
||||
/// Creates and adds a datafuson predicate representing the
|
||||
/// combination of predicate and timestamp.
|
||||
pub fn filter_expr(&self) -> Option<Expr> {
|
||||
// build up a list of expressions
|
||||
let mut builder =
|
||||
AndExprBuilder::default().append_opt(self.make_timestamp_predicate_expr());
|
||||
|
||||
for expr in &self.chunk_exprs {
|
||||
builder = builder.append_expr(expr.clone());
|
||||
}
|
||||
|
||||
builder.build()
|
||||
}
|
||||
|
||||
/// For plans which select a subset of fields, returns true if
|
||||
/// the field should be included in the results
|
||||
pub fn should_include_field(&self, field_id: u32) -> bool {
|
||||
match &self.field_name_predicate {
|
||||
None => true,
|
||||
Some(field_restriction) => field_restriction.contains(&field_id),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return true if this column is the time column
|
||||
pub fn is_time_column(&self, id: u32) -> bool {
|
||||
self.time_column_id == id
|
||||
}
|
||||
|
||||
/// Creates a DataFusion predicate for appliying a timestamp range:
|
||||
///
|
||||
/// range.start <= time and time < range.end`
|
||||
fn make_timestamp_predicate_expr(&self) -> Option<Expr> {
|
||||
self.range
|
||||
.map(|range| make_range_expr(range.start, range.end, TIME_COLUMN_NAME))
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds ChunkPredicates
|
||||
#[derive(Debug)]
|
||||
pub struct ChunkPredicateBuilder<'a> {
|
||||
inner: ChunkPredicate,
|
||||
dictionary: &'a Dictionary,
|
||||
}
|
||||
|
||||
impl<'a> ChunkPredicateBuilder<'a> {
|
||||
pub fn new(dictionary: &'a Dictionary) -> Result<Self> {
|
||||
let time_column_id = dictionary
|
||||
.lookup_value(TIME_COLUMN_NAME)
|
||||
.context(TimeColumnNotFound)?;
|
||||
|
||||
let inner = ChunkPredicate {
|
||||
time_column_id,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
Ok(Self { inner, dictionary })
|
||||
}
|
||||
|
||||
/// Set table_name_predicate so only tables in `names` are returned
|
||||
pub fn table_names(mut self, names: Option<&BTreeSet<String>>) -> Result<Self> {
|
||||
ensure!(
|
||||
self.inner.table_name_predicate.is_none(),
|
||||
TableNamesAlreadySet
|
||||
);
|
||||
self.inner.table_name_predicate = self.compile_string_list(names);
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Set field_name_predicate so only tables in `names` are returned
|
||||
pub fn field_names(mut self, names: Option<&BTreeSet<String>>) -> Result<Self> {
|
||||
ensure!(
|
||||
self.inner.field_name_predicate.is_none(),
|
||||
FieldNamesAlreadySet
|
||||
);
|
||||
self.inner.field_name_predicate = self.compile_string_list(names);
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
pub fn range(mut self, range: Option<TimestampRange>) -> Result<Self> {
|
||||
ensure!(self.inner.range.is_none(), RangeAlreadySet);
|
||||
self.inner.range = range;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Set the general purpose predicates
|
||||
pub fn exprs(mut self, chunk_exprs: Vec<Expr>) -> Result<Self> {
|
||||
// In order to evaluate expressions in the table, all columns
|
||||
// referenced in the expression must appear (I think, not sure
|
||||
// about NOT, etc so panic if we see one of those);
|
||||
let mut visitor = SupportVisitor {};
|
||||
let mut predicate_columns: HashSet<String> = HashSet::new();
|
||||
for expr in &chunk_exprs {
|
||||
visitor = expr.accept(visitor).context(UnsupportedPredicate)?;
|
||||
expr_to_column_names(&expr, &mut predicate_columns)
|
||||
.context(InternalVisitingExpressions)?;
|
||||
}
|
||||
|
||||
ensure!(self.inner.chunk_exprs.is_empty(), ExprsAlreadySet);
|
||||
self.inner.chunk_exprs = chunk_exprs;
|
||||
|
||||
// if there are any column references in the expression, ensure they appear in
|
||||
// any table
|
||||
if !predicate_columns.is_empty() {
|
||||
ensure!(
|
||||
self.inner.required_columns.is_none(),
|
||||
RequiredColumnsAlreadySet
|
||||
);
|
||||
self.inner.required_columns = Some(self.make_chunk_ids(predicate_columns.iter()));
|
||||
}
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Return the created chunk predicate, consuming self
|
||||
pub fn build(self) -> ChunkPredicate {
|
||||
self.inner
|
||||
}
|
||||
|
||||
/// Converts a Set of strings into a set of ids in terms of this
|
||||
/// Chunk's dictionary.
|
||||
///
|
||||
/// If there are no matching Strings in the chunks dictionary,
|
||||
/// those strings are ignored and a (potentially empty) set is
|
||||
/// returned.
|
||||
fn compile_string_list(&self, names: Option<&BTreeSet<String>>) -> Option<BTreeSet<u32>> {
|
||||
names.map(|names| {
|
||||
names
|
||||
.iter()
|
||||
.filter_map(|name| self.dictionary.id(name))
|
||||
.collect::<BTreeSet<_>>()
|
||||
})
|
||||
}
|
||||
|
||||
/// Translate a bunch of strings into a set of ids from the dictionarythis
|
||||
/// chunk
|
||||
pub fn make_chunk_ids<'b, I>(&self, predicate_columns: I) -> ChunkIdSet
|
||||
where
|
||||
I: Iterator<Item = &'b String>,
|
||||
{
|
||||
let mut symbols = BTreeSet::new();
|
||||
for column_name in predicate_columns {
|
||||
if let Some(column_id) = self.dictionary.id(column_name) {
|
||||
symbols.insert(column_id);
|
||||
} else {
|
||||
return ChunkIdSet::AtLeastOneMissing;
|
||||
}
|
||||
}
|
||||
|
||||
ChunkIdSet::Present(symbols)
|
||||
}
|
||||
}
|
||||
|
||||
/// Used to figure out if we know how to deal with this kind of
|
||||
/// predicate in the write buffer
|
||||
struct SupportVisitor {}
|
||||
|
||||
impl ExpressionVisitor for SupportVisitor {
|
||||
fn pre_visit(self, expr: &Expr) -> DatafusionResult<Recursion<Self>> {
|
||||
match expr {
|
||||
Expr::Literal(..) => Ok(Recursion::Continue(self)),
|
||||
Expr::Column(..) => Ok(Recursion::Continue(self)),
|
||||
Expr::BinaryExpr { op, .. } => {
|
||||
match op {
|
||||
Operator::Eq
|
||||
| Operator::Lt
|
||||
| Operator::LtEq
|
||||
| Operator::Gt
|
||||
| Operator::GtEq
|
||||
| Operator::Plus
|
||||
| Operator::Minus
|
||||
| Operator::Multiply
|
||||
| Operator::Divide
|
||||
| Operator::And
|
||||
| Operator::Or => Ok(Recursion::Continue(self)),
|
||||
// Unsupported (need to think about ramifications)
|
||||
Operator::NotEq | Operator::Modulus | Operator::Like | Operator::NotLike => {
|
||||
Err(DataFusionError::NotImplemented(format!(
|
||||
"Operator {:?} not yet supported in IOx MutableBuffer",
|
||||
op
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => Err(DataFusionError::NotImplemented(format!(
|
||||
"Unsupported expression in mutable_buffer database: {:?}",
|
||||
expr
|
||||
))),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,19 +1,16 @@
|
|||
use generated_types::wal as wb;
|
||||
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
sync::Arc,
|
||||
};
|
||||
use std::{cmp, collections::BTreeMap, sync::Arc};
|
||||
|
||||
use crate::{
|
||||
chunk::Chunk,
|
||||
column,
|
||||
column::Column,
|
||||
dictionary::{Dictionary, Error as DictionaryError},
|
||||
pred::{ChunkIdSet, ChunkPredicate},
|
||||
dictionary::{Dictionary, Error as DictionaryError, DID},
|
||||
};
|
||||
use data_types::{
|
||||
database_rules::WriterId,
|
||||
partition_metadata::{ColumnSummary, Statistics},
|
||||
};
|
||||
use data_types::partition_metadata::{ColumnSummary, Statistics};
|
||||
use internal_types::{
|
||||
entry::{self, ClockValue},
|
||||
schema::{builder::SchemaBuilder, Schema, TIME_COLUMN_NAME},
|
||||
selection::Selection,
|
||||
};
|
||||
|
@ -33,12 +30,8 @@ use arrow_deps::{
|
|||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Tag value ID {} not found in dictionary of chunk {}", value, chunk))]
|
||||
TagValueIdNotFoundInDictionary {
|
||||
value: u32,
|
||||
chunk: u64,
|
||||
source: DictionaryError,
|
||||
},
|
||||
#[snafu(display("Tag value ID {} not found in dictionary of chunk", value))]
|
||||
TagValueIdNotFoundInDictionary { value: DID, source: DictionaryError },
|
||||
|
||||
#[snafu(display("Column error on column {}: {}", column, source))]
|
||||
ColumnError {
|
||||
|
@ -53,7 +46,7 @@ pub enum Error {
|
|||
actual_column_type
|
||||
))]
|
||||
InternalColumnTypeMismatch {
|
||||
column_id: u32,
|
||||
column_id: DID,
|
||||
expected_column_type: String,
|
||||
actual_column_type: String,
|
||||
},
|
||||
|
@ -61,21 +54,12 @@ pub enum Error {
|
|||
#[snafu(display("Internal error: unexpected aggregate request for None aggregate",))]
|
||||
InternalUnexpectedNoneAggregate {},
|
||||
|
||||
#[snafu(display(
|
||||
"Column name '{}' not found in dictionary of chunk {}",
|
||||
column_name,
|
||||
chunk
|
||||
))]
|
||||
ColumnNameNotFoundInDictionary { column_name: String, chunk: u64 },
|
||||
#[snafu(display("Column name '{}' not found in dictionary of chunk", column_name,))]
|
||||
ColumnNameNotFoundInDictionary { column_name: String },
|
||||
|
||||
#[snafu(display(
|
||||
"Internal: Column id '{}' not found in dictionary of chunk {}",
|
||||
column_id,
|
||||
chunk
|
||||
))]
|
||||
#[snafu(display("Internal: Column id '{}' not found in dictionary", column_id,))]
|
||||
ColumnIdNotFoundInDictionary {
|
||||
column_id: u32,
|
||||
chunk: u64,
|
||||
column_id: DID,
|
||||
source: DictionaryError,
|
||||
},
|
||||
|
||||
|
@ -92,22 +76,22 @@ pub enum Error {
|
|||
column_name,
|
||||
column_id
|
||||
))]
|
||||
InternalNoColumnInIndex { column_name: String, column_id: u32 },
|
||||
InternalNoColumnInIndex { column_name: String, column_id: DID },
|
||||
|
||||
#[snafu(display("Error creating column from wal for column {}: {}", column, source))]
|
||||
CreatingFromWal {
|
||||
column: u32,
|
||||
column: DID,
|
||||
source: crate::column::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Error evaluating column predicate for column {}: {}", column, source))]
|
||||
ColumnPredicateEvaluation {
|
||||
column: u32,
|
||||
column: DID,
|
||||
source: crate::column::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Row insert to table {} missing column name", table))]
|
||||
ColumnNameNotInRow { table: u32 },
|
||||
ColumnNameNotInRow { table: DID },
|
||||
|
||||
#[snafu(display(
|
||||
"Group column '{}' not found in tag columns: {}",
|
||||
|
@ -123,68 +107,27 @@ pub enum Error {
|
|||
DuplicateGroupColumn { column_name: String },
|
||||
|
||||
#[snafu(display("Column {} not found in table {}", id, table_id))]
|
||||
ColumnIdNotFound { id: u32, table_id: u32 },
|
||||
ColumnIdNotFound { id: DID, table_id: DID },
|
||||
}
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Table {
|
||||
/// Name of the table as a u32 in the chunk dictionary
|
||||
pub id: u32,
|
||||
/// Name of the table as a DID in the chunk dictionary
|
||||
pub id: DID,
|
||||
|
||||
/// Map of column id from the chunk dictionary to the column
|
||||
pub columns: BTreeMap<u32, Column>,
|
||||
pub columns: BTreeMap<DID, Column>,
|
||||
}
|
||||
|
||||
impl Table {
|
||||
pub fn new(id: u32) -> Self {
|
||||
pub fn new(id: DID) -> Self {
|
||||
Self {
|
||||
id,
|
||||
columns: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn append_row(
|
||||
&mut self,
|
||||
dictionary: &mut Dictionary,
|
||||
values: &flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<wb::Value<'_>>>,
|
||||
) -> Result<()> {
|
||||
let row_count = self.row_count();
|
||||
|
||||
// insert new columns and validate existing ones
|
||||
for value in values {
|
||||
let column_name = value
|
||||
.column()
|
||||
.context(ColumnNameNotInRow { table: self.id })?;
|
||||
let column_id = dictionary.lookup_value_or_insert(column_name);
|
||||
|
||||
let column = match self.columns.get_mut(&column_id) {
|
||||
Some(col) => col,
|
||||
None => {
|
||||
// Add the column and make all values for existing rows None
|
||||
self.columns.insert(
|
||||
column_id,
|
||||
Column::with_value(dictionary, row_count, value)
|
||||
.context(CreatingFromWal { column: column_id })?,
|
||||
);
|
||||
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
column.push(dictionary, &value).context(ColumnError {
|
||||
column: column_name,
|
||||
})?;
|
||||
}
|
||||
|
||||
// make sure all the columns are of the same length
|
||||
for col in self.columns.values_mut() {
|
||||
col.push_none_if_len_equal(row_count);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn row_count(&self) -> usize {
|
||||
self.columns
|
||||
.values()
|
||||
|
@ -201,55 +144,124 @@ impl Table {
|
|||
}
|
||||
|
||||
/// Returns a reference to the specified column
|
||||
pub(crate) fn column(&self, column_id: u32) -> Result<&Column> {
|
||||
pub(crate) fn column(&self, column_id: DID) -> Result<&Column> {
|
||||
self.columns.get(&column_id).context(ColumnIdNotFound {
|
||||
id: column_id,
|
||||
table_id: self.id,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a reference to the specified column as a slice of
|
||||
/// i64s. Errors if the type is not i64
|
||||
pub fn column_i64(&self, column_id: u32) -> Result<&[Option<i64>]> {
|
||||
let column = self.column(column_id)?;
|
||||
match column {
|
||||
Column::I64(vals, _) => Ok(vals),
|
||||
_ => InternalColumnTypeMismatch {
|
||||
column_id,
|
||||
expected_column_type: "i64",
|
||||
actual_column_type: column.type_description(),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn append_rows(
|
||||
/// Validates the schema of the passed in columns, then adds their values to
|
||||
/// the associated columns in the table and updates summary statistics.
|
||||
pub fn write_columns(
|
||||
&mut self,
|
||||
dictionary: &mut Dictionary,
|
||||
rows: &flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<wb::Row<'_>>>,
|
||||
_clock_value: ClockValue,
|
||||
_writer_id: WriterId,
|
||||
columns: Vec<entry::Column<'_>>,
|
||||
) -> Result<()> {
|
||||
for row in rows {
|
||||
if let Some(values) = row.values() {
|
||||
self.append_row(dictionary, &values)?;
|
||||
// get the column ids and validate schema for those that already exist
|
||||
let columns_with_inserts = columns
|
||||
.into_iter()
|
||||
.map(|insert_column| {
|
||||
let column_id = dictionary.lookup_value_or_insert(insert_column.name());
|
||||
let values = insert_column.values();
|
||||
|
||||
if let Some(c) = self.columns.get(&column_id) {
|
||||
match (&values, c) {
|
||||
(entry::TypedValuesIterator::Bool(_), Column::Bool(_, _)) => (),
|
||||
(entry::TypedValuesIterator::U64(_), Column::U64(_, _)) => (),
|
||||
(entry::TypedValuesIterator::F64(_), Column::F64(_, _)) => (),
|
||||
(entry::TypedValuesIterator::I64(_), Column::I64(_, _)) => (),
|
||||
(entry::TypedValuesIterator::String(_), Column::String(_, _)) => {
|
||||
if !insert_column.is_field() {
|
||||
InternalColumnTypeMismatch {
|
||||
column_id,
|
||||
expected_column_type: c.type_description(),
|
||||
actual_column_type: values.type_description(),
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
}
|
||||
(entry::TypedValuesIterator::String(_), Column::Tag(_, _)) => {
|
||||
if !insert_column.is_tag() {
|
||||
InternalColumnTypeMismatch {
|
||||
column_id,
|
||||
expected_column_type: c.type_description(),
|
||||
actual_column_type: values.type_description(),
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
}
|
||||
_ => InternalColumnTypeMismatch {
|
||||
column_id,
|
||||
expected_column_type: c.type_description(),
|
||||
actual_column_type: values.type_description(),
|
||||
}
|
||||
.fail()?,
|
||||
}
|
||||
}
|
||||
|
||||
Ok((column_id, insert_column.logical_type(), values))
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
let row_count_before_insert = self.row_count();
|
||||
|
||||
for (column_id, logical_type, values) in columns_with_inserts.into_iter() {
|
||||
match self.columns.get_mut(&column_id) {
|
||||
Some(c) => c
|
||||
.push_typed_values(dictionary, logical_type, values)
|
||||
.with_context(|| {
|
||||
let column = dictionary
|
||||
.lookup_id(column_id)
|
||||
.expect("column name must be present in dictionary");
|
||||
ColumnError { column }
|
||||
})?,
|
||||
None => {
|
||||
self.columns.insert(
|
||||
column_id,
|
||||
Column::new_from_typed_values(
|
||||
dictionary,
|
||||
row_count_before_insert,
|
||||
logical_type,
|
||||
values,
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ensure all columns have the same number of rows as the one with the most.
|
||||
// This adds nulls to the columns that weren't included in this write
|
||||
let max_row_count = self
|
||||
.columns
|
||||
.values()
|
||||
.fold(row_count_before_insert, |max, col| cmp::max(max, col.len()));
|
||||
|
||||
for c in self.columns.values_mut() {
|
||||
c.push_nulls_to_len(max_row_count);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the column selection for all the columns in this table, orderd
|
||||
/// by table name
|
||||
fn all_columns_selection<'a>(&self, chunk: &'a Chunk) -> Result<TableColSelection<'a>> {
|
||||
fn all_columns_selection<'a>(
|
||||
&self,
|
||||
dictionary: &'a Dictionary,
|
||||
) -> Result<TableColSelection<'a>> {
|
||||
let cols = self
|
||||
.columns
|
||||
.iter()
|
||||
.map(|(column_id, _)| {
|
||||
let column_name = chunk.dictionary.lookup_id(*column_id).context(
|
||||
ColumnIdNotFoundInDictionary {
|
||||
column_id: *column_id,
|
||||
chunk: chunk.id,
|
||||
},
|
||||
)?;
|
||||
let column_name =
|
||||
dictionary
|
||||
.lookup_id(*column_id)
|
||||
.context(ColumnIdNotFoundInDictionary {
|
||||
column_id: *column_id,
|
||||
})?;
|
||||
Ok(ColSelection {
|
||||
column_name,
|
||||
column_id: *column_id,
|
||||
|
@ -266,45 +278,45 @@ impl Table {
|
|||
/// Returns a column selection for just the specified columns
|
||||
fn specific_columns_selection<'a>(
|
||||
&self,
|
||||
chunk: &'a Chunk,
|
||||
dictionary: &'a Dictionary,
|
||||
columns: &'a [&'a str],
|
||||
) -> Result<TableColSelection<'a>> {
|
||||
let cols =
|
||||
columns
|
||||
.iter()
|
||||
.map(|&column_name| {
|
||||
let column_id = chunk.dictionary.id(column_name).context(
|
||||
ColumnNameNotFoundInDictionary {
|
||||
column_name,
|
||||
chunk: chunk.id,
|
||||
},
|
||||
)?;
|
||||
let cols = columns
|
||||
.iter()
|
||||
.map(|&column_name| {
|
||||
let column_id = dictionary
|
||||
.id(column_name)
|
||||
.context(ColumnNameNotFoundInDictionary { column_name })?;
|
||||
|
||||
Ok(ColSelection {
|
||||
column_name,
|
||||
column_id,
|
||||
})
|
||||
Ok(ColSelection {
|
||||
column_name,
|
||||
column_id,
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
Ok(TableColSelection { cols })
|
||||
}
|
||||
|
||||
/// Converts this table to an arrow record batch.
|
||||
pub fn to_arrow(&self, chunk: &Chunk, selection: Selection<'_>) -> Result<RecordBatch> {
|
||||
pub fn to_arrow(
|
||||
&self,
|
||||
dictionary: &Dictionary,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<RecordBatch> {
|
||||
// translate chunk selection into name/indexes:
|
||||
let selection = match selection {
|
||||
Selection::All => self.all_columns_selection(chunk),
|
||||
Selection::Some(cols) => self.specific_columns_selection(chunk, cols),
|
||||
Selection::All => self.all_columns_selection(dictionary),
|
||||
Selection::Some(cols) => self.specific_columns_selection(dictionary, cols),
|
||||
}?;
|
||||
self.to_arrow_impl(chunk, &selection)
|
||||
self.to_arrow_impl(dictionary, &selection)
|
||||
}
|
||||
|
||||
pub fn schema(&self, chunk: &Chunk, selection: Selection<'_>) -> Result<Schema> {
|
||||
pub fn schema(&self, dictionary: &Dictionary, selection: Selection<'_>) -> Result<Schema> {
|
||||
// translate chunk selection into name/indexes:
|
||||
let selection = match selection {
|
||||
Selection::All => self.all_columns_selection(chunk),
|
||||
Selection::Some(cols) => self.specific_columns_selection(chunk, cols),
|
||||
Selection::All => self.all_columns_selection(dictionary),
|
||||
Selection::Some(cols) => self.specific_columns_selection(dictionary, cols),
|
||||
}?;
|
||||
self.schema_impl(&selection)
|
||||
}
|
||||
|
@ -341,7 +353,7 @@ impl Table {
|
|||
/// requested columns with index are tuples of column_name, column_index
|
||||
fn to_arrow_impl(
|
||||
&self,
|
||||
chunk: &Chunk,
|
||||
dictionary: &Dictionary,
|
||||
selection: &TableColSelection<'_>,
|
||||
) -> Result<RecordBatch> {
|
||||
let mut columns = Vec::with_capacity(selection.cols.len());
|
||||
|
@ -370,12 +382,9 @@ impl Table {
|
|||
match v {
|
||||
None => builder.append_null(),
|
||||
Some(value_id) => {
|
||||
let tag_value = chunk.dictionary.lookup_id(*value_id).context(
|
||||
TagValueIdNotFoundInDictionary {
|
||||
value: *value_id,
|
||||
chunk: chunk.id,
|
||||
},
|
||||
)?;
|
||||
let tag_value = dictionary
|
||||
.lookup_id(*value_id)
|
||||
.context(TagValueIdNotFoundInDictionary { value: *value_id })?;
|
||||
builder.append_value(tag_value)
|
||||
}
|
||||
}
|
||||
|
@ -430,124 +439,11 @@ impl Table {
|
|||
RecordBatch::try_new(schema, columns).context(ArrowError {})
|
||||
}
|
||||
|
||||
/// returns true if any row in this table could possible match the
|
||||
/// predicate. true does not mean any rows will *actually* match,
|
||||
/// just that the entire table can not be ruled out.
|
||||
///
|
||||
/// false means that no rows in this table could possibly match
|
||||
pub fn could_match_predicate(&self, chunk_predicate: &ChunkPredicate) -> Result<bool> {
|
||||
Ok(
|
||||
self.matches_column_name_predicate(chunk_predicate.field_name_predicate.as_ref())
|
||||
&& self.matches_table_name_predicate(chunk_predicate.table_name_predicate.as_ref())
|
||||
&& self.matches_timestamp_predicate(chunk_predicate)?
|
||||
&& self.has_columns(chunk_predicate.required_columns.as_ref()),
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns true if the table contains any of the field columns
|
||||
/// requested or there are no specific fields requested.
|
||||
fn matches_column_name_predicate(&self, column_selection: Option<&BTreeSet<u32>>) -> bool {
|
||||
match column_selection {
|
||||
Some(column_selection) => {
|
||||
for column_id in column_selection {
|
||||
if let Some(column) = self.columns.get(column_id) {
|
||||
if !column.is_tag() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// selection only had tag columns
|
||||
false
|
||||
}
|
||||
None => true, // no specific selection
|
||||
}
|
||||
}
|
||||
|
||||
fn matches_table_name_predicate(&self, table_name_predicate: Option<&BTreeSet<u32>>) -> bool {
|
||||
match table_name_predicate {
|
||||
Some(table_name_predicate) => table_name_predicate.contains(&self.id),
|
||||
None => true, // no table predicate
|
||||
}
|
||||
}
|
||||
|
||||
/// returns true if there are any timestamps in this table that
|
||||
/// fall within the timestamp range
|
||||
fn matches_timestamp_predicate(&self, chunk_predicate: &ChunkPredicate) -> Result<bool> {
|
||||
match &chunk_predicate.range {
|
||||
None => Ok(true),
|
||||
Some(range) => {
|
||||
let time_column_id = chunk_predicate.time_column_id;
|
||||
let time_column = self.column(time_column_id)?;
|
||||
time_column.has_i64_range(range.start, range.end).context(
|
||||
ColumnPredicateEvaluation {
|
||||
column: time_column_id,
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// returns true if no columns are specified, or the table has all
|
||||
/// columns specified
|
||||
fn has_columns(&self, columns: Option<&ChunkIdSet>) -> bool {
|
||||
if let Some(columns) = columns {
|
||||
match columns {
|
||||
ChunkIdSet::AtLeastOneMissing => return false,
|
||||
ChunkIdSet::Present(symbols) => {
|
||||
for symbol in symbols {
|
||||
if !self.columns.contains_key(symbol) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// returns true if there are any rows in column that are non-null
|
||||
/// and within the timestamp range specified by pred
|
||||
pub(crate) fn column_matches_predicate(
|
||||
&self,
|
||||
column: &Column,
|
||||
chunk_predicate: &ChunkPredicate,
|
||||
) -> Result<bool> {
|
||||
match column {
|
||||
Column::F64(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
|
||||
Column::I64(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
|
||||
Column::U64(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
|
||||
Column::String(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
|
||||
Column::Bool(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
|
||||
Column::Tag(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
|
||||
}
|
||||
}
|
||||
|
||||
fn column_value_matches_predicate<T>(
|
||||
&self,
|
||||
column_value: &[Option<T>],
|
||||
chunk_predicate: &ChunkPredicate,
|
||||
) -> Result<bool> {
|
||||
match chunk_predicate.range {
|
||||
None => Ok(true),
|
||||
Some(range) => {
|
||||
let time_column_id = chunk_predicate.time_column_id;
|
||||
let time_column = self.column(time_column_id)?;
|
||||
time_column
|
||||
.has_non_null_i64_range(column_value, range.start, range.end)
|
||||
.context(ColumnPredicateEvaluation {
|
||||
column: time_column_id,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stats(&self, chunk: &Chunk) -> Vec<ColumnSummary> {
|
||||
pub fn stats(&self, dictionary: &Dictionary) -> Vec<ColumnSummary> {
|
||||
self.columns
|
||||
.iter()
|
||||
.map(|(column_id, c)| {
|
||||
let column_name = chunk
|
||||
.dictionary
|
||||
let column_name = dictionary
|
||||
.lookup_id(*column_id)
|
||||
.expect("column name in dictionary");
|
||||
|
||||
|
@ -572,7 +468,7 @@ impl Table {
|
|||
|
||||
struct ColSelection<'a> {
|
||||
column_name: &'a str,
|
||||
column_id: u32,
|
||||
column_id: DID,
|
||||
}
|
||||
|
||||
/// Represets a set of column_name, column_index pairs
|
||||
|
@ -591,61 +487,13 @@ impl<'a> TableColSelection<'a> {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use influxdb_line_protocol::{parse_lines, ParsedLine};
|
||||
use internal_types::data::split_lines_into_write_entry_partitions;
|
||||
use internal_types::entry::test_helpers::lp_to_entry;
|
||||
|
||||
use super::*;
|
||||
use tracker::MemRegistry;
|
||||
|
||||
#[test]
|
||||
fn test_has_columns() {
|
||||
let registry = Arc::new(MemRegistry::new());
|
||||
let mut chunk = Chunk::new(42, registry.as_ref());
|
||||
let dictionary = &mut chunk.dictionary;
|
||||
let mut table = Table::new(dictionary.lookup_value_or_insert("table_name"));
|
||||
|
||||
let lp_lines = vec![
|
||||
"h2o,state=MA,city=Boston temp=70.4 100",
|
||||
"h2o,state=MA,city=Boston temp=72.4 250",
|
||||
];
|
||||
|
||||
write_lines_to_table(&mut table, dictionary, lp_lines);
|
||||
|
||||
let state_symbol = dictionary.id("state").unwrap();
|
||||
let new_symbol = dictionary.lookup_value_or_insert("not_a_columns");
|
||||
|
||||
assert!(table.has_columns(None));
|
||||
|
||||
let pred = ChunkIdSet::AtLeastOneMissing;
|
||||
assert!(!table.has_columns(Some(&pred)));
|
||||
|
||||
let set = BTreeSet::<u32>::new();
|
||||
let pred = ChunkIdSet::Present(set);
|
||||
assert!(table.has_columns(Some(&pred)));
|
||||
|
||||
let mut set = BTreeSet::new();
|
||||
set.insert(state_symbol);
|
||||
let pred = ChunkIdSet::Present(set);
|
||||
assert!(table.has_columns(Some(&pred)));
|
||||
|
||||
let mut set = BTreeSet::new();
|
||||
set.insert(new_symbol);
|
||||
let pred = ChunkIdSet::Present(set);
|
||||
assert!(!table.has_columns(Some(&pred)));
|
||||
|
||||
let mut set = BTreeSet::new();
|
||||
set.insert(state_symbol);
|
||||
set.insert(new_symbol);
|
||||
let pred = ChunkIdSet::Present(set);
|
||||
assert!(!table.has_columns(Some(&pred)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn table_size() {
|
||||
let registry = Arc::new(MemRegistry::new());
|
||||
let mut chunk = Chunk::new(42, registry.as_ref());
|
||||
let dictionary = &mut chunk.dictionary;
|
||||
let mut dictionary = Dictionary::new();
|
||||
let mut table = Table::new(dictionary.lookup_value_or_insert("table_name"));
|
||||
|
||||
let lp_lines = vec![
|
||||
|
@ -653,111 +501,31 @@ mod tests {
|
|||
"h2o,state=MA,city=Boston temp=72.4 250",
|
||||
];
|
||||
|
||||
write_lines_to_table(&mut table, dictionary, lp_lines.clone());
|
||||
assert_eq!(128, table.size());
|
||||
write_lines_to_table(&mut table, &mut dictionary, lp_lines.clone());
|
||||
assert_eq!(112, table.size());
|
||||
|
||||
// doesn't double because of the stats overhead
|
||||
write_lines_to_table(&mut table, dictionary, lp_lines.clone());
|
||||
assert_eq!(224, table.size());
|
||||
write_lines_to_table(&mut table, &mut dictionary, lp_lines.clone());
|
||||
assert_eq!(192, table.size());
|
||||
|
||||
// now make sure it increased by the same amount minus stats overhead
|
||||
write_lines_to_table(&mut table, dictionary, lp_lines);
|
||||
assert_eq!(320, table.size());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_matches_table_name_predicate() {
|
||||
let registry = Arc::new(MemRegistry::new());
|
||||
let mut chunk = Chunk::new(42, registry.as_ref());
|
||||
let dictionary = &mut chunk.dictionary;
|
||||
let mut table = Table::new(dictionary.lookup_value_or_insert("h2o"));
|
||||
|
||||
let lp_lines = vec![
|
||||
"h2o,state=MA,city=Boston temp=70.4 100",
|
||||
"h2o,state=MA,city=Boston temp=72.4 250",
|
||||
];
|
||||
write_lines_to_table(&mut table, dictionary, lp_lines);
|
||||
|
||||
let h2o_symbol = dictionary.id("h2o").unwrap();
|
||||
|
||||
assert!(table.matches_table_name_predicate(None));
|
||||
|
||||
let set = BTreeSet::new();
|
||||
assert!(!table.matches_table_name_predicate(Some(&set)));
|
||||
|
||||
let mut set = BTreeSet::new();
|
||||
set.insert(h2o_symbol);
|
||||
assert!(table.matches_table_name_predicate(Some(&set)));
|
||||
|
||||
// Some symbol that is not the same as h2o_symbol
|
||||
assert_ne!(37377, h2o_symbol);
|
||||
let mut set = BTreeSet::new();
|
||||
set.insert(37377);
|
||||
assert!(!table.matches_table_name_predicate(Some(&set)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_matches_column_name_predicate() {
|
||||
let registry = Arc::new(MemRegistry::new());
|
||||
let mut chunk = Chunk::new(42, registry.as_ref());
|
||||
let dictionary = &mut chunk.dictionary;
|
||||
let mut table = Table::new(dictionary.lookup_value_or_insert("h2o"));
|
||||
|
||||
let lp_lines = vec![
|
||||
"h2o,state=MA,city=Boston temp=70.4,awesomeness=1000 100",
|
||||
"h2o,state=MA,city=Boston temp=72.4,awesomeness=2000 250",
|
||||
];
|
||||
write_lines_to_table(&mut table, dictionary, lp_lines);
|
||||
|
||||
let state_symbol = dictionary.id("state").unwrap();
|
||||
let temp_symbol = dictionary.id("temp").unwrap();
|
||||
let awesomeness_symbol = dictionary.id("awesomeness").unwrap();
|
||||
|
||||
assert!(table.matches_column_name_predicate(None));
|
||||
|
||||
let set = BTreeSet::new();
|
||||
assert!(!table.matches_column_name_predicate(Some(&set)));
|
||||
|
||||
// tag columns should not count
|
||||
let mut set = BTreeSet::new();
|
||||
set.insert(state_symbol);
|
||||
assert!(!table.matches_column_name_predicate(Some(&set)));
|
||||
|
||||
let mut set = BTreeSet::new();
|
||||
set.insert(temp_symbol);
|
||||
assert!(table.matches_column_name_predicate(Some(&set)));
|
||||
|
||||
let mut set = BTreeSet::new();
|
||||
set.insert(temp_symbol);
|
||||
set.insert(awesomeness_symbol);
|
||||
assert!(table.matches_column_name_predicate(Some(&set)));
|
||||
|
||||
let mut set = BTreeSet::new();
|
||||
set.insert(temp_symbol);
|
||||
set.insert(awesomeness_symbol);
|
||||
set.insert(1337); // some other symbol, but that is ok
|
||||
assert!(table.matches_column_name_predicate(Some(&set)));
|
||||
|
||||
let mut set = BTreeSet::new();
|
||||
set.insert(1337);
|
||||
assert!(!table.matches_column_name_predicate(Some(&set)));
|
||||
write_lines_to_table(&mut table, &mut dictionary, lp_lines);
|
||||
assert_eq!(272, table.size());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_arrow_schema_all() {
|
||||
let registry = Arc::new(MemRegistry::new());
|
||||
let mut chunk = Chunk::new(42, registry.as_ref());
|
||||
let dictionary = &mut chunk.dictionary;
|
||||
let mut dictionary = Dictionary::new();
|
||||
let mut table = Table::new(dictionary.lookup_value_or_insert("table_name"));
|
||||
|
||||
let lp_lines = vec![
|
||||
"h2o,state=MA,city=Boston float_field=70.4,int_field=8i,uint_field=42u,bool_field=t,string_field=\"foo\" 100",
|
||||
];
|
||||
|
||||
write_lines_to_table(&mut table, dictionary, lp_lines);
|
||||
write_lines_to_table(&mut table, &mut dictionary, lp_lines);
|
||||
|
||||
let selection = Selection::All;
|
||||
let actual_schema = table.schema(&chunk, selection).unwrap();
|
||||
let actual_schema = table.schema(&dictionary, selection).unwrap();
|
||||
let expected_schema = SchemaBuilder::new()
|
||||
.field("bool_field", ArrowDataType::Boolean)
|
||||
.tag("city")
|
||||
|
@ -779,17 +547,15 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_to_arrow_schema_subset() {
|
||||
let registry = Arc::new(MemRegistry::new());
|
||||
let mut chunk = Chunk::new(42, registry.as_ref());
|
||||
let dictionary = &mut chunk.dictionary;
|
||||
let mut dictionary = Dictionary::new();
|
||||
let mut table = Table::new(dictionary.lookup_value_or_insert("table_name"));
|
||||
|
||||
let lp_lines = vec!["h2o,state=MA,city=Boston float_field=70.4 100"];
|
||||
|
||||
write_lines_to_table(&mut table, dictionary, lp_lines);
|
||||
write_lines_to_table(&mut table, &mut dictionary, lp_lines);
|
||||
|
||||
let selection = Selection::Some(&["float_field"]);
|
||||
let actual_schema = table.schema(&chunk, selection).unwrap();
|
||||
let actual_schema = table.schema(&dictionary, selection).unwrap();
|
||||
let expected_schema = SchemaBuilder::new()
|
||||
.field("float_field", ArrowDataType::Float64)
|
||||
.build()
|
||||
|
@ -802,29 +568,172 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_columns_validates_schema() {
|
||||
let mut dictionary = Dictionary::new();
|
||||
let mut table = Table::new(dictionary.lookup_value_or_insert("foo"));
|
||||
|
||||
let lp = "foo,t1=asdf iv=1i,uv=1u,fv=1.0,bv=true,sv=\"hi\" 1";
|
||||
let entry = lp_to_entry(&lp);
|
||||
table
|
||||
.write_columns(
|
||||
&mut dictionary,
|
||||
ClockValue::new(0),
|
||||
0,
|
||||
entry
|
||||
.partition_writes()
|
||||
.unwrap()
|
||||
.first()
|
||||
.unwrap()
|
||||
.table_batches()
|
||||
.first()
|
||||
.unwrap()
|
||||
.columns(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let lp = "foo t1=\"string\" 1";
|
||||
let entry = lp_to_entry(&lp);
|
||||
let response = table
|
||||
.write_columns(
|
||||
&mut dictionary,
|
||||
ClockValue::new(0),
|
||||
0,
|
||||
entry
|
||||
.partition_writes()
|
||||
.unwrap()
|
||||
.first()
|
||||
.unwrap()
|
||||
.table_batches()
|
||||
.first()
|
||||
.unwrap()
|
||||
.columns(),
|
||||
)
|
||||
.err()
|
||||
.unwrap();
|
||||
assert!(
|
||||
matches!(
|
||||
&response,
|
||||
Error::InternalColumnTypeMismatch {
|
||||
expected_column_type,
|
||||
actual_column_type,
|
||||
..
|
||||
} if expected_column_type == "tag" && actual_column_type == "String"),
|
||||
format!("didn't match returned error: {:?}", response)
|
||||
);
|
||||
|
||||
let lp = "foo iv=1u 1";
|
||||
let entry = lp_to_entry(&lp);
|
||||
let response = table
|
||||
.write_columns(
|
||||
&mut dictionary,
|
||||
ClockValue::new(0),
|
||||
0,
|
||||
entry
|
||||
.partition_writes()
|
||||
.unwrap()
|
||||
.first()
|
||||
.unwrap()
|
||||
.table_batches()
|
||||
.first()
|
||||
.unwrap()
|
||||
.columns(),
|
||||
)
|
||||
.err()
|
||||
.unwrap();
|
||||
assert!(
|
||||
matches!(&response, Error::InternalColumnTypeMismatch {expected_column_type, actual_column_type, ..} if expected_column_type == "i64" && actual_column_type == "u64"),
|
||||
format!("didn't match returned error: {:?}", response)
|
||||
);
|
||||
|
||||
let lp = "foo fv=1i 1";
|
||||
let entry = lp_to_entry(&lp);
|
||||
let response = table
|
||||
.write_columns(
|
||||
&mut dictionary,
|
||||
ClockValue::new(0),
|
||||
0,
|
||||
entry
|
||||
.partition_writes()
|
||||
.unwrap()
|
||||
.first()
|
||||
.unwrap()
|
||||
.table_batches()
|
||||
.first()
|
||||
.unwrap()
|
||||
.columns(),
|
||||
)
|
||||
.err()
|
||||
.unwrap();
|
||||
assert!(
|
||||
matches!(&response, Error::InternalColumnTypeMismatch {expected_column_type, actual_column_type, ..} if expected_column_type == "f64" && actual_column_type == "i64"),
|
||||
format!("didn't match returned error: {:?}", response)
|
||||
);
|
||||
|
||||
let lp = "foo bv=1 1";
|
||||
let entry = lp_to_entry(&lp);
|
||||
let response = table
|
||||
.write_columns(
|
||||
&mut dictionary,
|
||||
ClockValue::new(0),
|
||||
0,
|
||||
entry
|
||||
.partition_writes()
|
||||
.unwrap()
|
||||
.first()
|
||||
.unwrap()
|
||||
.table_batches()
|
||||
.first()
|
||||
.unwrap()
|
||||
.columns(),
|
||||
)
|
||||
.err()
|
||||
.unwrap();
|
||||
assert!(
|
||||
matches!(&response, Error::InternalColumnTypeMismatch {expected_column_type, actual_column_type, ..} if expected_column_type == "bool" && actual_column_type == "f64"),
|
||||
format!("didn't match returned error: {:?}", response)
|
||||
);
|
||||
|
||||
let lp = "foo sv=true 1";
|
||||
let entry = lp_to_entry(&lp);
|
||||
let response = table
|
||||
.write_columns(
|
||||
&mut dictionary,
|
||||
ClockValue::new(0),
|
||||
0,
|
||||
entry
|
||||
.partition_writes()
|
||||
.unwrap()
|
||||
.first()
|
||||
.unwrap()
|
||||
.table_batches()
|
||||
.first()
|
||||
.unwrap()
|
||||
.columns(),
|
||||
)
|
||||
.err()
|
||||
.unwrap();
|
||||
assert!(
|
||||
matches!(&response, Error::InternalColumnTypeMismatch {expected_column_type, actual_column_type, ..} if expected_column_type == "String" && actual_column_type == "bool"),
|
||||
format!("didn't match returned error: {:?}", response)
|
||||
);
|
||||
}
|
||||
|
||||
/// Insert the line protocol lines in `lp_lines` into this table
|
||||
fn write_lines_to_table(table: &mut Table, dictionary: &mut Dictionary, lp_lines: Vec<&str>) {
|
||||
let lp_data = lp_lines.join("\n");
|
||||
let entry = lp_to_entry(&lp_data);
|
||||
|
||||
let lines: Vec<_> = parse_lines(&lp_data).map(|l| l.unwrap()).collect();
|
||||
|
||||
let data = split_lines_into_write_entry_partitions(chunk_key_func, &lines);
|
||||
|
||||
let batch = flatbuffers::root::<wb::WriteBufferBatch<'_>>(&data).unwrap();
|
||||
let entries = batch.entries().expect("at least one entry");
|
||||
|
||||
for entry in entries {
|
||||
let table_batches = entry.table_batches().expect("there were table batches");
|
||||
for batch in table_batches {
|
||||
let rows = batch.rows().expect("Had rows in the batch");
|
||||
table
|
||||
.append_rows(dictionary, &rows)
|
||||
.expect("Appended the row");
|
||||
}
|
||||
for batch in entry
|
||||
.partition_writes()
|
||||
.unwrap()
|
||||
.first()
|
||||
.unwrap()
|
||||
.table_batches()
|
||||
{
|
||||
table
|
||||
.write_columns(dictionary, ClockValue::new(0), 0, batch.columns())
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn chunk_key_func(_: &ParsedLine<'_>) -> String {
|
||||
String::from("the_chunk_key")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ bytes = "1.0"
|
|||
chrono = "0.4"
|
||||
# Google Cloud Storage integration
|
||||
cloud-storage = "0.9.0"
|
||||
futures = "0.3.5"
|
||||
futures = "0.3"
|
||||
itertools = "0.9.0"
|
||||
percent-encoding = "2.1"
|
||||
# rusoto crates are for Amazon S3 integration
|
||||
|
|
|
@ -9,6 +9,7 @@ arrow_deps = { path = "../arrow_deps" }
|
|||
bytes = "1.0"
|
||||
data_types = { path = "../data_types" }
|
||||
futures = "0.3.7"
|
||||
internal_types = {path = "../internal_types"}
|
||||
object_store = {path = "../object_store"}
|
||||
parking_lot = "0.11.1"
|
||||
snafu = "0.6"
|
||||
|
|
|
@ -1,22 +1,44 @@
|
|||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use crate::table::Table;
|
||||
use data_types::partition_metadata::TableSummary;
|
||||
use data_types::{partition_metadata::TableSummary, timestamp::TimestampRange};
|
||||
use internal_types::{schema::Schema, selection::Selection};
|
||||
use object_store::path::Path;
|
||||
use tracker::{MemRegistry, MemTracker};
|
||||
|
||||
use std::mem;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Error writing table '{}': {}", table_name, source))]
|
||||
TableWrite {
|
||||
table_name: String,
|
||||
source: crate::table::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Table Error in '{}': {}", table_name, source))]
|
||||
NamedTableError {
|
||||
table_name: String,
|
||||
source: crate::table::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Table '{}' not found in chunk {}", table_name, chunk_id))]
|
||||
NamedTableNotFoundInChunk { table_name: String, chunk_id: u64 },
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Chunk {
|
||||
/// Partition this chunk belongs to
|
||||
pub partition_key: String,
|
||||
partition_key: String,
|
||||
|
||||
/// The id for this chunk
|
||||
pub id: u32,
|
||||
id: u32,
|
||||
|
||||
/// Tables of this chunk
|
||||
pub tables: Vec<Table>,
|
||||
tables: Vec<Table>,
|
||||
|
||||
/// Track memory used by this chunk
|
||||
memory_tracker: MemTracker,
|
||||
|
@ -34,9 +56,36 @@ impl Chunk {
|
|||
chunk
|
||||
}
|
||||
|
||||
/// Return the chunk id
|
||||
pub fn id(&self) -> u32 {
|
||||
self.id
|
||||
}
|
||||
|
||||
/// Return the chunk's partition key
|
||||
pub fn partition_key(&self) -> &str {
|
||||
self.partition_key.as_ref()
|
||||
}
|
||||
|
||||
/// Return all paths of this chunks
|
||||
pub fn all_paths(&self) -> Vec<Path> {
|
||||
self.tables.iter().map(|t| t.path()).collect()
|
||||
}
|
||||
|
||||
/// Returns a vec of the summary statistics of the tables in this chunk
|
||||
pub fn table_summaries(&self) -> Vec<TableSummary> {
|
||||
self.tables.iter().map(|t| t.table_summary()).collect()
|
||||
}
|
||||
|
||||
/// Add a chunk's table and its summary
|
||||
pub fn add_table(&mut self, table_summary: TableSummary, file_location: Path) {
|
||||
self.tables.push(Table::new(table_summary, file_location));
|
||||
pub fn add_table(
|
||||
&mut self,
|
||||
table_summary: TableSummary,
|
||||
file_location: Path,
|
||||
schema: Schema,
|
||||
range: Option<TimestampRange>,
|
||||
) {
|
||||
self.tables
|
||||
.push(Table::new(table_summary, file_location, schema, range));
|
||||
}
|
||||
|
||||
/// Return true if this chunk includes the given table
|
||||
|
@ -62,4 +111,33 @@ impl Chunk {
|
|||
|
||||
size + self.partition_key.len() + mem::size_of::<u32>() + mem::size_of::<Self>()
|
||||
}
|
||||
|
||||
/// Return Schema for the specified table / columns
|
||||
pub fn table_schema(&self, table_name: &str, selection: Selection<'_>) -> Result<Schema> {
|
||||
let table = self
|
||||
.tables
|
||||
.iter()
|
||||
.find(|t| t.has_table(table_name))
|
||||
.context(NamedTableNotFoundInChunk {
|
||||
table_name,
|
||||
chunk_id: self.id(),
|
||||
})?;
|
||||
|
||||
table
|
||||
.schema(selection)
|
||||
.context(NamedTableError { table_name })
|
||||
}
|
||||
|
||||
pub fn table_names(
|
||||
&self,
|
||||
timestamp_range: Option<TimestampRange>,
|
||||
) -> impl Iterator<Item = String> + '_ {
|
||||
self.tables.iter().flat_map(move |t| {
|
||||
if t.matches_predicate(×tamp_range) {
|
||||
Some(t.name())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,28 +1,57 @@
|
|||
use data_types::partition_metadata::TableSummary;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::mem;
|
||||
|
||||
use data_types::{partition_metadata::TableSummary, timestamp::TimestampRange};
|
||||
use internal_types::{schema::Schema, selection::Selection};
|
||||
use object_store::path::Path;
|
||||
|
||||
use std::mem;
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Failed to select columns: {}", source))]
|
||||
SelectColumns {
|
||||
source: internal_types::schema::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Table that belongs to a chunk persisted in a parquet file in object store
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Table {
|
||||
/// Meta data of the table
|
||||
pub table_summary: TableSummary,
|
||||
table_summary: TableSummary,
|
||||
|
||||
/// Path in the object store. Format:
|
||||
/// <writer id>/<database>/data/<partition key>/<chunk
|
||||
/// id>/<tablename>.parquet
|
||||
pub object_store_path: Path,
|
||||
object_store_path: Path,
|
||||
|
||||
/// Schema that goes with this table's parquet file
|
||||
table_schema: Schema,
|
||||
|
||||
/// Timestamp rang of this table's parquet file
|
||||
timestamp_range: Option<TimestampRange>,
|
||||
}
|
||||
|
||||
impl Table {
|
||||
pub fn new(meta: TableSummary, path: Path) -> Self {
|
||||
pub fn new(
|
||||
meta: TableSummary,
|
||||
path: Path,
|
||||
schema: Schema,
|
||||
range: Option<TimestampRange>,
|
||||
) -> Self {
|
||||
Self {
|
||||
table_summary: meta,
|
||||
object_store_path: path,
|
||||
table_schema: schema,
|
||||
timestamp_range: range,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn table_summary(&self) -> TableSummary {
|
||||
self.table_summary.clone()
|
||||
}
|
||||
|
||||
pub fn has_table(&self, table_name: &str) -> bool {
|
||||
self.table_summary.has_table(table_name)
|
||||
}
|
||||
|
@ -32,10 +61,36 @@ impl Table {
|
|||
mem::size_of::<Self>()
|
||||
+ self.table_summary.size()
|
||||
+ mem::size_of_val(&self.object_store_path)
|
||||
+ mem::size_of_val(&self.table_schema)
|
||||
}
|
||||
|
||||
/// Return name of this table
|
||||
pub fn name(&self) -> String {
|
||||
self.table_summary.name.clone()
|
||||
}
|
||||
|
||||
/// Return the object store path of this table
|
||||
pub fn path(&self) -> Path {
|
||||
self.object_store_path.clone()
|
||||
}
|
||||
|
||||
/// return schema of this table for specified selection columns
|
||||
pub fn schema(&self, selection: Selection<'_>) -> Result<Schema> {
|
||||
Ok(match selection {
|
||||
Selection::All => self.table_schema.clone(),
|
||||
Selection::Some(columns) => {
|
||||
let columns = self.table_schema.select(columns).context(SelectColumns)?;
|
||||
self.table_schema.project(&columns)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn matches_predicate(&self, timestamp_range: &Option<TimestampRange>) -> bool {
|
||||
match (self.timestamp_range, timestamp_range) {
|
||||
(Some(a), Some(b)) => !a.disjoint(b),
|
||||
(None, Some(_)) => false, /* If this chunk doesn't have a time column it can't match */
|
||||
// the predicate
|
||||
(_, None) => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ async-trait = "0.1"
|
|||
chrono = "0.4"
|
||||
croaring = "0.4.5"
|
||||
data_types = { path = "../data_types" }
|
||||
futures = "0.3.7"
|
||||
futures = "0.3"
|
||||
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
|
||||
internal_types = { path = "../internal_types" }
|
||||
parking_lot = "0.11.1"
|
||||
|
@ -29,5 +29,9 @@ tokio = { version = "1.0", features = ["macros"] }
|
|||
tokio-stream = "0.1.2"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
|
||||
# use libc on unix like platforms to set worker priority in DedicatedExecutor
|
||||
[target."cfg(unix)".dependencies.libc]
|
||||
version = "0.2"
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
|
|
@ -8,13 +8,14 @@ pub mod fieldlist;
|
|||
mod schema_pivot;
|
||||
pub mod seriesset;
|
||||
pub mod stringset;
|
||||
mod task;
|
||||
pub use context::{DEFAULT_CATALOG, DEFAULT_SCHEMA};
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_deps::{
|
||||
arrow::record_batch::RecordBatch,
|
||||
datafusion::{self, logical_plan::LogicalPlan},
|
||||
datafusion::{self, logical_plan::LogicalPlan, physical_plan::ExecutionPlan},
|
||||
};
|
||||
use counters::ExecutionCounters;
|
||||
|
||||
|
@ -34,6 +35,8 @@ use crate::plan::{
|
|||
stringset::StringSetPlan,
|
||||
};
|
||||
|
||||
use self::task::{DedicatedExecutor, Error as ExecutorError};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Plan Execution Error: {}", source))]
|
||||
|
@ -84,21 +87,29 @@ pub enum Error {
|
|||
},
|
||||
|
||||
#[snafu(display("Joining execution task: {}", source))]
|
||||
JoinError { source: tokio::task::JoinError },
|
||||
JoinError { source: ExecutorError },
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Handles executing plans, and marshalling the results into rust
|
||||
/// Handles executing DataFusion plans, and marshalling the results into rust
|
||||
/// native structures.
|
||||
#[derive(Debug, Default)]
|
||||
#[derive(Debug)]
|
||||
pub struct Executor {
|
||||
counters: Arc<ExecutionCounters>,
|
||||
exec: DedicatedExecutor,
|
||||
}
|
||||
|
||||
impl Executor {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
/// Creates a new executor with a single dedicated thread pool with
|
||||
/// num_threads
|
||||
pub fn new(num_threads: usize) -> Self {
|
||||
let exec = DedicatedExecutor::new("IOx Executor Thread", num_threads);
|
||||
|
||||
Self {
|
||||
exec,
|
||||
counters: Arc::new(ExecutionCounters::default()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Executes this plan and returns the resulting set of strings
|
||||
|
@ -148,7 +159,7 @@ impl Executor {
|
|||
let (plan_tx, plan_rx) = mpsc::channel(1);
|
||||
rx_channels.push(plan_rx);
|
||||
|
||||
tokio::task::spawn(async move {
|
||||
self.exec.spawn(async move {
|
||||
let SeriesSetPlan {
|
||||
table_name,
|
||||
plan,
|
||||
|
@ -161,7 +172,6 @@ impl Executor {
|
|||
|
||||
let physical_plan = ctx
|
||||
.prepare_plan(&plan)
|
||||
.await
|
||||
.context(DataFusionPhysicalPlanning)?;
|
||||
|
||||
let it = ctx
|
||||
|
@ -212,13 +222,10 @@ impl Executor {
|
|||
let handles = plans
|
||||
.into_iter()
|
||||
.map(|plan| {
|
||||
let counters = Arc::clone(&self.counters);
|
||||
|
||||
tokio::task::spawn(async move {
|
||||
let ctx = IOxExecutionContext::new(counters);
|
||||
let ctx = self.new_context();
|
||||
self.exec.spawn(async move {
|
||||
let physical_plan = ctx
|
||||
.prepare_plan(&plan)
|
||||
.await
|
||||
.context(DataFusionPhysicalPlanning)?;
|
||||
|
||||
// TODO: avoid this buffering
|
||||
|
@ -250,9 +257,18 @@ impl Executor {
|
|||
self.run_logical_plans(vec![plan]).await
|
||||
}
|
||||
|
||||
/// Executes the logical plan using DataFusion on a separate
|
||||
/// thread pool and produces RecordBatches
|
||||
pub async fn collect(&self, physical_plan: Arc<dyn ExecutionPlan>) -> Result<Vec<RecordBatch>> {
|
||||
self.new_context()
|
||||
.collect(physical_plan)
|
||||
.await
|
||||
.context(DataFusionExecution)
|
||||
}
|
||||
|
||||
/// Create a new execution context, suitable for executing a new query
|
||||
pub fn new_context(&self) -> IOxExecutionContext {
|
||||
IOxExecutionContext::new(Arc::clone(&self.counters))
|
||||
IOxExecutionContext::new(self.exec.clone(), Arc::clone(&self.counters))
|
||||
}
|
||||
|
||||
/// plans and runs the plans in parallel and collects the results
|
||||
|
@ -262,11 +278,10 @@ impl Executor {
|
|||
.into_iter()
|
||||
.map(|plan| {
|
||||
let ctx = self.new_context();
|
||||
// TODO run these on some executor other than the main tokio pool
|
||||
tokio::task::spawn(async move {
|
||||
|
||||
self.exec.spawn(async move {
|
||||
let physical_plan = ctx
|
||||
.prepare_plan(&plan)
|
||||
.await
|
||||
.context(DataFusionPhysicalPlanning)?;
|
||||
|
||||
// TODO: avoid this buffering
|
||||
|
@ -327,7 +342,7 @@ mod tests {
|
|||
let expected_strings = to_set(&["Foo", "Bar"]);
|
||||
let plan = StringSetPlan::Known(Arc::clone(&expected_strings));
|
||||
|
||||
let executor = Executor::default();
|
||||
let executor = Executor::new(1);
|
||||
let result_strings = executor.to_string_set(plan).await.unwrap();
|
||||
assert_eq!(result_strings, expected_strings);
|
||||
}
|
||||
|
@ -339,7 +354,7 @@ mod tests {
|
|||
let scan = make_plan(schema, vec![]);
|
||||
let plan: StringSetPlan = vec![scan].into();
|
||||
|
||||
let executor = Executor::new();
|
||||
let executor = Executor::new(1);
|
||||
let results = executor.to_string_set(plan).await.unwrap();
|
||||
|
||||
assert_eq!(results, StringSetRef::new(StringSet::new()));
|
||||
|
@ -355,7 +370,7 @@ mod tests {
|
|||
let scan = make_plan(schema, vec![batch]);
|
||||
let plan: StringSetPlan = vec![scan].into();
|
||||
|
||||
let executor = Executor::new();
|
||||
let executor = Executor::new(1);
|
||||
let results = executor.to_string_set(plan).await.unwrap();
|
||||
|
||||
assert_eq!(results, to_set(&["foo", "bar", "baz"]));
|
||||
|
@ -374,7 +389,7 @@ mod tests {
|
|||
let scan = make_plan(schema, vec![batch1, batch2]);
|
||||
let plan: StringSetPlan = vec![scan].into();
|
||||
|
||||
let executor = Executor::new();
|
||||
let executor = Executor::new(1);
|
||||
let results = executor.to_string_set(plan).await.unwrap();
|
||||
|
||||
assert_eq!(results, to_set(&["foo", "bar", "baz"]));
|
||||
|
@ -397,7 +412,7 @@ mod tests {
|
|||
|
||||
let plan: StringSetPlan = vec![scan1, scan2].into();
|
||||
|
||||
let executor = Executor::new();
|
||||
let executor = Executor::new(1);
|
||||
let results = executor.to_string_set(plan).await.unwrap();
|
||||
|
||||
assert_eq!(results, to_set(&["foo", "bar", "baz"]));
|
||||
|
@ -417,7 +432,7 @@ mod tests {
|
|||
let scan = make_plan(schema, vec![batch]);
|
||||
let plan: StringSetPlan = vec![scan].into();
|
||||
|
||||
let executor = Executor::new();
|
||||
let executor = Executor::new(1);
|
||||
let results = executor.to_string_set(plan).await;
|
||||
|
||||
let actual_error = match results {
|
||||
|
@ -443,7 +458,7 @@ mod tests {
|
|||
let scan = make_plan(schema, vec![batch]);
|
||||
let plan: StringSetPlan = vec![scan].into();
|
||||
|
||||
let executor = Executor::new();
|
||||
let executor = Executor::new(1);
|
||||
let results = executor.to_string_set(plan).await;
|
||||
|
||||
let actual_error = match results {
|
||||
|
@ -481,7 +496,7 @@ mod tests {
|
|||
let pivot = make_schema_pivot(scan);
|
||||
let plan = vec![pivot].into();
|
||||
|
||||
let executor = Executor::new();
|
||||
let executor = Executor::new(1);
|
||||
let results = executor.to_string_set(plan).await.expect("Executed plan");
|
||||
|
||||
assert_eq!(results, to_set(&["f1", "f2"]));
|
||||
|
|
|
@ -25,7 +25,7 @@ use observability_deps::tracing::debug;
|
|||
// Reuse DataFusion error and Result types for this module
|
||||
pub use arrow_deps::datafusion::error::{DataFusionError as Error, Result};
|
||||
|
||||
use super::counters::ExecutionCounters;
|
||||
use super::{counters::ExecutionCounters, task::DedicatedExecutor};
|
||||
|
||||
// The default catalog name - this impacts what SQL queries use if not specified
|
||||
pub const DEFAULT_CATALOG: &str = "public";
|
||||
|
@ -77,15 +77,27 @@ impl ExtensionPlanner for IOxExtensionPlanner {
|
|||
}
|
||||
}
|
||||
|
||||
/// This is an execution context for planning in IOx.
|
||||
/// It wraps a DataFusion execution context and incudes
|
||||
/// statistical counters.
|
||||
/// This is an execution context for planning in IOx. It wraps a
|
||||
/// DataFusion execution context and incudes statistical counters and
|
||||
/// a dedicated thread pool.
|
||||
///
|
||||
/// Eventually we envision this as also managing resources
|
||||
/// and providing visibility into what plans are running
|
||||
/// Methods on this struct should be preferred to using the raw
|
||||
/// DataFusion functions (such as `collect`) directly.
|
||||
///
|
||||
/// Eventually we envision this also managing additional resource
|
||||
/// types such as Memory and providing visibility into what plans are
|
||||
/// running
|
||||
pub struct IOxExecutionContext {
|
||||
counters: Arc<ExecutionCounters>,
|
||||
inner: ExecutionContext,
|
||||
|
||||
/// Dedicated executor for query execution.
|
||||
///
|
||||
/// DataFusion plans are "CPU" bound and thus can consume tokio
|
||||
/// executors threads for extended periods of time. We use a
|
||||
/// dedicated tokio runtime to run them so that other requests
|
||||
/// can be handled.
|
||||
exec: DedicatedExecutor,
|
||||
}
|
||||
|
||||
impl fmt::Debug for IOxExecutionContext {
|
||||
|
@ -102,7 +114,7 @@ impl IOxExecutionContext {
|
|||
///
|
||||
/// The config is created with a default catalog and schema, but this
|
||||
/// can be overridden at a later date
|
||||
pub fn new(counters: Arc<ExecutionCounters>) -> Self {
|
||||
pub fn new(exec: DedicatedExecutor, counters: Arc<ExecutionCounters>) -> Self {
|
||||
const BATCH_SIZE: usize = 1000;
|
||||
|
||||
// TBD: Should we be reusing an execution context across all executions?
|
||||
|
@ -115,7 +127,11 @@ impl IOxExecutionContext {
|
|||
|
||||
let inner = ExecutionContext::with_config(config);
|
||||
|
||||
Self { counters, inner }
|
||||
Self {
|
||||
exec,
|
||||
counters,
|
||||
inner,
|
||||
}
|
||||
}
|
||||
|
||||
/// returns a reference to the inner datafusion execution context
|
||||
|
@ -130,13 +146,13 @@ impl IOxExecutionContext {
|
|||
|
||||
/// Prepare a SQL statement for execution. This assumes that any
|
||||
/// tables referenced in the SQL have been registered with this context
|
||||
pub async fn prepare_sql(&mut self, sql: &str) -> Result<Arc<dyn ExecutionPlan>> {
|
||||
pub fn prepare_sql(&mut self, sql: &str) -> Result<Arc<dyn ExecutionPlan>> {
|
||||
let logical_plan = self.inner.sql(sql)?.to_logical_plan();
|
||||
self.prepare_plan(&logical_plan).await
|
||||
self.prepare_plan(&logical_plan)
|
||||
}
|
||||
|
||||
/// Prepare (optimize + plan) a pre-created logical plan for execution
|
||||
pub async fn prepare_plan(&self, plan: &LogicalPlan) -> Result<Arc<dyn ExecutionPlan>> {
|
||||
pub fn prepare_plan(&self, plan: &LogicalPlan) -> Result<Arc<dyn ExecutionPlan>> {
|
||||
debug!(
|
||||
"Creating plan: Initial plan\n----\n{}\n{}\n----",
|
||||
plan.display_indent_schema(),
|
||||
|
@ -154,13 +170,16 @@ impl IOxExecutionContext {
|
|||
self.inner.create_physical_plan(&plan)
|
||||
}
|
||||
|
||||
/// Executes the logical plan using DataFusion and produces RecordBatches
|
||||
/// Executes the logical plan using DataFusion on a separate
|
||||
/// thread pool and produces RecordBatches
|
||||
pub async fn collect(&self, physical_plan: Arc<dyn ExecutionPlan>) -> Result<Vec<RecordBatch>> {
|
||||
self.counters.inc_plans_run();
|
||||
|
||||
debug!("Running plan, physical:\n{:?}", physical_plan);
|
||||
|
||||
collect(physical_plan).await
|
||||
self.exec.spawn(collect(physical_plan)).await.map_err(|e| {
|
||||
Error::Execution(format!("Error running IOxExecutionContext::collect: {}", e))
|
||||
})?
|
||||
}
|
||||
|
||||
/// Executes the physical plan and produces a RecordBatchStream to stream
|
||||
|
@ -169,14 +188,21 @@ impl IOxExecutionContext {
|
|||
&self,
|
||||
physical_plan: Arc<dyn ExecutionPlan>,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
if physical_plan.output_partitioning().partition_count() <= 1 {
|
||||
physical_plan.execute(0).await
|
||||
} else {
|
||||
// merge into a single partition
|
||||
let plan = MergeExec::new(physical_plan);
|
||||
// MergeExec must produce a single partition
|
||||
assert_eq!(1, plan.output_partitioning().partition_count());
|
||||
plan.execute(0).await
|
||||
}
|
||||
self.exec
|
||||
.spawn(async move {
|
||||
if physical_plan.output_partitioning().partition_count() <= 1 {
|
||||
physical_plan.execute(0).await
|
||||
} else {
|
||||
// merge into a single partition
|
||||
let plan = MergeExec::new(physical_plan);
|
||||
// MergeExec must produce a single partition
|
||||
assert_eq!(1, plan.output_partitioning().partition_count());
|
||||
plan.execute(0).await
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(|e| {
|
||||
Error::Execution(format!("Error running IOxExecutionContext::execute: {}", e))
|
||||
})?
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,344 @@
|
|||
//! This module contains a dedicated thread pool for running "cpu
|
||||
//! intensive" workloads such as DataFusion plans
|
||||
|
||||
use parking_lot::Mutex;
|
||||
use std::{pin::Pin, sync::Arc};
|
||||
use tokio::sync::oneshot::Receiver;
|
||||
|
||||
use futures::Future;
|
||||
|
||||
use observability_deps::tracing::warn;
|
||||
|
||||
/// The type of thing that the dedicated executor runs
|
||||
type Task = Pin<Box<dyn Future<Output = ()> + Send>>;
|
||||
|
||||
/// The type of error that is returned from tasks in this module
|
||||
pub type Error = tokio::sync::oneshot::error::RecvError;
|
||||
|
||||
/// Runs futures (and any `tasks` that are `tokio::task::spawned` by
|
||||
/// them) on a separate tokio Executor
|
||||
#[derive(Clone)]
|
||||
pub struct DedicatedExecutor {
|
||||
state: Arc<Mutex<State>>,
|
||||
}
|
||||
|
||||
/// Runs futures (and any `tasks` that are `tokio::task::spawned` by
|
||||
/// them) on a separate tokio Executor
|
||||
struct State {
|
||||
/// Channel for requests -- the dedicated executor takes requests
|
||||
/// from here and runs them.
|
||||
requests: Option<std::sync::mpsc::Sender<Task>>,
|
||||
|
||||
/// The thread that is doing the work
|
||||
thread: Option<std::thread::JoinHandle<()>>,
|
||||
}
|
||||
|
||||
/// The default worker priority (value passed to `libc::setpriority`);
|
||||
const WORKER_PRIORITY: i32 = 10;
|
||||
|
||||
impl std::fmt::Debug for DedicatedExecutor {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// Avoid taking the mutex in debug formatting
|
||||
write!(f, "DedicatedExecutor")
|
||||
}
|
||||
}
|
||||
|
||||
impl DedicatedExecutor {
|
||||
/// Creates a new `DedicatedExecutor` with a dedicated tokio
|
||||
/// executor that is separate from the threadpool created via
|
||||
/// `[tokio::main]` or similar.
|
||||
///
|
||||
/// The worker thread priority is set to low so that such tasks do
|
||||
/// not starve other more important tasks (such as answering health checks)
|
||||
///
|
||||
/// Follows the example from to stack overflow and spawns a new
|
||||
/// thread to install a Tokio runtime "context"
|
||||
/// https://stackoverflow.com/questions/62536566
|
||||
///
|
||||
/// If you try to do this from a async context you see something like
|
||||
/// thread 'plan::stringset::tests::test_builder_plan' panicked at 'Cannot
|
||||
/// drop a runtime in a context where blocking is not allowed. This
|
||||
/// happens when a runtime is dropped from within an asynchronous
|
||||
/// context.', .../tokio-1.4.0/src/runtime/blocking/shutdown.rs:51:21
|
||||
pub fn new(thread_name: &str, num_threads: usize) -> Self {
|
||||
let thread_name = thread_name.to_string();
|
||||
|
||||
let (tx, rx) = std::sync::mpsc::channel();
|
||||
|
||||
let thread = std::thread::spawn(move || {
|
||||
let runtime = tokio::runtime::Builder::new_multi_thread()
|
||||
.enable_all()
|
||||
.thread_name(&thread_name)
|
||||
.worker_threads(num_threads)
|
||||
.on_thread_start(move || set_current_thread_priority(WORKER_PRIORITY))
|
||||
.build()
|
||||
.expect("Creating tokio runtime");
|
||||
|
||||
// By entering the context, all calls to `tokio::spawn` go
|
||||
// to this executor
|
||||
let _guard = runtime.enter();
|
||||
|
||||
while let Ok(request) = rx.recv() {
|
||||
// TODO track the outstanding tasks
|
||||
tokio::task::spawn(request);
|
||||
}
|
||||
});
|
||||
|
||||
let state = State {
|
||||
requests: Some(tx),
|
||||
thread: Some(thread),
|
||||
};
|
||||
|
||||
Self {
|
||||
state: Arc::new(Mutex::new(state)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Runs the specified Future (and any tasks it spawns) on the
|
||||
/// `DedicatedExecutor`.
|
||||
///
|
||||
/// Currently all tasks are added to the tokio executor
|
||||
/// immediately and compete for the threadpool's resources.
|
||||
pub fn spawn<T>(&self, task: T) -> Receiver<T::Output>
|
||||
where
|
||||
T: Future + Send + 'static,
|
||||
T::Output: Send + 'static,
|
||||
{
|
||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||
|
||||
let job = Box::pin(async move {
|
||||
let task_output = task.await;
|
||||
if tx.send(task_output).is_err() {
|
||||
warn!("Spawned task output ignored: receiver dropped")
|
||||
}
|
||||
});
|
||||
|
||||
let mut state = self.state.lock();
|
||||
|
||||
if let Some(requests) = &mut state.requests {
|
||||
// would fail if someone has started shutdown
|
||||
requests.send(job).ok();
|
||||
} else {
|
||||
warn!("tried to schedule task on an executor that was shutdown");
|
||||
}
|
||||
|
||||
rx
|
||||
}
|
||||
|
||||
/// signals shutdown of this executor and any Clones
|
||||
pub fn shutdown(&self) {
|
||||
// hang up the channel which will cause the dedicated thread
|
||||
// to quit
|
||||
let mut state = self.state.lock();
|
||||
state.requests = None;
|
||||
}
|
||||
|
||||
/// Stops all subsequent task executions, and waits for the worker
|
||||
/// thread to complete. Note this will shutdown all clones of this
|
||||
/// `DedicatedExecutor` as well.
|
||||
///
|
||||
/// Only the first all to `join` will actually wait for the
|
||||
/// executing thread to complete. All other calls to join will
|
||||
/// complete immediately.
|
||||
pub fn join(&self) {
|
||||
self.shutdown();
|
||||
|
||||
// take the thread out when mutex is held
|
||||
let thread = {
|
||||
let mut state = self.state.lock();
|
||||
state.thread.take()
|
||||
};
|
||||
|
||||
// wait for completion while not holding the mutex to avoid
|
||||
// deadlocks
|
||||
if let Some(thread) = thread {
|
||||
thread.join().ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn set_current_thread_priority(prio: i32) {
|
||||
// on linux setpriority sets the current thread's priority
|
||||
// (as opposed to the current process).
|
||||
unsafe { libc::setpriority(0, 0, prio) };
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn set_current_thread_priority(prio: i32) {
|
||||
warn!("Setting worker thread priority not supported on this platform");
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::{Arc, Barrier};
|
||||
|
||||
#[cfg(unix)]
|
||||
fn get_current_thread_priority() -> i32 {
|
||||
// on linux setpriority sets the current thread's priority
|
||||
// (as opposed to the current process).
|
||||
unsafe { libc::getpriority(0, 0) }
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn get_current_thread_priority() -> i32 {
|
||||
WORKER_PRIORITY
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn basic() {
|
||||
let barrier = Arc::new(Barrier::new(2));
|
||||
|
||||
let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
|
||||
let dedicated_task = exec.spawn(do_work(42, Arc::clone(&barrier)));
|
||||
|
||||
// Note the dedicated task will never complete if it runs on
|
||||
// the main tokio thread (as this test is not using the
|
||||
// 'multithreaded' version of the executor and the call to
|
||||
// barrier.wait actually blocks the tokio thread)
|
||||
barrier.wait();
|
||||
|
||||
// should be able to get the result
|
||||
assert_eq!(dedicated_task.await.unwrap(), 42);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn basic_clone() {
|
||||
let barrier = Arc::new(Barrier::new(2));
|
||||
let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
|
||||
// Run task on clone should work fine
|
||||
let dedicated_task = exec.clone().spawn(do_work(42, Arc::clone(&barrier)));
|
||||
barrier.wait();
|
||||
assert_eq!(dedicated_task.await.unwrap(), 42);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn multi_task() {
|
||||
let barrier = Arc::new(Barrier::new(3));
|
||||
|
||||
// make an executor with two threads
|
||||
let exec = DedicatedExecutor::new("Test DedicatedExecutor", 2);
|
||||
let dedicated_task1 = exec.spawn(do_work(11, Arc::clone(&barrier)));
|
||||
let dedicated_task2 = exec.spawn(do_work(42, Arc::clone(&barrier)));
|
||||
|
||||
// block main thread until completion of other two tasks
|
||||
barrier.wait();
|
||||
|
||||
// should be able to get the result
|
||||
assert_eq!(dedicated_task1.await.unwrap(), 11);
|
||||
assert_eq!(dedicated_task2.await.unwrap(), 42);
|
||||
|
||||
exec.join();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn worker_priority() {
|
||||
let exec = DedicatedExecutor::new("Test DedicatedExecutor", 2);
|
||||
|
||||
let dedicated_task = exec.spawn(async move { get_current_thread_priority() });
|
||||
|
||||
assert_eq!(dedicated_task.await.unwrap(), WORKER_PRIORITY);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn tokio_spawn() {
|
||||
let exec = DedicatedExecutor::new("Test DedicatedExecutor", 2);
|
||||
|
||||
// spawn a task that spawns to other tasks and ensure they run on the dedicated
|
||||
// executor
|
||||
let dedicated_task = exec.spawn(async move {
|
||||
// spawn separate tasks
|
||||
let t1 = tokio::task::spawn(async {
|
||||
assert_eq!(
|
||||
std::thread::current().name(),
|
||||
Some("Test DedicatedExecutor")
|
||||
);
|
||||
25usize
|
||||
});
|
||||
t1.await.unwrap()
|
||||
});
|
||||
|
||||
// Validate the inner task ran to completion (aka it did not panic)
|
||||
assert_eq!(dedicated_task.await.unwrap(), 25);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn panic_on_executor() {
|
||||
let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
|
||||
let dedicated_task = exec.spawn(async move {
|
||||
if true {
|
||||
panic!("At the disco, on the dedicated task scheduler");
|
||||
} else {
|
||||
42
|
||||
}
|
||||
});
|
||||
|
||||
// should not be able to get the result
|
||||
dedicated_task.await.unwrap_err();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn executor_shutdown_while_task_running() {
|
||||
let barrier = Arc::new(Barrier::new(2));
|
||||
|
||||
let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
|
||||
let dedicated_task = exec.spawn(do_work(42, Arc::clone(&barrier)));
|
||||
|
||||
exec.shutdown();
|
||||
// block main thread until completion of the outstanding task
|
||||
barrier.wait();
|
||||
|
||||
// task should complete successfully
|
||||
assert_eq!(dedicated_task.await.unwrap(), 42);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn executor_submit_task_after_shutdown() {
|
||||
let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
|
||||
|
||||
// Simulate trying to submit tasks once executor has shutdown
|
||||
exec.shutdown();
|
||||
let dedicated_task = exec.spawn(async { 11 });
|
||||
|
||||
// task should complete, but return an error
|
||||
dedicated_task.await.unwrap_err();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn executor_submit_task_after_clone_shutdown() {
|
||||
let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
|
||||
|
||||
// shutdown the clone (but not the exec)
|
||||
exec.clone().join();
|
||||
|
||||
// Simulate trying to submit tasks once executor has shutdown
|
||||
let dedicated_task = exec.spawn(async { 11 });
|
||||
|
||||
// task should complete, but return an error
|
||||
dedicated_task.await.unwrap_err();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn executor_join() {
|
||||
let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
|
||||
// test it doesn't hang
|
||||
exec.join()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[allow(clippy::redundant_clone)]
|
||||
async fn executor_clone_join() {
|
||||
let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
|
||||
// test it doesn't hang
|
||||
exec.clone().join();
|
||||
exec.clone().join();
|
||||
exec.join();
|
||||
}
|
||||
|
||||
/// Wait for the barrier and then return `result`
|
||||
async fn do_work(result: usize, barrier: Arc<Barrier>) -> usize {
|
||||
barrier.wait();
|
||||
result
|
||||
}
|
||||
}
|
|
@ -195,13 +195,13 @@ impl InfluxRPCPlanner {
|
|||
/// Returns a plan that lists the names of tables in this
|
||||
/// database that have at least one row that matches the
|
||||
/// conditions listed on `predicate`
|
||||
pub async fn table_names<D>(&self, database: &D, predicate: Predicate) -> Result<StringSetPlan>
|
||||
pub fn table_names<D>(&self, database: &D, predicate: Predicate) -> Result<StringSetPlan>
|
||||
where
|
||||
D: Database + 'static,
|
||||
{
|
||||
let mut builder = StringSetPlanBuilder::new();
|
||||
|
||||
for chunk in self.filtered_chunks(database, &predicate).await? {
|
||||
for chunk in self.filtered_chunks(database, &predicate)? {
|
||||
let new_table_names = chunk
|
||||
.table_names(&predicate, builder.known_strings())
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
|
@ -227,7 +227,7 @@ impl InfluxRPCPlanner {
|
|||
/// columns (as defined in the InfluxDB Data model) names in this
|
||||
/// database that have more than zero rows which pass the
|
||||
/// conditions specified by `predicate`.
|
||||
pub async fn tag_keys<D>(&self, database: &D, predicate: Predicate) -> Result<StringSetPlan>
|
||||
pub fn tag_keys<D>(&self, database: &D, predicate: Predicate) -> Result<StringSetPlan>
|
||||
where
|
||||
D: Database + 'static,
|
||||
{
|
||||
|
@ -246,9 +246,9 @@ impl InfluxRPCPlanner {
|
|||
let mut need_full_plans = BTreeMap::new();
|
||||
|
||||
let mut known_columns = BTreeSet::new();
|
||||
for chunk in self.filtered_chunks(database, &predicate).await? {
|
||||
for chunk in self.filtered_chunks(database, &predicate)? {
|
||||
// try and get the table names that have rows that match the predicate
|
||||
let table_names = self.chunk_table_names(chunk.as_ref(), &predicate).await?;
|
||||
let table_names = self.chunk_table_names(chunk.as_ref(), &predicate)?;
|
||||
|
||||
for table_name in table_names {
|
||||
debug!(
|
||||
|
@ -308,7 +308,7 @@ impl InfluxRPCPlanner {
|
|||
// were already known to have data (based on the contents of known_columns)
|
||||
|
||||
for (table_name, chunks) in need_full_plans.into_iter() {
|
||||
let plan = self.tag_keys_plan(&table_name, &predicate, chunks).await?;
|
||||
let plan = self.tag_keys_plan(&table_name, &predicate, chunks)?;
|
||||
|
||||
if let Some(plan) = plan {
|
||||
builder = builder.append(plan)
|
||||
|
@ -326,7 +326,7 @@ impl InfluxRPCPlanner {
|
|||
/// Returns a plan which finds the distinct, non-null tag values
|
||||
/// in the specified `tag_name` column of this database which pass
|
||||
/// the conditions specified by `predicate`.
|
||||
pub async fn tag_values<D>(
|
||||
pub fn tag_values<D>(
|
||||
&self,
|
||||
database: &D,
|
||||
tag_name: &str,
|
||||
|
@ -351,8 +351,8 @@ impl InfluxRPCPlanner {
|
|||
let mut need_full_plans = BTreeMap::new();
|
||||
|
||||
let mut known_values = BTreeSet::new();
|
||||
for chunk in self.filtered_chunks(database, &predicate).await? {
|
||||
let table_names = self.chunk_table_names(chunk.as_ref(), &predicate).await?;
|
||||
for chunk in self.filtered_chunks(database, &predicate)? {
|
||||
let table_names = self.chunk_table_names(chunk.as_ref(), &predicate)?;
|
||||
|
||||
for table_name in table_names {
|
||||
debug!(
|
||||
|
@ -426,9 +426,7 @@ impl InfluxRPCPlanner {
|
|||
// time in `known_columns`, and some tables in chunks that we
|
||||
// need to run a plan to find what values pass the predicate.
|
||||
for (table_name, chunks) in need_full_plans.into_iter() {
|
||||
let scan_and_filter = self
|
||||
.scan_and_filter(&table_name, &predicate, chunks)
|
||||
.await?;
|
||||
let scan_and_filter = self.scan_and_filter(&table_name, &predicate, chunks)?;
|
||||
|
||||
// if we have any data to scan, make a plan!
|
||||
if let Some(TableScanAndFilter {
|
||||
|
@ -471,11 +469,7 @@ impl InfluxRPCPlanner {
|
|||
/// datatypes (as defined in the data written via `write_lines`),
|
||||
/// and which have more than zero rows which pass the conditions
|
||||
/// specified by `predicate`.
|
||||
pub async fn field_columns<D>(
|
||||
&self,
|
||||
database: &D,
|
||||
predicate: Predicate,
|
||||
) -> Result<FieldListPlan>
|
||||
pub fn field_columns<D>(&self, database: &D, predicate: Predicate) -> Result<FieldListPlan>
|
||||
where
|
||||
D: Database + 'static,
|
||||
{
|
||||
|
@ -488,15 +482,12 @@ impl InfluxRPCPlanner {
|
|||
// values and stops the plan executing once it has them
|
||||
|
||||
// map table -> Vec<Arc<Chunk>>
|
||||
let chunks = self.filtered_chunks(database, &predicate).await?;
|
||||
let table_chunks = self.group_chunks_by_table(&predicate, chunks).await?;
|
||||
let chunks = self.filtered_chunks(database, &predicate)?;
|
||||
let table_chunks = self.group_chunks_by_table(&predicate, chunks)?;
|
||||
|
||||
let mut field_list_plan = FieldListPlan::new();
|
||||
for (table_name, chunks) in table_chunks {
|
||||
if let Some(plan) = self
|
||||
.field_columns_plan(&table_name, &predicate, chunks)
|
||||
.await?
|
||||
{
|
||||
if let Some(plan) = self.field_columns_plan(&table_name, &predicate, chunks)? {
|
||||
field_list_plan = field_list_plan.append(plan);
|
||||
}
|
||||
}
|
||||
|
@ -523,7 +514,7 @@ impl InfluxRPCPlanner {
|
|||
/// rows for a particular series (groups where all tags are the
|
||||
/// same) occur together in the plan
|
||||
|
||||
pub async fn read_filter<D>(&self, database: &D, predicate: Predicate) -> Result<SeriesSetPlans>
|
||||
pub fn read_filter<D>(&self, database: &D, predicate: Predicate) -> Result<SeriesSetPlans>
|
||||
where
|
||||
D: Database + 'static,
|
||||
{
|
||||
|
@ -531,17 +522,15 @@ impl InfluxRPCPlanner {
|
|||
|
||||
// group tables by chunk, pruning if possible
|
||||
// key is table name, values are chunks
|
||||
let chunks = self.filtered_chunks(database, &predicate).await?;
|
||||
let table_chunks = self.group_chunks_by_table(&predicate, chunks).await?;
|
||||
let chunks = self.filtered_chunks(database, &predicate)?;
|
||||
let table_chunks = self.group_chunks_by_table(&predicate, chunks)?;
|
||||
|
||||
// now, build up plans for each table
|
||||
let mut ss_plans = Vec::with_capacity(table_chunks.len());
|
||||
for (table_name, chunks) in table_chunks {
|
||||
let prefix_columns: Option<&[&str]> = None;
|
||||
|
||||
let ss_plan = self
|
||||
.read_filter_plan(table_name, prefix_columns, &predicate, chunks)
|
||||
.await?;
|
||||
let ss_plan = self.read_filter_plan(table_name, prefix_columns, &predicate, chunks)?;
|
||||
// If we have to do real work, add it to the list of plans
|
||||
if let Some(ss_plan) = ss_plan {
|
||||
ss_plans.push(ss_plan);
|
||||
|
@ -555,7 +544,7 @@ impl InfluxRPCPlanner {
|
|||
/// with rows grouped by an aggregate function. Note that we still
|
||||
/// group by all tags (so group within series) and the
|
||||
/// group_columns define the order of the result
|
||||
pub async fn read_group<D>(
|
||||
pub fn read_group<D>(
|
||||
&self,
|
||||
database: &D,
|
||||
predicate: Predicate,
|
||||
|
@ -568,8 +557,8 @@ impl InfluxRPCPlanner {
|
|||
debug!(predicate=?predicate, agg=?agg, "planning read_group");
|
||||
|
||||
// group tables by chunk, pruning if possible
|
||||
let chunks = self.filtered_chunks(database, &predicate).await?;
|
||||
let table_chunks = self.group_chunks_by_table(&predicate, chunks).await?;
|
||||
let chunks = self.filtered_chunks(database, &predicate)?;
|
||||
let table_chunks = self.group_chunks_by_table(&predicate, chunks)?;
|
||||
let num_prefix_tag_group_columns = group_columns.len();
|
||||
|
||||
// now, build up plans for each table
|
||||
|
@ -577,13 +566,9 @@ impl InfluxRPCPlanner {
|
|||
for (table_name, chunks) in table_chunks {
|
||||
let ss_plan = match agg {
|
||||
Aggregate::None => {
|
||||
self.read_filter_plan(table_name, Some(group_columns), &predicate, chunks)
|
||||
.await?
|
||||
}
|
||||
_ => {
|
||||
self.read_group_plan(table_name, &predicate, agg, group_columns, chunks)
|
||||
.await?
|
||||
self.read_filter_plan(table_name, Some(group_columns), &predicate, chunks)?
|
||||
}
|
||||
_ => self.read_group_plan(table_name, &predicate, agg, group_columns, chunks)?,
|
||||
};
|
||||
|
||||
// If we have to do real work, add it to the list of plans
|
||||
|
@ -598,7 +583,7 @@ impl InfluxRPCPlanner {
|
|||
|
||||
/// Creates a GroupedSeriesSet plan that produces an output table with rows
|
||||
/// that are grouped by window defintions
|
||||
pub async fn read_window_aggregate<D>(
|
||||
pub fn read_window_aggregate<D>(
|
||||
&self,
|
||||
database: &D,
|
||||
predicate: Predicate,
|
||||
|
@ -612,15 +597,14 @@ impl InfluxRPCPlanner {
|
|||
debug!(predicate=?predicate, "planning read_window_aggregate");
|
||||
|
||||
// group tables by chunk, pruning if possible
|
||||
let chunks = self.filtered_chunks(database, &predicate).await?;
|
||||
let table_chunks = self.group_chunks_by_table(&predicate, chunks).await?;
|
||||
let chunks = self.filtered_chunks(database, &predicate)?;
|
||||
let table_chunks = self.group_chunks_by_table(&predicate, chunks)?;
|
||||
|
||||
// now, build up plans for each table
|
||||
let mut ss_plans = Vec::with_capacity(table_chunks.len());
|
||||
for (table_name, chunks) in table_chunks {
|
||||
let ss_plan = self
|
||||
.read_window_aggregate_plan(table_name, &predicate, agg, &every, &offset, chunks)
|
||||
.await?;
|
||||
.read_window_aggregate_plan(table_name, &predicate, agg, &every, &offset, chunks)?;
|
||||
// If we have to do real work, add it to the list of plans
|
||||
if let Some(ss_plan) = ss_plan {
|
||||
ss_plans.push(ss_plan);
|
||||
|
@ -631,7 +615,7 @@ impl InfluxRPCPlanner {
|
|||
}
|
||||
|
||||
/// Creates a map of table_name --> Chunks that have that table
|
||||
async fn group_chunks_by_table<C>(
|
||||
fn group_chunks_by_table<C>(
|
||||
&self,
|
||||
predicate: &Predicate,
|
||||
chunks: Vec<Arc<C>>,
|
||||
|
@ -641,7 +625,7 @@ impl InfluxRPCPlanner {
|
|||
{
|
||||
let mut table_chunks = BTreeMap::new();
|
||||
for chunk in chunks {
|
||||
let table_names = self.chunk_table_names(chunk.as_ref(), &predicate).await?;
|
||||
let table_names = self.chunk_table_names(chunk.as_ref(), &predicate)?;
|
||||
for table_name in table_names {
|
||||
table_chunks
|
||||
.entry(table_name)
|
||||
|
@ -653,11 +637,7 @@ impl InfluxRPCPlanner {
|
|||
}
|
||||
|
||||
/// Find all the table names in the specified chunk that pass the predicate
|
||||
async fn chunk_table_names<C>(
|
||||
&self,
|
||||
chunk: &C,
|
||||
predicate: &Predicate,
|
||||
) -> Result<BTreeSet<String>>
|
||||
fn chunk_table_names<C>(&self, chunk: &C, predicate: &Predicate) -> Result<BTreeSet<String>>
|
||||
where
|
||||
C: PartitionChunk + 'static,
|
||||
{
|
||||
|
@ -705,7 +685,7 @@ impl InfluxRPCPlanner {
|
|||
/// Filter(predicate)
|
||||
/// TableScan (of chunks)
|
||||
/// ```
|
||||
async fn tag_keys_plan<C>(
|
||||
fn tag_keys_plan<C>(
|
||||
&self,
|
||||
table_name: &str,
|
||||
predicate: &Predicate,
|
||||
|
@ -714,7 +694,7 @@ impl InfluxRPCPlanner {
|
|||
where
|
||||
C: PartitionChunk + 'static,
|
||||
{
|
||||
let scan_and_filter = self.scan_and_filter(table_name, predicate, chunks).await?;
|
||||
let scan_and_filter = self.scan_and_filter(table_name, predicate, chunks)?;
|
||||
|
||||
let TableScanAndFilter {
|
||||
plan_builder,
|
||||
|
@ -767,7 +747,7 @@ impl InfluxRPCPlanner {
|
|||
/// Filter(predicate) [optional]
|
||||
/// Scan
|
||||
/// ```
|
||||
async fn field_columns_plan<C>(
|
||||
fn field_columns_plan<C>(
|
||||
&self,
|
||||
table_name: &str,
|
||||
predicate: &Predicate,
|
||||
|
@ -776,7 +756,7 @@ impl InfluxRPCPlanner {
|
|||
where
|
||||
C: PartitionChunk + 'static,
|
||||
{
|
||||
let scan_and_filter = self.scan_and_filter(table_name, predicate, chunks).await?;
|
||||
let scan_and_filter = self.scan_and_filter(table_name, predicate, chunks)?;
|
||||
let TableScanAndFilter {
|
||||
plan_builder,
|
||||
schema,
|
||||
|
@ -817,7 +797,7 @@ impl InfluxRPCPlanner {
|
|||
/// Order by (tag_columns, timestamp_column)
|
||||
/// Filter(predicate)
|
||||
/// Scan
|
||||
async fn read_filter_plan<C>(
|
||||
fn read_filter_plan<C>(
|
||||
&self,
|
||||
table_name: impl Into<String>,
|
||||
prefix_columns: Option<&[impl AsRef<str>]>,
|
||||
|
@ -828,7 +808,7 @@ impl InfluxRPCPlanner {
|
|||
C: PartitionChunk + 'static,
|
||||
{
|
||||
let table_name = table_name.into();
|
||||
let scan_and_filter = self.scan_and_filter(&table_name, predicate, chunks).await?;
|
||||
let scan_and_filter = self.scan_and_filter(&table_name, predicate, chunks)?;
|
||||
|
||||
let TableScanAndFilter {
|
||||
plan_builder,
|
||||
|
@ -937,7 +917,7 @@ impl InfluxRPCPlanner {
|
|||
/// GroupBy(gby cols, aggs, time cols)
|
||||
/// Filter(predicate)
|
||||
/// Scan
|
||||
pub async fn read_group_plan<C>(
|
||||
pub fn read_group_plan<C>(
|
||||
&self,
|
||||
table_name: impl Into<String>,
|
||||
predicate: &Predicate,
|
||||
|
@ -949,7 +929,7 @@ impl InfluxRPCPlanner {
|
|||
C: PartitionChunk + 'static,
|
||||
{
|
||||
let table_name = table_name.into();
|
||||
let scan_and_filter = self.scan_and_filter(&table_name, predicate, chunks).await?;
|
||||
let scan_and_filter = self.scan_and_filter(&table_name, predicate, chunks)?;
|
||||
|
||||
let TableScanAndFilter {
|
||||
plan_builder,
|
||||
|
@ -1027,7 +1007,7 @@ impl InfluxRPCPlanner {
|
|||
/// GroupBy(gby: tag columns, window_function; agg: aggregate(field)
|
||||
/// Filter(predicate)
|
||||
/// Scan
|
||||
pub async fn read_window_aggregate_plan<C>(
|
||||
pub fn read_window_aggregate_plan<C>(
|
||||
&self,
|
||||
table_name: impl Into<String>,
|
||||
predicate: &Predicate,
|
||||
|
@ -1040,7 +1020,7 @@ impl InfluxRPCPlanner {
|
|||
C: PartitionChunk + 'static,
|
||||
{
|
||||
let table_name = table_name.into();
|
||||
let scan_and_filter = self.scan_and_filter(&table_name, predicate, chunks).await?;
|
||||
let scan_and_filter = self.scan_and_filter(&table_name, predicate, chunks)?;
|
||||
|
||||
let TableScanAndFilter {
|
||||
plan_builder,
|
||||
|
@ -1114,7 +1094,7 @@ impl InfluxRPCPlanner {
|
|||
/// Filter(predicate) [optional]
|
||||
/// Scan
|
||||
/// ```
|
||||
async fn scan_and_filter<C>(
|
||||
fn scan_and_filter<C>(
|
||||
&self,
|
||||
table_name: &str,
|
||||
predicate: &Predicate,
|
||||
|
@ -1190,7 +1170,7 @@ impl InfluxRPCPlanner {
|
|||
|
||||
/// Returns a list of chunks across all partitions which may
|
||||
/// contain data that pass the predicate
|
||||
async fn filtered_chunks<D>(
|
||||
fn filtered_chunks<D>(
|
||||
&self,
|
||||
database: &D,
|
||||
predicate: &Predicate,
|
||||
|
|
|
@ -84,7 +84,7 @@ impl SQLQueryPlanner {
|
|||
/// Plan a SQL query against the data in `database`, and return a
|
||||
/// DataFusion physical execution plan. The plan can then be
|
||||
/// executed using `executor` in a streaming fashion.
|
||||
pub async fn query<D: CatalogProvider + 'static>(
|
||||
pub fn query<D: CatalogProvider + 'static>(
|
||||
&self,
|
||||
database: Arc<D>,
|
||||
query: &str,
|
||||
|
@ -92,6 +92,6 @@ impl SQLQueryPlanner {
|
|||
) -> Result<Arc<dyn ExecutionPlan>> {
|
||||
let mut ctx = executor.new_context();
|
||||
ctx.inner_mut().register_catalog(DEFAULT_CATALOG, database);
|
||||
ctx.prepare_sql(query).await.context(Preparing)
|
||||
ctx.prepare_sql(query).context(Preparing)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@ use arrow_deps::datafusion::physical_plan::SendableRecordBatchStream;
|
|||
use async_trait::async_trait;
|
||||
use data_types::chunk::ChunkSummary;
|
||||
use exec::{stringset::StringSet, Executor};
|
||||
use internal_types::{data::ReplicatedWrite, schema::Schema, selection::Selection};
|
||||
use internal_types::{schema::Schema, selection::Selection};
|
||||
|
||||
use std::{fmt::Debug, sync::Arc};
|
||||
|
||||
|
@ -39,9 +39,6 @@ pub trait Database: Debug + Send + Sync {
|
|||
type Error: std::error::Error + Send + Sync + 'static;
|
||||
type Chunk: PartitionChunk;
|
||||
|
||||
/// Stores the replicated write into the database.
|
||||
fn store_replicated_write(&self, write: &ReplicatedWrite) -> Result<(), Self::Error>;
|
||||
|
||||
/// Return the partition keys for data in this DB
|
||||
fn partition_keys(&self) -> Result<Vec<String>, Self::Error>;
|
||||
|
||||
|
|
|
@ -211,7 +211,7 @@ mod tests {
|
|||
let expected_ss = to_string_set(&["foo", "bar", "baz", "from_a_plan"]).into();
|
||||
|
||||
assert!(matches!(plan, StringSetPlan::Plan(_)));
|
||||
let executor = Executor::new();
|
||||
let executor = Executor::new(1);
|
||||
let ss = executor.to_string_set(plan).await.unwrap();
|
||||
assert_eq!(ss, expected_ss);
|
||||
}
|
||||
|
|
|
@ -18,10 +18,7 @@ use crate::{
|
|||
Database, DatabaseStore, PartitionChunk, Predicate,
|
||||
};
|
||||
|
||||
use data_types::database_rules::{PartitionTemplate, TemplatePart};
|
||||
use influxdb_line_protocol::{parse_lines, ParsedLine};
|
||||
use internal_types::{
|
||||
data::{lines_to_replicated_write, ReplicatedWrite},
|
||||
schema::{
|
||||
builder::{SchemaBuilder, SchemaMerger},
|
||||
Schema,
|
||||
|
@ -30,10 +27,8 @@ use internal_types::{
|
|||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
use data_types::database_rules::Partitioner;
|
||||
use parking_lot::Mutex;
|
||||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
use snafu::{OptionExt, Snafu};
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
|
@ -43,12 +38,6 @@ pub struct TestDatabase {
|
|||
/// Value is map of chunk_id to chunk
|
||||
partitions: Mutex<BTreeMap<String, BTreeMap<u32, Arc<TestChunk>>>>,
|
||||
|
||||
/// Lines which have been written to this database, in order
|
||||
saved_lines: Mutex<Vec<String>>,
|
||||
|
||||
/// Replicated writes which have been written to this database, in order
|
||||
replicated_writes: Mutex<Vec<ReplicatedWrite>>,
|
||||
|
||||
/// `column_names` to return upon next request
|
||||
column_names: Arc<Mutex<Option<StringSetRef>>>,
|
||||
}
|
||||
|
@ -74,33 +63,6 @@ impl TestDatabase {
|
|||
Self::default()
|
||||
}
|
||||
|
||||
/// Get all lines written to this database
|
||||
pub fn get_lines(&self) -> Vec<String> {
|
||||
self.saved_lines.lock().clone()
|
||||
}
|
||||
|
||||
/// Get all replicated writs to this database
|
||||
pub fn get_writes(&self) -> Vec<ReplicatedWrite> {
|
||||
self.replicated_writes.lock().clone()
|
||||
}
|
||||
|
||||
/// Parse line protocol and add it as new lines to this
|
||||
/// database
|
||||
pub async fn add_lp_string(&self, lp_data: &str) {
|
||||
let parsed_lines = parse_lines(&lp_data)
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.unwrap_or_else(|_| panic!("parsing line protocol: {}", lp_data));
|
||||
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lines(self, &parsed_lines).unwrap();
|
||||
|
||||
// Writes parsed lines into this database
|
||||
let mut saved_lines = self.saved_lines.lock();
|
||||
for line in parsed_lines {
|
||||
saved_lines.push(line.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a test chunk to the database
|
||||
pub fn add_chunk(&self, partition_key: &str, chunk: Arc<TestChunk>) {
|
||||
let mut partitions = self.partitions.lock();
|
||||
|
@ -132,12 +94,6 @@ impl Database for TestDatabase {
|
|||
type Error = TestError;
|
||||
type Chunk = TestChunk;
|
||||
|
||||
/// Adds the replicated write to this database
|
||||
fn store_replicated_write(&self, write: &ReplicatedWrite) -> Result<(), Self::Error> {
|
||||
self.replicated_writes.lock().push(write.clone());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the partition keys for data in this DB
|
||||
fn partition_keys(&self) -> Result<Vec<String>, Self::Error> {
|
||||
let partitions = self.partitions.lock();
|
||||
|
@ -448,22 +404,13 @@ impl TestDatabaseStore {
|
|||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Parse line protocol and add it as new lines to the `db_name` database
|
||||
pub async fn add_lp_string(&self, db_name: &str, lp_data: &str) {
|
||||
self.db_or_create(db_name)
|
||||
.await
|
||||
.expect("db_or_create suceeeds")
|
||||
.add_lp_string(lp_data)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for TestDatabaseStore {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
databases: Mutex::new(BTreeMap::new()),
|
||||
executor: Arc::new(Executor::new()),
|
||||
executor: Arc::new(Executor::new(1)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -505,91 +452,3 @@ impl DatabaseStore for TestDatabaseStore {
|
|||
Arc::clone(&self.executor)
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper for writing line protocol data directly into test databases
|
||||
/// (handles creating sequence numbers and writer ids
|
||||
#[derive(Debug, Default)]
|
||||
pub struct TestLPWriter {
|
||||
pub writer_id: u32,
|
||||
sequence_number: u64,
|
||||
}
|
||||
|
||||
impl TestLPWriter {
|
||||
// writes data in LineProtocol format into a database
|
||||
pub fn write_lines<D: Database>(
|
||||
&mut self,
|
||||
database: &D,
|
||||
lines: &[ParsedLine<'_>],
|
||||
) -> Result<()> {
|
||||
// partitions data in hourly segments
|
||||
let partition_template = PartitionTemplate {
|
||||
parts: vec![TemplatePart::TimeFormat("%Y-%m-%dT%H".to_string())],
|
||||
};
|
||||
|
||||
let write = lines_to_replicated_write(
|
||||
self.writer_id,
|
||||
self.sequence_number,
|
||||
&lines,
|
||||
&partition_template,
|
||||
);
|
||||
self.sequence_number += 1;
|
||||
database
|
||||
.store_replicated_write(&write)
|
||||
.map_err(|e| TestError::DatabaseWrite {
|
||||
source: Box::new(e),
|
||||
})
|
||||
}
|
||||
|
||||
/// Writes line protocol formatted data in lp_data to `database`
|
||||
pub fn write_lp_string<D: Database>(&mut self, database: &D, lp_data: &str) -> Result<()> {
|
||||
let lines = parse_lines(lp_data)
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
.context(DatabaseWrite)?;
|
||||
|
||||
self.write_lines(database, &lines)
|
||||
}
|
||||
|
||||
/// Writes line protocol formatted data to database and partition
|
||||
pub fn write_lp_to_partition<D: Database>(
|
||||
&mut self,
|
||||
database: &D,
|
||||
lp_data: &str,
|
||||
paritition_key: impl Into<String>,
|
||||
) {
|
||||
let lines = parse_lines(lp_data).collect::<Result<Vec<_>, _>>().unwrap();
|
||||
self.write_lines_to_partition(database, paritition_key, &lines)
|
||||
}
|
||||
|
||||
/// Writes lines the the given partition
|
||||
pub fn write_lines_to_partition<D: Database>(
|
||||
&mut self,
|
||||
database: &D,
|
||||
partition_key: impl Into<String>,
|
||||
lines: &[ParsedLine<'_>],
|
||||
) {
|
||||
let partitioner = TestPartitioner {
|
||||
key: partition_key.into(),
|
||||
};
|
||||
let write =
|
||||
lines_to_replicated_write(self.writer_id, self.sequence_number, &lines, &partitioner);
|
||||
self.sequence_number += 1;
|
||||
database.store_replicated_write(&write).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
// Outputs a set partition key for testing. Used for parsing line protocol into
|
||||
// ReplicatedWrite and setting an explicit partition key for all writes therein.
|
||||
struct TestPartitioner {
|
||||
key: String,
|
||||
}
|
||||
|
||||
impl Partitioner for TestPartitioner {
|
||||
fn partition_key(
|
||||
&self,
|
||||
_line: &ParsedLine<'_>,
|
||||
_default_time: &DateTime<Utc>,
|
||||
) -> data_types::database_rules::Result<String> {
|
||||
Ok(self.key.clone())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -376,6 +376,29 @@ impl Chunk {
|
|||
.collect()
|
||||
}
|
||||
|
||||
/// A helper method for determining the time-range associated with the
|
||||
/// specified table.
|
||||
///
|
||||
/// A table's schema need not contain a column representing the time,
|
||||
/// however any table that represents data using the InfluxDB model does
|
||||
/// contain a column that represents the timestamp associated with each
|
||||
/// row.
|
||||
///
|
||||
/// `table_time_range` will return the min and max values for that column
|
||||
/// if the table is using the InfluxDB data-model, otherwise it will return
|
||||
/// `None`. An error will be returned if the table does not exist.
|
||||
pub fn table_time_range(&self, table_name: &str) -> Result<Option<(i64, i64)>> {
|
||||
// read lock on chunk.
|
||||
let chunk_data = self.chunk_data.read().unwrap();
|
||||
|
||||
let table = chunk_data
|
||||
.data
|
||||
.get(table_name)
|
||||
.context(TableNotFound { table_name })?;
|
||||
|
||||
Ok(table.time_range())
|
||||
}
|
||||
|
||||
/// Returns a schema object for a `read_filter` operation using the provided
|
||||
/// column selection. An error is returned if the specified columns do not
|
||||
/// exist.
|
||||
|
|
|
@ -13,12 +13,15 @@ use snafu::{ensure, Snafu};
|
|||
|
||||
use crate::row_group::{self, ColumnName, Predicate, RowGroup};
|
||||
use crate::schema::{AggregateType, ColumnType, LogicalDataType, ResultSchema};
|
||||
use crate::value::Value;
|
||||
use crate::value::{OwnedValue, Scalar, Value};
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("cannot drop last row group in table; drop table"))]
|
||||
EmptyTableError {},
|
||||
|
||||
#[snafu(display("table does not have InfluxDB timestamp column"))]
|
||||
NoTimestampColumnError {},
|
||||
|
||||
#[snafu(display("unsupported column operation on {}: {}", column_name, msg))]
|
||||
UnsupportedColumnOperation { msg: String, column_name: String },
|
||||
}
|
||||
|
@ -151,9 +154,38 @@ impl Table {
|
|||
self.table_data.read().unwrap().meta.to_summary(&self.name)
|
||||
}
|
||||
|
||||
/// The time range of all row groups within this table.
|
||||
/// Returns the column range associated with an InfluxDB Timestamp column
|
||||
/// or None if the table's schema does not have such a column.
|
||||
pub fn time_range(&self) -> Option<(i64, i64)> {
|
||||
self.table_data.read().unwrap().meta.time_range
|
||||
let table_data = self.table_data.read().unwrap();
|
||||
|
||||
let time_column = table_data
|
||||
.meta
|
||||
.columns
|
||||
.values()
|
||||
.filter(|cm| matches!(cm.typ, crate::schema::ColumnType::Timestamp(_)))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if time_column.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
assert_eq!(time_column.len(), 1); // can only be one timestamp column.
|
||||
let range = &time_column[0].range;
|
||||
|
||||
let (min, max) = match (&range.0, &range.1) {
|
||||
(OwnedValue::Scalar(Scalar::I64(min)), OwnedValue::Scalar(Scalar::I64(max))) => {
|
||||
(min, max)
|
||||
}
|
||||
(min, max) => {
|
||||
panic!(
|
||||
"invalid range type for timestamp column: ({:?}, {:?})",
|
||||
min, max
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
Some((*min, *max))
|
||||
}
|
||||
|
||||
// Helper function used in tests.
|
||||
|
@ -612,7 +644,6 @@ impl MetaData {
|
|||
}
|
||||
|
||||
pub fn to_summary(&self, table_name: impl Into<String>) -> TableSummary {
|
||||
use crate::value::{OwnedValue, Scalar};
|
||||
use data_types::partition_metadata::{ColumnSummary, StatValues, Statistics};
|
||||
let columns = self
|
||||
.columns
|
||||
|
@ -1435,4 +1466,20 @@ west,host-b,100
|
|||
vec!["time".to_owned()],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn time_range() {
|
||||
// Build a row group.
|
||||
let mut columns = vec![];
|
||||
let tc = ColumnType::Time(Column::from(&[-29_i64, -100, 3, 2][..]));
|
||||
columns.push((row_group::TIME_COLUMN_NAME.to_string(), tc));
|
||||
|
||||
let rc = ColumnType::Tag(Column::from(&["west", "south", "north", "west"][..]));
|
||||
columns.push(("region".to_string(), rc));
|
||||
|
||||
let rg = RowGroup::new(4, columns);
|
||||
let table = Table::new("cpu".to_owned(), rg);
|
||||
|
||||
assert_eq!(table.time_range().unwrap(), (-100, 3));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,11 +14,12 @@ data_types = { path = "../data_types" }
|
|||
# See docs/regenerating_flatbuffers.md about updating generated code when updating the
|
||||
# version of the flatbuffers crate
|
||||
flatbuffers = "0.8"
|
||||
futures = "0.3.7"
|
||||
futures = "0.3"
|
||||
generated_types = { path = "../generated_types" }
|
||||
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
|
||||
internal_types = { path = "../internal_types" }
|
||||
mutable_buffer = { path = "../mutable_buffer" }
|
||||
num_cpus = "1.13.0"
|
||||
object_store = { path = "../object_store" }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
parking_lot = "0.11.1"
|
||||
|
@ -35,4 +36,12 @@ tracker = { path = "../tracker" }
|
|||
uuid = { version = "0.8", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
criterion = { version = "0.3.4", features = ["async_tokio"] }
|
||||
flate2 = "1.0.20"
|
||||
tempfile = "3.1.0"
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
||||
[[bench]]
|
||||
name = "influxrpc"
|
||||
harness = false
|
||||
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
mod tag_values;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
|
||||
use tag_values::benchmark_tag_values;
|
||||
|
||||
criterion_group!(benches, benchmark_tag_values);
|
||||
criterion_main!(benches);
|
|
@ -0,0 +1,122 @@
|
|||
use std::io::Read;
|
||||
|
||||
use arrow_deps::datafusion::{logical_plan::Expr, scalar::ScalarValue};
|
||||
use criterion::{BenchmarkId, Criterion};
|
||||
// This is a struct that tells Criterion.rs to use the "futures" crate's
|
||||
// current-thread executor
|
||||
use flate2::read::GzDecoder;
|
||||
use tokio::runtime::Runtime;
|
||||
|
||||
use query::frontend::influxrpc::InfluxRPCPlanner;
|
||||
use query::predicate::PredicateBuilder;
|
||||
use query::{exec::Executor, predicate::Predicate};
|
||||
use server::{benchmarks::scenarios::DBScenario, db::Db};
|
||||
|
||||
// Uses the `query_tests` module to generate some chunk scenarios, specifically
|
||||
// the scenarios where there are:
|
||||
//
|
||||
// - a single open mutable buffer chunk;
|
||||
// - a closed mutable buffer chunk and another open one;
|
||||
// - an open mutable buffer chunk and a closed read buffer chunk;
|
||||
// - two closed read buffer chunks.
|
||||
//
|
||||
// The chunks are all fed the *same* line protocol, so these benchmarks are
|
||||
// useful for assessig the differences in performance between querying the
|
||||
// chunks held in different execution engines.
|
||||
//
|
||||
// These benchmarks use a synthetically generated set of line protocol using
|
||||
// `inch`. Each point is a new series containing three tag keys. Those tag keys
|
||||
// are:
|
||||
//
|
||||
// - tag0, cardinality 10.
|
||||
// - tag1, cardinality 100.
|
||||
// - tag2, cardinality 1,000.
|
||||
//
|
||||
// The timespan of the points in the line protocol is around 1m or wall-clock
|
||||
// time.
|
||||
async fn setup_scenarios() -> Vec<DBScenario> {
|
||||
let raw = include_bytes!("../../tests/fixtures/lineproto/tag_values.lp.gz");
|
||||
let mut gz = GzDecoder::new(&raw[..]);
|
||||
let mut lp = String::new();
|
||||
gz.read_to_string(&mut lp).unwrap();
|
||||
|
||||
let db =
|
||||
server::benchmarks::scenarios::make_two_chunk_scenarios("2021-04-12T17", &lp, &lp).await;
|
||||
db
|
||||
}
|
||||
|
||||
// Run all benchmarks for `tag_values`.
|
||||
pub fn benchmark_tag_values(c: &mut Criterion) {
|
||||
let scenarios = Runtime::new().unwrap().block_on(setup_scenarios());
|
||||
|
||||
execute_benchmark_group(c, scenarios.as_slice());
|
||||
}
|
||||
|
||||
// Runs an async criterion benchmark against the provided scenarios and
|
||||
// predicate.
|
||||
fn execute_benchmark_group(c: &mut Criterion, scenarios: &[DBScenario]) {
|
||||
let planner = InfluxRPCPlanner::new();
|
||||
|
||||
let predicates = vec![
|
||||
(PredicateBuilder::default().build(), "no_pred"),
|
||||
(
|
||||
PredicateBuilder::default()
|
||||
.add_expr(
|
||||
Expr::Column("tag2".to_owned()).eq(Expr::Literal(ScalarValue::Utf8(Some(
|
||||
"value321".to_owned(),
|
||||
)))),
|
||||
)
|
||||
.build(),
|
||||
"with_pred",
|
||||
),
|
||||
];
|
||||
|
||||
// these tags have different cardinalities: 10, 100, 1000.
|
||||
let tag_keys = &["tag0", "tag1", "tag2"];
|
||||
|
||||
for scenario in scenarios {
|
||||
let DBScenario { scenario_name, db } = scenario;
|
||||
let mut group = c.benchmark_group(scenario_name);
|
||||
|
||||
for (predicate, pred_name) in &predicates {
|
||||
for tag_key in tag_keys {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(format!("{}/{}", tag_key, pred_name)),
|
||||
tag_key,
|
||||
|b, &tag_key| {
|
||||
let executor = db.executor();
|
||||
b.to_async(Runtime::new().unwrap()).iter(|| {
|
||||
run_tag_values_query(
|
||||
&planner,
|
||||
executor.as_ref(),
|
||||
db,
|
||||
tag_key,
|
||||
predicate.clone(),
|
||||
)
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
}
|
||||
|
||||
// Plans and runs a tag_values query.
|
||||
async fn run_tag_values_query(
|
||||
planner: &InfluxRPCPlanner,
|
||||
executor: &Executor,
|
||||
db: &Db,
|
||||
tag_key: &str,
|
||||
predicate: Predicate,
|
||||
) {
|
||||
let plan = planner
|
||||
.tag_values(db, &tag_key, predicate)
|
||||
.expect("built plan successfully");
|
||||
let names = executor.to_string_set(plan).await.expect(
|
||||
"converted plan to strings
|
||||
successfully",
|
||||
);
|
||||
assert!(names.len() > 0);
|
||||
}
|
|
@ -9,6 +9,7 @@ use data_types::{
|
|||
DatabaseName,
|
||||
};
|
||||
use object_store::{path::ObjectStorePath, ObjectStore};
|
||||
use query::exec::Executor;
|
||||
|
||||
/// This module contains code for managing the configuration of the server.
|
||||
use crate::{db::Db, Error, JobRegistry, Result};
|
||||
|
@ -114,7 +115,13 @@ impl Config {
|
|||
state.remotes.remove(&id)
|
||||
}
|
||||
|
||||
fn commit(&self, rules: DatabaseRules, server_id: NonZeroU32, object_store: Arc<ObjectStore>) {
|
||||
fn commit(
|
||||
&self,
|
||||
rules: DatabaseRules,
|
||||
server_id: NonZeroU32,
|
||||
object_store: Arc<ObjectStore>,
|
||||
exec: Arc<Executor>,
|
||||
) {
|
||||
let mut state = self.state.write().expect("mutex poisoned");
|
||||
let name = state
|
||||
.reservations
|
||||
|
@ -131,6 +138,7 @@ impl Config {
|
|||
rules,
|
||||
server_id,
|
||||
object_store,
|
||||
exec,
|
||||
wal_buffer,
|
||||
Arc::clone(&self.jobs),
|
||||
));
|
||||
|
@ -253,9 +261,14 @@ pub(crate) struct CreateDatabaseHandle<'a> {
|
|||
}
|
||||
|
||||
impl<'a> CreateDatabaseHandle<'a> {
|
||||
pub(crate) fn commit(mut self, server_id: NonZeroU32, object_store: Arc<ObjectStore>) {
|
||||
pub(crate) fn commit(
|
||||
mut self,
|
||||
server_id: NonZeroU32,
|
||||
object_store: Arc<ObjectStore>,
|
||||
exec: Arc<Executor>,
|
||||
) {
|
||||
self.config
|
||||
.commit(self.rules.take().unwrap(), server_id, object_store)
|
||||
.commit(self.rules.take().unwrap(), server_id, object_store, exec)
|
||||
}
|
||||
|
||||
pub(crate) fn rules(&self) -> &DatabaseRules {
|
||||
|
@ -292,7 +305,8 @@ mod test {
|
|||
let db_reservation = config.create_db(rules).unwrap();
|
||||
let server_id = NonZeroU32::new(1).unwrap();
|
||||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
db_reservation.commit(server_id, store);
|
||||
let exec = Arc::new(Executor::new(1));
|
||||
db_reservation.commit(server_id, store, exec);
|
||||
assert!(config.db(&name).is_some());
|
||||
assert_eq!(config.db_names_sorted(), vec![name.clone()]);
|
||||
|
||||
|
@ -318,7 +332,8 @@ mod test {
|
|||
let db_reservation = config.create_db(rules).unwrap();
|
||||
let server_id = NonZeroU32::new(1).unwrap();
|
||||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
db_reservation.commit(server_id, store);
|
||||
let exec = Arc::new(Executor::new(1));
|
||||
db_reservation.commit(server_id, store, exec);
|
||||
|
||||
let token = config
|
||||
.state
|
||||
|
|
647
server/src/db.rs
647
server/src/db.rs
|
@ -3,6 +3,7 @@
|
|||
|
||||
use std::any::Any;
|
||||
use std::{
|
||||
convert::TryInto,
|
||||
num::NonZeroU32,
|
||||
sync::{
|
||||
atomic::{AtomicU64, AtomicUsize, Ordering},
|
||||
|
@ -15,20 +16,24 @@ use observability_deps::tracing::{debug, info};
|
|||
use parking_lot::{Mutex, RwLock};
|
||||
use snafu::{ensure, OptionExt, ResultExt, Snafu};
|
||||
|
||||
use arrow_deps::datafusion::{
|
||||
catalog::{catalog::CatalogProvider, schema::SchemaProvider},
|
||||
physical_plan::SendableRecordBatchStream,
|
||||
use arrow_deps::{
|
||||
arrow::datatypes::SchemaRef as ArrowSchemaRef,
|
||||
datafusion::{
|
||||
catalog::{catalog::CatalogProvider, schema::SchemaProvider},
|
||||
physical_plan::SendableRecordBatchStream,
|
||||
},
|
||||
};
|
||||
|
||||
use catalog::{chunk::ChunkState, Catalog};
|
||||
pub(crate) use chunk::DBChunk;
|
||||
use data_types::{
|
||||
chunk::ChunkSummary, database_rules::DatabaseRules, partition_metadata::PartitionSummary,
|
||||
timestamp::TimestampRange,
|
||||
};
|
||||
use internal_types::{data::ReplicatedWrite, selection::Selection};
|
||||
use internal_types::selection::Selection;
|
||||
use object_store::ObjectStore;
|
||||
use parquet_file::{chunk::Chunk, storage::Storage};
|
||||
use query::{Database, DEFAULT_SCHEMA};
|
||||
use query::{exec::Executor, Database, DEFAULT_SCHEMA};
|
||||
use read_buffer::Chunk as ReadBufferChunk;
|
||||
use tracker::{MemRegistry, TaskTracker, TrackedFutureExt};
|
||||
|
||||
|
@ -36,6 +41,7 @@ use super::{buffer::Buffer, JobRegistry};
|
|||
use data_types::job::Job;
|
||||
|
||||
use data_types::partition_metadata::TableSummary;
|
||||
use internal_types::entry::{self, ClockValue, Entry, SequencedEntry};
|
||||
use lifecycle::LifecycleManager;
|
||||
use system_tables::{SystemSchemaProvider, SYSTEM_SCHEMA};
|
||||
|
||||
|
@ -114,6 +120,18 @@ pub enum Error {
|
|||
chunk_id: u32,
|
||||
},
|
||||
|
||||
#[snafu(display("Read Buffer Schema Error in chunk {}: {}", chunk_id, source))]
|
||||
ReadBufferChunkSchemaError {
|
||||
source: read_buffer::Error,
|
||||
chunk_id: u32,
|
||||
},
|
||||
|
||||
#[snafu(display("Read Buffer Timestamp Error in chunk {}: {}", chunk_id, source))]
|
||||
ReadBufferChunkTimestampError {
|
||||
chunk_id: u32,
|
||||
source: read_buffer::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Error writing to object store: {}", source))]
|
||||
WritingToObjectStore {
|
||||
source: parquet_file::storage::Error,
|
||||
|
@ -131,6 +149,14 @@ pub enum Error {
|
|||
chunk_id: u32,
|
||||
source: mutable_buffer::chunk::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Error building sequenced entry: {}", source))]
|
||||
SequencedEntryError { source: entry::Error },
|
||||
|
||||
#[snafu(display("Error building sequenced entry: {}", source))]
|
||||
SchemaConversion {
|
||||
source: internal_types::schema::Error,
|
||||
},
|
||||
}
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
|
@ -197,8 +223,12 @@ pub struct Db {
|
|||
|
||||
pub server_id: NonZeroU32, // this is also the Query Server ID
|
||||
|
||||
/// Interface to use for peristence
|
||||
pub store: Arc<ObjectStore>,
|
||||
|
||||
/// Executor for running queries
|
||||
exec: Arc<Executor>,
|
||||
|
||||
/// The catalog holds chunks of data under partitions for the database.
|
||||
/// The underlying chunks may be backed by different execution engines
|
||||
/// depending on their stage in the data lifecycle. Currently there are
|
||||
|
@ -245,6 +275,7 @@ impl Db {
|
|||
rules: DatabaseRules,
|
||||
server_id: NonZeroU32,
|
||||
object_store: Arc<ObjectStore>,
|
||||
exec: Arc<Executor>,
|
||||
wal_buffer: Option<Buffer>,
|
||||
jobs: Arc<JobRegistry>,
|
||||
) -> Self {
|
||||
|
@ -258,6 +289,7 @@ impl Db {
|
|||
rules,
|
||||
server_id,
|
||||
store,
|
||||
exec,
|
||||
catalog,
|
||||
wal_buffer,
|
||||
jobs,
|
||||
|
@ -268,6 +300,11 @@ impl Db {
|
|||
}
|
||||
}
|
||||
|
||||
/// Return a handle to the executor used to run queries
|
||||
pub fn executor(&self) -> Arc<Executor> {
|
||||
Arc::clone(&self.exec)
|
||||
}
|
||||
|
||||
/// Rolls over the active chunk in the database's specified
|
||||
/// partition. Returns the previously open (now closed) Chunk
|
||||
pub async fn rollover_partition(&self, partition_key: &str) -> Result<Arc<DBChunk>> {
|
||||
|
@ -421,7 +458,7 @@ impl Db {
|
|||
Ok(DBChunk::snapshot(&chunk))
|
||||
}
|
||||
|
||||
pub async fn load_chunk_to_object_store(
|
||||
pub async fn write_chunk_to_object_store(
|
||||
&self,
|
||||
partition_key: &str,
|
||||
chunk_id: u32,
|
||||
|
@ -480,17 +517,19 @@ impl Db {
|
|||
let predicate = read_buffer::Predicate::default();
|
||||
|
||||
// Get RecordBatchStream of data from the read buffer chunk
|
||||
// TODO: When we have the rb_chunk, the following code will be replaced with one
|
||||
// line let stream = rb_chunk.read_filter()
|
||||
let read_results = rb_chunk
|
||||
.read_filter(stats.name.as_str(), predicate, Selection::All)
|
||||
.context(ReadBufferChunkError { chunk_id })?;
|
||||
let schema = rb_chunk
|
||||
let arrow_schema: ArrowSchemaRef = rb_chunk
|
||||
.read_filter_table_schema(stats.name.as_str(), Selection::All)
|
||||
.context(ReadBufferChunkError { chunk_id })?
|
||||
.context(ReadBufferChunkSchemaError { chunk_id })?
|
||||
.into();
|
||||
let stream: SendableRecordBatchStream =
|
||||
Box::pin(streams::ReadFilterResultsStream::new(read_results, schema));
|
||||
let time_range = rb_chunk
|
||||
.table_time_range(stats.name.as_str())
|
||||
.context(ReadBufferChunkTimestampError { chunk_id })?;
|
||||
let stream: SendableRecordBatchStream = Box::pin(
|
||||
streams::ReadFilterResultsStream::new(read_results, Arc::clone(&arrow_schema)),
|
||||
);
|
||||
|
||||
// Write this table data into the object store
|
||||
let path = storage
|
||||
|
@ -504,7 +543,20 @@ impl Db {
|
|||
.context(WritingToObjectStore)?;
|
||||
|
||||
// Now add the saved info into the parquet_chunk
|
||||
parquet_chunk.add_table(stats, path);
|
||||
let schema = Arc::clone(&arrow_schema)
|
||||
.try_into()
|
||||
.context(SchemaConversion)?;
|
||||
let table_time_range = match time_range {
|
||||
None => None,
|
||||
Some((start, end)) => {
|
||||
if start < end {
|
||||
Some(TimestampRange::new(start, end))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
parquet_chunk.add_table(stats, path, schema, table_time_range);
|
||||
}
|
||||
|
||||
// Relock the chunk again (nothing else should have been able
|
||||
|
@ -524,7 +576,8 @@ impl Db {
|
|||
Ok(DBChunk::snapshot(&chunk))
|
||||
}
|
||||
|
||||
/// Spawns a task to perform load_chunk_to_read_buffer
|
||||
/// Spawns a task to perform
|
||||
/// [`load_chunk_to_read_buffer`](Self::load_chunk_to_read_buffer)
|
||||
pub fn load_chunk_to_read_buffer_in_background(
|
||||
self: &Arc<Self>,
|
||||
partition_key: String,
|
||||
|
@ -558,6 +611,41 @@ impl Db {
|
|||
tracker
|
||||
}
|
||||
|
||||
/// Spawns a task to perform
|
||||
/// [`write_chunk_to_object_store`](Self::write_chunk_to_object_store)
|
||||
pub fn write_chunk_to_object_store_in_background(
|
||||
self: &Arc<Self>,
|
||||
partition_key: String,
|
||||
chunk_id: u32,
|
||||
) -> TaskTracker<Job> {
|
||||
let name = self.rules.read().name.clone();
|
||||
let (tracker, registration) = self.jobs.register(Job::WriteChunk {
|
||||
db_name: name.to_string(),
|
||||
partition_key: partition_key.clone(),
|
||||
chunk_id,
|
||||
});
|
||||
|
||||
let captured = Arc::clone(&self);
|
||||
let task = async move {
|
||||
debug!(%name, %partition_key, %chunk_id, "background task loading chunk to object store");
|
||||
let result = captured
|
||||
.write_chunk_to_object_store(&partition_key, chunk_id)
|
||||
.await;
|
||||
if let Err(e) = result {
|
||||
info!(?e, %name, %partition_key, %chunk_id, "background task error loading object store chunk");
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
debug!(%name, %partition_key, %chunk_id, "background task completed writing chunk to object store");
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
tokio::spawn(task.track(registration));
|
||||
|
||||
tracker
|
||||
}
|
||||
|
||||
/// Returns the next write sequence number
|
||||
pub fn next_sequence(&self) -> u64 {
|
||||
self.sequence.fetch_add(1, Ordering::SeqCst)
|
||||
|
@ -624,6 +712,79 @@ impl Db {
|
|||
|
||||
info!("finished background worker");
|
||||
}
|
||||
|
||||
/// Stores an entry based on the configuration. The Entry will first be
|
||||
/// converted into a Sequenced Entry with the logical clock assigned
|
||||
/// from the database. If the write buffer is configured, the sequenced
|
||||
/// entry is written into the buffer and replicated based on the
|
||||
/// configured rules. If the mutable buffer is configured, the sequenced
|
||||
/// entry is then written into the mutable buffer.
|
||||
pub fn store_entry(&self, entry: Entry) -> Result<()> {
|
||||
// TODO: build this based on either this or on the write buffer, if configured
|
||||
let sequenced_entry = SequencedEntry::new_from_entry_bytes(
|
||||
ClockValue::new(self.next_sequence()),
|
||||
self.server_id.get(),
|
||||
entry.data(),
|
||||
)
|
||||
.context(SequencedEntryError)?;
|
||||
|
||||
if self.rules.read().wal_buffer_config.is_some() {
|
||||
todo!("route to the Write Buffer. TODO: carols10cents #1157")
|
||||
}
|
||||
|
||||
self.store_sequenced_entry(sequenced_entry)
|
||||
}
|
||||
|
||||
pub fn store_sequenced_entry(&self, sequenced_entry: SequencedEntry) -> Result<()> {
|
||||
let rules = self.rules.read();
|
||||
let mutable_size_threshold = rules.lifecycle_rules.mutable_size_threshold;
|
||||
if rules.lifecycle_rules.immutable {
|
||||
return DatabaseNotWriteable {}.fail();
|
||||
}
|
||||
std::mem::drop(rules);
|
||||
|
||||
// TODO: Direct writes to closing chunks
|
||||
|
||||
if let Some(partitioned_writes) = sequenced_entry.partition_writes() {
|
||||
for write in partitioned_writes {
|
||||
let partition_key = write.key();
|
||||
let partition = self.catalog.get_or_create_partition(partition_key);
|
||||
let mut partition = partition.write();
|
||||
partition.update_last_write_at();
|
||||
|
||||
let chunk = partition.open_chunk().unwrap_or_else(|| {
|
||||
partition.create_open_chunk(self.memory_registries.mutable_buffer.as_ref())
|
||||
});
|
||||
|
||||
let mut chunk = chunk.write();
|
||||
chunk.record_write();
|
||||
let chunk_id = chunk.id();
|
||||
|
||||
let mb_chunk = chunk.mutable_buffer().expect("cannot mutate open chunk");
|
||||
|
||||
mb_chunk
|
||||
.write_table_batches(
|
||||
sequenced_entry.clock_value(),
|
||||
sequenced_entry.writer_id(),
|
||||
&write.table_batches(),
|
||||
)
|
||||
.context(WriteEntry {
|
||||
partition_key,
|
||||
chunk_id,
|
||||
})?;
|
||||
|
||||
let size = mb_chunk.size();
|
||||
|
||||
if let Some(threshold) = mutable_size_threshold {
|
||||
if size > threshold.get() {
|
||||
chunk.set_closing().expect("cannot close open chunk")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
@ -652,54 +813,6 @@ impl Database for Db {
|
|||
.collect()
|
||||
}
|
||||
|
||||
fn store_replicated_write(&self, write: &ReplicatedWrite) -> Result<(), Self::Error> {
|
||||
let rules = self.rules.read();
|
||||
let mutable_size_threshold = rules.lifecycle_rules.mutable_size_threshold;
|
||||
if rules.lifecycle_rules.immutable {
|
||||
return DatabaseNotWriteable {}.fail();
|
||||
}
|
||||
std::mem::drop(rules);
|
||||
|
||||
let entries = match write.write_buffer_batch().and_then(|batch| batch.entries()) {
|
||||
Some(entries) => entries,
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
// TODO: Direct writes to closing chunks
|
||||
|
||||
for entry in entries.into_iter() {
|
||||
if let Some(partition_key) = entry.partition_key() {
|
||||
let partition = self.catalog.get_or_create_partition(partition_key);
|
||||
let mut partition = partition.write();
|
||||
partition.update_last_write_at();
|
||||
|
||||
let chunk = partition.open_chunk().unwrap_or_else(|| {
|
||||
partition.create_open_chunk(self.memory_registries.mutable_buffer.as_ref())
|
||||
});
|
||||
|
||||
let mut chunk = chunk.write();
|
||||
chunk.record_write();
|
||||
let chunk_id = chunk.id();
|
||||
|
||||
let mb_chunk = chunk.mutable_buffer().expect("cannot mutate open chunk");
|
||||
|
||||
mb_chunk.write_entry(&entry).context(WriteEntry {
|
||||
partition_key,
|
||||
chunk_id,
|
||||
})?;
|
||||
|
||||
let size = mb_chunk.size();
|
||||
|
||||
if let Some(threshold) = mutable_size_threshold {
|
||||
if size > threshold.get() {
|
||||
chunk.set_closing().expect("cannot close open chunk")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn partition_keys(&self) -> Result<Vec<String>, Self::Error> {
|
||||
Ok(self.catalog.partition_keys())
|
||||
}
|
||||
|
@ -731,10 +844,25 @@ impl CatalogProvider for Db {
|
|||
}
|
||||
}
|
||||
|
||||
pub mod test_helpers {
|
||||
use super::*;
|
||||
use internal_types::entry::test_helpers::lp_to_entries;
|
||||
|
||||
pub fn write_lp(db: &Db, lp: &str) {
|
||||
let entries = lp_to_entries(lp);
|
||||
for entry in entries {
|
||||
db.store_entry(entry).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::query_tests::utils::{make_database, make_db};
|
||||
use ::test_helpers::assert_contains;
|
||||
use arrow_deps::{
|
||||
arrow::record_batch::RecordBatch, assert_table_eq, datafusion::physical_plan::collect,
|
||||
arrow::record_batch::RecordBatch, assert_batches_sorted_eq, assert_table_eq,
|
||||
datafusion::execution::context,
|
||||
};
|
||||
use chrono::Utc;
|
||||
use data_types::{
|
||||
|
@ -742,23 +870,32 @@ mod tests {
|
|||
database_rules::{Order, Sort, SortOrder},
|
||||
partition_metadata::{ColumnSummary, StatValues, Statistics, TableSummary},
|
||||
};
|
||||
use query::{
|
||||
exec::Executor, frontend::sql::SQLQueryPlanner, test::TestLPWriter, PartitionChunk,
|
||||
use object_store::{
|
||||
disk::File, path::ObjectStorePath, path::Path, ObjectStore, ObjectStoreApi,
|
||||
};
|
||||
use test_helpers::assert_contains;
|
||||
|
||||
use crate::query_tests::utils::make_db;
|
||||
use query::{frontend::sql::SQLQueryPlanner, PartitionChunk};
|
||||
|
||||
use super::*;
|
||||
use futures::stream;
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use std::iter::Iterator;
|
||||
|
||||
use super::test_helpers::write_lp;
|
||||
use internal_types::entry::test_helpers::lp_to_entry;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::str;
|
||||
use tempfile::TempDir;
|
||||
|
||||
type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
|
||||
type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
#[tokio::test]
|
||||
async fn write_no_mutable_buffer() {
|
||||
// Validate that writes are rejected if there is no mutable buffer
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
db.rules.write().lifecycle_rules.immutable = true;
|
||||
let res = writer.write_lp_string(&db, "cpu bar=1 10");
|
||||
let entry = lp_to_entry("cpu bar=1 10");
|
||||
let res = db.store_entry(entry);
|
||||
assert_contains!(
|
||||
res.unwrap_err().to_string(),
|
||||
"Cannot write to this database: no mutable buffer configured"
|
||||
|
@ -768,8 +905,7 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn read_write() {
|
||||
let db = Arc::new(make_db());
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(db.as_ref(), "cpu bar=1 10").unwrap();
|
||||
write_lp(db.as_ref(), "cpu bar=1 10");
|
||||
|
||||
let batches = run_query(db, "select * from cpu").await;
|
||||
|
||||
|
@ -786,9 +922,7 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn write_with_rollover() {
|
||||
let db = Arc::new(make_db());
|
||||
let mut writer = TestLPWriter::default();
|
||||
//writer.write_lp_string(db.as_ref(), "cpu bar=1 10").unwrap();
|
||||
writer.write_lp_string(db.as_ref(), "cpu bar=1 10").unwrap();
|
||||
write_lp(db.as_ref(), "cpu bar=1 10");
|
||||
assert_eq!(vec!["1970-01-01T00"], db.partition_keys().unwrap());
|
||||
|
||||
let mb_chunk = db.rollover_partition("1970-01-01T00").await.unwrap();
|
||||
|
@ -802,10 +936,10 @@ mod tests {
|
|||
"+-----+------+",
|
||||
];
|
||||
let batches = run_query(Arc::clone(&db), "select * from cpu").await;
|
||||
assert_table_eq!(expected, &batches);
|
||||
assert_batches_sorted_eq!(expected, &batches);
|
||||
|
||||
// add new data
|
||||
writer.write_lp_string(db.as_ref(), "cpu bar=2 20").unwrap();
|
||||
write_lp(db.as_ref(), "cpu bar=2 20");
|
||||
let expected = vec![
|
||||
"+-----+------+",
|
||||
"| bar | time |",
|
||||
|
@ -815,20 +949,19 @@ mod tests {
|
|||
"+-----+------+",
|
||||
];
|
||||
let batches = run_query(Arc::clone(&db), "select * from cpu").await;
|
||||
assert_table_eq!(&expected, &batches);
|
||||
assert_batches_sorted_eq!(&expected, &batches);
|
||||
|
||||
// And expect that we still get the same thing when data is rolled over again
|
||||
let chunk = db.rollover_partition("1970-01-01T00").await.unwrap();
|
||||
assert_eq!(chunk.id(), 1);
|
||||
|
||||
let batches = run_query(db, "select * from cpu").await;
|
||||
assert_table_eq!(&expected, &batches);
|
||||
assert_batches_sorted_eq!(&expected, &batches);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn write_with_missing_tags_are_null() {
|
||||
let db = Arc::new(make_db());
|
||||
let mut writer = TestLPWriter::default();
|
||||
// Note the `region` tag is introduced in the second line, so
|
||||
// the values in prior rows for the region column are
|
||||
// null. Likewise the `core` tag is introduced in the third
|
||||
|
@ -839,9 +972,7 @@ mod tests {
|
|||
"cpu,core=one user=10.0 11",
|
||||
];
|
||||
|
||||
writer
|
||||
.write_lp_string(db.as_ref(), &lines.join("\n"))
|
||||
.unwrap();
|
||||
write_lp(db.as_ref(), &lines.join("\n"));
|
||||
assert_eq!(vec!["1970-01-01T00"], db.partition_keys().unwrap());
|
||||
|
||||
let mb_chunk = db.rollover_partition("1970-01-01T00").await.unwrap();
|
||||
|
@ -864,12 +995,11 @@ mod tests {
|
|||
async fn read_from_read_buffer() {
|
||||
// Test that data can be loaded into the ReadBuffer
|
||||
let db = Arc::new(make_db());
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(db.as_ref(), "cpu bar=1 10").unwrap();
|
||||
writer.write_lp_string(db.as_ref(), "cpu bar=2 20").unwrap();
|
||||
write_lp(db.as_ref(), "cpu bar=1 10");
|
||||
write_lp(db.as_ref(), "cpu bar=2 20");
|
||||
|
||||
let partition_key = "1970-01-01T00";
|
||||
let mb_chunk = db.rollover_partition("1970-01-01T00").await.unwrap();
|
||||
let mb_chunk = db.rollover_partition(partition_key).await.unwrap();
|
||||
let rb_chunk = db
|
||||
.load_chunk_to_read_buffer(partition_key, mb_chunk.id())
|
||||
.await
|
||||
|
@ -909,14 +1039,221 @@ mod tests {
|
|||
// cpu").await; assert_table_eq!(expected, &batches);
|
||||
}
|
||||
|
||||
async fn flatten_list_stream(
|
||||
storage: Arc<ObjectStore>,
|
||||
prefix: Option<&Path>,
|
||||
) -> Result<Vec<Path>> {
|
||||
storage
|
||||
.list(prefix)
|
||||
.await?
|
||||
.map_ok(|v| stream::iter(v).map(Ok))
|
||||
.try_flatten()
|
||||
.try_collect()
|
||||
.await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn write_one_chunk_one_table_to_parquet_file() {
|
||||
// Test that data can be written into parquet files
|
||||
|
||||
// Create an object store with a specified location in a local disk
|
||||
let root = TempDir::new().unwrap();
|
||||
let object_store = Arc::new(ObjectStore::new_file(File::new(root.path())));
|
||||
|
||||
// Create a DB given a server id, an object store and a db name
|
||||
let server_id: NonZeroU32 = NonZeroU32::new(10).unwrap();
|
||||
let db_name = "parquet_test_db";
|
||||
let db = Arc::new(make_database(server_id, Arc::clone(&object_store), db_name));
|
||||
|
||||
// Write some line protocols in Mutable buffer of the DB
|
||||
write_lp(db.as_ref(), "cpu bar=1 10");
|
||||
write_lp(db.as_ref(), "cpu bar=2 20");
|
||||
|
||||
//Now mark the MB chunk close
|
||||
let partition_key = "1970-01-01T00";
|
||||
let mb_chunk = db.rollover_partition("1970-01-01T00").await.unwrap();
|
||||
// Move that MB chunk to RB chunk and drop it from MB
|
||||
let rb_chunk = db
|
||||
.load_chunk_to_read_buffer(partition_key, mb_chunk.id())
|
||||
.await
|
||||
.unwrap();
|
||||
// Write the RB chunk to Object Store but keep it in RB
|
||||
let pq_chunk = db
|
||||
.write_chunk_to_object_store(partition_key, mb_chunk.id())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// it should be the same chunk!
|
||||
assert_eq!(mb_chunk.id(), rb_chunk.id());
|
||||
assert_eq!(mb_chunk.id(), pq_chunk.id());
|
||||
|
||||
// we should have chunks in the mutable buffer, read buffer, and object store
|
||||
// (Note the currently open chunk is not listed)
|
||||
assert_eq!(mutable_chunk_ids(&db, partition_key), vec![1]);
|
||||
assert_eq!(read_buffer_chunk_ids(&db, partition_key), vec![0]);
|
||||
assert_eq!(read_parquet_file_chunk_ids(&db, partition_key), vec![0]);
|
||||
|
||||
// Verify data written to the parquet file in object store
|
||||
// First, there must be one path of object store in the catalog
|
||||
let paths = pq_chunk.object_store_paths();
|
||||
assert_eq!(paths.len(), 1);
|
||||
|
||||
// Check that the path must exist in the object store
|
||||
let path_list = flatten_list_stream(Arc::clone(&object_store), Some(&paths[0]))
|
||||
.await
|
||||
.unwrap();
|
||||
println!("path_list: {:#?}", path_list);
|
||||
assert_eq!(path_list.len(), 1);
|
||||
assert_eq!(path_list, paths.clone());
|
||||
|
||||
// Get full string path
|
||||
let root_path = format!("{:?}", root.path());
|
||||
let root_path = root_path.trim_matches('"');
|
||||
let path = format!("{}/{}", root_path, paths[0].display());
|
||||
println!("path: {}", path);
|
||||
|
||||
// Create External table of this parquet file to get its content in a human
|
||||
// readable form
|
||||
// Note: We do not care about escaping quotes here because it is just a test
|
||||
let sql = format!(
|
||||
"CREATE EXTERNAL TABLE parquet_table STORED AS PARQUET LOCATION '{}'",
|
||||
path
|
||||
);
|
||||
|
||||
let mut ctx = context::ExecutionContext::new();
|
||||
let df = ctx.sql(&sql).unwrap();
|
||||
df.collect().await.unwrap();
|
||||
|
||||
// Select data from that table
|
||||
let sql = "SELECT * FROM parquet_table";
|
||||
let content = ctx.sql(&sql).unwrap().collect().await.unwrap();
|
||||
println!("Content: {:?}", content);
|
||||
let expected = vec![
|
||||
"+-----+------+",
|
||||
"| bar | time |",
|
||||
"+-----+------+",
|
||||
"| 1 | 10 |",
|
||||
"| 2 | 20 |",
|
||||
"+-----+------+",
|
||||
];
|
||||
assert_table_eq!(expected, &content);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn write_one_chunk_many_tables_to_parquet_files() {
|
||||
// Test that data can be written into parquet files
|
||||
|
||||
// Create an object store with a specified location in a local disk
|
||||
let root = TempDir::new().unwrap();
|
||||
let object_store = Arc::new(ObjectStore::new_file(File::new(root.path())));
|
||||
|
||||
// Create a DB given a server id, an object store and a db name
|
||||
let server_id: NonZeroU32 = NonZeroU32::new(10).unwrap();
|
||||
let db_name = "parquet_test_db";
|
||||
let db = Arc::new(make_database(server_id, Arc::clone(&object_store), db_name));
|
||||
|
||||
// Write some line protocols in Mutable buffer of the DB
|
||||
write_lp(db.as_ref(), "cpu bar=1 10");
|
||||
write_lp(db.as_ref(), "disk ops=1 20");
|
||||
write_lp(db.as_ref(), "cpu bar=2 20");
|
||||
|
||||
//Now mark the MB chunk close
|
||||
let partition_key = "1970-01-01T00";
|
||||
let mb_chunk = db.rollover_partition("1970-01-01T00").await.unwrap();
|
||||
// Move that MB chunk to RB chunk and drop it from MB
|
||||
let rb_chunk = db
|
||||
.load_chunk_to_read_buffer(partition_key, mb_chunk.id())
|
||||
.await
|
||||
.unwrap();
|
||||
// Write the RB chunk to Object Store but keep it in RB
|
||||
let pq_chunk = db
|
||||
.write_chunk_to_object_store(partition_key, mb_chunk.id())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// it should be the same chunk!
|
||||
assert_eq!(mb_chunk.id(), rb_chunk.id());
|
||||
assert_eq!(mb_chunk.id(), pq_chunk.id());
|
||||
|
||||
// we should have chunks in the mutable buffer, read buffer, and object store
|
||||
// (Note the currently open chunk is not listed)
|
||||
assert_eq!(mutable_chunk_ids(&db, partition_key), vec![1]);
|
||||
assert_eq!(read_buffer_chunk_ids(&db, partition_key), vec![0]);
|
||||
assert_eq!(read_parquet_file_chunk_ids(&db, partition_key), vec![0]);
|
||||
|
||||
// Verify data written to the parquet files in object store
|
||||
// First, there must be 2 paths of object store in the catalog
|
||||
// that represents 2 files
|
||||
let paths = pq_chunk.object_store_paths();
|
||||
assert_eq!(paths.len(), 2);
|
||||
|
||||
// Check that the path must exist in the object store
|
||||
let prefix = object_store.new_path();
|
||||
let path_list = flatten_list_stream(Arc::clone(&object_store), Some(&prefix))
|
||||
.await
|
||||
.unwrap();
|
||||
println!("path_list: {:#?}", path_list);
|
||||
assert_eq!(path_list.len(), 2);
|
||||
|
||||
// Check the content of each path
|
||||
//
|
||||
// Root path
|
||||
let root_path = format!("{:?}", root.path());
|
||||
let root_path = root_path.trim_matches('"');
|
||||
|
||||
for path in path_list {
|
||||
// Get full string path
|
||||
let path_string = format!("{}/{}", root_path, path.display());
|
||||
println!("path: {}", path_string);
|
||||
|
||||
// Create External table of this parquet file to get its content in a human
|
||||
// readable form
|
||||
// Note: We do not care about escaping quotes here because it is just a test
|
||||
let sql = format!(
|
||||
"CREATE EXTERNAL TABLE parquet_table STORED AS PARQUET LOCATION '{}'",
|
||||
path_string
|
||||
);
|
||||
|
||||
let mut ctx = context::ExecutionContext::new();
|
||||
let df = ctx.sql(&sql).unwrap();
|
||||
df.collect().await.unwrap();
|
||||
|
||||
// Select data from that table
|
||||
let sql = "SELECT * FROM parquet_table";
|
||||
let content = ctx.sql(&sql).unwrap().collect().await.unwrap();
|
||||
println!("Content: {:?}", content);
|
||||
let expected = if path_string.contains("cpu") {
|
||||
// file name: cpu.parquet
|
||||
vec![
|
||||
"+-----+------+",
|
||||
"| bar | time |",
|
||||
"+-----+------+",
|
||||
"| 1 | 10 |",
|
||||
"| 2 | 20 |",
|
||||
"+-----+------+",
|
||||
]
|
||||
} else {
|
||||
// file name: disk.parquet
|
||||
vec![
|
||||
"+-----+------+",
|
||||
"| ops | time |",
|
||||
"+-----+------+",
|
||||
"| 1 | 20 |",
|
||||
"+-----+------+",
|
||||
]
|
||||
};
|
||||
|
||||
assert_table_eq!(expected, &content);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn write_updates_last_write_at() {
|
||||
let db = make_db();
|
||||
let before_create = Utc::now();
|
||||
|
||||
let partition_key = "1970-01-01T00";
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(&db, "cpu bar=1 10").unwrap();
|
||||
write_lp(&db, "cpu bar=1 10");
|
||||
let after_write = Utc::now();
|
||||
|
||||
let last_write_prev = {
|
||||
|
@ -929,7 +1266,7 @@ mod tests {
|
|||
partition.last_write_at()
|
||||
};
|
||||
|
||||
writer.write_lp_string(&db, "cpu bar=1 20").unwrap();
|
||||
write_lp(&db, "cpu bar=1 20");
|
||||
{
|
||||
let partition = db.catalog.valid_partition(partition_key).unwrap();
|
||||
let partition = partition.read();
|
||||
|
@ -943,8 +1280,7 @@ mod tests {
|
|||
let db = make_db();
|
||||
|
||||
// Given data loaded into two chunks
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(&db, "cpu bar=1 10").unwrap();
|
||||
write_lp(&db, "cpu bar=1 10");
|
||||
let after_data_load = Utc::now();
|
||||
|
||||
// When the chunk is rolled over
|
||||
|
@ -977,9 +1313,8 @@ mod tests {
|
|||
db.rules.write().lifecycle_rules.mutable_size_threshold =
|
||||
Some(NonZeroUsize::new(2).unwrap());
|
||||
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(&db, "cpu bar=1 10").unwrap();
|
||||
writer.write_lp_string(&db, "cpu bar=1 20").unwrap();
|
||||
write_lp(&db, "cpu bar=1 10");
|
||||
write_lp(&db, "cpu bar=1 20");
|
||||
|
||||
let partitions = db.catalog.partition_keys();
|
||||
assert_eq!(partitions.len(), 1);
|
||||
|
@ -996,15 +1331,10 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn chunks_sorted_by_times() {
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(&db, "cpu val=1 1").unwrap();
|
||||
writer
|
||||
.write_lp_string(&db, "mem val=2 400000000000001")
|
||||
.unwrap();
|
||||
writer.write_lp_string(&db, "cpu val=1 2").unwrap();
|
||||
writer
|
||||
.write_lp_string(&db, "mem val=2 400000000000002")
|
||||
.unwrap();
|
||||
write_lp(&db, "cpu val=1 1");
|
||||
write_lp(&db, "mem val=2 400000000000001");
|
||||
write_lp(&db, "cpu val=1 2");
|
||||
write_lp(&db, "mem val=2 400000000000002");
|
||||
|
||||
let sort_rules = SortOrder {
|
||||
order: Order::Desc,
|
||||
|
@ -1035,9 +1365,9 @@ mod tests {
|
|||
// Test that chunk id listing is hooked up
|
||||
let db = make_db();
|
||||
let partition_key = "1970-01-01T00";
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(&db, "cpu bar=1 10").unwrap();
|
||||
writer.write_lp_string(&db, "cpu bar=1 20").unwrap();
|
||||
|
||||
write_lp(&db, "cpu bar=1 10");
|
||||
write_lp(&db, "cpu bar=1 20");
|
||||
|
||||
assert_eq!(mutable_chunk_ids(&db, partition_key), vec![0]);
|
||||
assert_eq!(
|
||||
|
@ -1051,13 +1381,13 @@ mod tests {
|
|||
|
||||
// add a new chunk in mutable buffer, and move chunk1 (but
|
||||
// not chunk 0) to read buffer
|
||||
writer.write_lp_string(&db, "cpu bar=1 30").unwrap();
|
||||
write_lp(&db, "cpu bar=1 30");
|
||||
let mb_chunk = db.rollover_partition("1970-01-01T00").await.unwrap();
|
||||
db.load_chunk_to_read_buffer(partition_key, mb_chunk.id())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
writer.write_lp_string(&db, "cpu bar=1 40").unwrap();
|
||||
write_lp(&db, "cpu bar=1 40");
|
||||
|
||||
assert_eq!(mutable_chunk_ids(&db, partition_key), vec![0, 2]);
|
||||
assert_eq!(read_buffer_chunk_ids(&db, partition_key), vec![1]);
|
||||
|
@ -1086,15 +1416,12 @@ mod tests {
|
|||
async fn partition_chunk_summaries() {
|
||||
// Test that chunk id listing is hooked up
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
|
||||
writer.write_lp_string(&db, "cpu bar=1 1").unwrap();
|
||||
write_lp(&db, "cpu bar=1 1");
|
||||
db.rollover_partition("1970-01-01T00").await.unwrap();
|
||||
|
||||
// write into a separate partitiion
|
||||
writer
|
||||
.write_lp_string(&db, "cpu bar=1,baz2,frob=3 400000000000000")
|
||||
.unwrap();
|
||||
write_lp(&db, "cpu bar=1,baz2,frob=3 400000000000000");
|
||||
|
||||
print!("Partitions: {:?}", db.partition_keys().unwrap());
|
||||
|
||||
|
@ -1131,11 +1458,10 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn partition_chunk_summaries_timestamp() {
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
let start = Utc::now();
|
||||
writer.write_lp_string(&db, "cpu bar=1 1").unwrap();
|
||||
write_lp(&db, "cpu bar=1 1");
|
||||
let after_first_write = Utc::now();
|
||||
writer.write_lp_string(&db, "cpu bar=2 2").unwrap();
|
||||
write_lp(&db, "cpu bar=2 2");
|
||||
db.rollover_partition("1970-01-01T00").await.unwrap();
|
||||
let after_close = Utc::now();
|
||||
|
||||
|
@ -1183,17 +1509,13 @@ mod tests {
|
|||
async fn chunk_summaries() {
|
||||
// Test that chunk id listing is hooked up
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
|
||||
// get three chunks: one open, one closed in mb and one close in rb
|
||||
writer.write_lp_string(&db, "cpu bar=1 1").unwrap();
|
||||
write_lp(&db, "cpu bar=1 1");
|
||||
db.rollover_partition("1970-01-01T00").await.unwrap();
|
||||
|
||||
writer.write_lp_string(&db, "cpu bar=1,baz=2 2").unwrap();
|
||||
|
||||
writer
|
||||
.write_lp_string(&db, "cpu bar=1,baz=2,frob=3 400000000000000")
|
||||
.unwrap();
|
||||
write_lp(&db, "cpu bar=1,baz=2 2");
|
||||
write_lp(&db, "cpu bar=1,baz=2,frob=3 400000000000000");
|
||||
|
||||
print!("Partitions: {:?}", db.partition_keys().unwrap());
|
||||
|
||||
|
@ -1204,9 +1526,7 @@ mod tests {
|
|||
print!("Partitions2: {:?}", db.partition_keys().unwrap());
|
||||
|
||||
db.rollover_partition("1970-01-05T15").await.unwrap();
|
||||
writer
|
||||
.write_lp_string(&db, "cpu bar=1,baz=3,blargh=3 400000000000000")
|
||||
.unwrap();
|
||||
write_lp(&db, "cpu bar=1,baz=3,blargh=3 400000000000000");
|
||||
|
||||
fn to_arc(s: &str) -> Arc<String> {
|
||||
Arc::new(s.to_string())
|
||||
|
@ -1256,12 +1576,11 @@ mod tests {
|
|||
async fn partition_summaries() {
|
||||
// Test that chunk id listing is hooked up
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
|
||||
writer.write_lp_string(&db, "cpu bar=1 1").unwrap();
|
||||
write_lp(&db, "cpu bar=1 1");
|
||||
let chunk_id = db.rollover_partition("1970-01-01T00").await.unwrap().id();
|
||||
writer.write_lp_string(&db, "cpu bar=2,baz=3.0 2").unwrap();
|
||||
writer.write_lp_string(&db, "mem foo=1 1").unwrap();
|
||||
write_lp(&db, "cpu bar=2,baz=3.0 2");
|
||||
write_lp(&db, "mem foo=1 1");
|
||||
|
||||
// load a chunk to the read buffer
|
||||
db.load_chunk_to_read_buffer("1970-01-01T00", chunk_id)
|
||||
|
@ -1269,12 +1588,8 @@ mod tests {
|
|||
.unwrap();
|
||||
|
||||
// write into a separate partitiion
|
||||
writer
|
||||
.write_lp_string(&db, "cpu bar=1 400000000000000")
|
||||
.unwrap();
|
||||
writer
|
||||
.write_lp_string(&db, "mem frob=3 400000000000001")
|
||||
.unwrap();
|
||||
write_lp(&db, "cpu bar=1 400000000000000");
|
||||
write_lp(&db, "mem frob=3 400000000000001");
|
||||
|
||||
print!("Partitions: {:?}", db.partition_keys().unwrap());
|
||||
|
||||
|
@ -1398,11 +1713,11 @@ mod tests {
|
|||
// run a sql query against the database, returning the results as record batches
|
||||
async fn run_query(db: Arc<Db>, query: &str) -> Vec<RecordBatch> {
|
||||
let planner = SQLQueryPlanner::default();
|
||||
let executor = Executor::new();
|
||||
let executor = db.executor();
|
||||
|
||||
let physical_plan = planner.query(db, query, &executor).await.unwrap();
|
||||
let physical_plan = planner.query(db, query, &executor).unwrap();
|
||||
|
||||
collect(physical_plan).await.unwrap()
|
||||
executor.collect(physical_plan).await.unwrap()
|
||||
}
|
||||
|
||||
fn mutable_chunk_ids(db: &Db, partition_key: &str) -> Vec<u32> {
|
||||
|
@ -1426,10 +1741,62 @@ mod tests {
|
|||
.into_iter()
|
||||
.filter_map(|chunk| match chunk.storage {
|
||||
ChunkStorage::ReadBuffer => Some(chunk.id),
|
||||
ChunkStorage::ReadBufferAndObjectStore => Some(chunk.id),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
chunk_ids.sort_unstable();
|
||||
chunk_ids
|
||||
}
|
||||
|
||||
fn read_parquet_file_chunk_ids(db: &Db, partition_key: &str) -> Vec<u32> {
|
||||
let mut chunk_ids: Vec<u32> = db
|
||||
.partition_chunk_summaries(partition_key)
|
||||
.into_iter()
|
||||
.filter_map(|chunk| match chunk.storage {
|
||||
ChunkStorage::ReadBufferAndObjectStore => Some(chunk.id),
|
||||
ChunkStorage::ObjectStoreOnly => Some(chunk.id),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
chunk_ids.sort_unstable();
|
||||
chunk_ids
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn write_chunk_to_object_store_in_background() {
|
||||
// Test that data can be written to object store using a background task
|
||||
let db = Arc::new(make_db());
|
||||
|
||||
// create MB partition
|
||||
write_lp(db.as_ref(), "cpu bar=1 10");
|
||||
write_lp(db.as_ref(), "cpu bar=2 20");
|
||||
|
||||
// MB => RB
|
||||
let partition_key = "1970-01-01T00";
|
||||
let mb_chunk = db.rollover_partition(partition_key).await.unwrap();
|
||||
let rb_chunk = db
|
||||
.load_chunk_to_read_buffer(partition_key, mb_chunk.id())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(mb_chunk.id(), rb_chunk.id());
|
||||
|
||||
// RB => OS
|
||||
let task =
|
||||
db.write_chunk_to_object_store_in_background(partition_key.to_string(), rb_chunk.id());
|
||||
let t_start = std::time::Instant::now();
|
||||
while !task.is_complete() {
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
|
||||
assert!(
|
||||
std::time::Instant::now() - t_start < std::time::Duration::from_secs(10),
|
||||
"task deadline exceeded"
|
||||
);
|
||||
}
|
||||
|
||||
// we should have chunks in the mutable buffer, read buffer, and object store
|
||||
// (Note the currently open chunk is not listed)
|
||||
assert_eq!(mutable_chunk_ids(&db, partition_key), vec![1]);
|
||||
assert_eq!(read_buffer_chunk_ids(&db, partition_key), vec![0]);
|
||||
assert_eq!(read_parquet_file_chunk_ids(&db, partition_key), vec![0]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -171,8 +171,12 @@ impl Chunk {
|
|||
ChunkState::Closing(chunk) => (chunk.size(), ChunkStorage::ClosedMutableBuffer),
|
||||
ChunkState::Moving(chunk) => (chunk.size(), ChunkStorage::ClosedMutableBuffer),
|
||||
ChunkState::Moved(chunk) => (chunk.size(), ChunkStorage::ReadBuffer),
|
||||
ChunkState::WritingToObjectStore(chunk) => (chunk.size(), ChunkStorage::ObjectStore),
|
||||
ChunkState::WrittenToObjectStore(chunk, _) => (chunk.size(), ChunkStorage::ObjectStore),
|
||||
ChunkState::WritingToObjectStore(chunk) => {
|
||||
(chunk.size(), ChunkStorage::ReadBufferAndObjectStore)
|
||||
}
|
||||
ChunkState::WrittenToObjectStore(chunk, _) => {
|
||||
(chunk.size(), ChunkStorage::ReadBufferAndObjectStore)
|
||||
}
|
||||
};
|
||||
|
||||
ChunkSummary {
|
||||
|
|
|
@ -1,24 +1,28 @@
|
|||
use arrow_deps::datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use internal_types::{schema::Schema, selection::Selection};
|
||||
use mutable_buffer::chunk::Chunk as MBChunk;
|
||||
use mutable_buffer::chunk::snapshot::ChunkSnapshot;
|
||||
use object_store::path::Path;
|
||||
use observability_deps::tracing::debug;
|
||||
use parquet_file::chunk::Chunk as ParquetChunk;
|
||||
use query::{exec::stringset::StringSet, predicate::Predicate, PartitionChunk};
|
||||
use read_buffer::Chunk as ReadBufferChunk;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
||||
use std::{collections::BTreeSet, sync::Arc};
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use super::{
|
||||
pred::{to_mutable_buffer_predicate, to_read_buffer_predicate},
|
||||
streams::{MutableBufferChunkStream, ReadFilterResultsStream},
|
||||
pred::to_read_buffer_predicate,
|
||||
streams::{MemoryStream, ReadFilterResultsStream},
|
||||
};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Mutable Buffer Chunk Error: {}", source))]
|
||||
MutableBufferChunk {
|
||||
source: mutable_buffer::chunk::Error,
|
||||
source: mutable_buffer::chunk::snapshot::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Read Buffer Error in chunk {}: {}", chunk_id, source))]
|
||||
|
@ -27,6 +31,15 @@ pub enum Error {
|
|||
chunk_id: u32,
|
||||
},
|
||||
|
||||
#[snafu(display("Read Buffer Error in chunk {}: {}", chunk_id, msg))]
|
||||
ReadBufferError { chunk_id: u32, msg: String },
|
||||
|
||||
#[snafu(display("Parquet File Error in chunk {}: {}", chunk_id, source))]
|
||||
ParquetFileChunkError {
|
||||
source: parquet_file::chunk::Error,
|
||||
chunk_id: u32,
|
||||
},
|
||||
|
||||
#[snafu(display("Internal error restricting schema: {}", source))]
|
||||
InternalSelectingSchema {
|
||||
source: internal_types::schema::Error,
|
||||
|
@ -58,10 +71,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
|
|||
#[derive(Debug)]
|
||||
pub enum DBChunk {
|
||||
MutableBuffer {
|
||||
chunk: Arc<MBChunk>,
|
||||
partition_key: Arc<String>,
|
||||
/// is this chunk open for writing?
|
||||
open: bool,
|
||||
chunk: Arc<ChunkSnapshot>,
|
||||
},
|
||||
ReadBuffer {
|
||||
chunk: Arc<ReadBufferChunk>,
|
||||
|
@ -83,36 +93,12 @@ impl DBChunk {
|
|||
ChunkState::Invalid => {
|
||||
panic!("Invalid internal state");
|
||||
}
|
||||
ChunkState::Open(chunk) => {
|
||||
// TODO the performance if cloning the chunk is terrible
|
||||
// Proper performance is tracked in
|
||||
// https://github.com/influxdata/influxdb_iox/issues/635
|
||||
let chunk = Arc::new(chunk.clone());
|
||||
Self::MutableBuffer {
|
||||
chunk,
|
||||
partition_key,
|
||||
open: true,
|
||||
}
|
||||
}
|
||||
ChunkState::Closing(chunk) => {
|
||||
// TODO the performance if cloning the chunk is terrible
|
||||
// Proper performance is tracked in
|
||||
// https://github.com/influxdata/influxdb_iox/issues/635
|
||||
let chunk = Arc::new(chunk.clone());
|
||||
Self::MutableBuffer {
|
||||
chunk,
|
||||
partition_key,
|
||||
open: false,
|
||||
}
|
||||
}
|
||||
ChunkState::Moving(chunk) => {
|
||||
let chunk = Arc::clone(chunk);
|
||||
Self::MutableBuffer {
|
||||
chunk,
|
||||
partition_key,
|
||||
open: false,
|
||||
}
|
||||
}
|
||||
ChunkState::Open(chunk) | ChunkState::Closing(chunk) => Self::MutableBuffer {
|
||||
chunk: chunk.snapshot(),
|
||||
},
|
||||
ChunkState::Moving(chunk) => Self::MutableBuffer {
|
||||
chunk: chunk.snapshot(),
|
||||
},
|
||||
ChunkState::Moved(chunk) => Self::ReadBuffer {
|
||||
chunk: Arc::clone(chunk),
|
||||
partition_key,
|
||||
|
@ -128,6 +114,14 @@ impl DBChunk {
|
|||
};
|
||||
Arc::new(db_chunk)
|
||||
}
|
||||
|
||||
/// Return object store paths
|
||||
pub fn object_store_paths(&self) -> Vec<Path> {
|
||||
match self {
|
||||
Self::ParquetFile { chunk } => chunk.all_paths(),
|
||||
_ => vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartitionChunk for DBChunk {
|
||||
|
@ -135,15 +129,17 @@ impl PartitionChunk for DBChunk {
|
|||
|
||||
fn id(&self) -> u32 {
|
||||
match self {
|
||||
Self::MutableBuffer { chunk, .. } => chunk.id(),
|
||||
Self::MutableBuffer { chunk, .. } => chunk.chunk_id(),
|
||||
Self::ReadBuffer { chunk, .. } => chunk.id(),
|
||||
Self::ParquetFile { .. } => unimplemented!("parquet file not implemented"),
|
||||
Self::ParquetFile { chunk, .. } => chunk.id(),
|
||||
}
|
||||
}
|
||||
|
||||
fn all_table_names(&self, known_tables: &mut StringSet) {
|
||||
match self {
|
||||
Self::MutableBuffer { chunk, .. } => chunk.all_table_names(known_tables),
|
||||
Self::MutableBuffer { chunk, .. } => {
|
||||
known_tables.extend(chunk.table_names(None).cloned())
|
||||
}
|
||||
Self::ReadBuffer { chunk, .. } => {
|
||||
// TODO - align APIs so they behave in the same way...
|
||||
let rb_names = chunk.all_table_names(known_tables);
|
||||
|
@ -151,42 +147,22 @@ impl PartitionChunk for DBChunk {
|
|||
known_tables.insert(name);
|
||||
}
|
||||
}
|
||||
Self::ParquetFile { .. } => unimplemented!("parquet file not implemented"),
|
||||
Self::ParquetFile { chunk, .. } => chunk.all_table_names(known_tables),
|
||||
}
|
||||
}
|
||||
|
||||
fn table_names(
|
||||
&self,
|
||||
predicate: &Predicate,
|
||||
_known_tables: &StringSet,
|
||||
_known_tables: &StringSet, // TODO: Should this be being used?
|
||||
) -> Result<Option<StringSet>, Self::Error> {
|
||||
let names = match self {
|
||||
Self::MutableBuffer { chunk, .. } => {
|
||||
if chunk.is_empty() {
|
||||
Some(StringSet::new())
|
||||
} else {
|
||||
let chunk_predicate = match to_mutable_buffer_predicate(chunk, predicate) {
|
||||
Ok(chunk_predicate) => chunk_predicate,
|
||||
Err(e) => {
|
||||
debug!(?predicate, %e, "mutable buffer predicate not supported for table_names, falling back");
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
|
||||
// we don't support arbitrary expressions in chunk predicate yet
|
||||
if !chunk_predicate.chunk_exprs.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let names = chunk
|
||||
.table_names(&chunk_predicate)
|
||||
.context(MutableBufferChunk)?
|
||||
.into_iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect::<StringSet>();
|
||||
|
||||
Some(names)
|
||||
}
|
||||
if predicate.has_exprs() {
|
||||
// TODO: Support more predicates
|
||||
return Ok(None);
|
||||
}
|
||||
chunk.table_names(predicate.range).cloned().collect()
|
||||
}
|
||||
Self::ReadBuffer { chunk, .. } => {
|
||||
// If not supported, ReadBuffer can't answer with
|
||||
|
@ -199,26 +175,19 @@ impl PartitionChunk for DBChunk {
|
|||
}
|
||||
};
|
||||
|
||||
Some(chunk.table_names(&rb_predicate, &BTreeSet::new()))
|
||||
}
|
||||
Self::ParquetFile { .. } => {
|
||||
unimplemented!("parquet file not implemented")
|
||||
chunk.table_names(&rb_predicate, &BTreeSet::new())
|
||||
}
|
||||
Self::ParquetFile { chunk, .. } => chunk.table_names(predicate.range).collect(),
|
||||
};
|
||||
|
||||
// Prune out tables that should not be
|
||||
// present (based on additional table restrictions of the Predicate)
|
||||
//
|
||||
// This is needed because at time of writing, the ReadBuffer's
|
||||
// table_names implementation doesn't include any way to
|
||||
// further restrict the tables to a known set of tables
|
||||
let names = names.map(|names| {
|
||||
Ok(Some(
|
||||
names
|
||||
.into_iter()
|
||||
.filter(|table_name| predicate.should_include_table(table_name))
|
||||
.collect()
|
||||
});
|
||||
Ok(names)
|
||||
.collect(),
|
||||
))
|
||||
}
|
||||
|
||||
fn table_schema(
|
||||
|
@ -253,8 +222,12 @@ impl PartitionChunk for DBChunk {
|
|||
|
||||
Ok(schema)
|
||||
}
|
||||
Self::ParquetFile { .. } => {
|
||||
unimplemented!("parquet file not implemented for table schema")
|
||||
Self::ParquetFile { chunk, .. } => {
|
||||
chunk
|
||||
.table_schema(table_name, selection)
|
||||
.context(ParquetFileChunkError {
|
||||
chunk_id: chunk.id(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -263,9 +236,7 @@ impl PartitionChunk for DBChunk {
|
|||
match self {
|
||||
Self::MutableBuffer { chunk, .. } => chunk.has_table(table_name),
|
||||
Self::ReadBuffer { chunk, .. } => chunk.has_table(table_name),
|
||||
Self::ParquetFile { .. } => {
|
||||
unimplemented!("parquet file not implemented for has_table")
|
||||
}
|
||||
Self::ParquetFile { chunk, .. } => chunk.has_table(table_name),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -277,22 +248,17 @@ impl PartitionChunk for DBChunk {
|
|||
) -> Result<SendableRecordBatchStream, Self::Error> {
|
||||
match self {
|
||||
Self::MutableBuffer { chunk, .. } => {
|
||||
// Note MutableBuffer doesn't support predicate
|
||||
// pushdown (other than pruning out the entire chunk
|
||||
// via `might_pass_predicate)
|
||||
if !predicate.is_empty() {
|
||||
return InternalPredicateNotSupported {
|
||||
predicate: predicate.clone(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
let schema: Schema = self.table_schema(table_name, selection)?;
|
||||
let batch = chunk
|
||||
.read_filter(table_name, selection)
|
||||
.context(MutableBufferChunk)?;
|
||||
|
||||
Ok(Box::pin(MutableBufferChunkStream::new(
|
||||
Arc::clone(&chunk),
|
||||
schema.as_arrow(),
|
||||
table_name,
|
||||
)))
|
||||
Ok(Box::pin(MemoryStream::new(batch)))
|
||||
}
|
||||
Self::ReadBuffer { chunk, .. } => {
|
||||
// Error converting to a rb_predicate needs to fail
|
||||
|
@ -354,17 +320,11 @@ impl PartitionChunk for DBChunk {
|
|||
) -> Result<Option<StringSet>, Self::Error> {
|
||||
match self {
|
||||
Self::MutableBuffer { chunk, .. } => {
|
||||
let chunk_predicate = match to_mutable_buffer_predicate(chunk, predicate) {
|
||||
Ok(chunk_predicate) => chunk_predicate,
|
||||
Err(e) => {
|
||||
debug!(?predicate, %e, "mutable buffer predicate not supported for column_names, falling back");
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
|
||||
chunk
|
||||
.column_names(table_name, &chunk_predicate, columns)
|
||||
.context(MutableBufferChunk)
|
||||
if !predicate.is_empty() {
|
||||
// TODO: Support predicates
|
||||
return Ok(None);
|
||||
}
|
||||
Ok(chunk.column_names(table_name, columns))
|
||||
}
|
||||
Self::ReadBuffer { chunk, .. } => {
|
||||
let rb_predicate = match to_read_buffer_predicate(&predicate) {
|
||||
|
@ -396,32 +356,47 @@ impl PartitionChunk for DBChunk {
|
|||
predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, Self::Error> {
|
||||
match self {
|
||||
Self::MutableBuffer { chunk, .. } => {
|
||||
use mutable_buffer::chunk::Error::UnsupportedColumnTypeForListingValues;
|
||||
|
||||
let chunk_predicate = match to_mutable_buffer_predicate(chunk, predicate) {
|
||||
Ok(chunk_predicate) => chunk_predicate,
|
||||
Self::MutableBuffer { .. } => {
|
||||
// There is no advantage to manually implementing this
|
||||
// vs just letting DataFusion do its thing
|
||||
Ok(None)
|
||||
}
|
||||
Self::ReadBuffer { chunk, .. } => {
|
||||
let rb_predicate = match to_read_buffer_predicate(predicate) {
|
||||
Ok(rb_predicate) => rb_predicate,
|
||||
Err(e) => {
|
||||
debug!(?predicate, %e, "mutable buffer predicate not supported for column_values, falling back");
|
||||
debug!(?predicate, %e, "read buffer predicate not supported for column_names, falling back");
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
|
||||
let values = chunk.tag_column_values(table_name, column_name, &chunk_predicate);
|
||||
let mut values = chunk
|
||||
.column_values(
|
||||
table_name,
|
||||
rb_predicate,
|
||||
Selection::Some(&[column_name]),
|
||||
BTreeMap::new(),
|
||||
)
|
||||
.context(ReadBufferChunkError {
|
||||
chunk_id: chunk.id(),
|
||||
})?;
|
||||
|
||||
// if the mutable buffer doesn't support getting
|
||||
// values for this kind of column, report back None
|
||||
if let Err(UnsupportedColumnTypeForListingValues { .. }) = values {
|
||||
Ok(None)
|
||||
} else {
|
||||
values.context(MutableBufferChunk)
|
||||
}
|
||||
}
|
||||
Self::ReadBuffer { .. } => {
|
||||
// TODO hook up read buffer API here when ready. Until
|
||||
// now, fallback to using a full plan
|
||||
// https://github.com/influxdata/influxdb_iox/issues/857
|
||||
Ok(None)
|
||||
// The InfluxRPC frontend only supports getting column values
|
||||
// for one column at a time (this is a restriction on the Influx
|
||||
// Read gRPC API too). However, the Read Buffer support multiple
|
||||
// columns and will return a map - we just need to pull the
|
||||
// column out to get the set of values.
|
||||
let values = values
|
||||
.remove(column_name)
|
||||
.ok_or_else(|| Error::ReadBufferError {
|
||||
chunk_id: chunk.id(),
|
||||
msg: format!(
|
||||
"failed to find column_name {:?} in results of tag_values",
|
||||
column_name
|
||||
),
|
||||
})?;
|
||||
|
||||
Ok(Some(values))
|
||||
}
|
||||
Self::ParquetFile { .. } => {
|
||||
unimplemented!("parquet file not implemented for column_values")
|
||||
|
|
|
@ -20,6 +20,7 @@ pub struct LifecycleManager {
|
|||
db: Arc<Db>,
|
||||
db_name: String,
|
||||
move_task: Option<TaskTracker<Job>>,
|
||||
write_task: Option<TaskTracker<Job>>,
|
||||
}
|
||||
|
||||
impl LifecycleManager {
|
||||
|
@ -30,6 +31,7 @@ impl LifecycleManager {
|
|||
db,
|
||||
db_name,
|
||||
move_task: None,
|
||||
write_task: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -65,9 +67,15 @@ trait ChunkMover {
|
|||
/// Returns a boolean indicating if a move is in progress
|
||||
fn is_move_active(&self) -> bool;
|
||||
|
||||
/// Returns a boolean indicating if a write is in progress
|
||||
fn is_write_active(&self) -> bool;
|
||||
|
||||
/// Starts an operation to move a chunk to the read buffer
|
||||
fn move_to_read_buffer(&mut self, partition_key: String, chunk_id: u32);
|
||||
|
||||
/// Starts an operation to write a chunk to the object store
|
||||
fn write_to_object_store(&mut self, partition_key: String, chunk_id: u32);
|
||||
|
||||
/// Drops a chunk from the database
|
||||
fn drop_chunk(&mut self, partition_key: String, chunk_id: u32);
|
||||
|
||||
|
@ -78,10 +86,11 @@ trait ChunkMover {
|
|||
|
||||
let mut buffer_size = 0;
|
||||
|
||||
// Only want to start a new move task if there isn't one already in-flight
|
||||
// Only want to start a new move/write task if there isn't one already in-flight
|
||||
//
|
||||
// Note: This does not take into account manually triggered tasks
|
||||
let mut move_active = self.is_move_active();
|
||||
let mut write_active = self.is_write_active();
|
||||
|
||||
// Iterate through the chunks to determine
|
||||
// - total memory consumption
|
||||
|
@ -90,33 +99,44 @@ trait ChunkMover {
|
|||
// TODO: Track size globally to avoid iterating through all chunks (#1100)
|
||||
for chunk in &chunks {
|
||||
let chunk_guard = chunk.upgradable_read();
|
||||
|
||||
buffer_size += Self::chunk_size(&*chunk_guard);
|
||||
|
||||
if !move_active && can_move(&rules, &*chunk_guard, now) {
|
||||
match chunk_guard.state() {
|
||||
ChunkState::Open(_) => {
|
||||
let mut chunk_guard = RwLockUpgradableReadGuard::upgrade(chunk_guard);
|
||||
chunk_guard.set_closing().expect("cannot close open chunk");
|
||||
let would_move = !move_active && can_move(&rules, &*chunk_guard, now);
|
||||
let would_write = !write_active && rules.persist;
|
||||
|
||||
let partition_key = chunk_guard.key().to_string();
|
||||
let chunk_id = chunk_guard.id();
|
||||
match chunk_guard.state() {
|
||||
ChunkState::Open(_) if would_move => {
|
||||
let mut chunk_guard = RwLockUpgradableReadGuard::upgrade(chunk_guard);
|
||||
chunk_guard.set_closing().expect("cannot close open chunk");
|
||||
|
||||
std::mem::drop(chunk_guard);
|
||||
let partition_key = chunk_guard.key().to_string();
|
||||
let chunk_id = chunk_guard.id();
|
||||
|
||||
move_active = true;
|
||||
self.move_to_read_buffer(partition_key, chunk_id);
|
||||
}
|
||||
ChunkState::Closing(_) => {
|
||||
let partition_key = chunk_guard.key().to_string();
|
||||
let chunk_id = chunk_guard.id();
|
||||
std::mem::drop(chunk_guard);
|
||||
|
||||
std::mem::drop(chunk_guard);
|
||||
|
||||
move_active = true;
|
||||
self.move_to_read_buffer(partition_key, chunk_id);
|
||||
}
|
||||
_ => {}
|
||||
move_active = true;
|
||||
self.move_to_read_buffer(partition_key, chunk_id);
|
||||
}
|
||||
ChunkState::Closing(_) if would_move => {
|
||||
let partition_key = chunk_guard.key().to_string();
|
||||
let chunk_id = chunk_guard.id();
|
||||
|
||||
std::mem::drop(chunk_guard);
|
||||
|
||||
move_active = true;
|
||||
self.move_to_read_buffer(partition_key, chunk_id);
|
||||
}
|
||||
ChunkState::Moved(_) if would_write => {
|
||||
let partition_key = chunk_guard.key().to_string();
|
||||
let chunk_id = chunk_guard.id();
|
||||
|
||||
std::mem::drop(chunk_guard);
|
||||
|
||||
write_active = true;
|
||||
self.write_to_object_store(partition_key, chunk_id);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// TODO: Find and recover cancelled move jobs (#1099)
|
||||
|
@ -129,8 +149,9 @@ trait ChunkMover {
|
|||
match chunks.next() {
|
||||
Some(chunk) => {
|
||||
let chunk_guard = chunk.read();
|
||||
if rules.drop_non_persisted
|
||||
|| matches!(chunk_guard.state(), ChunkState::Moved(_))
|
||||
if (rules.drop_non_persisted
|
||||
&& matches!(chunk_guard.state(), ChunkState::Moved(_)))
|
||||
|| matches!(chunk_guard.state(), ChunkState::WrittenToObjectStore(_, _))
|
||||
{
|
||||
let partition_key = chunk_guard.key().to_string();
|
||||
let chunk_id = chunk_guard.id();
|
||||
|
@ -169,6 +190,13 @@ impl ChunkMover for LifecycleManager {
|
|||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn is_write_active(&self) -> bool {
|
||||
self.write_task
|
||||
.as_ref()
|
||||
.map(|x| !x.is_complete())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn move_to_read_buffer(&mut self, partition_key: String, chunk_id: u32) {
|
||||
info!(%partition_key, %chunk_id, "moving chunk to read buffer");
|
||||
self.move_task = Some(
|
||||
|
@ -177,6 +205,14 @@ impl ChunkMover for LifecycleManager {
|
|||
)
|
||||
}
|
||||
|
||||
fn write_to_object_store(&mut self, partition_key: String, chunk_id: u32) {
|
||||
info!(%partition_key, %chunk_id, "write chunk to object store");
|
||||
self.write_task = Some(
|
||||
self.db
|
||||
.write_chunk_to_object_store_in_background(partition_key, chunk_id),
|
||||
)
|
||||
}
|
||||
|
||||
fn drop_chunk(&mut self, partition_key: String, chunk_id: u32) {
|
||||
info!(%partition_key, %chunk_id, "dropping chunk");
|
||||
let _ = self
|
||||
|
@ -251,9 +287,57 @@ mod tests {
|
|||
chunk
|
||||
}
|
||||
|
||||
/// Transitions a new ("open") chunk into the "moving" state.
|
||||
fn transition_to_moving(mut chunk: Chunk) -> Chunk {
|
||||
chunk.set_closing().unwrap();
|
||||
chunk.set_moving().unwrap();
|
||||
chunk
|
||||
}
|
||||
|
||||
/// Transitions a new ("open") chunk into the "moved" state.
|
||||
fn transition_to_moved(mut chunk: Chunk, rb: &Arc<read_buffer::Chunk>) -> Chunk {
|
||||
chunk = transition_to_moving(chunk);
|
||||
chunk.set_moved(Arc::clone(&rb)).unwrap();
|
||||
chunk
|
||||
}
|
||||
|
||||
/// Transitions a new ("open") chunk into the "writing to object store"
|
||||
/// state.
|
||||
fn transition_to_writing_to_object_store(
|
||||
mut chunk: Chunk,
|
||||
rb: &Arc<read_buffer::Chunk>,
|
||||
) -> Chunk {
|
||||
chunk = transition_to_moved(chunk, rb);
|
||||
chunk.set_writing_to_object_store().unwrap();
|
||||
chunk
|
||||
}
|
||||
|
||||
/// Transitions a new ("open") chunk into the "written to object store"
|
||||
/// state.
|
||||
fn transition_to_written_to_object_store(
|
||||
mut chunk: Chunk,
|
||||
rb: &Arc<read_buffer::Chunk>,
|
||||
) -> Chunk {
|
||||
chunk = transition_to_writing_to_object_store(chunk, rb);
|
||||
let parquet_chunk = new_parquet_chunk(&chunk);
|
||||
chunk
|
||||
.set_written_to_object_store(Arc::new(parquet_chunk))
|
||||
.unwrap();
|
||||
chunk
|
||||
}
|
||||
|
||||
fn new_parquet_chunk(chunk: &Chunk) -> parquet_file::chunk::Chunk {
|
||||
parquet_file::chunk::Chunk::new(
|
||||
chunk.key().to_string(),
|
||||
chunk.id(),
|
||||
&tracker::MemRegistry::new(),
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
enum MoverEvents {
|
||||
Move(u32),
|
||||
Write(u32),
|
||||
Drop(u32),
|
||||
}
|
||||
|
||||
|
@ -262,6 +346,7 @@ mod tests {
|
|||
struct DummyMover {
|
||||
rules: LifecycleRules,
|
||||
move_active: bool,
|
||||
write_active: bool,
|
||||
chunks: Vec<Arc<RwLock<Chunk>>>,
|
||||
events: Vec<MoverEvents>,
|
||||
}
|
||||
|
@ -275,6 +360,7 @@ mod tests {
|
|||
.map(|x| Arc::new(RwLock::new(x)))
|
||||
.collect(),
|
||||
move_active: false,
|
||||
write_active: false,
|
||||
events: vec![],
|
||||
}
|
||||
}
|
||||
|
@ -298,6 +384,10 @@ mod tests {
|
|||
self.move_active
|
||||
}
|
||||
|
||||
fn is_write_active(&self) -> bool {
|
||||
self.write_active
|
||||
}
|
||||
|
||||
fn move_to_read_buffer(&mut self, _: String, chunk_id: u32) {
|
||||
let chunk = self
|
||||
.chunks
|
||||
|
@ -308,7 +398,22 @@ mod tests {
|
|||
self.events.push(MoverEvents::Move(chunk_id))
|
||||
}
|
||||
|
||||
fn write_to_object_store(&mut self, _partition_key: String, chunk_id: u32) {
|
||||
let chunk = self
|
||||
.chunks
|
||||
.iter()
|
||||
.find(|x| x.read().id() == chunk_id)
|
||||
.unwrap();
|
||||
chunk.write().set_writing_to_object_store().unwrap();
|
||||
self.events.push(MoverEvents::Write(chunk_id))
|
||||
}
|
||||
|
||||
fn drop_chunk(&mut self, _: String, chunk_id: u32) {
|
||||
self.chunks = self
|
||||
.chunks
|
||||
.drain(..)
|
||||
.filter(|x| x.read().id() != chunk_id)
|
||||
.collect();
|
||||
self.events.push(MoverEvents::Drop(chunk_id))
|
||||
}
|
||||
|
||||
|
@ -467,7 +572,56 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_buffer_size_soft() {
|
||||
fn test_buffer_size_soft_drop_non_persisted() {
|
||||
// test that chunk mover only drops moved and written chunks
|
||||
|
||||
// IMPORTANT: the lifecycle rules have the default `persist` flag (false) so NOT
|
||||
// "write" events will be triggered
|
||||
let rules = LifecycleRules {
|
||||
buffer_size_soft: Some(NonZeroUsize::new(5).unwrap()),
|
||||
drop_non_persisted: true,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let rb = Arc::new(read_buffer::Chunk::new_with_memory_tracker(
|
||||
22,
|
||||
&tracker::MemRegistry::new(),
|
||||
));
|
||||
|
||||
let chunks = vec![new_chunk(0, Some(0), Some(0))];
|
||||
|
||||
let mut mover = DummyMover::new(rules.clone(), chunks);
|
||||
|
||||
mover.check_for_work(from_secs(10));
|
||||
assert_eq!(mover.events, vec![]);
|
||||
|
||||
let chunks = vec![
|
||||
// two "open" chunks => they must not be dropped (yet)
|
||||
new_chunk(0, Some(0), Some(0)),
|
||||
new_chunk(1, Some(0), Some(0)),
|
||||
// "moved" chunk => can be dropped because `drop_non_persistent=true`
|
||||
transition_to_moved(new_chunk(2, Some(0), Some(0)), &rb),
|
||||
// "writing" chunk => cannot be drop while write is in-progess
|
||||
transition_to_writing_to_object_store(new_chunk(3, Some(0), Some(0)), &rb),
|
||||
// "written" chunk => can be dropped
|
||||
transition_to_written_to_object_store(new_chunk(4, Some(0), Some(0)), &rb),
|
||||
];
|
||||
|
||||
let mut mover = DummyMover::new(rules, chunks);
|
||||
|
||||
mover.check_for_work(from_secs(10));
|
||||
assert_eq!(
|
||||
mover.events,
|
||||
vec![MoverEvents::Drop(2), MoverEvents::Drop(4)]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_buffer_size_soft_dont_drop_non_persisted() {
|
||||
// test that chunk mover only drops written chunks
|
||||
|
||||
// IMPORTANT: the lifecycle rules have the default `persist` flag (false) so NOT
|
||||
// "write" events will be triggered
|
||||
let rules = LifecycleRules {
|
||||
buffer_size_soft: Some(NonZeroUsize::new(5).unwrap()),
|
||||
..Default::default()
|
||||
|
@ -485,21 +639,27 @@ mod tests {
|
|||
mover.check_for_work(from_secs(10));
|
||||
assert_eq!(mover.events, vec![]);
|
||||
|
||||
let mut chunks = vec![
|
||||
let chunks = vec![
|
||||
// two "open" chunks => they must not be dropped (yet)
|
||||
new_chunk(0, Some(0), Some(0)),
|
||||
new_chunk(1, Some(0), Some(0)),
|
||||
new_chunk(2, Some(0), Some(0)),
|
||||
// "moved" chunk => cannot be dropped because `drop_non_persistent=false`
|
||||
transition_to_moved(new_chunk(2, Some(0), Some(0)), &rb),
|
||||
// "writing" chunk => cannot be drop while write is in-progess
|
||||
transition_to_writing_to_object_store(new_chunk(3, Some(0), Some(0)), &rb),
|
||||
// "written" chunk => can be dropped
|
||||
transition_to_written_to_object_store(new_chunk(4, Some(0), Some(0)), &rb),
|
||||
];
|
||||
|
||||
chunks[2].set_closing().unwrap();
|
||||
chunks[2].set_moving().unwrap();
|
||||
chunks[2].set_moved(Arc::clone(&rb)).unwrap();
|
||||
|
||||
let mut mover = DummyMover::new(rules, chunks);
|
||||
|
||||
mover.check_for_work(from_secs(10));
|
||||
assert_eq!(mover.events, vec![MoverEvents::Drop(2)]);
|
||||
assert_eq!(mover.events, vec![MoverEvents::Drop(4)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_buffer_size_soft_no_op() {
|
||||
// check that we don't drop anything if nothing is to drop
|
||||
let rules = LifecycleRules {
|
||||
buffer_size_soft: Some(NonZeroUsize::new(40).unwrap()),
|
||||
..Default::default()
|
||||
|
@ -512,4 +672,33 @@ mod tests {
|
|||
mover.check_for_work(from_secs(10));
|
||||
assert_eq!(mover.events, vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_persist() {
|
||||
let rules = LifecycleRules {
|
||||
mutable_linger_seconds: Some(NonZeroU32::new(10).unwrap()),
|
||||
persist: true,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let rb = Arc::new(read_buffer::Chunk::new_with_memory_tracker(
|
||||
22,
|
||||
&tracker::MemRegistry::new(),
|
||||
));
|
||||
|
||||
let chunks = vec![
|
||||
// still moving => cannot write
|
||||
transition_to_moving(new_chunk(0, Some(0), Some(0))),
|
||||
// moved => write to object store
|
||||
transition_to_moved(new_chunk(1, Some(0), Some(0)), &rb),
|
||||
// moved, but there will be already a write in progress (previous chunk) => don't write
|
||||
transition_to_moved(new_chunk(2, Some(0), Some(0)), &rb),
|
||||
];
|
||||
|
||||
let mut mover = DummyMover::new(rules, chunks);
|
||||
|
||||
mover.check_for_work(from_secs(0));
|
||||
|
||||
assert_eq!(mover.events, vec![MoverEvents::Write(1)]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use mutable_buffer::{chunk::Chunk, pred::ChunkPredicate};
|
||||
use query::predicate::Predicate;
|
||||
use snafu::Snafu;
|
||||
|
||||
|
@ -11,15 +10,6 @@ use snafu::Snafu;
|
|||
pub enum Error {
|
||||
#[snafu(display("Error translating predicate: {}", msg))]
|
||||
ReadBufferPredicate { msg: String, pred: Predicate },
|
||||
|
||||
#[snafu(display("Error building predicate for mutable buffer: {}", source))]
|
||||
MutableBufferPredicate { source: mutable_buffer::pred::Error },
|
||||
}
|
||||
|
||||
impl From<mutable_buffer::pred::Error> for Error {
|
||||
fn from(source: mutable_buffer::pred::Error) -> Self {
|
||||
Self::MutableBufferPredicate { source }
|
||||
}
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
@ -52,25 +42,6 @@ pub fn to_read_buffer_predicate(predicate: &Predicate) -> Result<read_buffer::Pr
|
|||
}
|
||||
}
|
||||
|
||||
/// Converts a [`query::Predicate`] into [`ChunkPredicate`],
|
||||
/// suitable for evaluating on the MutableBuffer.
|
||||
pub fn to_mutable_buffer_predicate(
|
||||
chunk: impl AsRef<Chunk>,
|
||||
predicate: &Predicate,
|
||||
) -> Result<ChunkPredicate> {
|
||||
let predicate = chunk
|
||||
.as_ref()
|
||||
.predicate_builder()?
|
||||
.table_names(predicate.table_names.as_ref())?
|
||||
.field_names(predicate.field_columns.as_ref())?
|
||||
.range(predicate.range)?
|
||||
// it would be nice to avoid cloning all the exprs here.
|
||||
.exprs(predicate.exprs.clone())?
|
||||
.build();
|
||||
|
||||
Ok(predicate)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use super::*;
|
||||
|
@ -196,7 +167,6 @@ pub mod test {
|
|||
Error::ReadBufferPredicate { msg, pred: _ } => {
|
||||
assert_eq!(msg, exp.to_owned());
|
||||
}
|
||||
_ => panic!("Unexpected error type"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,15 +1,9 @@
|
|||
//! Adapter streams for different Chunk types that implement the interface
|
||||
//! needed by DataFusion
|
||||
use arrow_deps::{
|
||||
arrow::{
|
||||
datatypes::SchemaRef,
|
||||
error::{ArrowError, Result as ArrowResult},
|
||||
record_batch::RecordBatch,
|
||||
},
|
||||
arrow::{datatypes::SchemaRef, error::Result as ArrowResult, record_batch::RecordBatch},
|
||||
datafusion::physical_plan::RecordBatchStream,
|
||||
};
|
||||
use internal_types::selection::Selection;
|
||||
use mutable_buffer::chunk::Chunk as MBChunk;
|
||||
use read_buffer::ReadFilterResults;
|
||||
|
||||
use std::{
|
||||
|
@ -17,99 +11,6 @@ use std::{
|
|||
task::{Context, Poll},
|
||||
};
|
||||
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display(
|
||||
"Error getting data for table '{}' chunk {}: {}",
|
||||
table_name,
|
||||
chunk_id,
|
||||
source
|
||||
))]
|
||||
GettingTableData {
|
||||
table_name: String,
|
||||
chunk_id: u32,
|
||||
source: mutable_buffer::chunk::Error,
|
||||
},
|
||||
}
|
||||
|
||||
/// Adapter which will produce record batches from a mutable buffer
|
||||
/// chunk on demand
|
||||
pub(crate) struct MutableBufferChunkStream {
|
||||
/// Requested output schema (includes selection)
|
||||
schema: SchemaRef,
|
||||
chunk: Arc<MBChunk>,
|
||||
table_name: Arc<String>,
|
||||
|
||||
/// Vector of record batches to send in reverse order (send data[len-1]
|
||||
/// next) Is None until the first call to poll_next
|
||||
data: Option<Vec<RecordBatch>>,
|
||||
}
|
||||
|
||||
impl MutableBufferChunkStream {
|
||||
pub fn new(chunk: Arc<MBChunk>, schema: SchemaRef, table_name: impl Into<String>) -> Self {
|
||||
Self {
|
||||
chunk,
|
||||
schema,
|
||||
table_name: Arc::new(table_name.into()),
|
||||
data: None,
|
||||
}
|
||||
}
|
||||
|
||||
// gets the next batch, as needed
|
||||
fn next_batch(&mut self) -> ArrowResult<Option<RecordBatch>> {
|
||||
if self.data.is_none() {
|
||||
// Want all the columns in the schema. Note we don't
|
||||
// use `Selection::All` here because the mutable buffer chunk would interpret it
|
||||
// as "all columns in the table in that chunk" rather than
|
||||
// all columns this query needs
|
||||
let selected_cols = self
|
||||
.schema
|
||||
.fields()
|
||||
.iter()
|
||||
.map(|f| f.name() as &str)
|
||||
.collect::<Vec<_>>();
|
||||
let selection = Selection::Some(&selected_cols);
|
||||
|
||||
let mut data = Vec::new();
|
||||
self.chunk
|
||||
.table_to_arrow(&mut data, self.table_name.as_ref(), selection)
|
||||
.context(GettingTableData {
|
||||
table_name: self.table_name.as_ref(),
|
||||
chunk_id: self.chunk.id(),
|
||||
})
|
||||
.map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
|
||||
|
||||
// reverse the array so we can pop off the back
|
||||
data.reverse();
|
||||
self.data = Some(data);
|
||||
}
|
||||
|
||||
// self.data was set to Some above
|
||||
Ok(self.data.as_mut().unwrap().pop())
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordBatchStream for MutableBufferChunkStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
Arc::clone(&self.schema)
|
||||
}
|
||||
}
|
||||
|
||||
impl futures::Stream for MutableBufferChunkStream {
|
||||
type Item = ArrowResult<RecordBatch>;
|
||||
|
||||
fn poll_next(
|
||||
mut self: std::pin::Pin<&mut Self>,
|
||||
_: &mut Context<'_>,
|
||||
) -> Poll<Option<Self::Item>> {
|
||||
Poll::Ready(self.next_batch().transpose())
|
||||
}
|
||||
|
||||
// TODO is there a useful size_hint to pass?
|
||||
}
|
||||
|
||||
/// Adapter which will take a ReadFilterResults and make it an async stream
|
||||
pub struct ReadFilterResultsStream {
|
||||
read_results: ReadFilterResults,
|
||||
|
@ -143,3 +44,42 @@ impl futures::Stream for ReadFilterResultsStream {
|
|||
|
||||
// TODO is there a useful size_hint to pass?
|
||||
}
|
||||
|
||||
/// A RecordBatchStream created from a single RecordBatch
|
||||
///
|
||||
/// Unfortunately datafusion's MemoryStream is crate-local
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct MemoryStream {
|
||||
schema: SchemaRef,
|
||||
batch: Option<RecordBatch>,
|
||||
}
|
||||
|
||||
impl MemoryStream {
|
||||
pub fn new(batch: RecordBatch) -> Self {
|
||||
Self {
|
||||
schema: batch.schema(),
|
||||
batch: Some(batch),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordBatchStream for MemoryStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
Arc::clone(&self.schema)
|
||||
}
|
||||
}
|
||||
|
||||
impl futures::Stream for MemoryStream {
|
||||
type Item = ArrowResult<RecordBatch>;
|
||||
|
||||
fn poll_next(
|
||||
mut self: std::pin::Pin<&mut Self>,
|
||||
_: &mut Context<'_>,
|
||||
) -> Poll<Option<Self::Item>> {
|
||||
Poll::Ready(self.batch.take().map(Ok))
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
(1, Some(1))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -67,6 +67,7 @@
|
|||
clippy::clone_on_ref_ptr
|
||||
)]
|
||||
|
||||
use std::convert::TryInto;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
@ -83,11 +84,11 @@ use data_types::{
|
|||
};
|
||||
use influxdb_line_protocol::ParsedLine;
|
||||
use internal_types::{
|
||||
data::{lines_to_replicated_write, ReplicatedWrite},
|
||||
entry::{self, lines_to_sharded_entries, Entry},
|
||||
once::OnceNonZeroU32,
|
||||
};
|
||||
use object_store::{path::ObjectStorePath, ObjectStore, ObjectStoreApi};
|
||||
use query::{exec::Executor, Database, DatabaseStore};
|
||||
use query::{exec::Executor, DatabaseStore};
|
||||
use tracker::{TaskId, TaskRegistration, TaskRegistryWithHistory, TaskTracker, TrackedFutureExt};
|
||||
|
||||
use futures::{pin_mut, FutureExt};
|
||||
|
@ -98,15 +99,20 @@ use crate::{
|
|||
},
|
||||
db::Db,
|
||||
};
|
||||
use internal_types::entry::SequencedEntry;
|
||||
use std::num::NonZeroU32;
|
||||
|
||||
pub mod buffer;
|
||||
mod config;
|
||||
pub mod db;
|
||||
mod query_tests;
|
||||
pub mod snapshot;
|
||||
|
||||
#[cfg(test)]
|
||||
mod query_tests;
|
||||
// This module exposes `query_tests` outside of the crate so that it may be used
|
||||
// in benchmarks. Do not import this module for non-benchmark purposes!
|
||||
pub mod benchmarks {
|
||||
pub use crate::query_tests::*;
|
||||
}
|
||||
|
||||
type DatabaseError = Box<dyn std::error::Error + Send + Sync + 'static>;
|
||||
|
||||
|
@ -147,6 +153,12 @@ pub enum Error {
|
|||
DatabaseAlreadyExists { db_name: String },
|
||||
#[snafu(display("error appending to wal buffer: {}", source))]
|
||||
WalError { source: buffer::Error },
|
||||
#[snafu(display("error converting line protocol to flatbuffers: {}", source))]
|
||||
LineConversion { source: entry::Error },
|
||||
#[snafu(display("error decoding entry flatbuffers: {}", source))]
|
||||
DecodingEntry {
|
||||
source: flatbuffers::InvalidFlatbuffer,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
@ -179,6 +191,38 @@ impl JobRegistry {
|
|||
|
||||
const STORE_ERROR_PAUSE_SECONDS: u64 = 100;
|
||||
|
||||
/// Used to configure a server instance
|
||||
#[derive(Debug)]
|
||||
pub struct ServerConfig {
|
||||
// number of executor worker threads. If not specified, defaults
|
||||
// to number of cores on the system.
|
||||
num_worker_threads: Option<usize>,
|
||||
|
||||
/// The `ObjectStore` instance to use for persistence
|
||||
object_store: Arc<ObjectStore>,
|
||||
}
|
||||
|
||||
impl ServerConfig {
|
||||
/// Create a new config using the specified store
|
||||
pub fn new(object_store: Arc<ObjectStore>) -> Self {
|
||||
Self {
|
||||
num_worker_threads: None,
|
||||
object_store,
|
||||
}
|
||||
}
|
||||
|
||||
/// Use `num` worker threads for running queries
|
||||
pub fn with_num_worker_threads(mut self, num: usize) -> Self {
|
||||
self.num_worker_threads = Some(num);
|
||||
self
|
||||
}
|
||||
|
||||
/// return a reference to the object store in this configuration
|
||||
pub fn store(&self) -> Arc<ObjectStore> {
|
||||
Arc::clone(&self.object_store)
|
||||
}
|
||||
}
|
||||
|
||||
/// `Server` is the container struct for how servers store data internally, as
|
||||
/// well as how they communicate with other servers. Each server will have one
|
||||
/// of these structs, which keeps track of all replication and query rules.
|
||||
|
@ -188,7 +232,7 @@ pub struct Server<M: ConnectionManager> {
|
|||
config: Arc<Config>,
|
||||
connection_manager: Arc<M>,
|
||||
pub store: Arc<ObjectStore>,
|
||||
executor: Arc<Executor>,
|
||||
exec: Arc<Executor>,
|
||||
jobs: Arc<JobRegistry>,
|
||||
}
|
||||
|
||||
|
@ -205,15 +249,21 @@ impl<E> From<Error> for UpdateError<E> {
|
|||
}
|
||||
|
||||
impl<M: ConnectionManager> Server<M> {
|
||||
pub fn new(connection_manager: M, store: Arc<ObjectStore>) -> Self {
|
||||
pub fn new(connection_manager: M, config: ServerConfig) -> Self {
|
||||
let jobs = Arc::new(JobRegistry::new());
|
||||
|
||||
let ServerConfig {
|
||||
num_worker_threads,
|
||||
object_store,
|
||||
} = config;
|
||||
let num_worker_threads = num_worker_threads.unwrap_or_else(num_cpus::get);
|
||||
|
||||
Self {
|
||||
id: Default::default(),
|
||||
config: Arc::new(Config::new(Arc::clone(&jobs))),
|
||||
store,
|
||||
store: object_store,
|
||||
connection_manager: Arc::new(connection_manager),
|
||||
executor: Arc::new(Executor::new()),
|
||||
exec: Arc::new(Executor::new(num_worker_threads)),
|
||||
jobs,
|
||||
}
|
||||
}
|
||||
|
@ -232,12 +282,7 @@ impl<M: ConnectionManager> Server<M> {
|
|||
}
|
||||
|
||||
/// Tells the server the set of rules for a database.
|
||||
pub async fn create_database(
|
||||
&self,
|
||||
rules: DatabaseRules,
|
||||
server_id: NonZeroU32,
|
||||
object_store: Arc<ObjectStore>,
|
||||
) -> Result<()> {
|
||||
pub async fn create_database(&self, rules: DatabaseRules, server_id: NonZeroU32) -> Result<()> {
|
||||
// Return an error if this server hasn't yet been setup with an id
|
||||
self.require_id()?;
|
||||
let db_reservation = self.config.create_db(rules)?;
|
||||
|
@ -245,7 +290,7 @@ impl<M: ConnectionManager> Server<M> {
|
|||
self.persist_database_rules(db_reservation.rules().clone())
|
||||
.await?;
|
||||
|
||||
db_reservation.commit(server_id, object_store);
|
||||
db_reservation.commit(server_id, Arc::clone(&self.store), Arc::clone(&self.exec));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -300,6 +345,7 @@ impl<M: ConnectionManager> Server<M> {
|
|||
.map(|mut path| {
|
||||
let store = Arc::clone(&self.store);
|
||||
let config = Arc::clone(&self.config);
|
||||
let exec = Arc::clone(&self.exec);
|
||||
|
||||
path.set_file_name(DB_RULES_FILE_NAME);
|
||||
|
||||
|
@ -325,7 +371,7 @@ impl<M: ConnectionManager> Server<M> {
|
|||
}
|
||||
Ok(rules) => match config.create_db(rules) {
|
||||
Err(e) => error!("error adding database to config: {}", e),
|
||||
Ok(handle) => handle.commit(server_id, store),
|
||||
Ok(handle) => handle.commit(server_id, store, exec),
|
||||
},
|
||||
}
|
||||
})
|
||||
|
@ -337,12 +383,12 @@ impl<M: ConnectionManager> Server<M> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// `write_lines` takes in raw line protocol and converts it to a
|
||||
/// `ReplicatedWrite`, which is then replicated to other servers based
|
||||
/// on the configuration of the `db`. This is step #1 from the crate
|
||||
/// level documentation.
|
||||
/// `write_lines` takes in raw line protocol and converts it to a collection
|
||||
/// of ShardedEntry which are then sent to other IOx servers based on
|
||||
/// the ShardConfig or sent to the local database for buffering in the
|
||||
/// WriteBuffer and/or the MutableBuffer if configured.
|
||||
pub async fn write_lines(&self, db_name: &str, lines: &[ParsedLine<'_>]) -> Result<()> {
|
||||
let id = self.require_id()?.get();
|
||||
self.require_id()?;
|
||||
|
||||
let db_name = DatabaseName::new(db_name).context(InvalidDatabaseName)?;
|
||||
let db = self
|
||||
|
@ -350,62 +396,52 @@ impl<M: ConnectionManager> Server<M> {
|
|||
.db(&db_name)
|
||||
.context(DatabaseNotFound { db_name: &*db_name })?;
|
||||
|
||||
let sequence = db.next_sequence();
|
||||
let write = lines_to_replicated_write(id, sequence, lines, &*db.rules.read());
|
||||
let sharded_entries = lines_to_sharded_entries(
|
||||
lines,
|
||||
db.rules.read().shard_config.as_ref(),
|
||||
&*db.rules.read(),
|
||||
)
|
||||
.context(LineConversion)?;
|
||||
|
||||
self.handle_replicated_write(&db_name, &db, write).await?;
|
||||
for e in sharded_entries {
|
||||
// TODO: handle sending to shards based on ShardConfig
|
||||
self.handle_write_entry(&db, e.entry).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn handle_replicated_write(
|
||||
pub async fn write_entry(&self, db_name: &str, entry_bytes: Vec<u8>) -> Result<()> {
|
||||
self.require_id()?;
|
||||
|
||||
let db_name = DatabaseName::new(db_name).context(InvalidDatabaseName)?;
|
||||
let db = self
|
||||
.config
|
||||
.db(&db_name)
|
||||
.context(DatabaseNotFound { db_name: &*db_name })?;
|
||||
|
||||
let entry = entry_bytes.try_into().context(DecodingEntry)?;
|
||||
self.handle_write_entry(&db, entry).await
|
||||
}
|
||||
|
||||
pub async fn handle_write_entry(&self, db: &Db, entry: Entry) -> Result<()> {
|
||||
db.store_entry(entry)
|
||||
.map_err(|e| Error::UnknownDatabaseError {
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn handle_sequenced_entry(
|
||||
&self,
|
||||
db_name: &DatabaseName<'_>,
|
||||
db: &Db,
|
||||
write: ReplicatedWrite,
|
||||
sequenced_entry: SequencedEntry,
|
||||
) -> Result<()> {
|
||||
match db.store_replicated_write(&write) {
|
||||
Err(db::Error::DatabaseNotWriteable {}) | Ok(_) => {}
|
||||
Err(e) => {
|
||||
return Err(Error::UnknownDatabaseError {
|
||||
source: Box::new(e),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
let write = Arc::new(write);
|
||||
|
||||
if let Some(wal_buffer) = &db.wal_buffer {
|
||||
let persist;
|
||||
let segment = {
|
||||
let mut wal_buffer = wal_buffer.lock();
|
||||
persist = wal_buffer.persist;
|
||||
|
||||
// TODO: address this issue?
|
||||
// the mutable buffer and the wal buffer have different locking mechanisms,
|
||||
// which means that it's possible for a mutable buffer write to
|
||||
// succeed while a WAL buffer write fails, which would then
|
||||
// return an error. A single lock is probably undesirable, but
|
||||
// we need to figure out what semantics we want.
|
||||
wal_buffer.append(Arc::clone(&write)).context(WalError)?
|
||||
};
|
||||
|
||||
if let Some(segment) = segment {
|
||||
if persist {
|
||||
let writer_id = self.require_id()?.get();
|
||||
let store = Arc::clone(&self.store);
|
||||
|
||||
let (_, tracker) = self.jobs.register(Job::PersistSegment {
|
||||
writer_id,
|
||||
segment_id: segment.id,
|
||||
});
|
||||
|
||||
segment
|
||||
.persist_bytes_in_background(tracker, writer_id, db_name, store)
|
||||
.context(WalError)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
db.store_sequenced_entry(sequenced_entry)
|
||||
.map_err(|e| Error::UnknownDatabaseError {
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -574,12 +610,8 @@ where
|
|||
let db = match self.db(&db_name) {
|
||||
Some(db) => db,
|
||||
None => {
|
||||
self.create_database(
|
||||
DatabaseRules::new(db_name.clone()),
|
||||
self.require_id()?,
|
||||
Arc::clone(&self.store),
|
||||
)
|
||||
.await?;
|
||||
self.create_database(DatabaseRules::new(db_name.clone()), self.require_id()?)
|
||||
.await?;
|
||||
self.db(&db_name).expect("db not inserted")
|
||||
}
|
||||
};
|
||||
|
@ -587,8 +619,9 @@ where
|
|||
Ok(db)
|
||||
}
|
||||
|
||||
/// Return a handle to the query executor
|
||||
fn executor(&self) -> Arc<Executor> {
|
||||
Arc::clone(&self.executor)
|
||||
Arc::clone(&self.exec)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -610,12 +643,17 @@ pub trait ConnectionManager {
|
|||
pub trait RemoteServer {
|
||||
type Error: std::error::Error + Send + Sync + 'static;
|
||||
|
||||
/// Sends a replicated write to a remote server. This is step #2 from the
|
||||
/// diagram.
|
||||
async fn replicate(
|
||||
/// Sends an Entry to the remote server. An IOx server acting as a
|
||||
/// router/sharder will call this method to send entries to remotes.
|
||||
async fn write_entry(&self, db: &str, entry: Entry) -> Result<(), Self::Error>;
|
||||
|
||||
/// Sends a SequencedEntry to the remote server. An IOx server acting as a
|
||||
/// write buffer will call this method to replicate to other write
|
||||
/// buffer servers or to send data to downstream subscribers.
|
||||
async fn write_sequenced_entry(
|
||||
&self,
|
||||
db: &str,
|
||||
replicated_write: &ReplicatedWrite,
|
||||
sequenced_entry: SequencedEntry,
|
||||
) -> Result<(), Self::Error>;
|
||||
}
|
||||
|
||||
|
@ -643,10 +681,19 @@ pub struct RemoteServerImpl {}
|
|||
impl RemoteServer for RemoteServerImpl {
|
||||
type Error = Error;
|
||||
|
||||
async fn replicate(
|
||||
/// Sends an Entry to the remote server. An IOx server acting as a
|
||||
/// router/sharder will call this method to send entries to remotes.
|
||||
async fn write_entry(&self, _db: &str, _entry: Entry) -> Result<(), Self::Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Sends a SequencedEntry to the remote server. An IOx server acting as a
|
||||
/// write buffer will call this method to replicate to other write
|
||||
/// buffer servers or to send data to downstream subscribers.
|
||||
async fn write_sequenced_entry(
|
||||
&self,
|
||||
_db: &str,
|
||||
_replicated_write: &ReplicatedWrite,
|
||||
_sequenced_entry: SequencedEntry,
|
||||
) -> Result<(), Self::Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
@ -675,28 +722,27 @@ mod tests {
|
|||
|
||||
use async_trait::async_trait;
|
||||
use futures::TryStreamExt;
|
||||
use parking_lot::Mutex;
|
||||
use snafu::Snafu;
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use arrow_deps::{assert_table_eq, datafusion::physical_plan::collect};
|
||||
use data_types::database_rules::{
|
||||
PartitionTemplate, TemplatePart, WalBufferConfig, WalBufferRollover,
|
||||
};
|
||||
use arrow_deps::assert_table_eq;
|
||||
use data_types::database_rules::{PartitionTemplate, TemplatePart, NO_SHARD_CONFIG};
|
||||
use influxdb_line_protocol::parse_lines;
|
||||
use object_store::{memory::InMemory, path::ObjectStorePath};
|
||||
use query::{frontend::sql::SQLQueryPlanner, Database};
|
||||
|
||||
use crate::buffer::Segment;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn config() -> ServerConfig {
|
||||
ServerConfig::new(Arc::new(ObjectStore::new_in_memory(InMemory::new())))
|
||||
.with_num_worker_threads(1)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn server_api_calls_return_error_with_no_id_set() {
|
||||
let manager = TestConnectionManager::new();
|
||||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
let server = Server::new(manager, store);
|
||||
let server = Server::new(manager, config());
|
||||
|
||||
let resp = server.require_id().unwrap_err();
|
||||
assert!(matches!(resp, Error::IdNotSet));
|
||||
|
@ -709,8 +755,9 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn create_database_persists_rules() {
|
||||
let manager = TestConnectionManager::new();
|
||||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
let server = Server::new(manager, Arc::clone(&store));
|
||||
let config = config();
|
||||
let store = config.store();
|
||||
let server = Server::new(manager, config);
|
||||
server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
|
||||
let name = DatabaseName::new("bananas").unwrap();
|
||||
|
@ -727,11 +774,7 @@ mod tests {
|
|||
|
||||
// Create a database
|
||||
server
|
||||
.create_database(
|
||||
rules.clone(),
|
||||
server.require_id().unwrap(),
|
||||
Arc::clone(&server.store),
|
||||
)
|
||||
.create_database(rules.clone(), server.require_id().unwrap())
|
||||
.await
|
||||
.expect("failed to create database");
|
||||
|
||||
|
@ -759,7 +802,6 @@ mod tests {
|
|||
.create_database(
|
||||
DatabaseRules::new(db2.clone()),
|
||||
server.require_id().unwrap(),
|
||||
Arc::clone(&server.store),
|
||||
)
|
||||
.await
|
||||
.expect("failed to create 2nd db");
|
||||
|
@ -767,7 +809,8 @@ mod tests {
|
|||
store.list_with_delimiter(&store.new_path()).await.unwrap();
|
||||
|
||||
let manager = TestConnectionManager::new();
|
||||
let server2 = Server::new(manager, store);
|
||||
let config2 = ServerConfig::new(store).with_num_worker_threads(1);
|
||||
let server2 = Server::new(manager, config2);
|
||||
server2.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
server2.load_database_configs().await.unwrap();
|
||||
|
||||
|
@ -780,8 +823,7 @@ mod tests {
|
|||
// Covers #643
|
||||
|
||||
let manager = TestConnectionManager::new();
|
||||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
let server = Server::new(manager, store);
|
||||
let server = Server::new(manager, config());
|
||||
server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
|
||||
let name = DatabaseName::new("bananas").unwrap();
|
||||
|
@ -791,7 +833,6 @@ mod tests {
|
|||
.create_database(
|
||||
DatabaseRules::new(name.clone()),
|
||||
server.require_id().unwrap(),
|
||||
Arc::clone(&server.store),
|
||||
)
|
||||
.await
|
||||
.expect("failed to create database");
|
||||
|
@ -801,7 +842,6 @@ mod tests {
|
|||
.create_database(
|
||||
DatabaseRules::new(name.clone()),
|
||||
server.require_id().unwrap(),
|
||||
Arc::clone(&server.store),
|
||||
)
|
||||
.await
|
||||
.unwrap_err();
|
||||
|
@ -814,8 +854,7 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn db_names_sorted() {
|
||||
let manager = TestConnectionManager::new();
|
||||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
let server = Server::new(manager, store);
|
||||
let server = Server::new(manager, config());
|
||||
server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
|
||||
let names = vec!["bar", "baz"];
|
||||
|
@ -823,11 +862,7 @@ mod tests {
|
|||
for name in &names {
|
||||
let name = DatabaseName::new(name.to_string()).unwrap();
|
||||
server
|
||||
.create_database(
|
||||
DatabaseRules::new(name),
|
||||
server.require_id().unwrap(),
|
||||
Arc::clone(&server.store),
|
||||
)
|
||||
.create_database(DatabaseRules::new(name), server.require_id().unwrap())
|
||||
.await
|
||||
.expect("failed to create database");
|
||||
}
|
||||
|
@ -839,17 +874,12 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn writes_local() {
|
||||
let manager = TestConnectionManager::new();
|
||||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
let server = Server::new(manager, store);
|
||||
let server = Server::new(manager, config());
|
||||
server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
|
||||
let name = DatabaseName::new("foo".to_string()).unwrap();
|
||||
server
|
||||
.create_database(
|
||||
DatabaseRules::new(name),
|
||||
server.require_id().unwrap(),
|
||||
Arc::clone(&server.store),
|
||||
)
|
||||
.create_database(DatabaseRules::new(name), server.require_id().unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -864,10 +894,52 @@ mod tests {
|
|||
let executor = server.executor();
|
||||
let physical_plan = planner
|
||||
.query(db, "select * from cpu", executor.as_ref())
|
||||
.unwrap();
|
||||
|
||||
let batches = executor.collect(physical_plan).await.unwrap();
|
||||
let expected = vec![
|
||||
"+-----+------+",
|
||||
"| bar | time |",
|
||||
"+-----+------+",
|
||||
"| 1 | 10 |",
|
||||
"+-----+------+",
|
||||
];
|
||||
assert_table_eq!(expected, &batches);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn write_entry_local() {
|
||||
let manager = TestConnectionManager::new();
|
||||
let server = Server::new(manager, config());
|
||||
server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
|
||||
let name = DatabaseName::new("foo".to_string()).unwrap();
|
||||
server
|
||||
.create_database(DatabaseRules::new(name), server.require_id().unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let batches = collect(physical_plan).await.unwrap();
|
||||
let db_name = DatabaseName::new("foo").unwrap();
|
||||
let db = server.db(&db_name).unwrap();
|
||||
|
||||
let line = "cpu bar=1 10";
|
||||
let lines: Vec<_> = parse_lines(line).map(|l| l.unwrap()).collect();
|
||||
let sharded_entries = lines_to_sharded_entries(&lines, NO_SHARD_CONFIG, &*db.rules.read())
|
||||
.expect("sharded entries");
|
||||
|
||||
let entry = &sharded_entries[0].entry;
|
||||
server
|
||||
.write_entry("foo", entry.data().into())
|
||||
.await
|
||||
.expect("write entry");
|
||||
|
||||
let planner = SQLQueryPlanner::default();
|
||||
let executor = server.executor();
|
||||
let physical_plan = planner
|
||||
.query(db, "select * from cpu", executor.as_ref())
|
||||
.unwrap();
|
||||
|
||||
let batches = executor.collect(physical_plan).await.unwrap();
|
||||
let expected = vec![
|
||||
"+-----+------+",
|
||||
"| bar | time |",
|
||||
|
@ -882,8 +954,7 @@ mod tests {
|
|||
async fn close_chunk() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let manager = TestConnectionManager::new();
|
||||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
let server = Arc::new(Server::new(manager, store));
|
||||
let server = Arc::new(Server::new(manager, config()));
|
||||
|
||||
let cancel_token = CancellationToken::new();
|
||||
let background_handle = spawn_worker(Arc::clone(&server), cancel_token.clone());
|
||||
|
@ -895,7 +966,6 @@ mod tests {
|
|||
.create_database(
|
||||
DatabaseRules::new(db_name.clone()),
|
||||
server.require_id().unwrap(),
|
||||
Arc::clone(&server.store),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
@ -945,71 +1015,10 @@ mod tests {
|
|||
let _ = background_handle.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn segment_persisted_on_rollover() {
|
||||
let manager = TestConnectionManager::new();
|
||||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
|
||||
let server = Server::new(manager, Arc::clone(&store));
|
||||
server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
let db_name = DatabaseName::new("my_db").unwrap();
|
||||
let rules = DatabaseRules {
|
||||
name: db_name.clone(),
|
||||
partition_template: Default::default(),
|
||||
wal_buffer_config: Some(WalBufferConfig {
|
||||
buffer_size: 500,
|
||||
segment_size: 10,
|
||||
buffer_rollover: WalBufferRollover::ReturnError,
|
||||
store_segments: true,
|
||||
close_segment_after: None,
|
||||
}),
|
||||
lifecycle_rules: Default::default(),
|
||||
shard_config: None,
|
||||
};
|
||||
server
|
||||
.create_database(
|
||||
rules,
|
||||
server.require_id().unwrap(),
|
||||
Arc::clone(&server.store),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let lines = parsed_lines("disk,host=a used=10.1 12");
|
||||
server.write_lines(db_name.as_str(), &lines).await.unwrap();
|
||||
|
||||
// write lines should have caused a segment rollover and persist, wait
|
||||
tokio::task::yield_now().await;
|
||||
|
||||
let mut path = store.new_path();
|
||||
path.push_all_dirs(&["1", "my_db", "wal", "000", "000"]);
|
||||
path.set_file_name("001.segment");
|
||||
|
||||
let data = store
|
||||
.get(&path)
|
||||
.await
|
||||
.unwrap()
|
||||
.map_ok(|b| bytes::BytesMut::from(&b[..]))
|
||||
.try_concat()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let segment = Segment::from_file_bytes(&data).unwrap();
|
||||
assert_eq!(segment.writes.len(), 1);
|
||||
let write = r#"
|
||||
writer:1, sequence:1, checksum:2741956553
|
||||
partition_key:
|
||||
table:disk
|
||||
host:a used:10.1 time:12
|
||||
"#;
|
||||
assert_eq!(segment.writes[0].to_string(), write);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn background_task_cleans_jobs() {
|
||||
let manager = TestConnectionManager::new();
|
||||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
let server = Arc::new(Server::new(manager, store));
|
||||
let server = Arc::new(Server::new(manager, config()));
|
||||
|
||||
let cancel_token = CancellationToken::new();
|
||||
let background_handle = spawn_worker(Arc::clone(&server), cancel_token.clone());
|
||||
|
@ -1057,24 +1066,22 @@ partition_key:
|
|||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct TestRemoteServer {
|
||||
writes: Mutex<BTreeMap<String, Vec<ReplicatedWrite>>>,
|
||||
}
|
||||
struct TestRemoteServer {}
|
||||
|
||||
#[async_trait]
|
||||
impl RemoteServer for TestRemoteServer {
|
||||
type Error = TestClusterError;
|
||||
|
||||
async fn replicate(
|
||||
&self,
|
||||
db: &str,
|
||||
replicated_write: &ReplicatedWrite,
|
||||
) -> Result<(), Self::Error> {
|
||||
let mut writes = self.writes.lock();
|
||||
let entries = writes.entry(db.to_string()).or_insert_with(Vec::new);
|
||||
entries.push(replicated_write.clone());
|
||||
async fn write_entry(&self, _db: &str, _entry: Entry) -> Result<(), Self::Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
Ok(())
|
||||
async fn write_sequenced_entry(
|
||||
&self,
|
||||
_db: &str,
|
||||
_sequenced_entry: SequencedEntry,
|
||||
) -> Result<(), Self::Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#![allow(unused_imports, dead_code, unused_macros)]
|
||||
pub mod field_columns;
|
||||
pub mod read_filter;
|
||||
pub mod read_group;
|
||||
|
|
|
@ -4,10 +4,7 @@ use arrow_deps::{
|
|||
datafusion::logical_plan::{col, lit},
|
||||
};
|
||||
use query::{
|
||||
exec::{
|
||||
fieldlist::{Field, FieldList},
|
||||
Executor,
|
||||
},
|
||||
exec::fieldlist::{Field, FieldList},
|
||||
frontend::influxrpc::InfluxRPCPlanner,
|
||||
predicate::PredicateBuilder,
|
||||
};
|
||||
|
@ -31,11 +28,10 @@ macro_rules! run_field_columns_test_case {
|
|||
println!("Running scenario '{}'", scenario_name);
|
||||
println!("Predicate: '{:#?}'", predicate);
|
||||
let planner = InfluxRPCPlanner::new();
|
||||
let executor = Executor::new();
|
||||
let executor = db.executor();
|
||||
|
||||
let plan = planner
|
||||
.field_columns(&db, predicate.clone())
|
||||
.await
|
||||
.expect("built plan successfully");
|
||||
let fields = executor
|
||||
.to_field_list(plan)
|
||||
|
@ -133,11 +129,9 @@ async fn test_field_name_plan() {
|
|||
println!("Running scenario '{}'", scenario_name);
|
||||
println!("Predicate: '{:#?}'", predicate);
|
||||
let planner = InfluxRPCPlanner::new();
|
||||
let executor = Executor::new();
|
||||
|
||||
let plan = planner
|
||||
.field_columns(&db, predicate.clone())
|
||||
.await
|
||||
.expect("built plan successfully");
|
||||
|
||||
let mut plans = plan.plans;
|
||||
|
@ -146,7 +140,8 @@ async fn test_field_name_plan() {
|
|||
|
||||
// run the created plan directly, ensuring the output is as
|
||||
// expected (specifically that the column ordering is correct)
|
||||
let results = executor
|
||||
let results = db
|
||||
.executor()
|
||||
.run_logical_plan(plan)
|
||||
.await
|
||||
.expect("ok running plan");
|
||||
|
|
|
@ -4,11 +4,11 @@ use crate::query_tests::scenarios::*;
|
|||
use arrow_deps::datafusion::logical_plan::{col, lit};
|
||||
use async_trait::async_trait;
|
||||
use query::{
|
||||
exec::Executor,
|
||||
frontend::influxrpc::InfluxRPCPlanner,
|
||||
predicate::{Predicate, PredicateBuilder, EMPTY_PREDICATE},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TwoMeasurementsMultiSeries {}
|
||||
#[async_trait]
|
||||
impl DBSetup for TwoMeasurementsMultiSeries {
|
||||
|
@ -46,14 +46,12 @@ macro_rules! run_read_filter_test_case {
|
|||
println!("Running scenario '{}'", scenario_name);
|
||||
println!("Predicate: '{:#?}'", predicate);
|
||||
let planner = InfluxRPCPlanner::new();
|
||||
let executor = Executor::new();
|
||||
|
||||
let plan = planner
|
||||
.read_filter(&db, predicate.clone())
|
||||
.await
|
||||
.expect("built plan successfully");
|
||||
|
||||
let string_results = run_series_set_plan(executor, plan).await;
|
||||
let string_results = run_series_set_plan(db.executor(), plan).await;
|
||||
|
||||
assert_eq!(
|
||||
expected_results, string_results,
|
||||
|
@ -310,6 +308,7 @@ async fn test_read_filter_data_pred_unsupported_in_scan() {
|
|||
run_read_filter_test_case!(TwoMeasurementsMultiSeries {}, predicate, expected_results);
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MeasurementsSortableTags {}
|
||||
#[async_trait]
|
||||
impl DBSetup for MeasurementsSortableTags {
|
||||
|
|
|
@ -4,7 +4,6 @@ use crate::query_tests::scenarios::*;
|
|||
use arrow_deps::{arrow::util::pretty::pretty_format_batches, datafusion::prelude::*};
|
||||
use async_trait::async_trait;
|
||||
use query::{
|
||||
exec::Executor,
|
||||
frontend::influxrpc::InfluxRPCPlanner,
|
||||
group_by::Aggregate,
|
||||
predicate::{Predicate, PredicateBuilder},
|
||||
|
@ -26,11 +25,9 @@ macro_rules! run_read_group_test_case {
|
|||
println!("Running scenario '{}'", scenario_name);
|
||||
println!("Predicate: '{:#?}'", predicate);
|
||||
let planner = InfluxRPCPlanner::new();
|
||||
let executor = Executor::new();
|
||||
|
||||
let plans = planner
|
||||
.read_group(&db, predicate.clone(), agg, &group_columns)
|
||||
.await
|
||||
.expect("built plan successfully");
|
||||
|
||||
let plans = plans.into_inner();
|
||||
|
@ -46,7 +43,8 @@ macro_rules! run_read_group_test_case {
|
|||
|
||||
let mut string_results = vec![];
|
||||
for plan in plans.into_iter() {
|
||||
let batches = executor
|
||||
let batches = db
|
||||
.executor()
|
||||
.run_logical_plan(plan.plan)
|
||||
.await
|
||||
.expect("ok running plan");
|
||||
|
|
|
@ -1,14 +1,15 @@
|
|||
//! Tests for the Influx gRPC queries
|
||||
use crate::query_tests::{scenarios::*, utils::make_db};
|
||||
use crate::{
|
||||
db::test_helpers::write_lp,
|
||||
query_tests::{scenarios::*, utils::make_db},
|
||||
};
|
||||
|
||||
use arrow_deps::{arrow::util::pretty::pretty_format_batches, datafusion::prelude::*};
|
||||
use async_trait::async_trait;
|
||||
use query::{
|
||||
exec::Executor,
|
||||
frontend::influxrpc::InfluxRPCPlanner,
|
||||
group_by::{Aggregate, WindowDuration},
|
||||
predicate::{Predicate, PredicateBuilder},
|
||||
test::TestLPWriter,
|
||||
};
|
||||
|
||||
/// runs read_window_aggregate(predicate) and compares it to the expected
|
||||
|
@ -28,18 +29,17 @@ macro_rules! run_read_window_aggregate_test_case {
|
|||
println!("Running scenario '{}'", scenario_name);
|
||||
println!("Predicate: '{:#?}'", predicate);
|
||||
let planner = InfluxRPCPlanner::new();
|
||||
let executor = Executor::new();
|
||||
|
||||
let plans = planner
|
||||
.read_window_aggregate(&db, predicate.clone(), agg, every.clone(), offset.clone())
|
||||
.await
|
||||
.expect("built plan successfully");
|
||||
|
||||
let plans = plans.into_inner();
|
||||
|
||||
let mut string_results = vec![];
|
||||
for plan in plans.into_iter() {
|
||||
let batches = executor
|
||||
let batches = db
|
||||
.executor()
|
||||
.run_logical_plan(plan.plan)
|
||||
.await
|
||||
.expect("ok running plan");
|
||||
|
@ -162,18 +162,16 @@ impl DBSetup for MeasurementForWindowAggregateMonths {
|
|||
// "2020-04-02T00"]
|
||||
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
let data = lp_lines.join("\n");
|
||||
writer.write_lp_string(&db, &data).unwrap();
|
||||
write_lp(&db, &data);
|
||||
let scenario1 = DBScenario {
|
||||
scenario_name: "Data in 4 partitions, open chunks of mutable buffer".into(),
|
||||
db,
|
||||
};
|
||||
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
let data = lp_lines.join("\n");
|
||||
writer.write_lp_string(&db, &data).unwrap();
|
||||
write_lp(&db, &data);
|
||||
db.rollover_partition("2020-03-01T00").await.unwrap();
|
||||
db.rollover_partition("2020-03-02T00").await.unwrap();
|
||||
let scenario2 = DBScenario {
|
||||
|
@ -184,9 +182,8 @@ impl DBSetup for MeasurementForWindowAggregateMonths {
|
|||
};
|
||||
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
let data = lp_lines.join("\n");
|
||||
writer.write_lp_string(&db, &data).unwrap();
|
||||
write_lp(&db, &data);
|
||||
rollover_and_load(&db, "2020-03-01T00").await;
|
||||
rollover_and_load(&db, "2020-03-02T00").await;
|
||||
rollover_and_load(&db, "2020-04-01T00").await;
|
||||
|
|
|
@ -1,9 +1,6 @@
|
|||
//! Tests for the Influx gRPC queries
|
||||
use query::{
|
||||
exec::{
|
||||
stringset::{IntoStringSet, StringSetRef},
|
||||
Executor,
|
||||
},
|
||||
exec::stringset::{IntoStringSet, StringSetRef},
|
||||
frontend::influxrpc::InfluxRPCPlanner,
|
||||
predicate::{Predicate, PredicateBuilder, EMPTY_PREDICATE},
|
||||
};
|
||||
|
@ -23,13 +20,12 @@ macro_rules! run_table_names_test_case {
|
|||
println!("Running scenario '{}'", scenario_name);
|
||||
println!("Predicate: '{:#?}'", predicate);
|
||||
let planner = InfluxRPCPlanner::new();
|
||||
let executor = Executor::new();
|
||||
|
||||
let plan = planner
|
||||
.table_names(&db, predicate.clone())
|
||||
.await
|
||||
.expect("built plan successfully");
|
||||
let names = executor
|
||||
let names = db
|
||||
.executor()
|
||||
.to_string_set(plan)
|
||||
.await
|
||||
.expect("converted plan to strings successfully");
|
||||
|
|
|
@ -1,9 +1,6 @@
|
|||
use arrow_deps::datafusion::logical_plan::{col, lit};
|
||||
use query::{
|
||||
exec::{
|
||||
stringset::{IntoStringSet, StringSetRef},
|
||||
Executor,
|
||||
},
|
||||
exec::stringset::{IntoStringSet, StringSetRef},
|
||||
frontend::influxrpc::InfluxRPCPlanner,
|
||||
predicate::PredicateBuilder,
|
||||
};
|
||||
|
@ -27,13 +24,12 @@ macro_rules! run_tag_keys_test_case {
|
|||
println!("Running scenario '{}'", scenario_name);
|
||||
println!("Predicate: '{:#?}'", predicate);
|
||||
let planner = InfluxRPCPlanner::new();
|
||||
let executor = Executor::new();
|
||||
|
||||
let plan = planner
|
||||
.tag_keys(&db, predicate.clone())
|
||||
.await
|
||||
.expect("built plan successfully");
|
||||
let names = executor
|
||||
let names = db
|
||||
.executor()
|
||||
.to_string_set(plan)
|
||||
.await
|
||||
.expect("converted plan to strings successfully");
|
||||
|
|
|
@ -1,9 +1,6 @@
|
|||
use arrow_deps::datafusion::logical_plan::{col, lit};
|
||||
use query::{
|
||||
exec::{
|
||||
stringset::{IntoStringSet, StringSetRef},
|
||||
Executor,
|
||||
},
|
||||
exec::stringset::{IntoStringSet, StringSetRef},
|
||||
frontend::influxrpc::InfluxRPCPlanner,
|
||||
predicate::PredicateBuilder,
|
||||
};
|
||||
|
@ -25,13 +22,12 @@ macro_rules! run_tag_values_test_case {
|
|||
println!("Running scenario '{}'", scenario_name);
|
||||
println!("Predicate: '{:#?}'", predicate);
|
||||
let planner = InfluxRPCPlanner::new();
|
||||
let executor = Executor::new();
|
||||
|
||||
let plan = planner
|
||||
.tag_values(&db, &tag_name, predicate.clone())
|
||||
.await
|
||||
.expect("built plan successfully");
|
||||
let names = executor
|
||||
let names = db
|
||||
.executor()
|
||||
.to_string_set(plan)
|
||||
.await
|
||||
.expect("converted plan to strings successfully");
|
||||
|
@ -239,7 +235,7 @@ async fn list_tag_values_field_col() {
|
|||
|
||||
// Test: temp is a field, not a tag
|
||||
let tag_name = "temp";
|
||||
let plan_result = planner.tag_values(&db, &tag_name, predicate.clone()).await;
|
||||
let plan_result = planner.tag_values(&db, &tag_name, predicate.clone());
|
||||
|
||||
assert_eq!(
|
||||
plan_result.unwrap_err().to_string(),
|
||||
|
|
|
@ -51,7 +51,7 @@ pub fn dump_series_set(s: SeriesSet) -> Vec<String> {
|
|||
}
|
||||
|
||||
/// Run a series set plan to completion and produce a Vec<String> representation
|
||||
pub async fn run_series_set_plan(executor: Executor, plans: SeriesSetPlans) -> Vec<String> {
|
||||
pub async fn run_series_set_plan(executor: Arc<Executor>, plans: SeriesSetPlans) -> Vec<String> {
|
||||
// Use a channel sufficiently large to buffer the series
|
||||
let (tx, mut rx) = mpsc::channel(100);
|
||||
executor
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
//! This module contains testing scenarios for Db
|
||||
|
||||
use query::{test::TestLPWriter, PartitionChunk};
|
||||
#[allow(unused_imports, dead_code, unused_macros)]
|
||||
use query::PartitionChunk;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::db::Db;
|
||||
use crate::db::{test_helpers::write_lp, Db};
|
||||
|
||||
use super::utils::{count_mutable_buffer_chunks, count_read_buffer_chunks, make_db};
|
||||
|
||||
/// Holds a database and a description of how its data was configured
|
||||
#[derive(Debug)]
|
||||
pub struct DBScenario {
|
||||
pub scenario_name: String,
|
||||
pub db: Db,
|
||||
|
@ -22,6 +24,7 @@ pub trait DBSetup {
|
|||
}
|
||||
|
||||
/// No data
|
||||
#[derive(Debug)]
|
||||
pub struct NoData {}
|
||||
#[async_trait]
|
||||
impl DBSetup for NoData {
|
||||
|
@ -47,8 +50,7 @@ impl DBSetup for NoData {
|
|||
|
||||
let db = make_db();
|
||||
let data = "cpu,region=west user=23.2 100";
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(&db, data).unwrap();
|
||||
write_lp(&db, data);
|
||||
// move data out of open chunk
|
||||
assert_eq!(db.rollover_partition(partition_key).await.unwrap().id(), 0);
|
||||
|
||||
|
@ -77,6 +79,7 @@ impl DBSetup for NoData {
|
|||
}
|
||||
|
||||
/// Two measurements data in a single mutable buffer chunk
|
||||
#[derive(Debug)]
|
||||
pub struct TwoMeasurements {}
|
||||
#[async_trait]
|
||||
impl DBSetup for TwoMeasurements {
|
||||
|
@ -92,6 +95,7 @@ impl DBSetup for TwoMeasurements {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TwoMeasurementsUnsignedType {}
|
||||
#[async_trait]
|
||||
impl DBSetup for TwoMeasurementsUnsignedType {
|
||||
|
@ -110,6 +114,7 @@ impl DBSetup for TwoMeasurementsUnsignedType {
|
|||
|
||||
/// Single measurement that has several different chunks with
|
||||
/// different (but compatible) schema
|
||||
#[derive(Debug)]
|
||||
pub struct MultiChunkSchemaMerge {}
|
||||
#[async_trait]
|
||||
impl DBSetup for MultiChunkSchemaMerge {
|
||||
|
@ -129,6 +134,7 @@ impl DBSetup for MultiChunkSchemaMerge {
|
|||
}
|
||||
|
||||
/// Two measurements data with many null values
|
||||
#[derive(Debug)]
|
||||
pub struct TwoMeasurementsManyNulls {}
|
||||
#[async_trait]
|
||||
impl DBSetup for TwoMeasurementsManyNulls {
|
||||
|
@ -150,6 +156,7 @@ impl DBSetup for TwoMeasurementsManyNulls {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TwoMeasurementsManyFields {}
|
||||
#[async_trait]
|
||||
impl DBSetup for TwoMeasurementsManyFields {
|
||||
|
@ -169,12 +176,12 @@ impl DBSetup for TwoMeasurementsManyFields {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TwoMeasurementsManyFieldsOneChunk {}
|
||||
#[async_trait]
|
||||
impl DBSetup for TwoMeasurementsManyFieldsOneChunk {
|
||||
async fn make(&self) -> Vec<DBScenario> {
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
|
||||
let lp_lines = vec![
|
||||
"h2o,state=MA,city=Boston temp=70.4 50",
|
||||
|
@ -184,7 +191,7 @@ impl DBSetup for TwoMeasurementsManyFieldsOneChunk {
|
|||
"o2,state=CA temp=79.0 300",
|
||||
];
|
||||
|
||||
writer.write_lp_string(&db, &lp_lines.join("\n")).unwrap();
|
||||
write_lp(&db, &lp_lines.join("\n"));
|
||||
vec![DBScenario {
|
||||
scenario_name: "Data in open chunk of mutable buffer".into(),
|
||||
db,
|
||||
|
@ -192,6 +199,7 @@ impl DBSetup for TwoMeasurementsManyFieldsOneChunk {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct OneMeasurementManyFields {}
|
||||
#[async_trait]
|
||||
impl DBSetup for OneMeasurementManyFields {
|
||||
|
@ -212,6 +220,7 @@ impl DBSetup for OneMeasurementManyFields {
|
|||
}
|
||||
|
||||
/// This data (from end to end test)
|
||||
#[derive(Debug)]
|
||||
pub struct EndToEndTest {}
|
||||
#[async_trait]
|
||||
impl DBSetup for EndToEndTest {
|
||||
|
@ -231,9 +240,7 @@ impl DBSetup for EndToEndTest {
|
|||
let lp_data = lp_lines.join("\n");
|
||||
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
let res = writer.write_lp_string(&db, &lp_data);
|
||||
assert!(res.is_ok(), "Error: {}", res.unwrap_err());
|
||||
write_lp(&db, &lp_data);
|
||||
|
||||
let scenario1 = DBScenario {
|
||||
scenario_name: "Data in open chunk of mutable buffer".into(),
|
||||
|
@ -251,16 +258,14 @@ impl DBSetup for EndToEndTest {
|
|||
/// Data in one only read buffer chunk
|
||||
pub(crate) async fn make_one_chunk_scenarios(partition_key: &str, data: &str) -> Vec<DBScenario> {
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(&db, data).unwrap();
|
||||
write_lp(&db, data);
|
||||
let scenario1 = DBScenario {
|
||||
scenario_name: "Data in open chunk of mutable buffer".into(),
|
||||
db,
|
||||
};
|
||||
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(&db, data).unwrap();
|
||||
write_lp(&db, data);
|
||||
db.rollover_partition(partition_key).await.unwrap();
|
||||
let scenario2 = DBScenario {
|
||||
scenario_name: "Data in closed chunk of mutable buffer".into(),
|
||||
|
@ -268,8 +273,7 @@ pub(crate) async fn make_one_chunk_scenarios(partition_key: &str, data: &str) ->
|
|||
};
|
||||
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(&db, data).unwrap();
|
||||
write_lp(&db, data);
|
||||
db.rollover_partition(partition_key).await.unwrap();
|
||||
db.load_chunk_to_read_buffer(partition_key, 0)
|
||||
.await
|
||||
|
@ -294,9 +298,8 @@ pub async fn make_two_chunk_scenarios(
|
|||
data2: &str,
|
||||
) -> Vec<DBScenario> {
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(&db, data1).unwrap();
|
||||
writer.write_lp_string(&db, data2).unwrap();
|
||||
write_lp(&db, data1);
|
||||
write_lp(&db, data2);
|
||||
let scenario1 = DBScenario {
|
||||
scenario_name: "Data in single open chunk of mutable buffer".into(),
|
||||
db,
|
||||
|
@ -304,10 +307,9 @@ pub async fn make_two_chunk_scenarios(
|
|||
|
||||
// spread across 2 mutable buffer chunks
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(&db, data1).unwrap();
|
||||
write_lp(&db, data1);
|
||||
db.rollover_partition(partition_key).await.unwrap();
|
||||
writer.write_lp_string(&db, data2).unwrap();
|
||||
write_lp(&db, data2);
|
||||
let scenario2 = DBScenario {
|
||||
scenario_name: "Data in one open chunk and one closed chunk of mutable buffer".into(),
|
||||
db,
|
||||
|
@ -315,13 +317,12 @@ pub async fn make_two_chunk_scenarios(
|
|||
|
||||
// spread across 1 mutable buffer, 1 read buffer chunks
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(&db, data1).unwrap();
|
||||
write_lp(&db, data1);
|
||||
db.rollover_partition(partition_key).await.unwrap();
|
||||
db.load_chunk_to_read_buffer(partition_key, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
writer.write_lp_string(&db, data2).unwrap();
|
||||
write_lp(&db, data2);
|
||||
let scenario3 = DBScenario {
|
||||
scenario_name: "Data in open chunk of mutable buffer, and one chunk of read buffer".into(),
|
||||
db,
|
||||
|
@ -329,10 +330,9 @@ pub async fn make_two_chunk_scenarios(
|
|||
|
||||
// in 2 read buffer chunks
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(&db, data1).unwrap();
|
||||
write_lp(&db, data1);
|
||||
db.rollover_partition(partition_key).await.unwrap();
|
||||
writer.write_lp_string(&db, data2).unwrap();
|
||||
write_lp(&db, data2);
|
||||
db.rollover_partition(partition_key).await.unwrap();
|
||||
|
||||
db.load_chunk_to_read_buffer(partition_key, 0)
|
||||
|
|
|
@ -3,11 +3,11 @@
|
|||
//! wired all the pieces together (as well as ensure any particularly
|
||||
//! important SQL does not regress)
|
||||
|
||||
#![allow(unused_imports, dead_code, unused_macros)]
|
||||
|
||||
use super::scenarios::*;
|
||||
use arrow_deps::{
|
||||
arrow::record_batch::RecordBatch, assert_table_eq, datafusion::physical_plan::collect,
|
||||
};
|
||||
use query::{exec::Executor, frontend::sql::SQLQueryPlanner};
|
||||
use arrow_deps::{arrow::record_batch::RecordBatch, assert_batches_sorted_eq};
|
||||
use query::frontend::sql::SQLQueryPlanner;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// runs table_names(predicate) and compares it to the expected
|
||||
|
@ -25,16 +25,16 @@ macro_rules! run_sql_test_case {
|
|||
println!("Running scenario '{}'", scenario_name);
|
||||
println!("SQL: '{:#?}'", sql);
|
||||
let planner = SQLQueryPlanner::default();
|
||||
let executor = Executor::new();
|
||||
let executor = db.executor();
|
||||
|
||||
let physical_plan = planner
|
||||
.query(db, &sql, &executor)
|
||||
.await
|
||||
.query(db, &sql, executor.as_ref())
|
||||
.expect("built plan successfully");
|
||||
|
||||
let results: Vec<RecordBatch> = collect(physical_plan).await.expect("Running plan");
|
||||
let results: Vec<RecordBatch> =
|
||||
executor.collect(physical_plan).await.expect("Running plan");
|
||||
|
||||
assert_table_eq!($EXPECTED_LINES, &results);
|
||||
assert_batches_sorted_eq!($EXPECTED_LINES, &results);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -278,7 +278,7 @@ async fn sql_select_from_system_tables() {
|
|||
"+----+---------------+-------------------+-----------------+",
|
||||
"| id | partition_key | storage | estimated_bytes |",
|
||||
"+----+---------------+-------------------+-----------------+",
|
||||
"| 0 | 1970-01-01T00 | OpenMutableBuffer | 493 |",
|
||||
"| 0 | 1970-01-01T00 | OpenMutableBuffer | 453 |",
|
||||
"+----+---------------+-------------------+-----------------+",
|
||||
];
|
||||
run_sql_test_case!(
|
||||
|
@ -291,13 +291,13 @@ async fn sql_select_from_system_tables() {
|
|||
"+---------------+------------+-------------+-------+",
|
||||
"| partition_key | table_name | column_name | count |",
|
||||
"+---------------+------------+-------------+-------+",
|
||||
"| 1970-01-01T00 | h2o | state | 3 |",
|
||||
"| 1970-01-01T00 | h2o | city | 3 |",
|
||||
"| 1970-01-01T00 | h2o | other_temp | 2 |",
|
||||
"| 1970-01-01T00 | h2o | state | 3 |",
|
||||
"| 1970-01-01T00 | h2o | temp | 1 |",
|
||||
"| 1970-01-01T00 | h2o | time | 3 |",
|
||||
"| 1970-01-01T00 | h2o | other_temp | 2 |",
|
||||
"| 1970-01-01T00 | o2 | state | 2 |",
|
||||
"| 1970-01-01T00 | o2 | city | 1 |",
|
||||
"| 1970-01-01T00 | o2 | state | 2 |",
|
||||
"| 1970-01-01T00 | o2 | temp | 2 |",
|
||||
"| 1970-01-01T00 | o2 | time | 2 |",
|
||||
"| 1970-01-01T00 | o2 | reading | 1 |",
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
//! Tests for the table_names implementation
|
||||
|
||||
#![allow(unused_imports, dead_code, unused_macros)]
|
||||
|
||||
use arrow_deps::arrow::datatypes::DataType;
|
||||
use internal_types::{schema::builder::SchemaBuilder, selection::Selection};
|
||||
use query::{Database, PartitionChunk};
|
||||
|
|
|
@ -4,7 +4,7 @@ use data_types::{
|
|||
DatabaseName,
|
||||
};
|
||||
use object_store::{memory::InMemory, ObjectStore};
|
||||
use query::Database;
|
||||
use query::{exec::Executor, Database};
|
||||
|
||||
use crate::{db::Db, JobRegistry};
|
||||
use std::{num::NonZeroU32, sync::Arc};
|
||||
|
@ -13,11 +13,25 @@ use std::{num::NonZeroU32, sync::Arc};
|
|||
pub fn make_db() -> Db {
|
||||
let server_id: NonZeroU32 = NonZeroU32::new(1).unwrap();
|
||||
let object_store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
let exec = Arc::new(Executor::new(1));
|
||||
|
||||
Db::new(
|
||||
DatabaseRules::new(DatabaseName::new("placeholder").unwrap()),
|
||||
server_id,
|
||||
object_store,
|
||||
exec,
|
||||
None, // wal buffer
|
||||
Arc::new(JobRegistry::new()),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn make_database(server_id: NonZeroU32, object_store: Arc<ObjectStore>, db_name: &str) -> Db {
|
||||
let exec = Arc::new(Executor::new(1));
|
||||
Db::new(
|
||||
DatabaseRules::new(DatabaseName::new(db_name.to_string()).unwrap()),
|
||||
server_id,
|
||||
object_store,
|
||||
exec,
|
||||
None, // wal buffer
|
||||
Arc::new(JobRegistry::new()),
|
||||
)
|
||||
|
|
|
@ -273,12 +273,13 @@ mod tests {
|
|||
};
|
||||
|
||||
use super::*;
|
||||
use crate::db::test_helpers::write_lp;
|
||||
use data_types::database_rules::DatabaseRules;
|
||||
use data_types::DatabaseName;
|
||||
use futures::TryStreamExt;
|
||||
use mutable_buffer::chunk::Chunk as ChunkWB;
|
||||
use object_store::memory::InMemory;
|
||||
use query::{test::TestLPWriter, Database};
|
||||
use query::{exec::Executor, Database};
|
||||
use tracker::MemRegistry;
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -291,8 +292,7 @@ mem,host=A,region=west used=45 1
|
|||
"#;
|
||||
|
||||
let db = make_db();
|
||||
let mut writer = TestLPWriter::default();
|
||||
writer.write_lp_string(&db, &lp).unwrap();
|
||||
write_lp(&db, &lp);
|
||||
|
||||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||
|
@ -354,9 +354,7 @@ mem,host=A,region=west used=45 1
|
|||
let registry = MemRegistry::new();
|
||||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
let chunk = Arc::new(DBChunk::MutableBuffer {
|
||||
chunk: Arc::new(ChunkWB::new(11, ®istry)),
|
||||
partition_key: Arc::new("key".to_string()),
|
||||
open: false,
|
||||
chunk: ChunkWB::new(11, ®istry).snapshot(),
|
||||
});
|
||||
let mut metadata_path = store.new_path();
|
||||
metadata_path.push_dir("meta");
|
||||
|
@ -393,11 +391,13 @@ mem,host=A,region=west used=45 1
|
|||
pub fn make_db() -> Db {
|
||||
let object_store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
let server_id = std::num::NonZeroU32::new(1).unwrap();
|
||||
let exec = Arc::new(Executor::new(1));
|
||||
|
||||
Db::new(
|
||||
DatabaseRules::new(DatabaseName::new("placeholder").unwrap()),
|
||||
server_id,
|
||||
object_store,
|
||||
exec,
|
||||
None, // wal buffer
|
||||
Arc::new(JobRegistry::new()),
|
||||
)
|
||||
|
|
|
@ -105,6 +105,10 @@ struct Create {
|
|||
#[structopt(long)]
|
||||
drop_non_persisted: bool,
|
||||
|
||||
/// Persists chunks to object storage.
|
||||
#[structopt(long)]
|
||||
persist: bool,
|
||||
|
||||
/// Do not allow writing new data to this database
|
||||
#[structopt(long)]
|
||||
immutable: bool,
|
||||
|
@ -173,6 +177,7 @@ pub async fn command(url: String, config: Config) -> Result<()> {
|
|||
buffer_size_hard: command.buffer_size_hard as _,
|
||||
sort_order: None, // Server-side default
|
||||
drop_non_persisted: command.drop_non_persisted,
|
||||
persist: command.persist,
|
||||
immutable: command.immutable,
|
||||
}),
|
||||
|
||||
|
|
|
@ -106,6 +106,16 @@ pub struct Config {
|
|||
#[structopt(long = "--data-dir", env = "INFLUXDB_IOX_DB_DIR")]
|
||||
pub database_directory: Option<PathBuf>,
|
||||
|
||||
/// The number of threads to use for the query worker pool.
|
||||
///
|
||||
/// IOx uses `--num-threads` threads for handling API requests and
|
||||
/// will use a dedicated thread pool woth `--num-worker-threads`
|
||||
/// for running queries.
|
||||
///
|
||||
/// If not specified, defaults to the number of cores on the system
|
||||
#[structopt(long = "--num-worker-threads", env = "INFLUXDB_IOX_NUM_WORKER_THREADS")]
|
||||
pub num_worker_threads: Option<usize>,
|
||||
|
||||
#[structopt(
|
||||
long = "--object-store",
|
||||
env = "INFLUXDB_IOX_OBJECT_STORE",
|
||||
|
|
|
@ -10,7 +10,10 @@ use object_store::{
|
|||
};
|
||||
use observability_deps::tracing::{self, error, info, warn, Instrument};
|
||||
use panic_logging::SendPanicsToTracing;
|
||||
use server::{ConnectionManagerImpl as ConnectionManager, Server as AppServer};
|
||||
use server::{
|
||||
ConnectionManagerImpl as ConnectionManager, Server as AppServer,
|
||||
ServerConfig as AppServerConfig,
|
||||
};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{convert::TryFrom, fs, net::SocketAddr, path::PathBuf, sync::Arc};
|
||||
|
||||
|
@ -124,9 +127,20 @@ pub async fn main(logging_level: LoggingLevel, config: Config) -> Result<()> {
|
|||
|
||||
let object_store = ObjectStore::try_from(&config)?;
|
||||
let object_storage = Arc::new(object_store);
|
||||
let server_config = AppServerConfig::new(object_storage);
|
||||
|
||||
let server_config = if let Some(n) = config.num_worker_threads {
|
||||
info!(
|
||||
num_worker_threads = n,
|
||||
"Using specified number of worker threads"
|
||||
);
|
||||
server_config.with_num_worker_threads(n)
|
||||
} else {
|
||||
server_config
|
||||
};
|
||||
|
||||
let connection_manager = ConnectionManager {};
|
||||
let app_server = Arc::new(AppServer::new(connection_manager, object_storage));
|
||||
let app_server = Arc::new(AppServer::new(connection_manager, server_config));
|
||||
|
||||
// if this ID isn't set the server won't be usable until this is set via an API
|
||||
// call
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
|
||||
// Influx crates
|
||||
use super::super::commands::metrics;
|
||||
use arrow_deps::datafusion::physical_plan::collect;
|
||||
use data_types::{
|
||||
http::WalMetadataQuery,
|
||||
names::{org_and_bucket_to_database, OrgBucketMappingError},
|
||||
|
@ -32,7 +31,7 @@ use http::header::{CONTENT_ENCODING, CONTENT_TYPE};
|
|||
use hyper::{Body, Method, Request, Response, StatusCode};
|
||||
use observability_deps::{
|
||||
opentelemetry::KeyValue,
|
||||
tracing::{self, debug, error, info},
|
||||
tracing::{self, debug, error},
|
||||
};
|
||||
use routerify::{prelude::*, Middleware, RequestInfo, Router, RouterError, RouterService};
|
||||
use serde::Deserialize;
|
||||
|
@ -312,11 +311,11 @@ where
|
|||
Router::builder()
|
||||
.data(server)
|
||||
.middleware(Middleware::pre(|req| async move {
|
||||
info!(request = ?req, "Processing request");
|
||||
debug!(request = ?req, "Processing request");
|
||||
Ok(req)
|
||||
}))
|
||||
.middleware(Middleware::post(|res| async move {
|
||||
info!(response = ?res, "Successfully processed request");
|
||||
debug!(response = ?res, "Successfully processed request");
|
||||
Ok(res)
|
||||
})) // this endpoint is for API backward compatibility with InfluxDB 2.x
|
||||
.post("/api/v2/write", write::<M>)
|
||||
|
@ -523,12 +522,12 @@ async fn query<M: ConnectionManager + Send + Sync + Debug + 'static>(
|
|||
|
||||
let physical_plan = planner
|
||||
.query(db, &q, executor.as_ref())
|
||||
.await
|
||||
.context(PlanningSQLQuery { query: &q })?;
|
||||
|
||||
// TODO: stream read results out rather than rendering the
|
||||
// whole thing in mem
|
||||
let batches = collect(physical_plan)
|
||||
let batches = executor
|
||||
.collect(physical_plan)
|
||||
.await
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
.context(Query { db_name })?;
|
||||
|
@ -733,27 +732,24 @@ mod tests {
|
|||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
|
||||
use arrow_deps::{arrow::record_batch::RecordBatch, assert_table_eq};
|
||||
use query::exec::Executor;
|
||||
use reqwest::{Client, Response};
|
||||
|
||||
use data_types::{
|
||||
database_rules::{DatabaseRules, WalBufferConfig, WalBufferRollover},
|
||||
wal::WriterSummary,
|
||||
DatabaseName,
|
||||
};
|
||||
use data_types::{database_rules::DatabaseRules, DatabaseName};
|
||||
use object_store::{memory::InMemory, ObjectStore};
|
||||
use serde::de::DeserializeOwned;
|
||||
use server::{db::Db, ConnectionManagerImpl};
|
||||
use server::{db::Db, ConnectionManagerImpl, ServerConfig as AppServerConfig};
|
||||
use std::num::NonZeroU32;
|
||||
use test_helpers::assert_contains;
|
||||
|
||||
fn config() -> AppServerConfig {
|
||||
AppServerConfig::new(Arc::new(ObjectStore::new_in_memory(InMemory::new())))
|
||||
.with_num_worker_threads(1)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_health() {
|
||||
let test_storage = Arc::new(AppServer::new(
|
||||
ConnectionManagerImpl {},
|
||||
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
|
||||
));
|
||||
let server_url = test_server(Arc::clone(&test_storage));
|
||||
let app_server = Arc::new(AppServer::new(ConnectionManagerImpl {}, config()));
|
||||
let server_url = test_server(Arc::clone(&app_server));
|
||||
|
||||
let client = Client::new();
|
||||
let response = client.get(&format!("{}/health", server_url)).send().await;
|
||||
|
@ -764,20 +760,16 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_write() {
|
||||
let test_storage = Arc::new(AppServer::new(
|
||||
ConnectionManagerImpl {},
|
||||
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
|
||||
));
|
||||
test_storage.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
test_storage
|
||||
let app_server = Arc::new(AppServer::new(ConnectionManagerImpl {}, config()));
|
||||
app_server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
app_server
|
||||
.create_database(
|
||||
DatabaseRules::new(DatabaseName::new("MyOrg_MyBucket").unwrap()),
|
||||
test_storage.require_id().unwrap(),
|
||||
Arc::clone(&test_storage.store),
|
||||
app_server.require_id().unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let server_url = test_server(Arc::clone(&test_storage));
|
||||
let server_url = test_server(Arc::clone(&app_server));
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
|
@ -798,7 +790,7 @@ mod tests {
|
|||
check_response("write", response, StatusCode::NO_CONTENT, "").await;
|
||||
|
||||
// Check that the data got into the right bucket
|
||||
let test_db = test_storage
|
||||
let test_db = app_server
|
||||
.db(&DatabaseName::new("MyOrg_MyBucket").unwrap())
|
||||
.expect("Database exists");
|
||||
|
||||
|
@ -816,20 +808,16 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_write_metrics() {
|
||||
metrics::init_metrics_for_test();
|
||||
let test_storage = Arc::new(AppServer::new(
|
||||
ConnectionManagerImpl {},
|
||||
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
|
||||
));
|
||||
test_storage.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
test_storage
|
||||
let app_server = Arc::new(AppServer::new(ConnectionManagerImpl {}, config()));
|
||||
app_server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
app_server
|
||||
.create_database(
|
||||
DatabaseRules::new(DatabaseName::new("MetricsOrg_MetricsBucket").unwrap()),
|
||||
test_storage.require_id().unwrap(),
|
||||
Arc::clone(&test_storage.store),
|
||||
app_server.require_id().unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let server_url = test_server(Arc::clone(&test_storage));
|
||||
let server_url = test_server(Arc::clone(&app_server));
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
|
@ -878,20 +866,16 @@ mod tests {
|
|||
/// returns a client for communicting with the server, and the server
|
||||
/// endpoint
|
||||
async fn setup_test_data() -> (Client, String) {
|
||||
let test_storage: Arc<AppServer<ConnectionManagerImpl>> = Arc::new(AppServer::new(
|
||||
ConnectionManagerImpl {},
|
||||
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
|
||||
));
|
||||
test_storage.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
test_storage
|
||||
let app_server = Arc::new(AppServer::new(ConnectionManagerImpl {}, config()));
|
||||
app_server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
app_server
|
||||
.create_database(
|
||||
DatabaseRules::new(DatabaseName::new("MyOrg_MyBucket").unwrap()),
|
||||
test_storage.require_id().unwrap(),
|
||||
Arc::clone(&test_storage.store),
|
||||
app_server.require_id().unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let server_url = test_server(Arc::clone(&test_storage));
|
||||
let server_url = test_server(Arc::clone(&app_server));
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
|
@ -1015,20 +999,16 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_gzip_write() {
|
||||
let test_storage = Arc::new(AppServer::new(
|
||||
ConnectionManagerImpl {},
|
||||
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
|
||||
));
|
||||
test_storage.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
test_storage
|
||||
let app_server = Arc::new(AppServer::new(ConnectionManagerImpl {}, config()));
|
||||
app_server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
app_server
|
||||
.create_database(
|
||||
DatabaseRules::new(DatabaseName::new("MyOrg_MyBucket").unwrap()),
|
||||
test_storage.require_id().unwrap(),
|
||||
Arc::clone(&test_storage.store),
|
||||
app_server.require_id().unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let server_url = test_server(Arc::clone(&test_storage));
|
||||
let server_url = test_server(Arc::clone(&app_server));
|
||||
|
||||
let client = Client::new();
|
||||
let lp_data = "h2o_temperature,location=santa_monica,state=CA surface_degrees=65.2,bottom_degrees=50.4 1568756160";
|
||||
|
@ -1049,7 +1029,7 @@ mod tests {
|
|||
check_response("gzip_write", response, StatusCode::NO_CONTENT, "").await;
|
||||
|
||||
// Check that the data got into the right bucket
|
||||
let test_db = test_storage
|
||||
let test_db = app_server
|
||||
.db(&DatabaseName::new("MyOrg_MyBucket").unwrap())
|
||||
.expect("Database exists");
|
||||
|
||||
|
@ -1067,20 +1047,16 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn write_to_invalid_database() {
|
||||
let test_storage = Arc::new(AppServer::new(
|
||||
ConnectionManagerImpl {},
|
||||
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
|
||||
));
|
||||
test_storage.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
test_storage
|
||||
let app_server = Arc::new(AppServer::new(ConnectionManagerImpl {}, config()));
|
||||
app_server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
app_server
|
||||
.create_database(
|
||||
DatabaseRules::new(DatabaseName::new("MyOrg_MyBucket").unwrap()),
|
||||
test_storage.require_id().unwrap(),
|
||||
Arc::clone(&test_storage.store),
|
||||
app_server.require_id().unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let server_url = test_server(Arc::clone(&test_storage));
|
||||
let server_url = test_server(Arc::clone(&app_server));
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
|
@ -1103,115 +1079,6 @@ mod tests {
|
|||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn get_wal_meta() {
|
||||
let server = Arc::new(AppServer::new(
|
||||
ConnectionManagerImpl {},
|
||||
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
|
||||
));
|
||||
server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
let server_url = test_server(Arc::clone(&server));
|
||||
|
||||
let database_name = "foo_bar";
|
||||
let rules = DatabaseRules {
|
||||
name: DatabaseName::new(database_name).unwrap(),
|
||||
partition_template: Default::default(),
|
||||
wal_buffer_config: Some(WalBufferConfig {
|
||||
buffer_size: 500,
|
||||
segment_size: 10,
|
||||
buffer_rollover: WalBufferRollover::ReturnError,
|
||||
store_segments: true,
|
||||
close_segment_after: None,
|
||||
}),
|
||||
lifecycle_rules: Default::default(),
|
||||
shard_config: None,
|
||||
};
|
||||
|
||||
server
|
||||
.create_database(
|
||||
rules,
|
||||
server.require_id().unwrap(),
|
||||
Arc::clone(&server.store),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let base_url = format!(
|
||||
"{}/iox/api/v1/databases/{}/wal/meta",
|
||||
server_url, database_name
|
||||
);
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
let r1: WalMetadataResponse = check_json_response(&client, &base_url, StatusCode::OK).await;
|
||||
|
||||
let lines: std::result::Result<Vec<_>, _> = influxdb_line_protocol::parse_lines(
|
||||
"cpu,host=A,region=west usage_system=64i 1590488773254420000",
|
||||
)
|
||||
.collect();
|
||||
|
||||
server
|
||||
.write_lines(database_name, &lines.unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let r2: WalMetadataResponse = check_json_response(&client, &base_url, StatusCode::OK).await;
|
||||
|
||||
let limit_1 = serde_urlencoded::to_string(&WalMetadataQuery {
|
||||
limit: Some(1),
|
||||
newer_than: None,
|
||||
offset: None,
|
||||
})
|
||||
.unwrap();
|
||||
let limit_url = format!("{}?{}", base_url, limit_1);
|
||||
|
||||
let r3: WalMetadataResponse =
|
||||
check_json_response(&client, &limit_url, StatusCode::OK).await;
|
||||
|
||||
let limit_future = serde_urlencoded::to_string(&WalMetadataQuery {
|
||||
limit: None,
|
||||
offset: None,
|
||||
newer_than: Some(chrono::Utc::now() + chrono::Duration::seconds(5)),
|
||||
})
|
||||
.unwrap();
|
||||
let future_url = format!("{}?{}", base_url, limit_future);
|
||||
|
||||
let r4: WalMetadataResponse =
|
||||
check_json_response(&client, &future_url, StatusCode::OK).await;
|
||||
|
||||
// No data written yet - expect no results
|
||||
assert_eq!(r1.segments.len(), 1);
|
||||
assert_eq!(r1.segments[0].size, 0);
|
||||
assert_eq!(r1.segments[0].writers.len(), 0);
|
||||
|
||||
// The WAL segment size is less than the line size
|
||||
// We therefore expect an open and a closed segment in that order
|
||||
// With the closed segment containing the written data
|
||||
// And the open segment containing no data
|
||||
assert_eq!(r2.segments.len(), 2);
|
||||
assert_eq!(r2.segments[0].size, 0);
|
||||
assert!(r2.segments[0].created_at >= r2.segments[1].created_at);
|
||||
|
||||
assert!(r2.segments[1].persisted.is_none());
|
||||
assert_eq!(r2.segments[1].size, 368);
|
||||
assert_eq!(r2.segments[1].writers.len(), 1);
|
||||
assert_eq!(
|
||||
r2.segments[1].writers.values().next().unwrap(),
|
||||
&WriterSummary {
|
||||
start_sequence: 1,
|
||||
end_sequence: 1,
|
||||
missing_sequence: false
|
||||
}
|
||||
);
|
||||
|
||||
// Query limited to a single segment - expect only the most recent segment
|
||||
assert_eq!(r3.segments.len(), 1);
|
||||
assert_eq!(r3.segments[0], r2.segments[0]);
|
||||
|
||||
// Requesting segments from future - expect no results
|
||||
assert_eq!(r4.segments.len(), 0);
|
||||
}
|
||||
|
||||
fn get_content_type(response: &Result<Response, reqwest::Error>) -> String {
|
||||
if let Ok(response) = response {
|
||||
response
|
||||
|
@ -1250,6 +1117,7 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
async fn check_json_response<T: DeserializeOwned + Eq + Debug>(
|
||||
client: &Client,
|
||||
url: &str,
|
||||
|
@ -1291,9 +1159,9 @@ mod tests {
|
|||
/// Run the specified SQL query and return formatted results as a string
|
||||
async fn run_query(db: Arc<Db>, query: &str) -> Vec<RecordBatch> {
|
||||
let planner = SQLQueryPlanner::default();
|
||||
let executor = Executor::new();
|
||||
let physical_plan = planner.query(db, query, &executor).await.unwrap();
|
||||
let executor = db.executor();
|
||||
let physical_plan = planner.query(db, query, executor.as_ref()).unwrap();
|
||||
|
||||
collect(physical_plan).await.unwrap()
|
||||
executor.collect(physical_plan).await.unwrap()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,11 @@ pub fn default_server_error_handler(error: server::Error) -> tonic::Status {
|
|||
description: source.to_string(),
|
||||
}
|
||||
.into(),
|
||||
Error::DecodingEntry { source } => FieldViolation {
|
||||
field: "entry".into(),
|
||||
description: source.to_string(),
|
||||
}
|
||||
.into(),
|
||||
error => {
|
||||
error!(?error, "Unexpected error");
|
||||
InternalError {}.into()
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
//! Implements the native gRPC IOx query API using Arrow Flight
|
||||
use std::{pin::Pin, sync::Arc};
|
||||
|
||||
use futures::Stream;
|
||||
|
@ -19,7 +20,6 @@ use arrow_deps::{
|
|||
Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
|
||||
HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
|
||||
},
|
||||
datafusion::physical_plan::collect,
|
||||
};
|
||||
use data_types::{DatabaseName, DatabaseNameError};
|
||||
use query::{frontend::sql::SQLQueryPlanner, DatabaseStore};
|
||||
|
@ -157,15 +157,17 @@ where
|
|||
let planner = SQLQueryPlanner::default();
|
||||
let executor = self.server.executor();
|
||||
|
||||
let physical_plan = planner
|
||||
.query(db, &read_info.sql_query, &executor)
|
||||
.await
|
||||
.context(PlanningSQLQuery {
|
||||
query: &read_info.sql_query,
|
||||
})?;
|
||||
let physical_plan =
|
||||
planner
|
||||
.query(db, &read_info.sql_query, &executor)
|
||||
.context(PlanningSQLQuery {
|
||||
query: &read_info.sql_query,
|
||||
})?;
|
||||
|
||||
// execute the query
|
||||
let results = collect(Arc::clone(&physical_plan))
|
||||
let results = executor
|
||||
.new_context()
|
||||
.collect(Arc::clone(&physical_plan))
|
||||
.await
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
.context(Query {
|
||||
|
|
|
@ -126,13 +126,8 @@ where
|
|||
Some(id) => id,
|
||||
None => return Err(NotFound::default().into()),
|
||||
};
|
||||
let object_store = Arc::clone(&self.server.store);
|
||||
|
||||
match self
|
||||
.server
|
||||
.create_database(rules, server_id, object_store)
|
||||
.await
|
||||
{
|
||||
match self.server.create_database(rules, server_id).await {
|
||||
Ok(_) => Ok(Response::new(CreateDatabaseResponse {})),
|
||||
Err(Error::DatabaseAlreadyExists { db_name }) => {
|
||||
return Err(AlreadyExists {
|
||||
|
|
|
@ -714,7 +714,6 @@ where
|
|||
|
||||
let plan = planner
|
||||
.table_names(db.as_ref(), predicate)
|
||||
.await
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
.context(ListingTables { db_name })?;
|
||||
let executor = db_store.executor();
|
||||
|
@ -765,7 +764,6 @@ where
|
|||
|
||||
let tag_key_plan = planner
|
||||
.tag_keys(db.as_ref(), predicate)
|
||||
.await
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
.context(ListingColumns {
|
||||
db_name: db_name.as_str(),
|
||||
|
@ -825,7 +823,6 @@ where
|
|||
|
||||
let tag_value_plan = planner
|
||||
.tag_values(db.as_ref(), tag_name, predicate)
|
||||
.await
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
.context(ListingTagValues { db_name, tag_name })?;
|
||||
|
||||
|
@ -882,7 +879,6 @@ where
|
|||
|
||||
let series_plan = planner
|
||||
.read_filter(db.as_ref(), predicate)
|
||||
.await
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
.context(PlanningFilteringSeries { db_name })?;
|
||||
|
||||
|
@ -968,14 +964,10 @@ where
|
|||
|
||||
let grouped_series_set_plan = match gby_agg {
|
||||
GroupByAndAggregate::Columns { agg, group_columns } => {
|
||||
planner
|
||||
.read_group(db.as_ref(), predicate, agg, &group_columns)
|
||||
.await
|
||||
planner.read_group(db.as_ref(), predicate, agg, &group_columns)
|
||||
}
|
||||
GroupByAndAggregate::Window { agg, every, offset } => {
|
||||
planner
|
||||
.read_window_aggregate(db.as_ref(), predicate, agg, every, offset)
|
||||
.await
|
||||
planner.read_window_aggregate(db.as_ref(), predicate, agg, every, offset)
|
||||
}
|
||||
};
|
||||
let grouped_series_set_plan = grouped_series_set_plan
|
||||
|
@ -1039,7 +1031,6 @@ where
|
|||
|
||||
let field_list_plan = planner
|
||||
.field_columns(db.as_ref(), predicate)
|
||||
.await
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
.context(ListingFields { db_name })?;
|
||||
|
||||
|
|
|
@ -47,6 +47,23 @@ where
|
|||
let lines_written = lp_line_count as u64;
|
||||
Ok(Response::new(WriteResponse { lines_written }))
|
||||
}
|
||||
|
||||
async fn write_entry(
|
||||
&self,
|
||||
request: tonic::Request<WriteEntryRequest>,
|
||||
) -> Result<tonic::Response<WriteEntryResponse>, tonic::Status> {
|
||||
let request = request.into_inner();
|
||||
if request.entry.is_empty() {
|
||||
return Err(FieldViolation::required("entry").into());
|
||||
}
|
||||
|
||||
self.server
|
||||
.write_entry(&request.db_name, request.entry)
|
||||
.await
|
||||
.map_err(default_server_error_handler)?;
|
||||
|
||||
Ok(Response::new(WriteEntryResponse {}))
|
||||
}
|
||||
}
|
||||
|
||||
/// Instantiate the write service
|
||||
|
|
|
@ -277,7 +277,7 @@ async fn test_chunk_get() {
|
|||
partition_key: "cpu".into(),
|
||||
id: 0,
|
||||
storage: ChunkStorage::OpenMutableBuffer as i32,
|
||||
estimated_bytes: 145,
|
||||
estimated_bytes: 137,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
time_closing: None,
|
||||
|
@ -286,7 +286,7 @@ async fn test_chunk_get() {
|
|||
partition_key: "disk".into(),
|
||||
id: 0,
|
||||
storage: ChunkStorage::OpenMutableBuffer as i32,
|
||||
estimated_bytes: 107,
|
||||
estimated_bytes: 103,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
time_closing: None,
|
||||
|
@ -452,7 +452,7 @@ async fn test_list_partition_chunks() {
|
|||
partition_key: "cpu".into(),
|
||||
id: 0,
|
||||
storage: ChunkStorage::OpenMutableBuffer as i32,
|
||||
estimated_bytes: 145,
|
||||
estimated_bytes: 137,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
time_closing: None,
|
||||
|
|
|
@ -191,7 +191,7 @@ async fn test_get_chunks() {
|
|||
.and(predicate::str::contains(
|
||||
r#""storage": "OpenMutableBuffer","#,
|
||||
))
|
||||
.and(predicate::str::contains(r#""estimated_bytes": 145"#))
|
||||
.and(predicate::str::contains(r#""estimated_bytes": 137"#))
|
||||
// Check for a non empty timestamp such as
|
||||
// "time_of_first_write": "2021-03-30T17:11:10.723866Z",
|
||||
.and(predicate::str::contains(r#""time_of_first_write": "20"#));
|
||||
|
|
Binary file not shown.
|
@ -7,7 +7,7 @@ description = "Utilities for tracking resource utilisation within IOx"
|
|||
|
||||
[dependencies]
|
||||
|
||||
futures = "0.3.7"
|
||||
futures = "0.3"
|
||||
hashbrown = "0.9.1"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
pin-project = "1.0"
|
||||
|
|
|
@ -7,7 +7,7 @@ edition = "2018"
|
|||
[dependencies] # In alphabetical order
|
||||
byteorder = "1.3.4"
|
||||
crc32fast = "1.2.0"
|
||||
futures = "0.3.4"
|
||||
futures = "0.3"
|
||||
itertools = "0.9.0"
|
||||
once_cell = { version = "1.4.0", features = ["parking_lot"] }
|
||||
regex = "1.3.7"
|
||||
|
|
Loading…
Reference in New Issue