Merge branch 'main' into er/feat/read_buffer/meta_delete

pull/24376/head
Edd Robinson 2021-11-01 10:26:10 +00:00 committed by GitHub
commit b1096d2a36
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
328 changed files with 18360 additions and 9944 deletions

View File

@ -1,9 +1,14 @@
[advisories]
ignore = [
# title: term is looking for a new maintainer
# why needed: used by `prettytable-rs` which is directly used by IOx but also by arrow
# upstream issue: https://github.com/phsym/prettytable-rs/issues/119
"RUSTSEC-2018-0015",
# title: Potential segfault in the time crate
# why needed: used by `chrono`
# upstream issue: https://github.com/chronotope/chrono/issues/553
"RUSTSEC-2020-0071",
# title: Potential segfault in `localtime_r` invocations
# why needed: bug in `chrono`
# upstream issue: https://github.com/chronotope/chrono/issues/499
"RUSTSEC-2020-0159",
# title: memmap is unmaintained
# why needed: used by `symbolic` which is used by `pprof`

View File

@ -127,9 +127,11 @@ jobs:
# excluding datafusion because it's effectively a dependency masqueraded as workspace crate.
command: cargo doc --document-private-items --no-deps --workspace --exclude datafusion
- cache_save
- run:
name: Compress Docs
command: tar -cvzf rustdoc.tar.gz target/doc/
- store_artifacts:
path: target/doc/
destination: rustdoc
path: rustdoc.tar.gz
test:
docker:
@ -282,11 +284,27 @@ jobs:
protobuf-lint:
docker:
- image: bufbuild/buf:0.40.0
environment:
# Value to look for to skip breaking changes check
SKIP_LABEL: "https://api.github.com/repos/influxdata/influxdb_iox/labels/incompatible%20protobuf"
steps:
- checkout
- run:
name: buf lint
command: buf lint
- run:
name: buf breaking changes
command: |
echo "If you want to make changes forbidden by this lint, please"
echo "coordinate with the conductor team, add the 'incompatible protobuf' label"
echo "to the PR, and rerun this test"
# Check if label is present using github API:
# Inspired by https://discuss.circleci.com/t/tag-label-filter/11158
if wget -O - https://api.github.com/repos/influxdata/influxdb_iox/issues/$(echo $CIRCLE_PULL_REQUEST | grep -oE "[^/pull]+$") | grep "$SKIP_LABEL" ; then echo "SKIPPING (FOUND LABEL)" && exit ; else echo "CHECKING (NO LABEL FOUND)"; fi
git fetch origin main
# compare against only changes in this branch (not against
# other stuff that may have been added to master since last merge)
MERGE_BASE=$(git merge-base origin/main $CIRCLE_BRANCH) sh -c 'buf breaking --against ".git#ref=$MERGE_BASE"'
# Check that any generated files are is up-to-date with the changes in this PR.
# named "check-flatbuffers" because that name is hardcoded into github checks
@ -336,15 +354,15 @@ jobs:
- checkout
- rust_components
- cache_restore
- run:
name: Print rustc target CPU options
command: cargo run --release --no-default-features --features="aws,gcp,azure,jemalloc_replacing_malloc" --bin print_cpu
- run:
name: Cargo release build with target arch set for CRoaring
command: cargo build --release --no-default-features --features="aws,gcp,azure,jemalloc_replacing_malloc"
- run: |
echo sha256sum after build is
sha256sum target/release/influxdb_iox
- run:
name: Print rustc target CPU options
command: target/release/influxdb_iox debug print-cpu
- setup_remote_docker:
# There seems to be a cache invalidation bug in docker
# or in the way that circleci implements layer caching.

View File

@ -5,8 +5,6 @@ updates:
schedule:
interval: "weekly"
ignore:
# https://github.com/tkaitchuck/aHash/issues/95
- dependency-name: "indexmap"
# Thrift version needs to match the version of the thrift-compiler used to generate code,
# and therefore this dependency requires a more manual upgrade
#

555
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,57 @@
[package]
name = "influxdb_iox"
version = "0.1.0"
authors = ["Paul Dix <paul@pauldix.net>"]
edition = "2018"
default-run = "influxdb_iox"
readme = "README.md"
[workspace]
# In alphabetical order
members = [
"arrow_util",
"client_util",
"data_types",
"datafusion",
"datafusion_util",
"entry",
"generated_types",
"grpc-router",
"grpc-router-test-gen",
"influxdb2_client",
"influxdb_iox",
"influxdb_iox_client",
"influxdb_line_protocol",
"influxdb_storage_client",
"influxdb_tsm",
"internal_types",
"iox_data_generator",
"iox_object_store",
"lifecycle",
"logfmt",
"metric",
"metric_exporters",
"mutable_batch",
"mutable_batch_entry",
"mutable_batch_lp",
"mutable_batch_pb",
"mutable_buffer",
"object_store",
"observability_deps",
"packers",
"panic_logging",
"parquet_catalog",
"parquet_file",
"persistence_windows",
"predicate",
"query",
"query_tests",
"read_buffer",
"schema",
"server",
"server_benchmarks",
"test_helpers",
"time",
"trace",
"trace_exporters",
"trace_http",
"tracker",
"trogging",
"write_buffer",
]
default-members = ["influxdb_iox"]
exclude = [
"*.md",
@ -22,179 +69,12 @@ exclude = [
"massif.out.*",
"perf/",
"scripts/",
"test_fixtures/",
"tools/",
]
[[bin]]
name = "print_cpu"
path = "src/print_cpu.rs"
[workspace] # In alphabetical order
members = [
"arrow_util",
"data_types",
"client_util",
"datafusion",
"datafusion_util",
"entry",
"generated_types",
"influxdb2_client",
"influxdb_iox_client",
"influxdb_line_protocol",
"influxdb_storage_client",
"influxdb_tsm",
"internal_types",
"iox_data_generator",
"iox_object_store",
"lifecycle",
"logfmt",
"metric",
"metric_exporters",
"mutable_buffer",
"object_store",
"observability_deps",
"packers",
"panic_logging",
"persistence_windows",
"predicate",
"query",
"query_tests",
"read_buffer",
"server",
"server_benchmarks",
"test_helpers",
"time",
"trace",
"trace_exporters",
"trace_http",
"tracker",
"trogging",
"schema",
"grpc-router",
"grpc-router/grpc-router-test-gen",
"write_buffer",
]
[profile.release]
debug = true
[profile.bench]
debug = true
[dependencies]
# Workspace dependencies, in alphabetical order
datafusion = { path = "datafusion" }
data_types = { path = "data_types" }
entry = { path = "entry" }
generated_types = { path = "generated_types" }
influxdb_iox_client = { path = "influxdb_iox_client", features = ["format"] }
influxdb_line_protocol = { path = "influxdb_line_protocol" }
internal_types = { path = "internal_types" }
iox_object_store = { path = "iox_object_store" }
logfmt = { path = "logfmt" }
metric = { path = "metric" }
metric_exporters = { path = "metric_exporters" }
mutable_buffer = { path = "mutable_buffer" }
num_cpus = "1.13.0"
object_store = { path = "object_store" }
observability_deps = { path = "observability_deps" }
panic_logging = { path = "panic_logging" }
parquet_file = { path = "parquet_file" }
predicate = { path = "predicate" }
query = { path = "query" }
read_buffer = { path = "read_buffer" }
server = { path = "server" }
trace = { path = "trace" }
trace_exporters = { path = "trace_exporters" }
trace_http = { path = "trace_http" }
tracker = { path = "tracker" }
trogging = { path = "trogging", default-features = false, features = ["structopt"] }
time = { path = "time" }
# Crates.io dependencies, in alphabetical order
arrow = { version = "5.5", features = ["prettyprint"] }
arrow-flight = "5.5"
backtrace = "0.3"
byteorder = "1.3.4"
bytes = "1.0"
chrono = "0.4"
clap = "2.33.1"
csv = "1.1"
dirs = "4.0.0"
dotenv = "0.15.0"
flate2 = "1.0"
futures = "0.3"
hashbrown = "0.11"
http = "0.2.0"
humantime = "2.1.0"
hyper = "0.14"
libc = { version = "0.2" }
log = "0.4"
once_cell = { version = "1.4.0", features = ["parking_lot"] }
parking_lot = "0.11.2"
itertools = "0.10.1"
parquet = "5.5"
# used by arrow/datafusion anyway
prettytable-rs = "0.8"
pprof = { version = "^0.5", default-features = false, features = ["flamegraph", "protobuf"], optional = true }
prost = "0.8"
rustyline = { version = "9.0", default-features = false }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.67"
serde_urlencoded = "0.7.0"
snafu = "0.6.9"
structopt = "0.3.23"
thiserror = "1.0.30"
tikv-jemalloc-ctl = { version = "0.4.0" }
tokio = { version = "1.11", features = ["macros", "rt-multi-thread", "parking_lot", "signal"] }
tokio-stream = { version = "0.1.2", features = ["net"] }
tokio-util = { version = "0.6.3" }
tonic = "0.5.0"
tonic-health = "0.4.0"
tonic-reflection = "0.2.0"
tower = "0.4"
uuid = { version = "0.8", features = ["v4"] }
# jemalloc-sys with unprefixed_malloc_on_supported_platforms feature and heappy are mutually exclusive
tikv-jemalloc-sys = { version = "0.4.0", optional = true, features = ["unprefixed_malloc_on_supported_platforms"] }
heappy = { git = "https://github.com/mkmik/heappy", rev = "20aa466524ac9ce34a4bae29f27ec11869b50e21", features = ["enable_heap_profiler", "jemalloc_shim", "measure_free"], optional = true }
[dev-dependencies]
# Workspace dependencies, in alphabetical order
arrow_util = { path = "arrow_util" }
entry = { path = "entry" }
influxdb2_client = { path = "influxdb2_client" }
influxdb_storage_client = { path = "influxdb_storage_client" }
influxdb_iox_client = { path = "influxdb_iox_client", features = ["flight"] }
test_helpers = { path = "test_helpers" }
synchronized-writer = "1"
parking_lot = "0.11.2"
write_buffer = { path = "write_buffer" }
# Crates.io dependencies, in alphabetical order
assert_cmd = "2.0.2"
flate2 = "1.0"
hex = "0.4.2"
predicates = "2.0.3"
rand = "0.8.3"
rdkafka = "0.26.0"
reqwest = "0.11"
tempfile = "3.1.0"
[features]
default = ["jemalloc_replacing_malloc"]
azure = ["object_store/azure"] # Optional Azure Object store support
gcp = ["object_store/gcp"] # Optional GCP object store support
aws = ["object_store/aws"] # Optional AWS / S3 object store support
# pprof is an optional feature for pprof support
# heappy is an optional feature; Not on by default as it
# runtime overhead on all allocations (calls to malloc).
# Cargo cannot currently implement mutually exclusive features so let's force every build
# to pick either heappy or jemalloc_replacing_malloc feature at least until we figure out something better.
jemalloc_replacing_malloc = ["tikv-jemalloc-sys"]

View File

@ -33,7 +33,7 @@ We're also hosting monthly tech talks and community office hours on the project
1. [Install dependencies](#install-dependencies)
1. [Clone the repository](#clone-the-repository)
1. [Configure the server](#configure-the-server)
1. [Compile and start the server](#compile-and-start-the-server)
1. [Compile and start the server](#compile-and-start-the-server)
(You can also [build a Docker image](#build-a-docker-image-optional) to run InfluxDB IOx.)
1. [Write and read data](#write-and-read-data)
1. [Use the CLI](#use-the-cli)
@ -47,6 +47,7 @@ To compile and run InfluxDB IOx from source, you'll need the following:
- [Rust](#rust)
- [Clang](#clang)
- [lld (on Linux)](#lld)
#### Rust
@ -72,6 +73,19 @@ If `clang` is not already present, it can typically be installed with the system
[`croaring`]: https://github.com/saulius/croaring-rs
#### lld
If you are building InfluxDB IOx on Linux then you will need to ensure you have installed the `lld` LLVM linker.
Check if you have already installed it by running `lld -version`.
```shell
lld -version
lld is a generic driver.
Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld (WebAssembly) instead
```
If `lld` is not already present, it can typically be installed with the system package manager.
### Clone the repository
Clone this repository using `git`.
@ -120,7 +134,7 @@ This which will create a binary at `target/debug/influxdb_iox`.
To start the InfluxDB IOx server, run:
```shell
./target/debug/influxdb_iox run
./target/debug/influxdb_iox run database
```
By default the server will start an HTTP server on port `8080` and a gRPC server on port `8082`.
@ -128,20 +142,20 @@ By default the server will start an HTTP server on port `8080` and a gRPC server
You can also compile and run with one command:
```shell
cargo run -- run
cargo run -- run database
```
To compile for performance testing, build in release mode:
```shell
cargo build --release
./target/release/influxdb_iox run
./target/release/influxdb_iox run database
```
You can also run in release mode with one step:
```shell
cargo run --release -- run
cargo run --release -- run database
```
To run all available tests in debug mode, you may want to set min stack size to avoid the current known stack overflow issue:

View File

@ -2,19 +2,19 @@
name = "arrow_util"
version = "0.1.0"
authors = ["Andrew Lamb <andrew@nerdnetworks.org>"]
edition = "2018"
edition = "2021"
description = "Apache Arrow utilities"
[dependencies]
arrow = { version = "5.5", features = ["prettyprint"] }
arrow = { version = "6.0", features = ["prettyprint"] }
ahash = "0.7.5"
num-traits = "0.2"
snafu = "0.6"
hashbrown = "0.11"
# used by arrow anyway (needed for printing workaround)
chrono = "0.4"
prettytable-rs = "0.8"
comfy-table = { version = "4.0", default-features = false }
[dev-dependencies]
rand = "0.8.3"

View File

@ -1,4 +1,5 @@
use arrow::buffer::Buffer;
use std::ops::Range;
/// An arrow-compatible mutable bitset implementation
///
@ -28,6 +29,12 @@ impl BitSet {
bitset
}
/// Reserve space for `count` further bits
pub fn reserve(&mut self, count: usize) {
let new_buf_len = (self.len + count + 7) >> 3;
self.buffer.reserve(new_buf_len);
}
/// Appends `count` unset bits
pub fn append_unset(&mut self, count: usize) {
self.len += count;
@ -35,8 +42,75 @@ impl BitSet {
self.buffer.resize(new_buf_len, 0);
}
/// Appends `count` set bits
pub fn append_set(&mut self, count: usize) {
let new_len = self.len + count;
let new_buf_len = (new_len + 7) >> 3;
let skew = self.len & 7;
if skew != 0 {
*self.buffer.last_mut().unwrap() |= 0xFF << skew;
}
self.buffer.resize(new_buf_len, 0xFF);
let rem = new_len & 7;
if rem != 0 {
*self.buffer.last_mut().unwrap() &= (1 << rem) - 1;
}
self.len = new_len;
}
/// Truncates the bitset to the provided length
pub fn truncate(&mut self, len: usize) {
let new_buf_len = (len + 7) >> 3;
self.buffer.truncate(new_buf_len);
let overrun = len & 7;
if overrun > 0 {
*self.buffer.last_mut().unwrap() &= (1 << overrun) - 1;
}
self.len = len;
}
/// Extends this [`BitSet`] by the context of `other`
pub fn extend_from(&mut self, other: &BitSet) {
self.append_bits(other.len, &other.buffer)
}
/// Extends this [`BitSet`] by `range` elements in `other`
pub fn extend_from_range(&mut self, other: &BitSet, range: Range<usize>) {
let count = range.end - range.start;
if count == 0 {
return;
}
let start_byte = range.start >> 3;
let end_byte = (range.end + 7) >> 3;
let skew = range.start & 7;
// `append_bits` requires the provided `to_set` to be byte aligned, therefore
// if the range being copied is not byte aligned we must first append
// the leading bits to reach a byte boundary
if skew == 0 {
// No skew can simply append bytes directly
self.append_bits(count, &other.buffer[start_byte..end_byte])
} else if start_byte + 1 == end_byte {
// Append bits from single byte
self.append_bits(count, &[other.buffer[start_byte] >> skew])
} else {
// Append trailing bits from first byte to reach byte boundary, then append
// bits from the remaining byte-aligned mask
let offset = 8 - skew;
self.append_bits(offset, &[other.buffer[start_byte] >> skew]);
self.append_bits(count - offset, &other.buffer[(start_byte + 1)..end_byte]);
}
}
/// Appends `count` boolean values from the slice of packed bits
pub fn append_bits(&mut self, count: usize, to_set: &[u8]) {
assert_eq!((count + 7) >> 3, to_set.len());
let new_len = self.len + count;
let new_buf_len = (new_len + 7) >> 3;
self.buffer.reserve(new_buf_len - self.buffer.len());
@ -113,12 +187,30 @@ impl BitSet {
pub fn byte_len(&self) -> usize {
self.buffer.len()
}
/// Return the raw packed bytes used by thie bitset
pub fn bytes(&self) -> &[u8] {
&self.buffer
}
}
/// Returns an iterator over set bit positions in increasing order
pub fn iter_set_positions(bytes: &[u8]) -> impl Iterator<Item = usize> + '_ {
let mut byte_idx = 0;
let mut in_progress = bytes.get(0).cloned().unwrap_or(0);
iter_set_positions_with_offset(bytes, 0)
}
/// Returns an iterator over set bit positions in increasing order starting
/// at the provided bit offset
pub fn iter_set_positions_with_offset(
bytes: &[u8],
offset: usize,
) -> impl Iterator<Item = usize> + '_ {
let mut byte_idx = offset >> 3;
let mut in_progress = bytes.get(byte_idx).cloned().unwrap_or(0);
let skew = offset & 7;
in_progress &= 0xFF << skew;
std::iter::from_fn(move || loop {
if in_progress != 0 {
let bit_pos = in_progress.trailing_zeros();
@ -134,7 +226,8 @@ pub fn iter_set_positions(bytes: &[u8]) -> impl Iterator<Item = usize> + '_ {
mod tests {
use super::*;
use arrow::array::BooleanBufferBuilder;
use rand::RngCore;
use rand::prelude::*;
use rand::rngs::OsRng;
/// Computes a compacted representation of a given bool array
fn compact_bools(bools: &[bool]) -> Vec<u8> {
@ -201,11 +294,17 @@ mod tests {
assert!(mask.get(19));
}
fn make_rng() -> StdRng {
let seed = OsRng::default().next_u64();
println!("Seed: {}", seed);
StdRng::seed_from_u64(seed)
}
#[test]
fn test_bit_mask_all_set() {
let mut mask = BitSet::new();
let mut all_bools = vec![];
let mut rng = rand::thread_rng();
let mut rng = make_rng();
for _ in 0..100 {
let mask_length = (rng.next_u32() % 50) as usize;
@ -228,7 +327,7 @@ mod tests {
fn test_bit_mask_fuzz() {
let mut mask = BitSet::new();
let mut all_bools = vec![];
let mut rng = rand::thread_rng();
let mut rng = make_rng();
for _ in 0..100 {
let mask_length = (rng.next_u32() % 50) as usize;
@ -247,11 +346,105 @@ mod tests {
let expected_indexes: Vec<_> = iter_set_bools(&all_bools).collect();
let actual_indexes: Vec<_> = iter_set_positions(&mask.buffer).collect();
assert_eq!(expected_indexes, actual_indexes);
if !all_bools.is_empty() {
for _ in 0..10 {
let offset = rng.next_u32() as usize % all_bools.len();
let expected_indexes: Vec<_> = iter_set_bools(&all_bools[offset..])
.map(|x| x + offset)
.collect();
let actual_indexes: Vec<_> =
iter_set_positions_with_offset(&mask.buffer, offset).collect();
assert_eq!(expected_indexes, actual_indexes);
}
}
for index in actual_indexes {
assert!(mask.get(index));
}
}
#[test]
fn test_append_fuzz() {
let mut mask = BitSet::new();
let mut all_bools = vec![];
let mut rng = make_rng();
for _ in 0..100 {
let len = (rng.next_u32() % 32) as usize;
let set = rng.next_u32() & 1 == 0;
match set {
true => mask.append_set(len),
false => mask.append_unset(len),
}
all_bools.extend(std::iter::repeat(set).take(len));
let collected = compact_bools(&all_bools);
assert_eq!(mask.buffer, collected);
}
}
#[test]
fn test_truncate_fuzz() {
let mut mask = BitSet::new();
let mut all_bools = vec![];
let mut rng = make_rng();
for _ in 0..100 {
let mask_length = (rng.next_u32() % 32) as usize;
let bools: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))
.take(mask_length)
.collect();
let collected = compact_bools(&bools);
mask.append_bits(mask_length, &collected);
all_bools.extend_from_slice(&bools);
if !all_bools.is_empty() {
let truncate = rng.next_u32() as usize % all_bools.len();
mask.truncate(truncate);
all_bools.truncate(truncate);
}
let collected = compact_bools(&all_bools);
assert_eq!(mask.buffer, collected);
}
}
#[test]
fn test_extend_range_fuzz() {
let mut rng = make_rng();
let src_len = 32;
let src_bools: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))
.take(src_len)
.collect();
let mut src_mask = BitSet::new();
src_mask.append_bits(src_len, &compact_bools(&src_bools));
let mut dst_bools = Vec::new();
let mut dst_mask = BitSet::new();
for _ in 0..100 {
let a = rng.next_u32() as usize % src_len;
let b = rng.next_u32() as usize % src_len;
let start = a.min(b);
let end = a.max(b);
dst_bools.extend_from_slice(&src_bools[start..end]);
dst_mask.extend_from_range(&src_mask, start..end);
let collected = compact_bools(&dst_bools);
assert_eq!(dst_mask.buffer, collected);
}
}
#[test]
fn test_arrow_compat() {
let bools = &[

View File

@ -112,6 +112,19 @@ impl<K: AsPrimitive<usize> + FromPrimitive + Zero> StringDictionary<K> {
pub fn into_inner(self) -> PackedStringArray<K> {
self.storage
}
/// Truncates this dictionary removing all keys larger than `id`
pub fn truncate(&mut self, id: K) {
let id = id.as_();
self.dedup.retain(|k, _| k.as_() <= id);
self.storage.truncate(id + 1)
}
/// Clears this dictionary removing all elements
pub fn clear(&mut self) {
self.storage.clear();
self.dedup.clear()
}
}
fn hash_str(hasher: &ahash::RandomState, value: &str) -> u64 {
@ -142,7 +155,10 @@ impl StringDictionary<i32> {
array_builder = array_builder.null_bit_buffer(nulls);
}
DictionaryArray::<Int32Type>::from(array_builder.build())
// TODO consider skipping the validation checks by using
// `build_unchecked()`
let array_data = array_builder.build().expect("Valid array data");
DictionaryArray::<Int32Type>::from(array_data)
}
}
@ -255,4 +271,30 @@ mod test {
let err = TryInto::<StringDictionary<_>>::try_into(data).expect_err("expected failure");
assert!(matches!(err, Error::DuplicateKeyFound { key } if &key == "cupcakes"))
}
#[test]
fn test_truncate() {
let mut dictionary = StringDictionary::<i32>::new();
dictionary.lookup_value_or_insert("cupcake");
dictionary.lookup_value_or_insert("cupcake");
dictionary.lookup_value_or_insert("bingo");
let bingo = dictionary.lookup_value_or_insert("bingo");
let bongo = dictionary.lookup_value_or_insert("bongo");
dictionary.lookup_value_or_insert("bingo");
dictionary.lookup_value_or_insert("cupcake");
dictionary.truncate(bingo);
assert_eq!(dictionary.values().len(), 2);
assert_eq!(dictionary.dedup.len(), 2);
assert_eq!(dictionary.lookup_value("cupcake"), Some(0));
assert_eq!(dictionary.lookup_value("bingo"), Some(1));
assert!(dictionary.lookup_value("bongo").is_none());
assert!(dictionary.lookup_id(bongo).is_none());
dictionary.lookup_value_or_insert("bongo");
assert_eq!(dictionary.lookup_value("bongo"), Some(2));
}
}

View File

@ -3,8 +3,7 @@ use arrow::datatypes::{DataType, TimeUnit};
use arrow::error::Result;
use arrow::record_batch::RecordBatch;
use prettytable::format;
use prettytable::{Cell, Row, Table};
use comfy_table::{Cell, Table};
use chrono::prelude::*;
@ -54,7 +53,7 @@ fn array_value_to_string(column: &ArrayRef, row: usize) -> Result<String> {
/// NB: COPIED FROM ARROW
fn create_table(results: &[RecordBatch]) -> Result<Table> {
let mut table = Table::new();
table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
table.load_preset("||--+-++| ++++++");
if results.is_empty() {
return Ok(table);
@ -66,7 +65,7 @@ fn create_table(results: &[RecordBatch]) -> Result<Table> {
for field in schema.fields() {
header.push(Cell::new(field.name()));
}
table.set_titles(Row::new(header));
table.set_header(header);
for batch in results {
for row in 0..batch.num_rows() {
@ -75,7 +74,7 @@ fn create_table(results: &[RecordBatch]) -> Result<Table> {
let column = batch.column(col);
cells.push(Cell::new(&array_value_to_string(column, row)?));
}
table.add_row(Row::new(cells));
table.add_row(cells);
}
}

View File

@ -297,7 +297,8 @@ mod tests {
.add_buffer(keys.data().buffers()[0].clone())
.null_bit_buffer(keys.data().null_buffer().unwrap().clone())
.add_child_data(values.data().clone())
.build();
.build()
.unwrap();
DictionaryArray::from(data)
}

View File

@ -3,6 +3,7 @@ use arrow::array::StringArray;
use arrow::buffer::Buffer;
use num_traits::{AsPrimitive, FromPrimitive, Zero};
use std::fmt::Debug;
use std::ops::Range;
/// A packed string array that stores start and end indexes into
/// a contiguous string slice.
@ -62,6 +63,41 @@ impl<K: AsPrimitive<usize> + FromPrimitive + Zero> PackedStringArray<K> {
id
}
/// Extends this [`PackedStringArray`] by the contents of `other`
pub fn extend_from(&mut self, other: &PackedStringArray<K>) {
let offset = self.storage.len();
self.storage.push_str(other.storage.as_str());
// Copy offsets skipping the first element as this string start delimiter is already
// provided by the end delimiter of the current offsets array
self.offsets.extend(
other
.offsets
.iter()
.skip(1)
.map(|x| K::from_usize(x.as_() + offset).expect("failed to fit into offset type")),
)
}
/// Extends this [`PackedStringArray`] by `range` elements from `other`
pub fn extend_from_range(&mut self, other: &PackedStringArray<K>, range: Range<usize>) {
let first_offset: usize = other.offsets[range.start].as_();
let end_offset: usize = other.offsets[range.end].as_();
let insert_offset = self.storage.len();
self.storage
.push_str(&other.storage[first_offset..end_offset]);
self.offsets.extend(
(&other.offsets[(range.start + 1)..(range.end + 1)])
.iter()
.map(|x| {
K::from_usize(x.as_() - first_offset + insert_offset)
.expect("failed to fit into offset type")
}),
)
}
/// Get the value at a given index
pub fn get(&self, index: usize) -> Option<&str> {
let start_offset = self.offsets.get(index)?.as_();
@ -76,6 +112,19 @@ impl<K: AsPrimitive<usize> + FromPrimitive + Zero> PackedStringArray<K> {
self.offsets.resize(self.offsets.len() + len, offset);
}
/// Truncates the array to the given length
pub fn truncate(&mut self, len: usize) {
self.offsets.truncate(len + 1);
let last_idx = self.offsets.last().expect("offsets empty");
self.storage.truncate(last_idx.as_());
}
/// Removes all elements from this array
pub fn clear(&mut self) {
self.offsets.truncate(1);
self.storage.clear();
}
pub fn iter(&self) -> PackedStringIterator<'_, K> {
PackedStringIterator {
array: self,
@ -112,8 +161,10 @@ impl PackedStringArray<i32> {
.len(len)
.add_buffer(offsets)
.add_buffer(values)
.build();
.build()
// TODO consider skipping the validation checks by using
// `new_unchecked`
.expect("Valid array data");
StringArray::from(data)
}
}
@ -170,4 +221,91 @@ mod tests {
assert_eq!(array.get(9).unwrap(), "");
assert_eq!(array.get(3).unwrap(), "");
}
#[test]
fn test_truncate() {
let mut array = PackedStringArray::<i32>::new();
array.append("hello");
array.append("world");
array.append("cupcake");
array.truncate(1);
assert_eq!(array.len(), 1);
assert_eq!(array.get(0).unwrap(), "hello");
array.append("world");
assert_eq!(array.len(), 2);
assert_eq!(array.get(0).unwrap(), "hello");
assert_eq!(array.get(1).unwrap(), "world");
}
#[test]
fn test_extend_from() {
let mut a = PackedStringArray::<i32>::new();
a.append("hello");
a.append("world");
a.append("cupcake");
a.append("");
let mut b = PackedStringArray::<i32>::new();
b.append("foo");
b.append("bar");
a.extend_from(&b);
let a_content: Vec<_> = a.iter().collect();
assert_eq!(
a_content,
vec!["hello", "world", "cupcake", "", "foo", "bar"]
);
}
#[test]
fn test_extend_from_range() {
let mut a = PackedStringArray::<i32>::new();
a.append("hello");
a.append("world");
a.append("cupcake");
a.append("");
let mut b = PackedStringArray::<i32>::new();
b.append("foo");
b.append("bar");
b.append("");
b.append("fiz");
a.extend_from_range(&b, 1..3);
assert_eq!(a.len(), 6);
let a_content: Vec<_> = a.iter().collect();
assert_eq!(a_content, vec!["hello", "world", "cupcake", "", "bar", ""]);
// Should be a no-op
a.extend_from_range(&b, 0..0);
let a_content: Vec<_> = a.iter().collect();
assert_eq!(a_content, vec!["hello", "world", "cupcake", "", "bar", ""]);
a.extend_from_range(&b, 0..1);
let a_content: Vec<_> = a.iter().collect();
assert_eq!(
a_content,
vec!["hello", "world", "cupcake", "", "bar", "", "foo"]
);
a.extend_from_range(&b, 1..4);
let a_content: Vec<_> = a.iter().collect();
assert_eq!(
a_content,
vec!["hello", "world", "cupcake", "", "bar", "", "foo", "bar", "", "fiz"]
);
}
}

View File

@ -3,7 +3,7 @@ name = "client_util"
version = "0.1.0"
authors = ["Raphael Taylor-Davies <r.taylordavies@googlemail.com>"]
description = "Shared code for IOx clients"
edition = "2018"
edition = "2021"
[dependencies]
http = "0.2.3"
@ -13,4 +13,4 @@ tonic = { version = "0.5.0" }
tower = "0.4"
[dev-dependencies]
tokio = { version = "1.11", features = ["macros"] }
tokio = { version = "1.11", features = ["macros"] }

View File

@ -3,7 +3,7 @@ name = "data_types"
version = "0.1.0"
authors = ["pauldix <paul@pauldix.net>"]
description = "InfluxDB IOx data_types, shared between IOx instances and IOx clients"
edition = "2018"
edition = "2021"
readme = "README.md"
[dependencies] # In alphabetical order

View File

@ -1,34 +1,11 @@
use crate::DatabaseName;
use chrono::{DateTime, Utc};
use std::{fmt, str::FromStr};
use uuid::Uuid;
/// Detailed metadata about a database.
/// Detailed metadata about an active database.
#[derive(Debug, Clone, PartialEq)]
pub struct DetailedDatabase {
pub struct ActiveDatabase {
/// The name of the database
pub name: DatabaseName<'static>,
/// The generation ID of the database in object storage
pub generation_id: GenerationId,
/// The UTC datetime at which this database was deleted, if applicable
pub deleted_at: Option<DateTime<Utc>>,
}
/// Identifier for a generation of a particular database
#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)]
pub struct GenerationId {
pub inner: usize,
}
impl FromStr for GenerationId {
type Err = std::num::ParseIntError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self { inner: s.parse()? })
}
}
impl fmt::Display for GenerationId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.inner)
}
/// The UUID of the database
pub uuid: Uuid,
}

View File

@ -421,6 +421,19 @@ impl<T> Default for StatValues<T> {
}
}
impl<T> StatValues<T> {
/// Create new statistics with no values
pub fn new_empty() -> Self {
Self {
min: None,
max: None,
total_count: 0,
null_count: 0,
distinct_count: None,
}
}
}
impl<T> StatValues<T>
where
T: Clone + PartialEq + PartialOrd + IsNan,
@ -440,14 +453,14 @@ where
Self::new_with_distinct(min, max, total_count, null_count, distinct_count)
}
/// Create new statitics with the specified count and null count
/// Create new statistics with the specified count and null count
pub fn new(min: Option<T>, max: Option<T>, total_count: u64, null_count: u64) -> Self {
let distinct_count = None;
Self::new_with_distinct(min, max, total_count, null_count, distinct_count)
}
/// Create new statitics with the specified count and null count and distinct values
fn new_with_distinct(
/// Create new statistics with the specified count and null count and distinct values
pub fn new_with_distinct(
min: Option<T>,
max: Option<T>,
total_count: u64,
@ -481,12 +494,21 @@ where
}
/// Create statistics for a column that only has nulls up to now
pub fn new_all_null(total_count: u64) -> Self {
pub fn new_all_null(total_count: u64, distinct_count: Option<u64>) -> Self {
let min = None;
let max = None;
let null_count = total_count;
let distinct_count = NonZeroU64::new(1);
Self::new_with_distinct(min, max, total_count, null_count, distinct_count)
if let Some(count) = distinct_count {
assert!(count > 0);
}
Self::new_with_distinct(
min,
max,
total_count,
null_count,
distinct_count.map(|c| NonZeroU64::new(c).unwrap()),
)
}
pub fn update_from(&mut self, other: &Self) {
@ -647,6 +669,8 @@ impl IsNan for f64 {
#[cfg(test)]
mod tests {
use std::convert::TryFrom;
use super::*;
#[test]
@ -664,13 +688,25 @@ mod tests {
#[test]
fn statistics_new_all_null() {
let actual = StatValues::<i64>::new_all_null(3);
// i64 values do not have a distinct count
let actual = StatValues::<i64>::new_all_null(3, None);
let expected = StatValues {
min: None,
max: None,
total_count: 3,
null_count: 3,
distinct_count: NonZeroU64::new(1),
distinct_count: None,
};
assert_eq!(actual, expected);
// string columns can have a distinct count
let actual = StatValues::<i64>::new_all_null(3, Some(1_u64));
let expected = StatValues {
min: None,
max: None,
total_count: 3,
null_count: 3,
distinct_count: Some(NonZeroU64::try_from(1_u64).unwrap()),
};
assert_eq!(actual, expected);
}

View File

@ -66,4 +66,9 @@ impl TimestampSummary {
self.counts[timestamp.minute() as usize] += 1;
self.stats.update(&timestamp.timestamp_nanos())
}
/// Records a timestamp value from nanos
pub fn record_nanos(&mut self, timestamp_nanos: i64) {
self.record(Time::from_timestamp_nanos(timestamp_nanos))
}
}

View File

@ -2,11 +2,11 @@
name = "datafusion"
version = "0.1.0"
authors = ["Andrew Lamb <andrew@nerdnetworks.org>"]
edition = "2018"
edition = "2021"
description = "Re-exports datafusion at a specific version"
[dependencies]
# Rename to workaround doctest bug
# Turn off optional datafusion features (e.g. don't get support for crypo functions or avro)
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="2454e468641d4d98af211c2800c0afec2732385b", default-features = false, package = "datafusion" }
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="57d7777fc0ce94e783a7f447631624c354b0b906", default-features = false, package = "datafusion" }

View File

@ -2,7 +2,7 @@
name = "datafusion_util"
version = "0.1.0"
authors = ["Andrew Lamb <andrew@nerdnetworks.org>"]
edition = "2018"
edition = "2021"
description = "Datafusion utilities"
[dependencies]

View File

@ -21,4 +21,4 @@ ENV TEST_INTEGRATION=1
ENV KAFKA_CONNECT=kafka:9092
# Run the integration tests that connect to Kafka that will be running in another container
CMD ["sh", "-c", "./docker/integration_test.sh"]
CMD ["sh", "-c", "cargo test -p write_buffer kafka -- --nocapture"]

View File

@ -1,7 +0,0 @@
#!/bin/bash
set -euxo pipefail
cargo test -p write_buffer kafka -- --nocapture
cargo test -p influxdb_iox --test end_to_end skip_replay -- --nocapture
cargo test -p influxdb_iox --test end_to_end write_buffer -- --nocapture

View File

@ -6,7 +6,7 @@ not intended to be general user facing documentation
## IOx Tech Talks
We hold monthly Tech Talks that explain the project's technical underpinnings. You can register for the [InfluxDB IOx Tech Talks here](https://www.influxdata.com/community-showcase/influxdb-tech-talks/), or you can find links to previous sessions below:
We hold monthly Tech Talks that explain the project's technical underpinnings. You can register for the [InfluxDB IOx Tech Talks here](https://www.influxdata.com/community-showcase/influxdb-tech-talks/), or you can find links to previous sessions below or in the [YouTube playlist](https://www.youtube.com/playlist?list=PLYt2jfZorkDp-PKBS05kf2Yx2NrRyPAAz):
* December 2020: Rusty Introduction to Apache Arrow [recording](https://www.youtube.com/watch?v=dQFjKa9vKhM)
* Jan 2021: Data Lifecycle in InfluxDB IOx & How it Uses Object Storage for Persistence [recording](https://www.youtube.com/watch?v=KwdPifHC1Gc)
@ -16,7 +16,8 @@ We hold monthly Tech Talks that explain the project's technical underpinnings. Y
* May 2021: Catalogs - Turning a Set of Parquet Files into a Data Set [recording](https://www.youtube.com/watch?v=Zaei3l3qk0c), [slides](https://www.slideshare.net/influxdata/catalogs-turning-a-set-of-parquet-files-into-a-data-set)
* June 2021: Performance Profiling in Rust [recording](https://www.youtube.com/watch?v=_ZNcg-nAVTM), [slides](https://www.slideshare.net/influxdata/performance-profiling-in-rust)
* July 2021: Impacts of Sharding, Partitioning, Encoding & Sorting on Distributed Query Performance [recording](https://www.youtube.com/watch?v=VHYMpItvBZQ), [slides](https://www.slideshare.net/influxdata/impacts-of-sharding-partitioning-encoding-and-sorting-on-distributed-query-performance)
* September 2021: Observability of InfluxDB IOx Tracing, Metrics and System Tables [recording](https://www.youtube.com/watch?v=tB-umdJCJQc)
* October 2021: Query Processing in InfluxDB IOx [recording](https://www.youtube.com/watch?v=9DYkWuM8xco)
## Table of Contents:

View File

@ -7,7 +7,7 @@
# The full list of available configuration values can be found by in
# the command line help (e.g. `env: INFLUXDB_IOX_DB_DIR=`):
#
# ./influxdb_iox run --help
# ./influxdb_iox run database --help
#
#
# The identifier for the server. Used for writing to object storage and as
@ -58,4 +58,4 @@
# To enable Jaeger tracing:
# OTEL_SERVICE_NAME="iox" # defaults to iox
# OTEL_EXPORTER_JAEGER_AGENT_HOST="jaeger.influxdata.net"
# OTEL_EXPORTER_JAEGER_AGENT_PORT="6831"
# OTEL_EXPORTER_JAEGER_AGENT_PORT="6831"

View File

@ -14,21 +14,21 @@ Some examples
```bash
# Default verbosity
$ ./influxdb_iox run
$ ./influxdb_iox run database
# More verbose
$ ./influxdb_iox run -v
$ ./influxdb_iox run database -v
# Even more verbose
$ ./influxdb_iox run -vv
$ ./influxdb_iox run database -vv
# Everything!!
$ ./influxdb_iox run --log-filter trace
$ ./influxdb_iox run database --log-filter trace
# Default info, but debug within http module
$ ./influxdb_iox run --log-filter info,influxdb_iox::influxdb_ioxd::http=debug
$ ./influxdb_iox run database --log-filter info,influxdb_iox::influxdb_ioxd::http=debug
```
Additionally, the output format can be controlled with `--log-format`
```bash
$ ./influxdb_iox run --log-filter debug --log-format logfmt
$ ./influxdb_iox run database --log-filter debug --log-format logfmt
```
## Developer Guide
@ -69,7 +69,7 @@ will strip out all trace level callsites from the release binary.
### Format
IOx supports logging in many formats. For a list run `influxdb_iox run --help` and view the help output
IOx supports logging in many formats. For a list run `influxdb_iox run database --help` and view the help output
for `--log-format`.
<sup>1.</sup> This span propagation uses thread-local storage and therefore does not automatically carry across

View File

@ -3,10 +3,10 @@
An IOx node can be started from the command line:
```shell
influxdb_iox run
influxdb_iox run database
```
See help (via `influxdb_iox run --help`) for arguments.
See help (via `influxdb_iox run database --help`) for arguments.
## Server ID

View File

@ -87,14 +87,14 @@ set.
### Configuration differences when running the tests
When running `influxdb_iox run`, you can pick one object store to use. When running the tests,
When running `influxdb_iox run database`, you can pick one object store to use. When running the tests,
you can run them against all the possible object stores. There's still only one
`INFLUXDB_IOX_BUCKET` variable, though, so that will set the bucket name for all configured object
stores. Use the same bucket name when setting up the different services.
Other than possibly configuring multiple object stores, configuring the tests to use the object
store services is the same as configuring the server to use an object store service. See the output
of `influxdb_iox run --help` for instructions.
of `influxdb_iox run database --help` for instructions.
## InfluxDB 2 Client
@ -136,7 +136,7 @@ You can then run the tests with `KAFKA_CONNECT=localhost:9093`. To run just the
tests, the full command would then be:
```
TEST_INTEGRATION=1 KAFKA_CONNECT=localhost:9093 cargo test -p influxdb_iox --test end_to_end write_buffer
TEST_INTEGRATION=1 KAFKA_CONNECT=localhost:9093 cargo test -p write_buffer kafka --nocapture
```
### Running `cargo test` in a Docker container

View File

@ -2,7 +2,7 @@
name = "entry"
version = "0.1.0"
authors = ["Paul Dix <paul@pauldix.net>"]
edition = "2018"
edition = "2021"
description = "The entry format used by the write buffer"
[dependencies]

View File

@ -2,7 +2,7 @@
name = "generated_types"
version = "0.1.0"
authors = ["Paul Dix <paul@pauldix.net>"]
edition = "2018"
edition = "2021"
[dependencies] # In alphabetical order
bytes = "1.0"
@ -13,7 +13,6 @@ pbjson-types = "0.1"
prost = "0.8"
regex = "1.4"
serde = { version = "1.0", features = ["derive"] }
thiserror = "1.0.30"
tonic = "0.5"
time = { path = "../time" }
@ -29,4 +28,4 @@ pbjson-build = "0.1"
[features]
default = []
data_types_conversions = ["data_types"]
data_types_conversions = ["data_types"]

View File

@ -21,37 +21,44 @@ fn main() -> Result<()> {
/// - `com.github.influxdata.idpe.storage.read.rs`
/// - `influxdata.iox.catalog.v1.rs`
/// - `influxdata.iox.management.v1.rs`
/// - `influxdata.iox.router.v1.rs`
/// - `influxdata.iox.write.v1.rs`
/// - `influxdata.platform.storage.rs`
fn generate_grpc_types(root: &Path) -> Result<()> {
let storage_path = root.join("influxdata/platform/storage");
let idpe_path = root.join("com/github/influxdata/idpe/storage/read");
let catalog_path = root.join("influxdata/iox/catalog/v1");
let idpe_path = root.join("com/github/influxdata/idpe/storage/read");
let management_path = root.join("influxdata/iox/management/v1");
let router_path = root.join("influxdata/iox/router/v1");
let storage_path = root.join("influxdata/platform/storage");
let write_path = root.join("influxdata/iox/write/v1");
let proto_files = vec![
storage_path.join("test.proto"),
storage_path.join("predicate.proto"),
storage_path.join("storage_common.proto"),
storage_path.join("service.proto"),
storage_path.join("storage_common_idpe.proto"),
idpe_path.join("source.proto"),
catalog_path.join("catalog.proto"),
catalog_path.join("parquet_metadata.proto"),
catalog_path.join("predicate.proto"),
management_path.join("database_rules.proto"),
idpe_path.join("source.proto"),
management_path.join("chunk.proto"),
management_path.join("database_rules.proto"),
management_path.join("jobs.proto"),
management_path.join("partition.proto"),
management_path.join("partition_template.proto"),
management_path.join("server_config.proto"),
management_path.join("service.proto"),
management_path.join("shard.proto"),
management_path.join("jobs.proto"),
write_path.join("service.proto"),
root.join("influxdata/pbdata/v1/influxdb_pb_data_protocol.proto"),
root.join("grpc/health/v1/service.proto"),
management_path.join("write_buffer.proto"),
root.join("google/longrunning/operations.proto"),
root.join("google/rpc/error_details.proto"),
root.join("google/rpc/status.proto"),
root.join("grpc/health/v1/service.proto"),
root.join("influxdata/pbdata/v1/influxdb_pb_data_protocol.proto"),
router_path.join("router.proto"),
router_path.join("service.proto"),
storage_path.join("predicate.proto"),
storage_path.join("service.proto"),
storage_path.join("storage_common.proto"),
storage_path.join("storage_common_idpe.proto"),
storage_path.join("test.proto"),
write_path.join("service.proto"),
];
// Tell cargo to recompile if any of these proto files are changed

View File

@ -3,34 +3,9 @@ package influxdata.iox.management.v1;
option go_package = "github.com/influxdata/iox/management/v1";
import "google/protobuf/duration.proto";
import "google/protobuf/empty.proto";
import "influxdata/iox/management/v1/partition_template.proto";
import "influxdata/iox/management/v1/shard.proto";
// `PartitionTemplate` is used to compute the partition key of each row that
// gets written. It can consist of the table name, a column name and its value,
// a formatted time, or a string column and regex captures of its value. For
// columns that do not appear in the input row, a blank value is output.
//
// The key is constructed in order of the template parts; thus ordering changes
// what partition key is generated.
message PartitionTemplate {
message Part {
message ColumnFormat {
string column = 1;
string format = 2;
}
oneof part {
google.protobuf.Empty table = 1;
string column = 2;
string time = 3;
ColumnFormat regex = 4;
ColumnFormat strf_time = 5;
}
}
repeated Part parts = 1;
}
import "influxdata/iox/management/v1/write_buffer.proto";
message LifecycleRules {
// Once the total amount of buffered data in memory reaches this size start
@ -111,6 +86,9 @@ message LifecycleRules {
uint64 parquet_cache_limit = 17;
}
// Database rules.
//
// TODO(marco): add `WriteSources` to this message.
message DatabaseRules {
// The unencoded name of the database
//
@ -128,6 +106,8 @@ message DatabaseRules {
LifecycleRules lifecycle_rules = 3;
// If not specified, does not configure any routing
//
// TODO(marco): remove this
oneof routing_rules {
// Shard config
ShardConfig shard_config = 8;
@ -146,6 +126,8 @@ message DatabaseRules {
// Optionally, the connection for the write buffer for writing or reading/restoring data.
//
// If not specified, does not configure a write buffer
//
// TODO(marco): remove this
WriteBufferConnection write_buffer_connection = 13;
}
@ -158,61 +140,6 @@ message PersistedDatabaseRules {
DatabaseRules rules = 2;
}
// Configures the use of a write buffer.
message WriteBufferConnection {
enum Direction {
// Unspecified direction, will be treated as an error.
DIRECTION_UNSPECIFIED = 0;
// Writes into the buffer aka "producer".
DIRECTION_WRITE = 1;
// Reads from the buffer aka "consumer".
DIRECTION_READ = 2;
}
// If the buffer is used for reading or writing.
Direction direction = 1;
// Which type should be used (e.g. "kafka", "mock")
string type = 2;
// Connection string, depends on `type`.
string connection = 3;
// Old non-nested auto-creation config.
reserved 4, 5, 7;
// Special configs to be applied when establishing the connection.
//
// This depends on `type` and can configure aspects like timeouts.
map<string, string> connection_config = 6;
// Specifies if the sequencers (e.g. for Kafka in form of a topic w/ `n_sequencers` partitions) should be
// automatically created if they do not existing prior to reading or writing.
WriteBufferCreationConfig creation_config = 8;
}
// Configs sequencer auto-creation for write buffers.
//
// What that means depends on the used write buffer, e.g. for Kafka this will create a new topic w/ `n_sequencers`
// partitions.
message WriteBufferCreationConfig {
// Number of sequencers.
//
// How they are implemented depends on `type`, e.g. for Kafka this is mapped to the number of partitions.
//
// If 0, a server-side default is used
uint32 n_sequencers = 1;
// Special configs to by applied when sequencers are created.
//
// This depends on `type` and can setup parameters like retention policy.
//
// Contains 0 or more key value pairs
map<string, string> options = 2;
}
message RoutingConfig {
Sink sink = 2;
}

View File

@ -0,0 +1,31 @@
syntax = "proto3";
package influxdata.iox.management.v1;
option go_package = "github.com/influxdata/iox/management/v1";
import "google/protobuf/empty.proto";
// `PartitionTemplate` is used to compute the partition key of each row that
// gets written. It can consist of the table name, a column name and its value,
// a formatted time, or a string column and regex captures of its value. For
// columns that do not appear in the input row, a blank value is output.
//
// The key is constructed in order of the template parts; thus ordering changes
// what partition key is generated.
message PartitionTemplate {
message Part {
message ColumnFormat {
string column = 1;
string format = 2;
}
oneof part {
google.protobuf.Empty table = 1;
string column = 2;
string time = 3;
ColumnFormat regex = 4;
ColumnFormat strf_time = 5;
}
}
repeated Part parts = 1;
}

View File

@ -0,0 +1,23 @@
syntax = "proto3";
package influxdata.iox.management.v1;
option go_package = "github.com/influxdata/iox/management/v1";
// Stores a server's map of the databases it owns. The keys are the database names and the values
// are the database's location in object storage.
//
// Example (current): "foo" => "/1/foo" ("/[server id]/[database name]")
// Example (after completing the switch to floating databases):
// "foo" => "/dbs/3f25185a-0773-4ae8-abda-f9c3786f242b" ("/dbs/[database uuid]")
message ServerConfig {
map<string, string> databases = 1;
}
// Stores information about a server that owns a database. To be stored in a database's object
// store directory as verification of ownership.
message OwnerInfo {
// The ID of the server that owns this database
uint32 id = 1;
// The path to this server's config file in object storage
string location = 2;
}

View File

@ -37,10 +37,7 @@ service ManagementService {
rpc RestoreDatabase(RestoreDatabaseRequest) returns (RestoreDatabaseResponse);
// List deleted databases and their metadata.
rpc ListDeletedDatabases(ListDeletedDatabasesRequest) returns (ListDeletedDatabasesResponse);
// List all databases and their metadata.
// List databases with their metadata.
rpc ListDetailedDatabases(ListDetailedDatabasesRequest) returns (ListDetailedDatabasesResponse);
// List chunks available on this database
@ -160,7 +157,9 @@ message CreateDatabaseRequest {
DatabaseRules rules = 1;
}
message CreateDatabaseResponse {}
message CreateDatabaseResponse {
bytes uuid = 1;
}
// Update a database.
message UpdateDatabaseRequest {
@ -177,24 +176,24 @@ message DeleteDatabaseRequest {
string db_name = 1;
}
message DeleteDatabaseResponse {}
message DeleteDatabaseResponse {
bytes uuid = 1;
}
message RestoreDatabaseRequest {
// The generation ID of the deleted database.
uint64 generation_id = 1;
// Was the generation ID of the deleted database.
reserved 1;
reserved "generation_id";
// the name of the database
string db_name = 2;
// The UUID of the deleted database.
string uuid = 3;
}
message RestoreDatabaseResponse {}
message ListDeletedDatabasesRequest {}
message ListDeletedDatabasesResponse {
repeated DetailedDatabase deleted_databases = 1;
}
message ListDetailedDatabasesRequest {}
message ListDetailedDatabasesResponse {
@ -203,14 +202,19 @@ message ListDetailedDatabasesResponse {
// This resource represents detailed information about a database.
message DetailedDatabase {
// The generation ID of the database.
uint64 generation_id = 1;
// Was the generation ID of the database.
reserved 1;
reserved "generation_id";
// The UTC datetime at which this database was deleted, if applicable.
google.protobuf.Timestamp deleted_at = 2;
// Was the datetime at which this database was deleted, if applicable.
reserved 2;
reserved "deleted_at";
// The name of the database.
string db_name = 3;
// The UUID of the database.
bytes uuid = 4;
}
message ListChunksRequest {
@ -403,6 +407,9 @@ message DatabaseStatus {
// No active database
DATABASE_STATE_NO_ACTIVE_DATABASE = 10;
// Database owner info has been loaded
DATABASE_STATE_OWNER_INFO_LOADED = 11;
// Rules are loaded
DATABASE_STATE_RULES_LOADED = 2;
@ -415,13 +422,16 @@ message DatabaseStatus {
// Error loading rules
DATABASE_STATE_RULES_LOAD_ERROR = 5;
// Error loading owner info
DATABASE_STATE_OWNER_INFO_LOAD_ERROR = 12;
// Error during catalog load
DATABASE_STATE_CATALOG_LOAD_ERROR = 6;
// Error during replay
DATABASE_STATE_REPLAY_ERROR = 7;
// Error encountered identifying active generation
// Error encountered finding the database's directory in object storage
DATABASE_STATE_DATABASE_OBJECT_STORE_LOOKUP_ERROR = 9;
}

View File

@ -30,10 +30,14 @@ message ShardConfig {
/// If set to true the router will ignore any errors sent by the remote
/// targets in this route. That is, the write request will succeed
/// regardless of this route's success.
///
/// TODO(marco): remove this
bool ignore_errors = 3;
/// Mapping between shard IDs and node groups. Other sharding rules use
/// ShardId as targets.
///
/// TODO(marco): remove this
map<uint32, Sink> shards = 4;
}

View File

@ -0,0 +1,58 @@
syntax = "proto3";
package influxdata.iox.management.v1;
option go_package = "github.com/influxdata/iox/management/v1";
// Configures the use of a write buffer.
message WriteBufferConnection {
enum Direction {
// Unspecified direction, will be treated as an error.
DIRECTION_UNSPECIFIED = 0;
// Writes into the buffer aka "producer".
DIRECTION_WRITE = 1;
// Reads from the buffer aka "consumer".
DIRECTION_READ = 2;
}
// If the buffer is used for reading or writing.
Direction direction = 1;
// Which type should be used (e.g. "kafka", "mock")
string type = 2;
// Connection string, depends on `type`.
string connection = 3;
// Old non-nested auto-creation config.
reserved 4, 5, 7;
// Special configs to be applied when establishing the connection.
//
// This depends on `type` and can configure aspects like timeouts.
map<string, string> connection_config = 6;
// Specifies if the sequencers (e.g. for Kafka in form of a topic w/ `n_sequencers` partitions) should be
// automatically created if they do not existing prior to reading or writing.
WriteBufferCreationConfig creation_config = 8;
}
// Configs sequencer auto-creation for write buffers.
//
// What that means depends on the used write buffer, e.g. for Kafka this will create a new topic w/ `n_sequencers`
// partitions.
message WriteBufferCreationConfig {
// Number of sequencers.
//
// How they are implemented depends on `type`, e.g. for Kafka this is mapped to the number of partitions.
//
// If 0, a server-side default is used
uint32 n_sequencers = 1;
// Special configs to by applied when sequencers are created.
//
// This depends on `type` and can setup parameters like retention policy.
//
// Contains 0 or more key value pairs
map<string, string> options = 2;
}

View File

@ -0,0 +1,148 @@
syntax = "proto3";
package influxdata.iox.router.v1;
option go_package = "github.com/influxdata/iox/router/v1";
import "influxdata/iox/management/v1/partition_template.proto";
import "influxdata/iox/management/v1/shard.proto";
import "influxdata/iox/management/v1/write_buffer.proto";
// Router for writes and queries.
//
// A router acts similar to a "real" database except that it does NOT store or manage any data by itself but forwards
// this responsiblity to other nodes (which then in turn provide an actual database or another routing layer).
//
// # Write Routing
//
// ## Overall Picture
// Data is accepted from all sources, is sharded, and is (according to the sharding) written into the sink sets. There
// may be a prioritization for sources that is "HTTP and gRPC first, and write buffers in declared order".
//
// ```text
// ( HTTP )--+ +------->( sink set 1 )
// | |
// ( gRPC )--+-->( sharder )--> ...
// | |
// ( Write Buffer 1 )--+ +------->( sink set n )
// ... |
// ( Write Buffer n )--+
// ```
//
// ## Sharder
// A sharder takes data and for every row/line:
//
// 1. Checks if a matcher matches the row, first matcher wins. If that's the case, the row/line is directly sent to the
// sink set.
// 2. If no matcher matches the row/line is handled by the hash ring.
//
// ```text
// --->[ matcher 1? ]-{no}---...--->[ matcher n? ]-{no}---+
// | | |
// {yes} {yes} |
// | | |
// V V |
// ( sink set 1 ) ( sink set n ) |
// ^ ^ |
// | | |
// +--------( hash ring )-------+ |
// ^ |
// | |
// +-----------------------------+
// ```
//
// ## Sink Set
// Data is written to all sinks in the set in implementation-defined order. Errors do NOT short-circuit. If an error
// occurs for at least one sink that has `ignore_errors = false`, an error is returned. An empty sink set acts as NULL
// sink and always succeeds.
//
// **IMPORTANT: Queries are NOT distributed! The are always only answered by a single node.**
//
// # Query Routing
// Queries always arrive via gRPC and are forwarded one sink. The specific sink is selected via an engine that might
// take the following features into account:
//
// - **freshness:** For each sink what are the lasted sequence numbers pulled from the write buffer.
// - **stickyness:** The same client should ideally reach the same sink in subsequent requests to improve caching.
// - **circuit breaking:** If a sink is unhealthy it should be excluded from the candidate list for a while.
//
// ```text
// ( gRPC )-->[ selection engine ]-->( sink 1 )
// | ...
// +---->( sink n )
// ```
message Router {
// Router name.
//
// The name is unique for this node.
string name = 1;
// Sources of write requests.
WriteSources write_sources = 2;
// Write sharder.
//
// NOTE: This only uses the `specific_targets` and `hash_ring` config of the sharder. The other fields are ignored.
//
// TODO(marco): remove the note above once the `ShardConfig` was cleaned up.
influxdata.iox.management.v1.ShardConfig write_sharder = 3;
// Sinks for write requests.
map<uint32, WriteSinkSet> write_sinks = 4;
// Sinks for query requests.
QuerySinks query_sinks = 5;
// Template that generates a partition key for each row inserted into the database.
//
// This is a temporary config until the partition is moved entirely into the database.
//
// If not specified, a server-side default is used
//
// TODO(marco): remove this
influxdata.iox.management.v1.PartitionTemplate partition_template = 6;
}
// Sources of write request aka new data.
//
// Data is accepted from these sources and a status is provided back to it.
message WriteSources {
// If set writes via gRPC and HTTP are accepted.
//
// You may want to disable this when incoming data should solely be received via write buffer(s).
bool allow_unsequenced_inputs = 2;
// Write buffer connections.
repeated influxdata.iox.management.v1.WriteBufferConnection write_buffers = 3;
}
// Sink of write requests aka new data.
//
// Data is sent to this sink and a status is received from it.
message WriteSink {
// Where the data goes.
oneof sink {
// gRPC-based remote, addressed by its server ID.
uint32 grpc_remote = 1;
// Write buffer connection.
influxdata.iox.management.v1.WriteBufferConnection write_buffer = 2;
}
// If set, errors during writing to this sink are ignored and do NOT lead to an overall failure.
bool ignore_errors = 3;
}
// Set of write sinks.
message WriteSinkSet {
// Sinks within the set.
repeated WriteSink sinks = 1;
}
// Sinks for query requests.
//
// Queries are sent to one of these sinks and the resulting data is received from it.
//
// Note that the query results are flowing into the opposite direction (aka a query sink is a result source).
message QuerySinks {
// gRPC-based remotes, addressed by their server IDs.
repeated uint32 grpc_remotes = 1;
}

View File

@ -0,0 +1,76 @@
syntax = "proto3";
package influxdata.iox.router.v1;
option go_package = "github.com/influxdata/iox/router/v1";
import "influxdata/iox/router/v1/router.proto";
service RouterService {
// List remote IOx servers we know about.
rpc ListRemotes(ListRemotesRequest) returns (ListRemotesResponse);
// Update information about a remote IOx server (upsert).
rpc UpdateRemote(UpdateRemoteRequest) returns (UpdateRemoteResponse);
// Delete a reference to remote IOx server.
rpc DeleteRemote(DeleteRemoteRequest) returns (DeleteRemoteResponse);
// List configured routers.
rpc ListRouter(ListRouterRequest) returns (ListRouterResponse);
// Update router config (upsert).
rpc UpdateRouter(UpdateRouterRequest) returns (UpdateRouterResponse);
// Delete router.
rpc DeleteRouter(DeleteRouterRequest) returns (DeleteRouterResponse);
}
message ListRemotesRequest {}
message ListRemotesResponse {
repeated Remote remotes = 1;
}
// This resource represents a remote IOx server.
message Remote {
// The server ID associated with a remote IOx server.
uint32 id = 1;
// The address of the remote IOx server gRPC endpoint.
string connection_string = 2;
}
// Updates information about a remote IOx server.
//
// If a remote for a given `id` already exists, it is updated in place.
message UpdateRemoteRequest {
// If omitted, the remote associated with `id` will be removed.
Remote remote = 1;
// TODO(#917): add an optional flag to test the connection or not before adding it.
}
message UpdateRemoteResponse {}
message ListRouterRequest {}
message ListRouterResponse {
repeated Router routers = 1;
}
message DeleteRemoteRequest{
uint32 id = 1;
}
message DeleteRemoteResponse {}
message UpdateRouterRequest {
Router router = 1;
}
message UpdateRouterResponse {}
message DeleteRouterRequest {
string router_name = 1;
}
message DeleteRouterResponse {}

View File

@ -1,6 +1,7 @@
use crate::{
google::{FieldViolation, FieldViolationExt, FromFieldOpt},
influxdata::iox::management::v1 as management,
DecodeError, EncodeError,
};
use data_types::{
database_rules::{
@ -14,7 +15,6 @@ use std::{
num::NonZeroU32,
time::Duration,
};
use thiserror::Error;
mod lifecycle;
mod partition;
@ -122,42 +122,19 @@ impl TryFrom<management::RoutingConfig> for RoutingConfig {
}
}
/// Wrapper around a `prost` error so that
/// users of this crate do not have a direct dependency
/// on the prost crate.
#[derive(Debug, Error)]
pub enum ProstError {
#[error("failed to encode protobuf: {0}")]
EncodeError(#[from] prost::EncodeError),
#[error("failed to decode protobuf: {0}")]
DecodeError(#[from] prost::DecodeError),
}
/// Decode database rules that were encoded using `encode_persisted_database_rules`
pub fn decode_persisted_database_rules(
bytes: prost::bytes::Bytes,
) -> Result<management::PersistedDatabaseRules, ProstError> {
Ok(prost::Message::decode(bytes)?)
) -> Result<management::PersistedDatabaseRules, DecodeError> {
prost::Message::decode(bytes)
}
/// TEMPORARY FOR TRANSITION PURPOSES - if decoding rules file as `PersistedDatabaseRules` (which
/// includes the database UUID) fails, use this to try instead to decode as `DatabaseRules`. Then
/// next time the database rules are updated, the rules file will be writted as
/// `PersistedDatabaseRules`.
pub fn decode_database_rules(
bytes: prost::bytes::Bytes,
) -> Result<management::DatabaseRules, ProstError> {
Ok(prost::Message::decode(bytes)?)
}
/// Encode database rules into a serialized format suitable for
/// storage in objet store
/// Encode database rules into a serialized format suitable for storage in object store
pub fn encode_persisted_database_rules(
rules: &management::PersistedDatabaseRules,
bytes: &mut prost::bytes::BytesMut,
) -> Result<(), ProstError> {
Ok(prost::Message::encode(rules, bytes)?)
) -> Result<(), EncodeError> {
prost::Message::encode(rules, bytes)
}
impl From<WriteBufferConnection> for management::WriteBufferConnection {

View File

@ -15,6 +15,8 @@ impl DatabaseState {
DatabaseState::DatabaseObjectStoreFound => "DatabaseObjectStoreFound",
DatabaseState::DatabaseObjectStoreLookupError => "DatabaseObjectStoreLookupError",
DatabaseState::NoActiveDatabase => "NoActiveDatabase",
DatabaseState::OwnerInfoLoaded => "OwnerInfoLoaded",
DatabaseState::OwnerInfoLoadError => "OwnerInfoLoadError",
DatabaseState::Unspecified => "Unspecified",
}
}

View File

@ -1,18 +1,13 @@
use crate::influxdata::iox::management::v1 as management;
use data_types::detailed_database::DetailedDatabase;
use data_types::detailed_database::ActiveDatabase;
impl From<DetailedDatabase> for management::DetailedDatabase {
fn from(database: DetailedDatabase) -> Self {
let DetailedDatabase {
name,
generation_id,
deleted_at,
} = database;
impl From<ActiveDatabase> for management::DetailedDatabase {
fn from(database: ActiveDatabase) -> Self {
let ActiveDatabase { name, uuid } = database;
Self {
db_name: name.to_string(),
generation_id: generation_id.inner as u64,
deleted_at: deleted_at.map(Into::into),
uuid: uuid.as_bytes().to_vec(),
}
}
}

View File

@ -152,6 +152,10 @@ pub mod database_state;
pub mod detailed_database;
#[cfg(feature = "data_types_conversions")]
pub mod job;
#[cfg(feature = "data_types_conversions")]
pub mod server_config;
pub use prost::{DecodeError, EncodeError};
#[cfg(test)]
mod tests {

View File

@ -0,0 +1,33 @@
use crate::{influxdata::iox::management::v1 as management, DecodeError, EncodeError};
/// Decode server config that was encoded using `encode_persisted_server_config`
pub fn decode_persisted_server_config(
bytes: prost::bytes::Bytes,
) -> Result<management::ServerConfig, DecodeError> {
prost::Message::decode(bytes)
}
/// Encode server config into a serialized format suitable for storage in object store
pub fn encode_persisted_server_config(
server_config: &management::ServerConfig,
bytes: &mut prost::bytes::BytesMut,
) -> Result<(), EncodeError> {
prost::Message::encode(server_config, bytes)
}
/// Encode server information to be serialized into a database's object store directory and used to
/// identify that database's owning server
pub fn encode_database_owner_info(
owner_info: &management::OwnerInfo,
bytes: &mut prost::bytes::BytesMut,
) -> Result<(), EncodeError> {
prost::Message::encode(owner_info, bytes)
}
/// Encode server information that was encoded using `encode_database_owner_info` to compare
/// with the currently-running server
pub fn decode_database_owner_info(
bytes: prost::bytes::Bytes,
) -> Result<management::OwnerInfo, DecodeError> {
prost::Message::decode(bytes)
}

View File

@ -2,7 +2,7 @@
name = "grpc-router-test-gen"
version = "0.1.0"
authors = ["Marko Mikulicic <mkm@influxdata.com>"]
edition = "2018"
edition = "2021"
description = "Protobuf used in test for the grpc-router crate; need to be in a separate create because of linter limitations"
[dependencies]

View File

@ -2,7 +2,7 @@
name = "grpc-router"
version = "0.1.0"
authors = ["Marko Mikulicic <mkm@influxdata.com>"]
edition = "2018"
edition = "2021"
[dependencies]
bytes = "1.0"
@ -25,4 +25,4 @@ prost-build = "0.8"
tonic-build = "0.5"
[dev-dependencies]
grpc-router-test-gen = { path = "./grpc-router-test-gen" }
grpc-router-test-gen = { path = "../grpc-router-test-gen" }

View File

@ -2,7 +2,7 @@
name = "influxdb2_client"
version = "0.1.0"
authors = ["Paul Dix <paul@pauldix.net>"]
edition = "2018"
edition = "2021"
[dependencies] # In alphabetical order
bytes = "1.0"

View File

@ -215,6 +215,7 @@ impl TestServer {
Command::new("docker")
.arg("run")
.arg("database")
.arg("--name")
.arg(&container_name)
.arg("--publish")

123
influxdb_iox/Cargo.toml Normal file
View File

@ -0,0 +1,123 @@
[package]
name = "influxdb_iox"
version = "0.1.0"
authors = ["Paul Dix <paul@pauldix.net>"]
edition = "2021"
default-run = "influxdb_iox"
[dependencies]
# Workspace dependencies, in alphabetical order
datafusion = { path = "../datafusion" }
data_types = { path = "../data_types" }
entry = { path = "../entry" }
generated_types = { path = "../generated_types" }
influxdb_iox_client = { path = "../influxdb_iox_client", features = ["flight", "format"] }
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
internal_types = { path = "../internal_types" }
iox_object_store = { path = "../iox_object_store" }
logfmt = { path = "../logfmt" }
metric = { path = "../metric" }
metric_exporters = { path = "../metric_exporters" }
mutable_buffer = { path = "../mutable_buffer" }
num_cpus = "1.13.0"
object_store = { path = "../object_store" }
observability_deps = { path = "../observability_deps" }
panic_logging = { path = "../panic_logging" }
parquet_catalog = { path = "../parquet_catalog" }
parquet_file = { path = "../parquet_file" }
predicate = { path = "../predicate" }
query = { path = "../query" }
read_buffer = { path = "../read_buffer" }
server = { path = "../server" }
trace = { path = "../trace" }
trace_exporters = { path = "../trace_exporters" }
trace_http = { path = "../trace_http" }
tracker = { path = "../tracker" }
trogging = { path = "../trogging", default-features = false, features = ["structopt"] }
time = { path = "../time" }
# Crates.io dependencies, in alphabetical order
arrow = { version = "6.0", features = ["prettyprint"] }
arrow-flight = "6.0"
async-trait = "0.1"
backtrace = "0.3"
byteorder = "1.3.4"
bytes = "1.0"
chrono = "0.4"
clap = "2.33.1"
csv = "1.1"
dirs = "4.0.0"
dotenv = "0.15.0"
flate2 = "1.0"
futures = "0.3"
hashbrown = "0.11"
http = "0.2.0"
humantime = "2.1.0"
hyper = "0.14"
libc = { version = "0.2" }
log = "0.4"
once_cell = { version = "1.4.0", features = ["parking_lot"] }
parking_lot = "0.11.2"
itertools = "0.10.1"
parquet = "6.0"
pin-project = "1.0"
# used by arrow/datafusion anyway
comfy-table = { version = "4.0", default-features = false }
pprof = { version = "^0.5", default-features = false, features = ["flamegraph", "protobuf"], optional = true }
prost = "0.8"
rustyline = { version = "9.0", default-features = false }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.67"
serde_urlencoded = "0.7.0"
snafu = "0.6.9"
structopt = "0.3.25"
thiserror = "1.0.30"
tikv-jemalloc-ctl = { version = "0.4.0" }
tokio = { version = "1.11", features = ["macros", "rt-multi-thread", "parking_lot", "signal"] }
tokio-stream = { version = "0.1.2", features = ["net"] }
tokio-util = { version = "0.6.3" }
tonic = "0.5.0"
tonic-health = "0.4.0"
tonic-reflection = "0.2.0"
tower = "0.4"
uuid = { version = "0.8", features = ["v4"] }
# jemalloc-sys with unprefixed_malloc_on_supported_platforms feature and heappy are mutually exclusive
tikv-jemalloc-sys = { version = "0.4.0", optional = true, features = ["unprefixed_malloc_on_supported_platforms"] }
heappy = { git = "https://github.com/mkmik/heappy", rev = "20aa466524ac9ce34a4bae29f27ec11869b50e21", features = ["enable_heap_profiler", "jemalloc_shim", "measure_free"], optional = true }
[dev-dependencies]
# Workspace dependencies, in alphabetical order
arrow_util = { path = "../arrow_util" }
entry = { path = "../entry" }
influxdb2_client = { path = "../influxdb2_client" }
influxdb_storage_client = { path = "../influxdb_storage_client" }
influxdb_iox_client = { path = "../influxdb_iox_client", features = ["flight"] }
test_helpers = { path = "../test_helpers" }
parking_lot = "0.11.2"
write_buffer = { path = "../write_buffer" }
# Crates.io dependencies, in alphabetical order
assert_cmd = "2.0.2"
flate2 = "1.0"
hex = "0.4.2"
predicates = "2.0.3"
rand = "0.8.3"
reqwest = "0.11"
tempfile = "3.1.0"
[features]
default = ["jemalloc_replacing_malloc"]
azure = ["object_store/azure"] # Optional Azure Object store support
gcp = ["object_store/gcp"] # Optional GCP object store support
aws = ["object_store/aws"] # Optional AWS / S3 object store support
# pprof is an optional feature for pprof support
# heappy is an optional feature; Not on by default as it
# runtime overhead on all allocations (calls to malloc).
# Cargo cannot currently implement mutually exclusive features so let's force every build
# to pick either heappy or jemalloc_replacing_malloc feature at least until we figure out something better.
jemalloc_replacing_malloc = ["tikv-jemalloc-sys"]

View File

@ -1,6 +1,7 @@
//! This module implements the `database` CLI command
use chrono::{DateTime, Utc};
use crate::TABLE_STYLE_SINGLE_LINE_BORDERS;
use comfy_table::{Cell, Table};
use influxdb_iox_client::{
connection::Connection,
flight,
@ -11,13 +12,10 @@ use influxdb_iox_client::{
},
write::{self, WriteError},
};
use prettytable::{format, Cell, Row, Table};
use std::{
convert::TryInto, fs::File, io::Read, num::NonZeroU64, path::PathBuf, str::FromStr,
time::Duration,
};
use std::{fs::File, io::Read, num::NonZeroU64, path::PathBuf, str::FromStr, time::Duration};
use structopt::StructOpt;
use thiserror::Error;
use uuid::Uuid;
mod chunk;
mod partition;
@ -139,12 +137,7 @@ struct Create {
/// Get list of databases
#[derive(Debug, StructOpt)]
struct List {
/// Whether to list databases marked as deleted instead, to restore or permanently delete.
#[structopt(long)]
deleted: bool,
/// Whether to list detailed information, including generation IDs, about all databases,
/// whether they are active or marked as deleted.
/// Whether to list detailed information about the databases along with their names.
#[structopt(long)]
detailed: bool,
}
@ -193,14 +186,14 @@ struct Delete {
name: String,
}
/// Restore a deleted database generation
/// Restore a deleted database
#[derive(Debug, StructOpt)]
struct Restore {
/// The generation ID of the database to restore
generation_id: usize,
/// The name of the database to delete
/// The name to give the database upon restoring it
name: String,
/// The UUID of the database to restore
uuid: String,
}
/// All possible subcommands for database
@ -224,7 +217,7 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
let mut client = management::Client::new(connection);
#[allow(deprecated)]
let rules = DatabaseRules {
name: command.name,
name: command.name.clone(),
lifecycle_rules: Some(LifecycleRules {
buffer_size_soft: command.buffer_size_soft as _,
buffer_size_hard: command.buffer_size_hard as _,
@ -258,46 +251,35 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
..Default::default()
};
client.create_database(rules).await?;
let uuid = client.create_database(rules).await?;
println!("Ok");
println!("Created database {} ({})", command.name, uuid);
}
Command::List(list) => {
let mut client = management::Client::new(connection);
if list.deleted || list.detailed {
let databases = if list.deleted {
client.list_deleted_databases().await?
} else {
client.list_detailed_databases().await?
};
if list.detailed {
let databases = client.list_detailed_databases().await?;
let mut table = Table::new();
table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
table.set_titles(Row::new(vec![
Cell::new("Deleted at"),
Cell::new("Generation ID"),
Cell::new("Name"),
]));
if !databases.is_empty() {
let mut table = Table::new();
table.load_preset(TABLE_STYLE_SINGLE_LINE_BORDERS);
table.set_header(vec![Cell::new("Name"), Cell::new("UUID")]);
for database in databases {
let deleted_at = database
.deleted_at
.and_then(|t| {
let dt: Result<DateTime<Utc>, _> = t.try_into();
dt.ok().map(|d| d.to_string())
})
.unwrap_or_else(String::new);
table.add_row(Row::new(vec![
Cell::new(&deleted_at),
Cell::new(&database.generation_id.to_string()),
Cell::new(&database.db_name),
]));
for database in databases {
let uuid = Uuid::from_slice(&database.uuid)
.map(|u| u.to_string())
.unwrap_or_else(|_| String::from("<UUID parsing failed>"));
table.add_row(vec![Cell::new(&database.db_name), Cell::new(&uuid)]);
}
print!("{}", table);
}
print!("{}", table);
} else {
let names = client.list_database_names().await?;
println!("{}", names.join("\n"))
if !names.is_empty() {
println!("{}", names.join("\n"))
}
}
}
Command::Get(get) => {
@ -362,15 +344,16 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
}
Command::Delete(command) => {
let mut client = management::Client::new(connection);
client.delete_database(&command.name).await?;
let uuid = client.delete_database(&command.name).await?;
println!("Deleted database {}", command.name);
println!("{}", uuid);
}
Command::Restore(command) => {
let mut client = management::Client::new(connection);
client
.restore_database(&command.name, command.generation_id)
.restore_database(&command.name, &command.uuid)
.await?;
println!("Restored database {}", command.name);
println!("Restored database {} ({})", command.name, command.uuid);
}
}

View File

@ -1,59 +1,47 @@
use data_types::DatabaseName;
use crate::structopt_blocks::{object_store::ObjectStoreConfig, server_id::ServerIdConfig};
use iox_object_store::IoxObjectStore;
use object_store::ObjectStore;
use snafu::{OptionExt, ResultExt, Snafu};
use std::{convert::TryFrom, sync::Arc};
use structopt::StructOpt;
use crate::{object_store::ObjectStoreConfig, server_id::ServerIdConfig};
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("Cannot parse object store config: {}", source))]
ObjectStoreParsing {
source: crate::object_store::ParseError,
source: crate::structopt_blocks::object_store::ParseError,
},
#[snafu(display("No server ID provided"))]
NoServerId,
#[snafu(display("Invalid database name: {}", source))]
InvalidDbName {
source: data_types::DatabaseNameError,
#[snafu(display("Can't read server config from object storage: {}", source))]
CantReadServerConfig { source: object_store::Error },
#[snafu(display("Error deserializing server config from protobuf: {}", source))]
CantDeserializeServerConfig {
source: generated_types::DecodeError,
},
#[snafu(display("Can't find a database with this name on this server"))]
CantFindDatabase,
#[snafu(display("Cannot open IOx object store: {}", source))]
IoxObjectStoreFailure {
source: iox_object_store::IoxObjectStoreError,
},
#[snafu(display("Cannot find existing IOx object store"))]
NoIoxObjectStore,
#[snafu(display("Cannot dump catalog: {}", source))]
DumpCatalogFailure {
source: parquet_file::catalog::dump::Error,
source: parquet_catalog::dump::Error,
},
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// Interrogate internal database data
#[derive(Debug, StructOpt)]
pub struct Config {
#[structopt(subcommand)]
command: Command,
}
#[derive(Debug, StructOpt)]
enum Command {
/// Dump preserved catalog.
DumpCatalog(DumpCatalog),
}
/// Dump preserved catalog.
#[derive(Debug, StructOpt)]
struct DumpCatalog {
pub struct Config {
// object store config
#[structopt(flatten)]
object_store_config: ObjectStoreConfig,
@ -71,11 +59,12 @@ struct DumpCatalog {
}
#[derive(Debug, StructOpt)]
struct DumpOptions {
/// Show debug output of `DecodedIoxParquetMetaData` if decoding succeeds, show the decoding error otherwise.
pub struct DumpOptions {
/// Show debug output of `DecodedIoxParquetMetaData` if decoding succeeds, show the decoding
/// error otherwise.
///
/// Since this contains the entire Apache Parquet metadata object this is quite verbose and is usually not
/// recommended.
/// Since this contains the entire Apache Parquet metadata object this is quite verbose and is
/// usually not recommended.
#[structopt(long = "--show-parquet-metadata")]
show_parquet_metadata: bool,
@ -94,16 +83,17 @@ struct DumpOptions {
#[structopt(long = "--show-statistics")]
show_statistics: bool,
/// Show unparsed `IoxParquetMetaData` -- which are Apache Thrift bytes -- as part of the transaction actions.
/// Show unparsed `IoxParquetMetaData` -- which are Apache Thrift bytes -- as part of the
/// transaction actions.
///
/// Since this binary data is usually quite hard to read, it is recommended to set this to `false` which will
/// replace the actual bytes with `b"metadata omitted"`. Use the other toggles to instead show the content of the
/// Apache Thrift message.
/// Since this binary data is usually quite hard to read, it is recommended to set this to
/// `false` which will replace the actual bytes with `b"metadata omitted"`. Use the other
/// toggles to instead show the content of the Apache Thrift message.
#[structopt(long = "--show-unparsed-metadata")]
show_unparsed_metadata: bool,
}
impl From<DumpOptions> for parquet_file::catalog::dump::DumpOptions {
impl From<DumpOptions> for parquet_catalog::dump::DumpOptions {
fn from(options: DumpOptions) -> Self {
Self {
show_parquet_metadata: options.show_parquet_metadata,
@ -116,29 +106,32 @@ impl From<DumpOptions> for parquet_file::catalog::dump::DumpOptions {
}
pub async fn command(config: Config) -> Result<()> {
match config.command {
Command::DumpCatalog(dump_catalog) => {
let object_store = ObjectStore::try_from(&dump_catalog.object_store_config)
.context(ObjectStoreParsing)?;
let database_name =
DatabaseName::try_from(dump_catalog.db_name).context(InvalidDbName)?;
let server_id = dump_catalog
.server_id_config
.server_id
.context(NoServerId)?;
let iox_object_store =
IoxObjectStore::find_existing(Arc::new(object_store), server_id, &database_name)
.await
.context(IoxObjectStoreFailure)?
.context(NoIoxObjectStore)?;
let object_store =
Arc::new(ObjectStore::try_from(&config.object_store_config).context(ObjectStoreParsing)?);
let server_id = config.server_id_config.server_id.context(NoServerId)?;
let server_config_bytes = IoxObjectStore::get_server_config_file(&object_store, server_id)
.await
.context(CantReadServerConfig)?;
let mut writer = std::io::stdout();
let options = dump_catalog.dump_options.into();
parquet_file::catalog::dump::dump(&iox_object_store, &mut writer, options)
.await
.context(DumpCatalogFailure)?;
}
}
let server_config =
generated_types::server_config::decode_persisted_server_config(server_config_bytes)
.context(CantDeserializeServerConfig)?;
let database_location = server_config
.databases
.get(&config.db_name)
.context(CantFindDatabase)?;
let iox_object_store =
IoxObjectStore::load_at_root_path(Arc::clone(&object_store), server_id, database_location)
.await
.context(IoxObjectStoreFailure)?;
let mut writer = std::io::stdout();
let options = config.dump_options.into();
parquet_catalog::dump::dump(&iox_object_store, &mut writer, options)
.await
.context(DumpCatalogFailure)?;
Ok(())
}

View File

@ -0,0 +1,41 @@
use snafu::{ResultExt, Snafu};
use structopt::StructOpt;
mod dump_catalog;
mod print_cpu;
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("Error in dump-catalog subcommand: {}", source))]
DumpCatalogError { source: dump_catalog::Error },
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// Interrogate internal database data
#[derive(Debug, StructOpt)]
pub struct Config {
#[structopt(subcommand)]
command: Command,
}
#[derive(Debug, StructOpt)]
enum Command {
/// Dump preserved catalog.
DumpCatalog(dump_catalog::Config),
/// Prints what CPU features are used by the compiler by default.
PrintCpu,
}
pub async fn command(config: Config) -> Result<()> {
match config.command {
Command::DumpCatalog(dump_catalog) => dump_catalog::command(dump_catalog)
.await
.context(DumpCatalogError),
Command::PrintCpu => {
print_cpu::main();
Ok(())
}
}
}

View File

@ -1,4 +1,3 @@
#![recursion_limit = "512"]
/// Prints what CPU features are used by the compiler by default.
///
/// Script from:
@ -29,7 +28,7 @@ macro_rules! print_if_feature_enabled {
}
}
fn main() {
pub fn main() {
println!("rustc is using the following target options");
print_if_feature_enabled!(

View File

@ -0,0 +1,106 @@
//! Implementation of command line option for running server
use std::sync::Arc;
use crate::{
influxdb_ioxd::{
self,
server_type::{
common_state::{CommonServerState, CommonServerStateError},
database::{
setup::{make_application, make_server},
DatabaseServerType,
},
},
},
structopt_blocks::{boolean_flag::BooleanFlag, run_config::RunConfig},
};
use structopt::StructOpt;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum Error {
#[error("Run: {0}")]
Run(#[from] influxdb_ioxd::Error),
#[error("Cannot setup server: {0}")]
Setup(#[from] crate::influxdb_ioxd::server_type::database::setup::Error),
#[error("Invalid config: {0}")]
InvalidConfig(#[from] CommonServerStateError),
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
#[derive(Debug, StructOpt)]
#[structopt(
name = "run",
about = "Runs in database mode",
long_about = "Run the IOx database server.\n\nThe configuration options below can be \
set either with the command line flags or with the specified environment \
variable. If there is a file named '.env' in the current working directory, \
it is sourced before loading the configuration.
Configuration is loaded from the following sources (highest precedence first):
- command line arguments
- user set environment variables
- .env file contents
- pre-configured default values"
)]
pub struct Config {
#[structopt(flatten)]
pub(crate) run_config: RunConfig,
/// The number of threads to use for all worker pools.
///
/// IOx uses a pool with `--num-threads` threads *each* for
/// 1. Handling API requests
/// 2. Running queries.
/// 3. Reorganizing data (e.g. compacting chunks)
///
/// If not specified, defaults to the number of cores on the system
#[structopt(long = "--num-worker-threads", env = "INFLUXDB_IOX_NUM_WORKER_THREADS")]
pub num_worker_threads: Option<usize>,
// TODO(marco): Remove once the database-run-mode (aka the `server` crate) cannot handle routing anymore and we're
// fully migrated to the new router code.
/// When IOx nodes need to talk to remote peers they consult an internal remote address
/// mapping. This mapping is populated via API calls. If the mapping doesn't produce
/// a result, this config entry allows to generate a hostname from at template:
/// occurrences of the "{id}" substring will be replaced with the remote Server ID.
///
/// Example: http://node-{id}.ioxmydomain.com:8082
#[structopt(long = "--remote-template", env = "INFLUXDB_IOX_REMOTE_TEMPLATE")]
pub remote_template: Option<String>,
/// Automatically wipe the preserved catalog on error
#[structopt(
long = "--wipe-catalog-on-error",
env = "INFLUXDB_IOX_WIPE_CATALOG_ON_ERROR",
// TODO: Don't automatically wipe on error (#1522)
default_value = "yes"
)]
pub wipe_catalog_on_error: BooleanFlag,
/// Skip replaying the write buffer and seek to high watermark instead.
#[structopt(
long = "--skip-replay",
env = "INFLUXDB_IOX_SKIP_REPLAY",
default_value = "no"
)]
pub skip_replay_and_seek_instead: BooleanFlag,
}
pub async fn command(config: Config) -> Result<()> {
let common_state = CommonServerState::from_config(config.run_config.clone())?;
let application = make_application(&config, common_state.trace_collector()).await?;
let app_server = make_server(Arc::clone(&application), &config);
let server_type = Arc::new(DatabaseServerType::new(
Arc::clone(&application),
Arc::clone(&app_server),
&common_state,
));
Ok(influxdb_ioxd::main(common_state, server_type).await?)
}

View File

@ -0,0 +1,53 @@
use snafu::{ResultExt, Snafu};
use structopt::StructOpt;
use crate::structopt_blocks::run_config::RunConfig;
pub mod database;
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("Error in database subcommand: {}", source))]
DatabaseError { source: database::Error },
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
#[derive(Debug, StructOpt)]
pub struct Config {
// TODO(marco) remove this
/// Config for database mode, for backwards compatibility reasons.
#[structopt(flatten)]
database_config: database::Config,
#[structopt(subcommand)]
command: Option<Command>,
}
impl Config {
pub fn run_config(&self) -> &RunConfig {
match &self.command {
None => &self.database_config.run_config,
Some(Command::Database(config)) => &config.run_config,
}
}
}
#[derive(Debug, StructOpt)]
enum Command {
Database(database::Config),
}
pub async fn command(config: Config) -> Result<()> {
match config.command {
None => {
println!(
"WARNING: Not specifying the run-mode is deprecated. Defaulting to 'database'."
);
database::command(config.database_config)
.await
.context(DatabaseError)
}
Some(Command::Database(config)) => database::command(config).await.context(DatabaseError),
}
}

View File

@ -1,9 +1,9 @@
use crate::TABLE_STYLE_SINGLE_LINE_BORDERS;
use comfy_table::{Cell, Table};
use influxdb_iox_client::{connection::Connection, management};
use structopt::StructOpt;
use thiserror::Error;
use prettytable::{format, Cell, Row, Table};
#[allow(clippy::enum_variant_names)]
#[derive(Debug, Error)]
pub enum Error {
@ -51,17 +51,14 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
println!("no remotes configured");
} else {
let mut table = Table::new();
table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
table.set_titles(Row::new(vec![
Cell::new("ID"),
Cell::new("Connection string"),
]));
table.load_preset(TABLE_STYLE_SINGLE_LINE_BORDERS);
table.set_header(vec![Cell::new("ID"), Cell::new("Connection string")]);
for i in remotes {
table.add_row(Row::new(vec![
table.add_row(vec![
Cell::new(&format!("{}", i.id)),
Cell::new(&i.connection_string),
]));
]);
}
print!("{}", table);
}

View File

@ -17,7 +17,7 @@ pub fn init_logs_and_tracing(
log_verbose_count: u8,
config: &crate::commands::run::Config,
) -> Result<TroggingGuard, trogging::Error> {
let mut logging_config = config.logging_config.clone();
let mut logging_config = config.run_config().logging_config.clone();
// Handle the case if -v/-vv is specified both before and after the server
// command

View File

@ -0,0 +1,298 @@
use crate::influxdb_ioxd::server_type::{common_state::CommonServerState, ServerType};
use futures::{future::FusedFuture, pin_mut, FutureExt};
use hyper::server::conn::AddrIncoming;
use observability_deps::tracing::{error, info};
use panic_logging::SendPanicsToTracing;
use snafu::{ResultExt, Snafu};
use std::{net::SocketAddr, sync::Arc};
use trace_http::ctx::TraceHeaderParser;
mod http;
mod jemalloc;
mod planner;
pub(crate) mod rpc;
pub(crate) mod server_type;
pub(crate) mod serving_readiness;
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("Unable to bind to listen for HTTP requests on {}: {}", addr, source))]
StartListeningHttp {
addr: SocketAddr,
source: hyper::Error,
},
#[snafu(display("Unable to bind to listen for gRPC requests on {}: {}", addr, source))]
StartListeningGrpc {
addr: SocketAddr,
source: std::io::Error,
},
#[snafu(display("Error serving HTTP: {}", source))]
ServingHttp { source: hyper::Error },
#[snafu(display("Error serving RPC: {}", source))]
ServingRpc { source: server_type::RpcError },
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// On unix platforms we want to intercept SIGINT and SIGTERM
/// This method returns if either are signalled
#[cfg(unix)]
async fn wait_for_signal() {
use tokio::signal::unix::{signal, SignalKind};
let mut term = signal(SignalKind::terminate()).expect("failed to register signal handler");
let mut int = signal(SignalKind::interrupt()).expect("failed to register signal handler");
tokio::select! {
_ = term.recv() => info!("Received SIGTERM"),
_ = int.recv() => info!("Received SIGINT"),
}
}
#[cfg(windows)]
/// ctrl_c is the cross-platform way to intercept the equivalent of SIGINT
/// This method returns if this occurs
async fn wait_for_signal() {
let _ = tokio::signal::ctrl_c().await;
}
#[cfg(all(not(feature = "heappy"), not(feature = "jemalloc_replacing_malloc")))]
fn build_malloc_conf() -> String {
"system".to_string()
}
#[cfg(all(feature = "heappy", not(feature = "jemalloc_replacing_malloc")))]
fn build_malloc_conf() -> String {
"heappy".to_string()
}
#[cfg(all(not(feature = "heappy"), feature = "jemalloc_replacing_malloc"))]
fn build_malloc_conf() -> String {
tikv_jemalloc_ctl::config::malloc_conf::mib()
.unwrap()
.read()
.unwrap()
.to_string()
}
#[cfg(all(feature = "heappy", feature = "jemalloc_replacing_malloc"))]
fn build_malloc_conf() -> String {
compile_error!("must use exactly one memory allocator")
}
/// This is the entry point for the IOx server.
///
/// The precise server type depends on `T`. This entry point ensures that the given `server_type` is started using best
/// practice, e.g. that we print the GIT-hash and malloc-configs, that a panic handler is installed, etc.
///
/// Due to the invasive nature of the setup routine, this should not be used during unit tests.
pub async fn main<T>(common_state: CommonServerState, server_type: Arc<T>) -> Result<()>
where
T: ServerType,
{
let git_hash = option_env!("GIT_HASH").unwrap_or("UNKNOWN");
let num_cpus = num_cpus::get();
let build_malloc_conf = build_malloc_conf();
info!(
git_hash,
num_cpus,
%build_malloc_conf,
"InfluxDB IOx server starting",
);
if (common_state.run_config().grpc_bind_address == common_state.run_config().http_bind_address)
&& (common_state.run_config().grpc_bind_address.port() != 0)
{
error!(
grpc_bind_address=%common_state.run_config().grpc_bind_address,
http_bind_address=%common_state.run_config().http_bind_address,
"grpc and http bind addresses must differ",
);
std::process::exit(1);
}
// Install custom panic handler and forget about it.
//
// This leaks the handler and prevents it from ever being dropped during the
// lifetime of the program - this is actually a good thing, as it prevents
// the panic handler from being removed while unwinding a panic (which in
// turn, causes a panic - see #548)
let f = SendPanicsToTracing::new();
std::mem::forget(f);
// Register jemalloc metrics
server_type
.metric_registry()
.register_instrument("jemalloc_metrics", jemalloc::JemallocMetrics::new);
let grpc_listener = grpc_listener(common_state.run_config().grpc_bind_address.into()).await?;
let http_listener = http_listener(common_state.run_config().http_bind_address.into()).await?;
let trace_exporter = common_state.trace_exporter();
let r = serve(common_state, grpc_listener, http_listener, server_type).await;
if let Some(trace_exporter) = trace_exporter {
if let Err(e) = trace_exporter.drain().await {
error!(%e, "error draining trace exporter");
}
}
r
}
pub async fn grpc_listener(addr: SocketAddr) -> Result<tokio::net::TcpListener> {
let listener = tokio::net::TcpListener::bind(addr)
.await
.context(StartListeningGrpc { addr })?;
match listener.local_addr() {
Ok(local_addr) => info!(%local_addr, "bound gRPC listener"),
Err(_) => info!(%addr, "bound gRPC listener"),
}
Ok(listener)
}
pub async fn http_listener(addr: SocketAddr) -> Result<AddrIncoming> {
let listener = AddrIncoming::bind(&addr).context(StartListeningHttp { addr })?;
info!(bind_addr=%listener.local_addr(), "bound HTTP listener");
Ok(listener)
}
/// Instantiates the gRPC and HTTP listeners and returns a Future that completes when
/// these listeners, the Server, Databases, etc... have all exited.
///
/// This is effectively the "main loop" for influxdb_iox
async fn serve<T>(
common_state: CommonServerState,
grpc_listener: tokio::net::TcpListener,
http_listener: AddrIncoming,
server_type: Arc<T>,
) -> Result<()>
where
T: ServerType,
{
// Construct a token to trigger shutdown of API services
let frontend_shutdown = tokio_util::sync::CancellationToken::new();
let trace_header_parser = TraceHeaderParser::new()
.with_jaeger_trace_context_header_name(
&common_state
.run_config()
.tracing_config
.traces_jaeger_trace_context_header_name,
)
.with_jaeger_debug_name(
&common_state
.run_config()
.tracing_config
.traces_jaeger_debug_name,
);
// Construct and start up gRPC server
let grpc_server = rpc::serve(
grpc_listener,
Arc::clone(&server_type),
trace_header_parser.clone(),
frontend_shutdown.clone(),
common_state.serving_readiness().clone(),
)
.fuse();
info!("gRPC server listening");
let http_server = http::serve(
http_listener,
Arc::clone(&server_type),
frontend_shutdown.clone(),
trace_header_parser,
)
.fuse();
info!("HTTP server listening");
// Purposefully use log not tokio-tracing to ensure correctly hooked up
log::info!("InfluxDB IOx server ready");
// Get IOx background worker task
let server_worker = Arc::clone(&server_type).background_worker().fuse();
// Shutdown signal
let signal = wait_for_signal().fuse();
// There are two different select macros - tokio::select and futures::select
//
// tokio::select takes ownership of the passed future "moving" it into the
// select block. This works well when not running select inside a loop, or
// when using a future that can be dropped and recreated, often the case
// with tokio's futures e.g. `channel.recv()`
//
// futures::select is more flexible as it doesn't take ownership of the provided
// future. However, to safely provide this it imposes some additional
// requirements
//
// All passed futures must implement FusedFuture - it is IB to poll a future
// that has returned Poll::Ready(_). A FusedFuture has an is_terminated()
// method that indicates if it is safe to poll - e.g. false if it has
// returned Poll::Ready(_). futures::select uses this to implement its
// functionality. futures::FutureExt adds a fuse() method that
// wraps an arbitrary future and makes it a FusedFuture
//
// The additional requirement of futures::select is that if the future passed
// outlives the select block, it must be Unpin or already Pinned
// pin_mut constructs a Pin<&mut T> from a T by preventing moving the T
// from the current stack frame and constructing a Pin<&mut T> to it
pin_mut!(signal);
pin_mut!(server_worker);
pin_mut!(grpc_server);
pin_mut!(http_server);
// Return the first error encountered
let mut res = Ok(());
// Graceful shutdown can be triggered by sending SIGINT or SIGTERM to the
// process, or by a background task exiting - most likely with an error
//
// Graceful shutdown should then proceed in the following order
// 1. Stop accepting new HTTP and gRPC requests and drain existing connections
// 2. Trigger shutdown of internal background workers loops
//
// This is important to ensure background tasks, such as polling the tracker
// registry, don't exit before HTTP and gRPC requests dependent on them
while !grpc_server.is_terminated() && !http_server.is_terminated() {
futures::select! {
_ = signal => info!("Shutdown requested"),
_ = server_worker => {
info!("server worker shutdown prematurely");
},
result = grpc_server => match result {
Ok(_) => info!("gRPC server shutdown"),
Err(error) => {
error!(%error, "gRPC server error");
res = res.and(Err(Error::ServingRpc{source: error}))
}
},
result = http_server => match result {
Ok(_) => info!("HTTP server shutdown"),
Err(error) => {
error!(%error, "HTTP server error");
res = res.and(Err(Error::ServingHttp{source: error}))
}
},
}
frontend_shutdown.cancel()
}
info!("frontend shutdown completed");
server_type.shutdown_background_worker();
if !server_worker.is_terminated() {
server_worker.await;
}
info!("backend shutdown completed");
res
}

View File

@ -1,5 +1,5 @@
use hashbrown::HashMap;
use metric::{Attributes, Metric, U64Counter};
use metric::{Attributes, Metric, U64Counter, U64Histogram, U64HistogramOptions};
use parking_lot::{MappedMutexGuard, Mutex, MutexGuard};
/// Line protocol ingest metrics
@ -14,6 +14,9 @@ pub struct LineProtocolMetrics {
/// The number of LP bytes ingested
ingest_bytes: Metric<U64Counter>,
/// Distribution of LP batch sizes.
ingest_batch_size_bytes: Metric<U64Histogram>,
/// Database metrics keyed by database name
databases: Mutex<HashMap<String, LineProtocolDatabaseMetrics>>,
}
@ -38,6 +41,12 @@ struct LineProtocolDatabaseMetrics {
/// The number of LP bytes ingested unsuccessfully
ingest_bytes_error: U64Counter,
/// Distribution of LP batch sizes ingested successfully
ingest_batch_size_bytes_ok: U64Histogram,
/// Distribution of LP batch sizes ingested unsuccessfully
ingest_batch_size_bytes_error: U64Histogram,
}
impl LineProtocolMetrics {
@ -47,6 +56,28 @@ impl LineProtocolMetrics {
ingest_fields: registry
.register_metric("ingest_fields", "total LP field values ingested"),
ingest_bytes: registry.register_metric("ingest_bytes", "total LP bytes ingested"),
ingest_batch_size_bytes: registry.register_metric_with_options(
"ingest_batch_size_bytes",
"distribution of ingested LP batch sizes",
|| {
U64HistogramOptions::new([
1024,
16 * 1024,
32 * 1024,
128 * 1024,
256 * 1024,
512 * 1024,
768 * 1024,
1024 * 1024,
4 * 1024 * 1024,
8 * 1024 * 1024,
16 * 1024 * 1024,
24 * 1024 * 1024,
32 * 1024 * 1024,
u64::MAX,
])
},
),
databases: Default::default(),
}
}
@ -66,11 +97,13 @@ impl LineProtocolMetrics {
metrics.ingest_lines_ok.inc(lines as u64);
metrics.ingest_fields_ok.inc(fields as u64);
metrics.ingest_bytes_ok.inc(bytes as u64);
metrics.ingest_batch_size_bytes_ok.record(bytes as u64);
}
false => {
metrics.ingest_lines_error.inc(lines as u64);
metrics.ingest_fields_error.inc(fields as u64);
metrics.ingest_bytes_error.inc(bytes as u64);
metrics.ingest_batch_size_bytes_error.record(bytes as u64);
}
}
}
@ -97,11 +130,15 @@ impl LineProtocolDatabaseMetrics {
let ingest_lines_ok = metrics.ingest_lines.recorder(attributes.clone());
let ingest_fields_ok = metrics.ingest_fields.recorder(attributes.clone());
let ingest_bytes_ok = metrics.ingest_bytes.recorder(attributes.clone());
let ingest_batch_size_bytes_ok =
metrics.ingest_batch_size_bytes.recorder(attributes.clone());
attributes.insert("status", "error");
let ingest_lines_error = metrics.ingest_lines.recorder(attributes.clone());
let ingest_fields_error = metrics.ingest_fields.recorder(attributes.clone());
let ingest_bytes_error = metrics.ingest_bytes.recorder(attributes.clone());
let ingest_batch_size_bytes_error =
metrics.ingest_batch_size_bytes.recorder(attributes.clone());
Self {
ingest_lines_ok,
@ -110,6 +147,8 @@ impl LineProtocolDatabaseMetrics {
ingest_fields_error,
ingest_bytes_ok,
ingest_bytes_error,
ingest_batch_size_bytes_ok,
ingest_batch_size_bytes_error,
}
}
}

View File

@ -0,0 +1,318 @@
use std::{convert::Infallible, num::NonZeroI32, sync::Arc};
use hyper::{
http::HeaderValue,
server::conn::{AddrIncoming, AddrStream},
Body, Method, Request, Response,
};
use observability_deps::tracing::{debug, error};
use serde::Deserialize;
use snafu::{ResultExt, Snafu};
use tokio_util::sync::CancellationToken;
use tower::Layer;
use trace_http::{ctx::TraceHeaderParser, tower::TraceLayer};
use crate::influxdb_ioxd::server_type::{RouteError, ServerType};
#[cfg(feature = "heappy")]
mod heappy;
#[cfg(feature = "pprof")]
mod pprof;
pub mod metrics;
#[cfg(test)]
pub mod test_utils;
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Snafu)]
pub enum ApplicationError {
/// Error for when we could not parse the http query uri (e.g.
/// `?foo=bar&bar=baz)`
#[snafu(display("Invalid query string in HTTP URI '{}': {}", query_string, source))]
InvalidQueryString {
query_string: String,
source: serde_urlencoded::de::Error,
},
#[snafu(display("PProf error: {}", source))]
PProf {
source: Box<dyn std::error::Error + Send + Sync>,
},
#[cfg(feature = "heappy")]
#[snafu(display("Heappy error: {}", source))]
HeappyError { source: heappy::Error },
#[snafu(display("Protobuf error: {}", source))]
Prost { source: prost::EncodeError },
#[snafu(display("Protobuf error: {}", source))]
ProstIO { source: std::io::Error },
#[snafu(display("Empty flamegraph"))]
EmptyFlamegraph,
#[snafu(display("heappy support is not compiled"))]
HeappyIsNotCompiled,
#[snafu(display("pprof support is not compiled"))]
PProfIsNotCompiled,
#[snafu(display("Route error from run mode: {}", source))]
RunModeRouteError { source: Box<dyn RouteError> },
}
impl RouteError for ApplicationError {
fn response(&self) -> Response<Body> {
match self {
Self::InvalidQueryString { .. } => self.bad_request(),
Self::PProf { .. } => self.internal_error(),
Self::Prost { .. } => self.internal_error(),
Self::ProstIO { .. } => self.internal_error(),
Self::EmptyFlamegraph => self.no_content(),
Self::HeappyIsNotCompiled => self.internal_error(),
Self::PProfIsNotCompiled => self.internal_error(),
#[cfg(feature = "heappy")]
Self::HeappyError { .. } => self.internal_error(),
Self::RunModeRouteError { source } => source.response(),
}
}
}
pub async fn serve<M>(
addr: AddrIncoming,
server_type: Arc<M>,
shutdown: CancellationToken,
trace_header_parser: TraceHeaderParser,
) -> Result<(), hyper::Error>
where
M: ServerType,
{
let metric_registry = server_type.metric_registry();
let trace_collector = server_type.trace_collector();
let trace_layer = TraceLayer::new(trace_header_parser, metric_registry, trace_collector, false);
hyper::Server::builder(addr)
.serve(hyper::service::make_service_fn(|_conn: &AddrStream| {
let server_type = Arc::clone(&server_type);
let service = hyper::service::service_fn(move |request: Request<_>| {
route_request(Arc::clone(&server_type), request)
});
let service = trace_layer.layer(service);
futures::future::ready(Ok::<_, Infallible>(service))
}))
.with_graceful_shutdown(shutdown.cancelled())
.await
}
async fn route_request<M>(
server_type: Arc<M>,
mut req: Request<Body>,
) -> Result<Response<Body>, Infallible>
where
M: ServerType,
{
// we don't need the authorization header and we don't want to accidentally log it.
req.headers_mut().remove("authorization");
debug!(request = ?req,"Processing request");
let method = req.method().clone();
let uri = req.uri().clone();
let content_length = req.headers().get("content-length").cloned();
let response = match (method.clone(), uri.path()) {
(Method::GET, "/health") => health(),
(Method::GET, "/metrics") => handle_metrics(server_type.as_ref()),
(Method::GET, "/debug/pprof") => pprof_home(req).await,
(Method::GET, "/debug/pprof/profile") => pprof_profile(req).await,
(Method::GET, "/debug/pprof/allocs") => pprof_heappy_profile(req).await,
_ => server_type
.route_http_request(req)
.await
.map_err(|e| Box::new(e) as _)
.context(RunModeRouteError),
};
// TODO: Move logging to TraceLayer
match response {
Ok(response) => {
debug!(?response, "Successfully processed request");
Ok(response)
}
Err(error) => {
error!(%error, %method, %uri, ?content_length, "Error while handling request");
Ok(error.response())
}
}
}
fn health() -> Result<Response<Body>, ApplicationError> {
let response_body = "OK";
Ok(Response::new(Body::from(response_body.to_string())))
}
fn handle_metrics<M>(server_type: &M) -> Result<Response<Body>, ApplicationError>
where
M: ServerType,
{
let mut body: Vec<u8> = Default::default();
let mut reporter = metric_exporters::PrometheusTextEncoder::new(&mut body);
server_type.metric_registry().report(&mut reporter);
Ok(Response::new(Body::from(body)))
}
async fn pprof_home(req: Request<Body>) -> Result<Response<Body>, ApplicationError> {
let default_host = HeaderValue::from_static("localhost");
let host = req
.headers()
.get("host")
.unwrap_or(&default_host)
.to_str()
.unwrap_or_default();
let profile_cmd = format!(
"/debug/pprof/profile?seconds={}",
PProfArgs::default_seconds()
);
let allocs_cmd = format!(
"/debug/pprof/allocs?seconds={}",
PProfAllocsArgs::default_seconds()
);
Ok(Response::new(Body::from(format!(
r#"<a href="{}">http://{}{}</a><br><a href="{}">http://{}{}</a>"#,
profile_cmd, host, profile_cmd, allocs_cmd, host, allocs_cmd,
))))
}
#[derive(Debug, Deserialize)]
struct PProfArgs {
#[serde(default = "PProfArgs::default_seconds")]
seconds: u64,
#[serde(default = "PProfArgs::default_frequency")]
frequency: NonZeroI32,
}
impl PProfArgs {
fn default_seconds() -> u64 {
30
}
// 99Hz to avoid coinciding with special periods
fn default_frequency() -> NonZeroI32 {
NonZeroI32::new(99).unwrap()
}
}
#[derive(Debug, Deserialize)]
struct PProfAllocsArgs {
#[serde(default = "PProfAllocsArgs::default_seconds")]
seconds: u64,
// The sampling interval is a number of bytes that have to cumulatively allocated for a sample to be taken.
//
// For example if the sampling interval is 99, and you're doing a million of 40 bytes allocations,
// the allocations profile will account for 16MB instead of 40MB.
// Heappy will adjust the estimate for sampled recordings, but now that feature is not yet implemented.
#[serde(default = "PProfAllocsArgs::default_interval")]
interval: NonZeroI32,
}
impl PProfAllocsArgs {
fn default_seconds() -> u64 {
30
}
// 1 means: sample every allocation.
fn default_interval() -> NonZeroI32 {
NonZeroI32::new(1).unwrap()
}
}
#[cfg(feature = "pprof")]
async fn pprof_profile(req: Request<Body>) -> Result<Response<Body>, ApplicationError> {
use ::pprof::protos::Message;
let query_string = req.uri().query().unwrap_or_default();
let query: PProfArgs =
serde_urlencoded::from_str(query_string).context(InvalidQueryString { query_string })?;
let report = self::pprof::dump_rsprof(query.seconds, query.frequency.get())
.await
.map_err(|e| Box::new(e) as _)
.context(PProf)?;
let mut body: Vec<u8> = Vec::new();
// render flamegraph when opening in the browser
// otherwise render as protobuf; works great with: go tool pprof http://..../debug/pprof/profile
if req
.headers()
.get_all("Accept")
.iter()
.flat_map(|i| i.to_str().unwrap_or_default().split(','))
.any(|i| i == "text/html" || i == "image/svg+xml")
{
report
.flamegraph(&mut body)
.map_err(|e| Box::new(e) as _)
.context(PProf)?;
if body.is_empty() {
return EmptyFlamegraph.fail();
}
} else {
let profile = report
.pprof()
.map_err(|e| Box::new(e) as _)
.context(PProf)?;
profile.encode(&mut body).context(Prost)?;
}
Ok(Response::new(Body::from(body)))
}
#[cfg(not(feature = "pprof"))]
async fn pprof_profile(_req: Request<Body>) -> Result<Response<Body>, ApplicationError> {
PProfIsNotCompiled {}.fail()
}
// If heappy support is enabled, call it
#[cfg(feature = "heappy")]
async fn pprof_heappy_profile(req: Request<Body>) -> Result<Response<Body>, ApplicationError> {
let query_string = req.uri().query().unwrap_or_default();
let query: PProfAllocsArgs =
serde_urlencoded::from_str(query_string).context(InvalidQueryString { query_string })?;
let report = self::heappy::dump_heappy_rsprof(query.seconds, query.interval.get())
.await
.context(HeappyError)?;
let mut body: Vec<u8> = Vec::new();
// render flamegraph when opening in the browser
// otherwise render as protobuf;
// works great with: go tool pprof http://..../debug/pprof/allocs
if req
.headers()
.get_all("Accept")
.iter()
.flat_map(|i| i.to_str().unwrap_or_default().split(','))
.any(|i| i == "text/html" || i == "image/svg+xml")
{
report.flamegraph(&mut body);
if body.is_empty() {
return EmptyFlamegraph.fail();
}
} else {
report.write_pprof(&mut body).context(ProstIO)?
}
Ok(Response::new(Body::from(body)))
}
// Return error if heappy not enabled
#[cfg(not(feature = "heappy"))]
async fn pprof_heappy_profile(_req: Request<Body>) -> Result<Response<Body>, ApplicationError> {
HeappyIsNotCompiled {}.fail()
}

View File

@ -0,0 +1,144 @@
use std::{
fmt::Debug,
net::{IpAddr, Ipv4Addr, SocketAddr},
sync::Arc,
};
use http::header::CONTENT_TYPE;
use hyper::{server::conn::AddrIncoming, StatusCode};
use serde::de::DeserializeOwned;
use tokio::task::JoinHandle;
use tokio_util::sync::CancellationToken;
use crate::influxdb_ioxd::{http::serve, server_type::ServerType};
/// checks a http response against expected results
pub async fn check_response(
description: &str,
response: Result<reqwest::Response, reqwest::Error>,
expected_status: StatusCode,
expected_body: Option<&str>,
) {
// Print the response so if the test fails, we have a log of
// what went wrong
println!("{} response: {:?}", description, response);
if let Ok(response) = response {
let status = response.status();
let body = response
.text()
.await
.expect("Converting request body to string");
assert_eq!(status, expected_status);
if let Some(expected_body) = expected_body {
assert!(
body.contains(expected_body),
"Could not find expected in body.\n\nExpected:\n{}\n\nBody:\n{}",
expected_body,
body
);
}
} else {
panic!("Unexpected error response: {:?}", response);
}
}
#[allow(dead_code)]
pub async fn check_json_response<T: DeserializeOwned + Eq + Debug>(
client: &reqwest::Client,
url: &str,
expected_status: StatusCode,
) -> T {
let response = client.get(url).send().await;
// Print the response so if the test fails, we have a log of
// what went wrong
println!("{} response: {:?}", url, response);
if let Ok(response) = response {
let status = response.status();
let body: T = response
.json()
.await
.expect("Converting request body to string");
assert_eq!(status, expected_status);
body
} else {
panic!("Unexpected error response: {:?}", response);
}
}
pub fn get_content_type(response: &Result<reqwest::Response, reqwest::Error>) -> String {
if let Ok(response) = response {
response
.headers()
.get(CONTENT_TYPE)
.map(|v| v.to_str().unwrap())
.unwrap_or("")
.to_string()
} else {
"".to_string()
}
}
pub struct TestServer<M>
where
M: ServerType,
{
join_handle: JoinHandle<()>,
url: String,
server_type: Arc<M>,
}
impl<M> TestServer<M>
where
M: ServerType,
{
pub fn new(server_type: Arc<M>) -> Self {
// NB: specify port 0 to let the OS pick the port.
let bind_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 0);
let addr = AddrIncoming::bind(&bind_addr).expect("failed to bind server");
let url = format!("http://{}", addr.local_addr());
let trace_header_parser = trace_http::ctx::TraceHeaderParser::new()
.with_jaeger_trace_context_header_name("uber-trace-id");
let server_type_captured = Arc::clone(&server_type);
let join_handle = tokio::task::spawn(async {
serve(
addr,
server_type_captured,
CancellationToken::new(),
trace_header_parser,
)
.await
.unwrap();
});
println!("Started server at {}", url);
Self {
join_handle,
url,
server_type,
}
}
pub fn url(&self) -> &str {
&self.url
}
pub fn server_type(&self) -> &Arc<M> {
&self.server_type
}
}
impl<M> Drop for TestServer<M>
where
M: ServerType,
{
fn drop(&mut self) {
self.join_handle.abort();
}
}

View File

@ -0,0 +1,195 @@
use std::sync::Arc;
use tokio::net::TcpListener;
use tokio_util::sync::CancellationToken;
use tonic::transport::NamedService;
use tonic_health::server::HealthReporter;
use trace_http::ctx::TraceHeaderParser;
use crate::influxdb_ioxd::{
server_type::{RpcError, ServerType},
serving_readiness::ServingReadiness,
};
pub mod error;
pub(crate) mod testing;
/// Returns the name of the gRPC service S.
pub fn service_name<S: NamedService>(_: &S) -> &'static str {
S::NAME
}
#[derive(Debug)]
pub struct RpcBuilderInput {
pub socket: TcpListener,
pub trace_header_parser: TraceHeaderParser,
pub shutdown: CancellationToken,
pub serving_readiness: ServingReadiness,
}
#[derive(Debug)]
pub struct RpcBuilder<T> {
pub inner: T,
pub health_reporter: HealthReporter,
pub shutdown: CancellationToken,
pub socket: TcpListener,
pub serving_readiness: ServingReadiness,
}
/// Adds a gRPC service to the builder, and registers it with the
/// health reporter
macro_rules! add_service {
($builder:ident, $svc:expr) => {
let $builder = {
// `inner` might be required to be `mut` or not depending if we're acting on:
// - a `Server`, no service added yet, no `mut` required
// - a `Router`, some service was added already, `mut` required
#[allow(unused_mut)]
{
use $crate::influxdb_ioxd::rpc::{service_name, RpcBuilder};
let RpcBuilder {
mut inner,
mut health_reporter,
shutdown,
socket,
serving_readiness,
} = $builder;
let service = $svc;
let status = tonic_health::ServingStatus::Serving;
health_reporter
.set_service_status(service_name(&service), status)
.await;
let inner = inner.add_service(service);
RpcBuilder {
inner,
health_reporter,
shutdown,
socket,
serving_readiness,
}
}
};
};
}
pub(crate) use add_service;
/// Adds a gRPC service to the builder gated behind the serving
/// readiness check, and registers it with the health reporter
macro_rules! add_gated_service {
($builder:ident, $svc:expr) => {
let $builder = {
let service = $svc;
let interceptor = $builder.serving_readiness.clone().into_interceptor();
let service = tonic::codegen::InterceptedService::new(service, interceptor);
add_service!($builder, service);
$builder
};
};
}
pub(crate) use add_gated_service;
/// Creates a [`RpcBuilder`] from [`RpcBuilderInput`].
///
/// The resulting builder can be used w/ [`add_service`] and [`add_gated_service`]. After adding all services it should
/// be used w/ [`serve_builder`].
macro_rules! setup_builder {
($input:ident, $server_type:ident) => {{
use $crate::influxdb_ioxd::{
rpc::{add_service, testing, RpcBuilder},
server_type::ServerType,
};
let RpcBuilderInput {
socket,
trace_header_parser,
shutdown,
serving_readiness,
} = $input;
let (health_reporter, health_service) = tonic_health::server::health_reporter();
let reflection_service = tonic_reflection::server::Builder::configure()
.register_encoded_file_descriptor_set(generated_types::FILE_DESCRIPTOR_SET)
.build()
.expect("gRPC reflection data broken");
let builder = tonic::transport::Server::builder();
let builder = builder.layer(trace_http::tower::TraceLayer::new(
trace_header_parser,
$server_type.metric_registry(),
$server_type.trace_collector(),
true,
));
let builder = RpcBuilder {
inner: builder,
health_reporter,
shutdown,
socket,
serving_readiness,
};
// important that this one is NOT gated so that it can answer health requests
add_service!(builder, health_service);
add_service!(builder, reflection_service);
add_service!(builder, testing::make_server());
builder
}};
}
pub(crate) use setup_builder;
/// Serve a server constructed using [`RpcBuilder`].
macro_rules! serve_builder {
($builder:ident) => {{
use tokio_stream::wrappers::TcpListenerStream;
use $crate::influxdb_ioxd::rpc::RpcBuilder;
let RpcBuilder {
inner,
shutdown,
socket,
..
} = $builder;
let stream = TcpListenerStream::new(socket);
inner
.serve_with_incoming_shutdown(stream, shutdown.cancelled())
.await?;
}};
}
pub(crate) use serve_builder;
/// Instantiate a server listening on the specified address
/// implementing the IOx, Storage, and Flight gRPC interfaces, the
/// underlying hyper server instance. Resolves when the server has
/// shutdown.
pub async fn serve<T>(
socket: TcpListener,
server_type: Arc<T>,
trace_header_parser: TraceHeaderParser,
shutdown: CancellationToken,
serving_readiness: ServingReadiness,
) -> Result<(), RpcError>
where
T: ServerType,
{
let builder_input = RpcBuilderInput {
socket,
trace_header_parser,
shutdown,
serving_readiness,
};
server_type.server_grpc(builder_input).await
}

View File

@ -67,9 +67,7 @@ pub fn default_server_error_handler(error: server::Error) -> tonic::Status {
Error::DatabaseInit { source } => {
tonic::Status::invalid_argument(format!("Cannot initialize database: {}", source))
}
e @ Error::StoreSequencedEntryFailures { .. } => {
tonic::Status::invalid_argument(e.to_string())
}
e @ Error::StoreWriteErrors { .. } => tonic::Status::invalid_argument(e.to_string()),
error => {
error!(?error, "Unexpected error");
InternalError {}.into()
@ -130,18 +128,11 @@ pub fn default_database_error_handler(error: server::database::Error) -> tonic::
error!(%source, "Unexpected error deleting database");
InternalError {}.into()
}
Error::NoActiveDatabaseToDelete { db_name } => NotFound {
resource_type: "database".to_string(),
resource_name: db_name,
..Default::default()
}
.into(),
Error::CannotRestoreActiveDatabase { .. } => {
Error::CannotDeleteInactiveDatabase { .. } => {
tonic::Status::failed_precondition(error.to_string())
}
Error::CannotRestoreDatabaseInObjectStorage { source } => {
error!(%source, "Unexpected error restoring database");
InternalError {}.into()
Error::CannotRestoreActiveDatabase { .. } => {
tonic::Status::failed_precondition(error.to_string())
}
}
}

View File

@ -0,0 +1,64 @@
use std::sync::Arc;
use snafu::{ResultExt, Snafu};
use trace::TraceCollector;
use crate::{
influxdb_ioxd::serving_readiness::ServingReadiness, structopt_blocks::run_config::RunConfig,
};
#[derive(Debug, Snafu)]
pub enum CommonServerStateError {
#[snafu(display("Cannot create tracing pipeline: {}", source))]
Tracing { source: trace_exporters::Error },
}
/// Common state used by all server types (e.g. `Database` and `Router`)
#[derive(Debug)]
pub struct CommonServerState {
run_config: RunConfig,
serving_readiness: ServingReadiness,
trace_exporter: Option<Arc<trace_exporters::export::AsyncExporter>>,
}
impl CommonServerState {
pub fn from_config(run_config: RunConfig) -> Result<Self, CommonServerStateError> {
let serving_readiness = run_config.initial_serving_state.clone().into();
let trace_exporter = run_config.tracing_config.build().context(Tracing)?;
Ok(Self {
run_config,
serving_readiness,
trace_exporter,
})
}
#[cfg(test)]
pub fn for_testing() -> Self {
use structopt::StructOpt;
Self::from_config(
RunConfig::from_iter_safe(["not_used".to_string()].into_iter())
.expect("default parsing should work"),
)
.expect("default configs should work")
}
pub fn run_config(&self) -> &RunConfig {
&self.run_config
}
pub fn serving_readiness(&self) -> &ServingReadiness {
&self.serving_readiness
}
pub fn trace_exporter(&self) -> Option<Arc<trace_exporters::export::AsyncExporter>> {
self.trace_exporter.clone()
}
pub fn trace_collector(&self) -> Option<Arc<dyn TraceCollector>> {
self.trace_exporter
.clone()
.map(|x| -> Arc<dyn TraceCollector> { x })
}
}

View File

@ -1,389 +1,140 @@
use crate::{
commands::run::Config,
object_store::{check_object_store, warn_about_inmem_store},
};
use futures::{future::FusedFuture, pin_mut, FutureExt};
use hyper::server::conn::AddrIncoming;
use object_store::{self, ObjectStore};
use observability_deps::tracing::{error, info, warn};
use panic_logging::SendPanicsToTracing;
use server::{
connection::ConnectionManagerImpl as ConnectionManager, ApplicationState, RemoteTemplate,
Server as AppServer, ServerConfig,
};
use snafu::{ResultExt, Snafu};
use std::{convert::TryFrom, net::SocketAddr, sync::Arc};
use std::sync::Arc;
use async_trait::async_trait;
use futures::{future::FusedFuture, FutureExt};
use hyper::{Body, Request, Response};
use metric::Registry;
use observability_deps::tracing::{error, info};
use server::{connection::ConnectionManager, ApplicationState, Server};
use tokio_util::sync::CancellationToken;
use trace::TraceCollector;
use trace_http::ctx::TraceHeaderParser;
use crate::influxdb_ioxd::{
http::metrics::LineProtocolMetrics,
rpc::RpcBuilderInput,
server_type::{RpcError, ServerType},
serving_readiness::ServingReadiness,
};
mod http;
mod jemalloc;
mod planner;
mod rpc;
pub(crate) mod serving_readiness;
pub mod setup;
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("Unable to bind to listen for HTTP requests on {}: {}", addr, source))]
StartListeningHttp {
addr: SocketAddr,
source: hyper::Error,
},
pub use self::http::ApplicationError;
#[snafu(display("Unable to bind to listen for gRPC requests on {}: {}", addr, source))]
StartListeningGrpc {
addr: SocketAddr,
source: std::io::Error,
},
use super::common_state::CommonServerState;
#[snafu(display("Error serving HTTP: {}", source))]
ServingHttp { source: hyper::Error },
#[snafu(display("Error serving RPC: {}", source))]
ServingRpc { source: rpc::Error },
#[snafu(display("Cannot parse object store config: {}", source))]
ObjectStoreParsing {
source: crate::object_store::ParseError,
},
#[snafu(display("Cannot check object store config: {}", source))]
ObjectStoreCheck {
source: crate::object_store::CheckError,
},
#[snafu(display("Cannot create tracing pipeline: {}", source))]
Tracing { source: trace_exporters::Error },
#[derive(Debug)]
pub struct DatabaseServerType<M>
where
M: ConnectionManager + std::fmt::Debug + Send + Sync + 'static,
{
pub application: Arc<ApplicationState>,
pub server: Arc<Server<M>>,
pub lp_metrics: Arc<LineProtocolMetrics>,
pub max_request_size: usize,
pub serving_readiness: ServingReadiness,
shutdown: CancellationToken,
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
impl<M> DatabaseServerType<M>
where
M: ConnectionManager + std::fmt::Debug + Send + Sync + 'static,
{
pub fn new(
application: Arc<ApplicationState>,
server: Arc<Server<M>>,
common_state: &CommonServerState,
) -> Self {
let lp_metrics = Arc::new(LineProtocolMetrics::new(
application.metric_registry().as_ref(),
));
/// On unix platforms we want to intercept SIGINT and SIGTERM
/// This method returns if either are signalled
#[cfg(unix)]
async fn wait_for_signal() {
use tokio::signal::unix::{signal, SignalKind};
let mut term = signal(SignalKind::terminate()).expect("failed to register signal handler");
let mut int = signal(SignalKind::interrupt()).expect("failed to register signal handler");
tokio::select! {
_ = term.recv() => info!("Received SIGTERM"),
_ = int.recv() => info!("Received SIGINT"),
}
}
#[cfg(windows)]
/// ctrl_c is the cross-platform way to intercept the equivalent of SIGINT
/// This method returns if this occurs
async fn wait_for_signal() {
let _ = tokio::signal::ctrl_c().await;
}
async fn make_application(config: &Config) -> Result<Arc<ApplicationState>> {
warn_about_inmem_store(&config.object_store_config);
let object_store =
ObjectStore::try_from(&config.object_store_config).context(ObjectStoreParsing)?;
check_object_store(&object_store)
.await
.context(ObjectStoreCheck)?;
let object_storage = Arc::new(object_store);
Ok(Arc::new(ApplicationState::new(
object_storage,
config.num_worker_threads,
)))
}
fn make_server(
application: Arc<ApplicationState>,
config: &Config,
) -> Arc<AppServer<ConnectionManager>> {
let server_config = ServerConfig {
remote_template: config.remote_template.clone().map(RemoteTemplate::new),
wipe_catalog_on_error: config.wipe_catalog_on_error.into(),
skip_replay_and_seek_instead: config.skip_replay_and_seek_instead.into(),
};
if config.grpc_bind_address == config.http_bind_address && config.grpc_bind_address.port() != 0
{
error!(
%config.grpc_bind_address,
%config.http_bind_address,
"grpc and http bind addresses must differ",
);
std::process::exit(1);
}
let connection_manager = ConnectionManager::new();
let app_server = Arc::new(AppServer::new(
connection_manager,
application,
server_config,
));
// if this ID isn't set the server won't be usable until this is set via an API
// call
if let Some(id) = config.server_id_config.server_id {
app_server.set_id(id).expect("server id already set");
} else {
warn!("server ID not set. ID must be set via the INFLUXDB_IOX_ID config or API before writing or querying data.");
}
app_server
}
#[cfg(all(not(feature = "heappy"), not(feature = "jemalloc_replacing_malloc")))]
fn build_malloc_conf() -> String {
"system".to_string()
}
#[cfg(all(feature = "heappy", not(feature = "jemalloc_replacing_malloc")))]
fn build_malloc_conf() -> String {
"heappy".to_string()
}
#[cfg(all(not(feature = "heappy"), feature = "jemalloc_replacing_malloc"))]
fn build_malloc_conf() -> String {
tikv_jemalloc_ctl::config::malloc_conf::mib()
.unwrap()
.read()
.unwrap()
.to_string()
}
#[cfg(all(feature = "heappy", feature = "jemalloc_replacing_malloc"))]
fn build_malloc_conf() -> String {
compile_error!("must use exactly one memory allocator")
}
/// This is the entry point for the IOx server. `config` represents
/// command line arguments, if any.
pub async fn main(config: Config) -> Result<()> {
let git_hash = option_env!("GIT_HASH").unwrap_or("UNKNOWN");
let num_cpus = num_cpus::get();
let build_malloc_conf = build_malloc_conf();
info!(
git_hash,
num_cpus,
%build_malloc_conf,
"InfluxDB IOx server starting",
);
// Install custom panic handler and forget about it.
//
// This leaks the handler and prevents it from ever being dropped during the
// lifetime of the program - this is actually a good thing, as it prevents
// the panic handler from being removed while unwinding a panic (which in
// turn, causes a panic - see #548)
let f = SendPanicsToTracing::new();
std::mem::forget(f);
let application = make_application(&config).await?;
// Register jemalloc metrics
application
.metric_registry()
.register_instrument("jemalloc_metrics", jemalloc::JemallocMetrics::new);
let app_server = make_server(Arc::clone(&application), &config);
let grpc_listener = grpc_listener(config.grpc_bind_address).await?;
let http_listener = http_listener(config.http_bind_address).await?;
let async_exporter = config.tracing_config.build().context(Tracing)?;
let trace_collector = async_exporter
.clone()
.map(|x| -> Arc<dyn TraceCollector> { x });
let r = serve(
config,
application,
grpc_listener,
http_listener,
trace_collector,
app_server,
)
.await;
if let Some(async_exporter) = async_exporter {
if let Err(e) = async_exporter.drain().await {
error!(%e, "error draining trace exporter");
Self {
application,
server,
lp_metrics,
max_request_size: common_state.run_config().max_http_request_size,
serving_readiness: common_state.serving_readiness().clone(),
shutdown: CancellationToken::new(),
}
}
r
}
async fn grpc_listener(addr: SocketAddr) -> Result<tokio::net::TcpListener> {
let listener = tokio::net::TcpListener::bind(addr)
.await
.context(StartListeningGrpc { addr })?;
#[async_trait]
impl<M> ServerType for DatabaseServerType<M>
where
M: ConnectionManager + std::fmt::Debug + Send + Sync + 'static,
{
type RouteError = ApplicationError;
match listener.local_addr() {
Ok(local_addr) => info!(%local_addr, "bound gRPC listener"),
Err(_) => info!(%addr, "bound gRPC listener"),
fn metric_registry(&self) -> Arc<Registry> {
Arc::clone(self.application.metric_registry())
}
Ok(listener)
}
fn trace_collector(&self) -> Option<Arc<dyn TraceCollector>> {
self.application.trace_collector().clone()
}
async fn http_listener(addr: SocketAddr) -> Result<AddrIncoming> {
let listener = AddrIncoming::bind(&addr).context(StartListeningHttp { addr })?;
info!(bind_addr=%listener.local_addr(), "bound HTTP listener");
async fn route_http_request(
&self,
req: Request<Body>,
) -> Result<Response<Body>, Self::RouteError> {
self::http::route_request(self, req).await
}
Ok(listener)
}
async fn server_grpc(self: Arc<Self>, builder_input: RpcBuilderInput) -> Result<(), RpcError> {
self::rpc::server_grpc(self, builder_input).await
}
/// Instantiates the gRPC and HTTP listeners and returns a Future that completes when
/// these listeners, the Server, Databases, etc... have all exited.
///
/// This is effectively the "main loop" for influxdb_iox
async fn serve(
config: Config,
application: Arc<ApplicationState>,
grpc_listener: tokio::net::TcpListener,
http_listener: AddrIncoming,
trace_collector: Option<Arc<dyn TraceCollector>>,
app_server: Arc<AppServer<ConnectionManager>>,
) -> Result<()> {
// Construct a token to trigger shutdown of API services
let frontend_shutdown = tokio_util::sync::CancellationToken::new();
async fn background_worker(self: Arc<Self>) {
let server_worker = self.server.join().fuse();
futures::pin_mut!(server_worker);
let trace_header_parser = TraceHeaderParser::new()
.with_jaeger_trace_context_header_name(
config
.tracing_config
.traces_jaeger_trace_context_header_name,
)
.with_jaeger_debug_name(config.tracing_config.traces_jaeger_debug_name);
// Construct and start up gRPC server
let grpc_server = rpc::serve(
grpc_listener,
Arc::clone(&application),
Arc::clone(&app_server),
trace_header_parser.clone(),
trace_collector.clone(),
frontend_shutdown.clone(),
config.initial_serving_state.into(),
)
.fuse();
info!("gRPC server listening");
let max_http_request_size = config.max_http_request_size;
let http_server = http::serve(
http_listener,
Arc::clone(&application),
Arc::clone(&app_server),
frontend_shutdown.clone(),
max_http_request_size,
trace_header_parser,
trace_collector,
)
.fuse();
info!("HTTP server listening");
// Purposefully use log not tokio-tracing to ensure correctly hooked up
log::info!("InfluxDB IOx server ready");
// Get IOx background worker task
let server_worker = app_server.join().fuse();
// Shutdown signal
let signal = wait_for_signal().fuse();
// There are two different select macros - tokio::select and futures::select
//
// tokio::select takes ownership of the passed future "moving" it into the
// select block. This works well when not running select inside a loop, or
// when using a future that can be dropped and recreated, often the case
// with tokio's futures e.g. `channel.recv()`
//
// futures::select is more flexible as it doesn't take ownership of the provided
// future. However, to safely provide this it imposes some additional
// requirements
//
// All passed futures must implement FusedFuture - it is IB to poll a future
// that has returned Poll::Ready(_). A FusedFuture has an is_terminated()
// method that indicates if it is safe to poll - e.g. false if it has
// returned Poll::Ready(_). futures::select uses this to implement its
// functionality. futures::FutureExt adds a fuse() method that
// wraps an arbitrary future and makes it a FusedFuture
//
// The additional requirement of futures::select is that if the future passed
// outlives the select block, it must be Unpin or already Pinned
// pin_mut constructs a Pin<&mut T> from a T by preventing moving the T
// from the current stack frame and constructing a Pin<&mut T> to it
pin_mut!(signal);
pin_mut!(server_worker);
pin_mut!(grpc_server);
pin_mut!(http_server);
// Return the first error encountered
let mut res = Ok(());
// Graceful shutdown can be triggered by sending SIGINT or SIGTERM to the
// process, or by a background task exiting - most likely with an error
//
// Graceful shutdown should then proceed in the following order
// 1. Stop accepting new HTTP and gRPC requests and drain existing connections
// 2. Trigger shutdown of internal background workers loops
//
// This is important to ensure background tasks, such as polling the tracker
// registry, don't exit before HTTP and gRPC requests dependent on them
while !grpc_server.is_terminated() && !http_server.is_terminated() {
futures::select! {
_ = signal => info!("Shutdown requested"),
_ = server_worker => {
info!("server worker shutdown prematurely");
},
result = grpc_server => match result {
Ok(_) => info!("gRPC server shutdown"),
Err(error) => {
error!(%error, "gRPC server error");
res = res.and(Err(Error::ServingRpc{source: error}))
}
},
result = http_server => match result {
Ok(_) => info!("HTTP server shutdown"),
Err(error) => {
error!(%error, "HTTP server error");
res = res.and(Err(Error::ServingHttp{source: error}))
}
},
_ = server_worker => {},
_ = self.shutdown.cancelled().fuse() => {},
}
frontend_shutdown.cancel()
}
self.server.shutdown();
info!("frontend shutdown completed");
app_server.shutdown();
if !server_worker.is_terminated() {
match server_worker.await {
Ok(_) => info!("server worker shutdown"),
Err(error) => error!(%error, "server worker error"),
if !server_worker.is_terminated() {
match server_worker.await {
Ok(_) => info!("server worker shutdown"),
Err(error) => error!(%error, "server worker error"),
}
}
info!("server completed shutting down");
self.application.join();
info!("shared application state completed shutting down");
}
info!("server completed shutting down");
application.join();
info!("shared application state completed shutting down");
res
fn shutdown_background_worker(&self) {
self.server.shutdown();
self.application.join();
}
}
#[cfg(test)]
mod tests {
use crate::{
commands::run::database::Config,
influxdb_ioxd::{
grpc_listener, http_listener, serve,
server_type::database::setup::{make_application, make_server},
},
structopt_blocks::run_config::RunConfig,
};
use super::*;
use ::http::{header::HeaderName, HeaderValue};
use data_types::{database_rules::DatabaseRules, DatabaseName};
use influxdb_iox_client::connection::Connection;
use server::rules::ProvidedDatabaseRules;
use std::{convert::TryInto, num::NonZeroU64};
use futures::pin_mut;
use influxdb_iox_client::{connection::Connection, flight::PerformQuery};
use server::{connection::ConnectionManagerImpl, rules::ProvidedDatabaseRules};
use std::{convert::TryInto, net::SocketAddr, num::NonZeroU64};
use structopt::StructOpt;
use tokio::task::JoinHandle;
use trace::{
@ -400,28 +151,28 @@ mod tests {
"--grpc-bind",
"127.0.0.1:0",
]);
config.server_id_config.server_id = server_id.map(|x| x.try_into().unwrap());
config.run_config.server_id_config.server_id = server_id.map(|x| x.try_into().unwrap());
config
}
async fn test_serve(
config: Config,
config: RunConfig,
application: Arc<ApplicationState>,
server: Arc<AppServer<ConnectionManager>>,
server: Arc<Server<ConnectionManagerImpl>>,
) {
let grpc_listener = grpc_listener(config.grpc_bind_address).await.unwrap();
let http_listener = http_listener(config.grpc_bind_address).await.unwrap();
let grpc_listener = grpc_listener(config.grpc_bind_address.into())
.await
.unwrap();
let http_listener = http_listener(config.grpc_bind_address.into())
.await
.unwrap();
serve(
config,
application,
grpc_listener,
http_listener,
None,
server,
)
.await
.unwrap()
let common_state = CommonServerState::from_config(config).unwrap();
let server_type = Arc::new(DatabaseServerType::new(application, server, &common_state));
serve(common_state, grpc_listener, http_listener, server_type)
.await
.unwrap()
}
#[tokio::test]
@ -430,12 +181,12 @@ mod tests {
// Create a server and wait for it to initialize
let config = test_config(Some(23));
let application = make_application(&config).await.unwrap();
let application = make_application(&config, None).await.unwrap();
let server = make_server(Arc::clone(&application), &config);
server.wait_for_init().await.unwrap();
// Start serving
let serve_fut = test_serve(config, application, Arc::clone(&server)).fuse();
let serve_fut = test_serve(config.run_config, application, Arc::clone(&server)).fuse();
pin_mut!(serve_fut);
// Nothing to trigger termination, so serve future should continue running
@ -458,10 +209,10 @@ mod tests {
async fn test_server_shutdown_uninit() {
// Create a server but don't set a server id
let config = test_config(None);
let application = make_application(&config).await.unwrap();
let application = make_application(&config, None).await.unwrap();
let server = make_server(Arc::clone(&application), &config);
let serve_fut = test_serve(config, application, Arc::clone(&server)).fuse();
let serve_fut = test_serve(config.run_config, application, Arc::clone(&server)).fuse();
pin_mut!(serve_fut);
// Nothing should have triggered shutdown so serve shouldn't finish
@ -489,11 +240,11 @@ mod tests {
async fn test_server_panic() {
// Create a server and wait for it to initialize
let config = test_config(Some(999999999));
let application = make_application(&config).await.unwrap();
let application = make_application(&config, None).await.unwrap();
let server = make_server(Arc::clone(&application), &config);
server.wait_for_init().await.unwrap();
let serve_fut = test_serve(config, application, Arc::clone(&server)).fuse();
let serve_fut = test_serve(config.run_config, application, Arc::clone(&server)).fuse();
pin_mut!(serve_fut);
// Nothing should have triggered shutdown so serve shouldn't finish
@ -516,7 +267,7 @@ mod tests {
async fn test_database_panic() {
// Create a server and wait for it to initialize
let config = test_config(Some(23));
let application = make_application(&config).await.unwrap();
let application = make_application(&config, None).await.unwrap();
let server = make_server(Arc::clone(&application), &config);
server.wait_for_init().await.unwrap();
@ -529,7 +280,12 @@ mod tests {
let other_db = server.database(&other_db_name).unwrap();
let serve_fut = test_serve(config, Arc::clone(&application), Arc::clone(&server)).fuse();
let serve_fut = test_serve(
config.run_config,
Arc::clone(&application),
Arc::clone(&server),
)
.fuse();
pin_mut!(serve_fut);
// Nothing should have triggered shutdown so serve shouldn't finish
@ -593,27 +349,33 @@ mod tests {
collector: &Arc<T>,
) -> (
SocketAddr,
Arc<AppServer<ConnectionManager>>,
JoinHandle<Result<()>>,
Arc<Server<ConnectionManagerImpl>>,
JoinHandle<crate::influxdb_ioxd::Result<()>>,
) {
let config = test_config(Some(23));
let application = make_application(&config).await.unwrap();
let application = make_application(&config, Some(Arc::<T>::clone(collector)))
.await
.unwrap();
let server = make_server(Arc::clone(&application), &config);
server.wait_for_init().await.unwrap();
let grpc_listener = grpc_listener(config.grpc_bind_address).await.unwrap();
let http_listener = http_listener(config.grpc_bind_address).await.unwrap();
let grpc_listener = grpc_listener(config.run_config.grpc_bind_address.into())
.await
.unwrap();
let http_listener = http_listener(config.run_config.grpc_bind_address.into())
.await
.unwrap();
let addr = grpc_listener.local_addr().unwrap();
let fut = serve(
config,
let common_state = CommonServerState::from_config(config.run_config.clone()).unwrap();
let server_type = Arc::new(DatabaseServerType::new(
application,
grpc_listener,
http_listener,
Some(Arc::<T>::clone(collector)),
Arc::clone(&server),
);
&common_state,
));
let fut = serve(common_state, grpc_listener, http_listener, server_type);
let join = tokio::spawn(fut);
(addr, server, join)
@ -690,6 +452,11 @@ mod tests {
join.await.unwrap().unwrap();
}
/// Ensure that query is fully executed.
async fn consume_query(mut query: PerformQuery) {
while query.next().await.unwrap().is_some() {}
}
#[tokio::test]
async fn test_query_tracing() {
let collector = Arc::new(RingBufferTraceCollector::new(100));
@ -721,10 +488,13 @@ mod tests {
.unwrap();
let mut flight = influxdb_iox_client::flight::Client::new(conn.clone());
flight
.perform_query(db_info.db_name(), "select * from cpu;")
.await
.unwrap();
consume_query(
flight
.perform_query(db_info.db_name(), "select * from cpu;")
.await
.unwrap(),
)
.await;
flight
.perform_query("nonexistent", "select * from cpu;")
@ -774,8 +544,7 @@ mod tests {
let prepare_sql_span = child(sql_span, "prepare_sql").unwrap();
child(prepare_sql_span, "prepare_plan").unwrap();
let collect_span = child(ctx_span, "collect").unwrap();
let execute_span = child(collect_span, "execute_stream_partitioned").unwrap();
let execute_span = child(ctx_span, "execute_stream_partitioned").unwrap();
let coalesce_span = child(execute_span, "CoalescePartitionsEx").unwrap();
// validate spans from DataFusion ExecutionPlan are present

View File

@ -1,5 +1,6 @@
//! Implements the native gRPC IOx query API using Arrow Flight
use std::fmt::Debug;
use std::task::Poll;
use std::{pin::Pin, sync::Arc};
use arrow::{
@ -13,19 +14,20 @@ use arrow_flight::{
Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
HandshakeRequest, HandshakeResponse, PutResult, SchemaAsIpc, SchemaResult, Ticket,
};
use futures::Stream;
use datafusion::physical_plan::ExecutionPlan;
use futures::{SinkExt, Stream, StreamExt};
use pin_project::{pin_project, pinned_drop};
use serde::Deserialize;
use snafu::{ResultExt, Snafu};
use tokio::task::JoinHandle;
use tonic::{Request, Response, Streaming};
use data_types::{DatabaseName, DatabaseNameError};
use observability_deps::tracing::{info, warn};
use query::exec::ExecutionContextProvider;
use query::exec::{ExecutionContextProvider, IOxExecutionContext};
use server::{connection::ConnectionManager, Server};
use crate::influxdb_ioxd::rpc::error::default_server_error_handler;
use super::super::planner::Planner;
use crate::influxdb_ioxd::{planner::Planner, rpc::error::default_server_error_handler};
#[allow(clippy::enum_variant_names)]
#[derive(Debug, Snafu)]
@ -65,7 +67,7 @@ pub enum Error {
#[snafu(display("Error while planning query: {}", source))]
Planning {
source: super::super::planner::Error,
source: crate::influxdb_ioxd::planner::Error,
},
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -153,8 +155,6 @@ where
Err(tonic::Status::unimplemented("Not yet implemented"))
}
// TODO: Stream results back directly by using `execute` instead of `collect`
// https://docs.rs/datafusion/3.0.0/datafusion/physical_plan/trait.ExecutionPlan.html#tymethod.execute
async fn do_get(
&self,
request: Request<Ticket>,
@ -182,32 +182,7 @@ where
.await
.context(Planning)?;
// execute the query
let results = ctx
.collect(Arc::clone(&physical_plan))
.await
.map_err(|e| Box::new(e) as _)
.context(Query {
database_name: &read_info.database_name,
})?;
let options = arrow::ipc::writer::IpcWriteOptions::default();
let schema = Arc::new(optimize_schema(&physical_plan.schema()));
let schema_flight_data = SchemaAsIpc::new(&schema, &options).into();
let mut flights = vec![schema_flight_data];
for batch in results {
let batch = optimize_record_batch(&batch, Arc::clone(&schema))?;
let (flight_dictionaries, flight_batch) =
arrow_flight::utils::flight_data_from_arrow_batch(&batch, &options);
flights.extend(flight_dictionaries);
flights.push(flight_batch);
}
let output = futures::stream::iter(flights.into_iter().map(Ok));
let output = GetStream::new(ctx, physical_plan, read_info.database_name).await?;
Ok(Response::new(Box::pin(output) as Self::DoGetStream))
}
@ -268,6 +243,132 @@ where
}
}
#[pin_project(PinnedDrop)]
struct GetStream {
#[pin]
rx: futures::channel::mpsc::Receiver<Result<FlightData, tonic::Status>>,
join_handle: JoinHandle<()>,
done: bool,
}
impl GetStream {
async fn new(
ctx: IOxExecutionContext,
physical_plan: Arc<dyn ExecutionPlan>,
database_name: String,
) -> Result<Self, tonic::Status> {
// setup channel
let (mut tx, rx) = futures::channel::mpsc::channel::<Result<FlightData, tonic::Status>>(1);
// get schema
let schema = Arc::new(optimize_schema(&physical_plan.schema()));
// setup stream
let options = arrow::ipc::writer::IpcWriteOptions::default();
let schema_flight_data = SchemaAsIpc::new(&schema, &options).into();
let mut stream_record_batches = ctx
.execute_stream(Arc::clone(&physical_plan))
.await
.map_err(|e| Box::new(e) as _)
.context(Query {
database_name: &database_name,
})?;
let join_handle = tokio::spawn(async move {
if tx.send(Ok(schema_flight_data)).await.is_err() {
// receiver gone
return;
}
while let Some(batch_or_err) = stream_record_batches.next().await {
match batch_or_err {
Ok(batch) => {
match optimize_record_batch(&batch, Arc::clone(&schema)) {
Ok(batch) => {
let (flight_dictionaries, flight_batch) =
arrow_flight::utils::flight_data_from_arrow_batch(
&batch, &options,
);
for dict in flight_dictionaries {
if tx.send(Ok(dict)).await.is_err() {
// receiver is gone
return;
}
}
if tx.send(Ok(flight_batch)).await.is_err() {
// receiver is gone
return;
}
}
Err(e) => {
// failure sending here is OK because we're cutting the stream anyways
tx.send(Err(e.into())).await.ok();
// end stream
return;
}
}
}
Err(e) => {
// failure sending here is OK because we're cutting the stream anyways
tx.send(Err(Error::Query {
database_name: database_name.clone(),
source: Box::new(e),
}
.into()))
.await
.ok();
// end stream
return;
}
}
}
});
Ok(Self {
rx,
join_handle,
done: false,
})
}
}
#[pinned_drop]
impl PinnedDrop for GetStream {
fn drop(self: Pin<&mut Self>) {
self.join_handle.abort();
}
}
impl Stream for GetStream {
type Item = Result<FlightData, tonic::Status>;
fn poll_next(
self: Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
) -> std::task::Poll<Option<Self::Item>> {
let this = self.project();
if *this.done {
Poll::Ready(None)
} else {
match this.rx.poll_next(cx) {
Poll::Ready(None) => {
*this.done = true;
Poll::Ready(None)
}
e @ Poll::Ready(Some(Err(_))) => {
*this.done = true;
e
}
other => other,
}
}
}
}
/// Some batches are small slices of the underlying arrays.
/// At this stage we only know the number of rows in the record batch
/// and the sizes in bytes of the backing buffers of the column arrays.

View File

@ -1,16 +1,16 @@
use std::convert::TryFrom;
use std::fmt::Debug;
use std::sync::Arc;
use data_types::chunk_metadata::ChunkId;
use data_types::{server_id::ServerId, DatabaseName};
use generated_types::google::{AlreadyExists, FieldViolation, FieldViolationExt, NotFound};
use generated_types::influxdata::iox::management::v1::{Error as ProtobufError, *};
use data_types::{chunk_metadata::ChunkId, server_id::ServerId, DatabaseName};
use generated_types::{
google::{AlreadyExists, FieldViolation, FieldViolationExt, NotFound},
influxdata::iox::management::v1::{Error as ProtobufError, *},
};
use predicate::delete_predicate::DeletePredicate;
use query::QueryDatabase;
use server::rules::ProvidedDatabaseRules;
use server::{connection::ConnectionManager, ApplicationState, Error, Server};
use server::{
connection::ConnectionManager, rules::ProvidedDatabaseRules, ApplicationState, Error, Server,
};
use std::{convert::TryFrom, fmt::Debug, str::FromStr, sync::Arc};
use tonic::{Request, Response, Status};
use uuid::Uuid;
struct ManagementService<M: ConnectionManager> {
application: Arc<ApplicationState>,
@ -18,7 +18,7 @@ struct ManagementService<M: ConnectionManager> {
serving_readiness: ServingReadiness,
}
use super::error::{
use crate::influxdb_ioxd::rpc::error::{
default_database_error_handler, default_db_error_handler, default_server_error_handler,
};
use crate::influxdb_ioxd::serving_readiness::ServingReadiness;
@ -128,18 +128,28 @@ where
description: e.to_string(),
})?;
match self.server.create_database(provided_rules).await {
Ok(_) => Ok(Response::new(CreateDatabaseResponse {})),
Err(Error::DatabaseAlreadyExists { db_name }) => {
return Err(AlreadyExists {
let database = self
.server
.create_database(provided_rules)
.await
.map_err(|e| match e {
Error::DatabaseAlreadyExists { db_name } => AlreadyExists {
resource_type: "database".to_string(),
resource_name: db_name,
..Default::default()
}
.into())
}
Err(e) => Err(default_server_error_handler(e)),
}
.into(),
_ => default_server_error_handler(e),
})?;
let uuid = database
.provided_rules()
.expect("Database should be initialized or an error should have been returned")
.uuid();
Ok(Response::new(CreateDatabaseResponse {
uuid: uuid.as_bytes().to_vec(),
}))
}
async fn update_database(
@ -157,10 +167,9 @@ where
description: e.to_string(),
})?;
let db_name = provided_rules.db_name().clone();
let updated_rules = self
.server
.update_db_rules(&db_name, provided_rules)
.update_db_rules(provided_rules)
.await
.map_err(default_server_error_handler)?;
@ -175,12 +184,15 @@ where
) -> Result<Response<DeleteDatabaseResponse>, Status> {
let db_name = DatabaseName::new(request.into_inner().db_name).field("db_name")?;
self.server
let uuid = self
.server
.delete_database(&db_name)
.await
.map_err(default_server_error_handler)?;
Ok(Response::new(DeleteDatabaseResponse {}))
Ok(Response::new(DeleteDatabaseResponse {
uuid: uuid.as_bytes().to_vec(),
}))
}
async fn restore_database(
@ -189,34 +201,16 @@ where
) -> Result<Response<RestoreDatabaseResponse>, Status> {
let request = request.into_inner();
let db_name = DatabaseName::new(request.db_name).field("db_name")?;
let generation_id = request.generation_id;
let uuid = Uuid::from_str(&request.uuid).field("uuid")?;
self.server
.restore_database(&db_name, generation_id)
.restore_database(&db_name, uuid)
.await
.map_err(default_server_error_handler)?;
Ok(Response::new(RestoreDatabaseResponse {}))
}
async fn list_deleted_databases(
&self,
_: Request<ListDeletedDatabasesRequest>,
) -> Result<Response<ListDeletedDatabasesResponse>, Status> {
let deleted_databases = self
.server
.list_deleted_databases()
.await
.map_err(default_server_error_handler)?
.into_iter()
.map(Into::into)
.collect();
Ok(Response::new(ListDeletedDatabasesResponse {
deleted_databases,
}))
}
async fn list_detailed_databases(
&self,
_: Request<ListDetailedDatabasesRequest>,

View File

@ -0,0 +1,57 @@
use std::sync::Arc;
use server::connection::ConnectionManager;
use crate::influxdb_ioxd::{
rpc::{add_gated_service, add_service, serve_builder, setup_builder, RpcBuilderInput},
server_type::{database::DatabaseServerType, RpcError},
};
mod flight;
mod management;
mod operations;
mod storage;
mod write;
mod write_pb;
pub async fn server_grpc<M>(
server_type: Arc<DatabaseServerType<M>>,
builder_input: RpcBuilderInput,
) -> Result<(), RpcError>
where
M: ConnectionManager + std::fmt::Debug + Send + Sync + 'static,
{
let builder = setup_builder!(builder_input, server_type);
add_gated_service!(
builder,
storage::make_server(Arc::clone(&server_type.server),)
);
add_gated_service!(
builder,
flight::make_server(Arc::clone(&server_type.server))
);
add_gated_service!(builder, write::make_server(Arc::clone(&server_type.server)));
add_gated_service!(
builder,
write_pb::make_server(Arc::clone(&server_type.server))
);
// Also important this is not behind a readiness check (as it is
// used to change the check!)
add_service!(
builder,
management::make_server(
Arc::clone(&server_type.application),
Arc::clone(&server_type.server),
server_type.serving_readiness.clone(),
)
);
add_service!(
builder,
operations::make_server(Arc::clone(server_type.application.job_registry()))
);
serve_builder!(builder);
Ok(())
}

View File

@ -0,0 +1,543 @@
//! This module contains code to translate from InfluxDB IOx data
//! formats into the formats needed by gRPC
use std::{collections::BTreeSet, fmt, sync::Arc};
use arrow::datatypes::DataType as ArrowDataType;
use observability_deps::tracing::trace;
use query::exec::{
fieldlist::FieldList,
seriesset::series::{self, Either},
};
use generated_types::{
measurement_fields_response::{FieldType, MessageField},
read_response::{
frame::Data, BooleanPointsFrame, DataType, FloatPointsFrame, Frame, GroupFrame,
IntegerPointsFrame, SeriesFrame, StringPointsFrame, UnsignedPointsFrame,
},
MeasurementFieldsResponse, ReadResponse, Tag,
};
use super::{TAG_KEY_FIELD, TAG_KEY_MEASUREMENT};
use snafu::Snafu;
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("Error converting series set to gRPC: {}", source))]
ConvertingSeries {
source: query::exec::seriesset::series::Error,
},
#[snafu(display("Unsupported field data type in gRPC data translation: {}", data_type))]
UnsupportedFieldType { data_type: ArrowDataType },
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// Convert a set of tag_keys into a form suitable for gRPC transport,
/// adding the special 0x00 (_m) and 0xff (_f) tag keys
///
/// Namely, a Vec<Vec<u8>>, including the measurement and field names
pub fn tag_keys_to_byte_vecs(tag_keys: Arc<BTreeSet<String>>) -> Vec<Vec<u8>> {
// special case measurement (0x00) and field (0xff)
// ensuring they are in the correct sort order (first and last, respectively)
let mut byte_vecs = Vec::with_capacity(2 + tag_keys.len());
byte_vecs.push(TAG_KEY_MEASUREMENT.to_vec()); // Shown as _m == _measurement
tag_keys.iter().for_each(|name| {
byte_vecs.push(name.bytes().collect());
});
byte_vecs.push(TAG_KEY_FIELD.to_vec()); // Shown as _f == _field
byte_vecs
}
/// Convert Series and Groups ` into a form suitable for gRPC transport:
///
/// ```
/// (GroupFrame) potentially
///
/// (SeriesFrame for field1)
/// (*Points for field1)
/// (SeriesFrame for field12)
/// (*Points for field1)
/// (....)
/// (SeriesFrame for field1)
/// (*Points for field1)
/// (SeriesFrame for field12)
/// (*Points for field1)
/// (....)
/// ```
///
/// The specific type of (*Points) depends on the type of field column.
pub fn series_or_groups_to_read_response(series_or_groups: Vec<Either>) -> ReadResponse {
let mut frames = vec![];
for series_or_group in series_or_groups {
match series_or_group {
Either::Series(series) => {
series_to_frames(&mut frames, series);
}
Either::Group(group) => {
frames.push(group_to_frame(group));
}
}
}
trace!(frames=%DisplayableFrames::new(&frames), "Response gRPC frames");
ReadResponse { frames }
}
/// Converts a `Series` into frames for GRPC transport
fn series_to_frames(frames: &mut Vec<Frame>, series: series::Series) {
let series::Series { tags, data } = series;
let (data_type, data_frame) = match data {
series::Data::FloatPoints { timestamps, values } => (
DataType::Float,
Data::FloatPoints(FloatPointsFrame { timestamps, values }),
),
series::Data::IntegerPoints { timestamps, values } => (
DataType::Integer,
Data::IntegerPoints(IntegerPointsFrame { timestamps, values }),
),
series::Data::UnsignedPoints { timestamps, values } => (
DataType::Unsigned,
Data::UnsignedPoints(UnsignedPointsFrame { timestamps, values }),
),
series::Data::BooleanPoints { timestamps, values } => (
DataType::Boolean,
Data::BooleanPoints(BooleanPointsFrame { timestamps, values }),
),
series::Data::StringPoints { timestamps, values } => (
DataType::String,
Data::StringPoints(StringPointsFrame { timestamps, values }),
),
};
let series_frame = Data::Series(SeriesFrame {
tags: convert_tags(tags),
data_type: data_type.into(),
});
frames.push(Frame {
data: Some(series_frame),
});
frames.push(Frame {
data: Some(data_frame),
});
}
/// Converts a [`series::Group`] into a storage gRPC `GroupFrame`
/// format that can be returned to the client.
fn group_to_frame(group: series::Group) -> Frame {
let series::Group {
tag_keys,
partition_key_vals,
} = group;
let group_frame = GroupFrame {
tag_keys: arcs_to_bytes(tag_keys),
partition_key_vals: arcs_to_bytes(partition_key_vals),
};
let data = Data::Group(group_frame);
Frame { data: Some(data) }
}
/// Convert the tag=value pairs from Arc<str> to Vec<u8> for gRPC transport
fn convert_tags(tags: Vec<series::Tag>) -> Vec<Tag> {
tags.into_iter()
.map(|series::Tag { key, value }| Tag {
key: key.bytes().collect(),
value: value.bytes().collect(),
})
.collect()
}
fn arcs_to_bytes(s: Vec<Arc<str>>) -> Vec<Vec<u8>> {
s.into_iter().map(|s| s.bytes().collect()).collect()
}
/// Translates FieldList into the gRPC format
pub fn fieldlist_to_measurement_fields_response(
fieldlist: FieldList,
) -> Result<MeasurementFieldsResponse> {
let fields = fieldlist
.fields
.into_iter()
.map(|f| {
Ok(MessageField {
key: f.name,
r#type: datatype_to_measurement_field_enum(&f.data_type)? as i32,
timestamp: f.last_timestamp,
})
})
.collect::<Result<Vec<_>>>()?;
Ok(MeasurementFieldsResponse { fields })
}
fn datatype_to_measurement_field_enum(data_type: &ArrowDataType) -> Result<FieldType> {
match data_type {
ArrowDataType::Float64 => Ok(FieldType::Float),
ArrowDataType::Int64 => Ok(FieldType::Integer),
ArrowDataType::UInt64 => Ok(FieldType::Unsigned),
ArrowDataType::Utf8 => Ok(FieldType::String),
ArrowDataType::Boolean => Ok(FieldType::Boolean),
_ => UnsupportedFieldType {
data_type: data_type.clone(),
}
.fail(),
}
}
/// Wrapper struture that implements [`std::fmt::Display`] for a slice
/// of `Frame`s
struct DisplayableFrames<'a> {
frames: &'a [Frame],
}
impl<'a> DisplayableFrames<'a> {
fn new(frames: &'a [Frame]) -> Self {
Self { frames }
}
}
impl<'a> fmt::Display for DisplayableFrames<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.frames.iter().try_for_each(|frame| {
format_frame(frame, f)?;
writeln!(f)
})
}
}
fn format_frame(frame: &Frame, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let data = &frame.data;
match data {
Some(Data::Series(SeriesFrame { tags, data_type })) => write!(
f,
"SeriesFrame, tags: {}, type: {:?}",
dump_tags(tags),
data_type
),
Some(Data::FloatPoints(FloatPointsFrame { timestamps, values })) => write!(
f,
"FloatPointsFrame, timestamps: {:?}, values: {:?}",
timestamps,
dump_values(values)
),
Some(Data::IntegerPoints(IntegerPointsFrame { timestamps, values })) => write!(
f,
"IntegerPointsFrame, timestamps: {:?}, values: {:?}",
timestamps,
dump_values(values)
),
Some(Data::UnsignedPoints(UnsignedPointsFrame { timestamps, values })) => write!(
f,
"UnsignedPointsFrame, timestamps: {:?}, values: {:?}",
timestamps,
dump_values(values)
),
Some(Data::BooleanPoints(BooleanPointsFrame { timestamps, values })) => write!(
f,
"BooleanPointsFrame, timestamps: {:?}, values: {}",
timestamps,
dump_values(values)
),
Some(Data::StringPoints(StringPointsFrame { timestamps, values })) => write!(
f,
"StringPointsFrame, timestamps: {:?}, values: {}",
timestamps,
dump_values(values)
),
Some(Data::Group(GroupFrame {
tag_keys,
partition_key_vals,
})) => write!(
f,
"GroupFrame, tag_keys: {}, partition_key_vals: {}",
dump_u8_vec(tag_keys),
dump_u8_vec(partition_key_vals)
),
None => write!(f, "<NO data field>"),
}
}
fn dump_values<T>(v: &[T]) -> String
where
T: std::fmt::Display,
{
v.iter()
.map(|item| format!("{}", item))
.collect::<Vec<_>>()
.join(",")
}
fn dump_u8_vec(encoded_strings: &[Vec<u8>]) -> String {
encoded_strings
.iter()
.map(|b| String::from_utf8_lossy(b))
.collect::<Vec<_>>()
.join(",")
}
fn dump_tags(tags: &[Tag]) -> String {
tags.iter()
.map(|tag| {
format!(
"{}={}",
String::from_utf8_lossy(&tag.key),
String::from_utf8_lossy(&tag.value),
)
})
.collect::<Vec<_>>()
.join(",")
}
#[cfg(test)]
mod tests {
use std::convert::TryInto;
use arrow::{
array::{
ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray,
TimestampNanosecondArray, UInt64Array,
},
datatypes::DataType as ArrowDataType,
record_batch::RecordBatch,
};
use query::exec::{
field::FieldIndexes,
fieldlist::Field,
seriesset::{
series::{Group, Series},
SeriesSet,
},
};
use super::*;
#[test]
fn test_tag_keys_to_byte_vecs() {
fn convert_keys(tag_keys: &[&str]) -> Vec<Vec<u8>> {
let tag_keys = tag_keys
.iter()
.map(|s| s.to_string())
.collect::<BTreeSet<_>>();
tag_keys_to_byte_vecs(Arc::new(tag_keys))
}
assert_eq!(convert_keys(&[]), vec![[0].to_vec(), [255].to_vec()]);
assert_eq!(
convert_keys(&["key_a"]),
vec![[0].to_vec(), b"key_a".to_vec(), [255].to_vec()]
);
assert_eq!(
convert_keys(&["key_a", "key_b"]),
vec![
[0].to_vec(),
b"key_a".to_vec(),
b"key_b".to_vec(),
[255].to_vec()
]
);
}
#[test]
fn test_series_set_conversion() {
let series_set = SeriesSet {
table_name: Arc::from("the_table"),
tags: vec![(Arc::from("tag1"), Arc::from("val1"))],
field_indexes: FieldIndexes::from_timestamp_and_value_indexes(5, &[0, 1, 2, 3, 4]),
start_row: 1,
num_rows: 2,
batch: make_record_batch(),
};
let series: Vec<Series> = series_set
.try_into()
.expect("Correctly converted series set");
let series: Vec<Either> = series.into_iter().map(|s| s.into()).collect();
let response = series_or_groups_to_read_response(series);
let dumped_frames = dump_frames(&response.frames);
let expected_frames = vec![
"SeriesFrame, tags: _field=string_field,_measurement=the_table,tag1=val1, type: 4",
"StringPointsFrame, timestamps: [2000, 3000], values: bar,baz",
"SeriesFrame, tags: _field=int_field,_measurement=the_table,tag1=val1, type: 1",
"IntegerPointsFrame, timestamps: [2000, 3000], values: \"2,3\"",
"SeriesFrame, tags: _field=uint_field,_measurement=the_table,tag1=val1, type: 2",
"UnsignedPointsFrame, timestamps: [2000, 3000], values: \"22,33\"",
"SeriesFrame, tags: _field=float_field,_measurement=the_table,tag1=val1, type: 0",
"FloatPointsFrame, timestamps: [2000, 3000], values: \"20.1,30.1\"",
"SeriesFrame, tags: _field=boolean_field,_measurement=the_table,tag1=val1, type: 3",
"BooleanPointsFrame, timestamps: [2000, 3000], values: false,true",
];
assert_eq!(
dumped_frames, expected_frames,
"Expected:\n{:#?}\nActual:\n{:#?}",
expected_frames, dumped_frames
);
}
#[test]
fn test_group_group_conversion() {
let group = Group {
tag_keys: vec![
Arc::from("_field"),
Arc::from("_measurement"),
Arc::from("tag1"),
Arc::from("tag2"),
],
partition_key_vals: vec![Arc::from("val1"), Arc::from("val2")],
};
let response = series_or_groups_to_read_response(vec![group.into()]);
let dumped_frames = dump_frames(&response.frames);
let expected_frames = vec![
"GroupFrame, tag_keys: _field,_measurement,tag1,tag2, partition_key_vals: val1,val2",
];
assert_eq!(
dumped_frames, expected_frames,
"Expected:\n{:#?}\nActual:\n{:#?}",
expected_frames, dumped_frames
);
}
#[test]
fn test_field_list_conversion() {
let input = FieldList {
fields: vec![
Field {
name: "float".into(),
data_type: ArrowDataType::Float64,
last_timestamp: 1000,
},
Field {
name: "int".into(),
data_type: ArrowDataType::Int64,
last_timestamp: 2000,
},
Field {
name: "uint".into(),
data_type: ArrowDataType::UInt64,
last_timestamp: 3000,
},
Field {
name: "string".into(),
data_type: ArrowDataType::Utf8,
last_timestamp: 4000,
},
Field {
name: "bool".into(),
data_type: ArrowDataType::Boolean,
last_timestamp: 5000,
},
],
};
let expected = MeasurementFieldsResponse {
fields: vec![
MessageField {
key: "float".into(),
r#type: FieldType::Float as i32,
timestamp: 1000,
},
MessageField {
key: "int".into(),
r#type: FieldType::Integer as i32,
timestamp: 2000,
},
MessageField {
key: "uint".into(),
r#type: FieldType::Unsigned as i32,
timestamp: 3000,
},
MessageField {
key: "string".into(),
r#type: FieldType::String as i32,
timestamp: 4000,
},
MessageField {
key: "bool".into(),
r#type: FieldType::Boolean as i32,
timestamp: 5000,
},
],
};
let actual = fieldlist_to_measurement_fields_response(input).unwrap();
assert_eq!(
actual, expected,
"Expected:\n{:#?}\nActual:\n{:#?}",
expected, actual
);
}
#[test]
fn test_field_list_conversion_error() {
let input = FieldList {
fields: vec![Field {
name: "unsupported".into(),
data_type: ArrowDataType::Int8,
last_timestamp: 1000,
}],
};
let result = fieldlist_to_measurement_fields_response(input);
match result {
Ok(r) => panic!("Unexpected success: {:?}", r),
Err(e) => {
let expected = "Unsupported field data type in gRPC data translation: Int8";
let actual = format!("{}", e);
assert!(
actual.contains(expected),
"Could not find expected '{}' in actual '{}'",
expected,
actual
);
}
}
}
fn make_record_batch() -> RecordBatch {
let string_array: ArrayRef = Arc::new(StringArray::from(vec!["foo", "bar", "baz", "foo"]));
let int_array: ArrayRef = Arc::new(Int64Array::from(vec![1, 2, 3, 4]));
let uint_array: ArrayRef = Arc::new(UInt64Array::from(vec![11, 22, 33, 44]));
let float_array: ArrayRef = Arc::new(Float64Array::from(vec![10.1, 20.1, 30.1, 40.1]));
let bool_array: ArrayRef = Arc::new(BooleanArray::from(vec![true, false, true, false]));
let timestamp_array: ArrayRef = Arc::new(TimestampNanosecondArray::from_vec(
vec![1000, 2000, 3000, 4000],
None,
));
RecordBatch::try_from_iter_with_nullable(vec![
("string_field", string_array, true),
("int_field", int_array, true),
("uint_field", uint_array, true),
("float_field", float_array, true),
("boolean_field", bool_array, true),
("time", timestamp_array, true),
])
.expect("created new record batch")
}
fn dump_frames(frames: &[Frame]) -> Vec<String> {
DisplayableFrames::new(frames)
.to_string()
.trim()
.split('\n')
.map(|s| s.to_string())
.collect()
}
}

View File

@ -21,15 +21,15 @@ use generated_types::{
use observability_deps::tracing::{error, info};
use predicate::predicate::PredicateBuilder;
use query::exec::{
fieldlist::FieldList, seriesset::Error as SeriesSetError, ExecutionContextProvider,
fieldlist::FieldList, seriesset::converter::Error as SeriesSetError, ExecutionContextProvider,
};
use server::DatabaseStore;
use crate::influxdb_ioxd::{
planner::Planner,
rpc::storage::{
server_type::database::rpc::storage::{
data::{
fieldlist_to_measurement_fields_response, series_set_item_to_read_response,
fieldlist_to_measurement_fields_response, series_or_groups_to_read_response,
tag_keys_to_byte_vecs,
},
expr::{self, AddRpcNode, GroupByAndAggregate, Loggable, SpecialTagKeys},
@ -130,18 +130,12 @@ pub enum Error {
source: super::expr::Error,
},
#[snafu(display("Error computing series: {}", source))]
ComputingSeriesSet { source: SeriesSetError },
#[snafu(display("Error converting tag_key to UTF-8 in tag_values request, tag_key value '{}': {}", String::from_utf8_lossy(source.as_bytes()), source))]
ConvertingTagKeyInTagValues { source: std::string::FromUtf8Error },
#[snafu(display("Error computing groups series: {}", source))]
ComputingGroupedSeriesSet { source: SeriesSetError },
#[snafu(display("Error converting time series into gRPC response: {}", source))]
ConvertingSeriesSet { source: super::data::Error },
#[snafu(display("Converting field information series into gRPC response: {}", source))]
ConvertingFieldList { source: super::data::Error },
@ -195,10 +189,8 @@ impl Error {
Self::ConvertingReadGroupAggregate { .. } => Status::invalid_argument(self.to_string()),
Self::ConvertingReadGroupType { .. } => Status::invalid_argument(self.to_string()),
Self::ConvertingWindowAggregate { .. } => Status::invalid_argument(self.to_string()),
Self::ComputingSeriesSet { .. } => Status::invalid_argument(self.to_string()),
Self::ConvertingTagKeyInTagValues { .. } => Status::invalid_argument(self.to_string()),
Self::ComputingGroupedSeriesSet { .. } => Status::invalid_argument(self.to_string()),
Self::ConvertingSeriesSet { .. } => Status::invalid_argument(self.to_string()),
Self::ConvertingFieldList { .. } => Status::invalid_argument(self.to_string()),
Self::SendingResults { .. } => Status::internal(self.to_string()),
Self::InternalHintsFieldNotSupported { .. } => Status::internal(self.to_string()),
@ -905,8 +897,8 @@ where
.context(PlanningFilteringSeries { db_name })?;
// Execute the plans.
let ss_items = ctx
.to_series_set(series_plan)
let series_or_groups = ctx
.to_series_and_groups(series_plan)
.await
.map_err(|e| Box::new(e) as _)
.context(FilteringSeries {
@ -914,11 +906,9 @@ where
})
.log_if_error("Running series set plan")?;
// Convert results into API responses
ss_items
.into_iter()
.map(|series_set| series_set_item_to_read_response(series_set).context(ConvertingSeriesSet))
.collect::<Result<Vec<ReadResponse>, Error>>()
let response = series_or_groups_to_read_response(series_or_groups);
Ok(vec![response])
}
/// Launch async tasks that send the result of executing read_group to `tx`
@ -971,8 +961,8 @@ where
// if big queries are causing a significant latency in TTFB.
// Execute the plans
let ss_items = ctx
.to_series_set(grouped_series_set_plan)
let series_or_groups = ctx
.to_series_and_groups(grouped_series_set_plan)
.await
.map_err(|e| Box::new(e) as _)
.context(GroupingSeries {
@ -980,11 +970,9 @@ where
})
.log_if_error("Running Grouped SeriesSet Plan")?;
// Convert plans to API responses
ss_items
.into_iter()
.map(|series_set| series_set_item_to_read_response(series_set).context(ConvertingSeriesSet))
.collect::<Result<Vec<ReadResponse>, Error>>()
let response = series_or_groups_to_read_response(series_or_groups);
Ok(vec![response])
}
/// Return field names, restricted via optional measurement, timestamp and
@ -1128,11 +1116,11 @@ mod tests {
let chunk0 = TestChunk::new("h2o")
.with_id(0)
.with_predicate_match(PredicateMatch::AtLeastOne);
.with_predicate_match(PredicateMatch::AtLeastOneNonNullField);
let chunk1 = TestChunk::new("o2")
.with_id(1)
.with_predicate_match(PredicateMatch::AtLeastOne);
.with_predicate_match(PredicateMatch::AtLeastOneNonNullField);
fixture
.test_storage
@ -1486,7 +1474,8 @@ mod tests {
tag_key: [0].into(),
};
let chunk = TestChunk::new("h2o").with_predicate_match(PredicateMatch::AtLeastOne);
let chunk =
TestChunk::new("h2o").with_predicate_match(PredicateMatch::AtLeastOneNonNullField);
fixture
.test_storage
@ -1736,7 +1725,8 @@ mod tests {
// Note we don't include the actual line / column in the
// expected panic message to avoid needing to update the test
// whenever the source code file changed.
let expected_error = "panicked at 'This is a test panic', src/influxdb_ioxd/rpc/testing.rs";
let expected_error =
"panicked at 'This is a test panic', influxdb_iox/src/influxdb_ioxd/rpc/testing.rs";
assert_contains!(captured_logs, expected_error);
// Ensure that panics don't exhaust the tokio executor by
@ -1841,6 +1831,7 @@ mod tests {
let chunk = TestChunk::new("TheMeasurement")
.with_time_column()
.with_i64_field_column("my field")
.with_tag_column("state")
.with_one_row_of_data();
@ -1869,7 +1860,11 @@ mod tests {
let frames = fixture.storage_client.read_group(request).await.unwrap();
assert_eq!(frames.len(), 1);
// three frames:
// GroupFrame
// SeriesFrame (tag=state, field=my field)
// DataFrame
assert_eq!(frames.len(), 3);
grpc_request_metric_has_count(&fixture, "ReadGroup", "ok", 1);
}
@ -2287,9 +2282,11 @@ mod tests {
true,
))
.add_service(crate::influxdb_ioxd::rpc::testing::make_server())
.add_service(crate::influxdb_ioxd::rpc::storage::make_server(Arc::clone(
&test_storage,
)));
.add_service(
crate::influxdb_ioxd::server_type::database::rpc::storage::make_server(
Arc::clone(&test_storage),
),
);
let server = async move {
let stream = TcpListenerStream::new(socket);

View File

@ -14,7 +14,7 @@ use influxdb_line_protocol::parse_lines;
use observability_deps::tracing::debug;
use server::{connection::ConnectionManager, Server};
use super::error::default_server_error_handler;
use crate::influxdb_ioxd::rpc::error::default_server_error_handler;
/// Implementation of the write service
struct WriteService<M: ConnectionManager> {
@ -30,6 +30,7 @@ where
&self,
request: tonic::Request<WriteRequest>,
) -> Result<tonic::Response<WriteResponse>, tonic::Status> {
let span_ctx = request.extensions().get().cloned();
let request = request.into_inner();
// The time, in nanoseconds since the epoch, to assign to any points that don't
@ -57,7 +58,7 @@ where
debug!(%db_name, %lp_chars, lp_line_count, body_size=lp_data.len(), num_fields, "Writing lines into database");
self.server
.write_lines(&db_name, &lines, default_time)
.write_lines(&db_name, &lines, default_time, span_ctx)
.await
.map_err(default_server_error_handler)?;
@ -69,6 +70,7 @@ where
&self,
request: tonic::Request<WriteEntryRequest>,
) -> Result<tonic::Response<WriteEntryResponse>, tonic::Status> {
let span_ctx = request.extensions().get().cloned();
let request = request.into_inner();
let db_name = DatabaseName::new(&request.db_name).field("db_name")?;
@ -79,7 +81,7 @@ where
let entry = entry::Entry::try_from(request.entry).field("entry")?;
self.server
.write_entry_local(&db_name, entry)
.write_entry_local(&db_name, entry, span_ctx)
.await
.map_err(default_server_error_handler)?;

View File

@ -1,10 +1,11 @@
use super::error::default_server_error_handler;
use generated_types::google::FieldViolation;
use generated_types::influxdata::pbdata::v1::*;
use server::{connection::ConnectionManager, Server};
use std::fmt::Debug;
use std::sync::Arc;
use crate::influxdb_ioxd::rpc::error::default_server_error_handler;
struct PBWriteService<M: ConnectionManager> {
server: Arc<Server<M>>,
}
@ -18,13 +19,14 @@ where
&self,
request: tonic::Request<WriteRequest>,
) -> Result<tonic::Response<WriteResponse>, tonic::Status> {
let span_ctx = request.extensions().get().cloned();
let database_batch = request
.into_inner()
.database_batch
.ok_or_else(|| FieldViolation::required("database_batch"))?;
self.server
.write_pb(database_batch)
.write_pb(database_batch, span_ctx)
.await
.map_err(default_server_error_handler)?;

View File

@ -0,0 +1,72 @@
use std::sync::Arc;
use object_store::ObjectStore;
use observability_deps::tracing::warn;
use server::{
connection::ConnectionManagerImpl, ApplicationState, RemoteTemplate, Server, ServerConfig,
};
use snafu::{ResultExt, Snafu};
use trace::TraceCollector;
use crate::{
commands::run::database::Config,
structopt_blocks::object_store::{check_object_store, warn_about_inmem_store},
};
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("Cannot parse object store config: {}", source))]
ObjectStoreParsing {
source: crate::structopt_blocks::object_store::ParseError,
},
#[snafu(display("Cannot check object store config: {}", source))]
ObjectStoreCheck {
source: crate::structopt_blocks::object_store::CheckError,
},
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
pub async fn make_application(
config: &Config,
trace_collector: Option<Arc<dyn TraceCollector>>,
) -> Result<Arc<ApplicationState>> {
warn_about_inmem_store(&config.run_config.object_store_config);
let object_store = ObjectStore::try_from(&config.run_config.object_store_config)
.context(ObjectStoreParsing)?;
check_object_store(&object_store)
.await
.context(ObjectStoreCheck)?;
let object_storage = Arc::new(object_store);
Ok(Arc::new(ApplicationState::new(
object_storage,
config.num_worker_threads,
trace_collector,
)))
}
pub fn make_server(
application: Arc<ApplicationState>,
config: &Config,
) -> Arc<Server<ConnectionManagerImpl>> {
let server_config = ServerConfig {
remote_template: config.remote_template.clone().map(RemoteTemplate::new),
wipe_catalog_on_error: config.wipe_catalog_on_error.into(),
skip_replay_and_seek_instead: config.skip_replay_and_seek_instead.into(),
};
let connection_manager = ConnectionManagerImpl::new();
let app_server = Arc::new(Server::new(connection_manager, application, server_config));
// if this ID isn't set the server won't be usable until this is set via an API
// call
if let Some(id) = config.run_config.server_id_config.server_id {
app_server.set_id(id).expect("server id already set");
} else {
warn!("server ID not set. ID must be set via the INFLUXDB_IOX_ID config or API before writing or querying data.");
}
app_server
}

View File

@ -0,0 +1,128 @@
use std::sync::Arc;
use async_trait::async_trait;
use hyper::{Body, Request, Response, StatusCode};
use metric::Registry;
use snafu::Snafu;
use trace::TraceCollector;
use super::rpc::RpcBuilderInput;
pub mod common_state;
pub mod database;
/// Constants used in API error codes.
///
/// Expressing this as a enum prevents reuse of discriminants, and as they're
/// effectively consts this uses UPPER_SNAKE_CASE.
#[allow(non_camel_case_types, clippy::upper_case_acronyms)]
#[derive(Debug, PartialEq)]
pub enum ApiErrorCode {
/// An unknown/unhandled error
UNKNOWN = 100,
/// The database name in the request is invalid.
DB_INVALID_NAME = 101,
/// The database referenced already exists.
DB_ALREADY_EXISTS = 102,
/// The database referenced does not exist.
DB_NOT_FOUND = 103,
}
impl From<ApiErrorCode> for u32 {
fn from(v: ApiErrorCode) -> Self {
v as Self
}
}
pub trait RouteError: std::error::Error + snafu::AsErrorSource {
fn response(&self) -> Response<Body>;
fn bad_request(&self) -> Response<Body> {
Response::builder()
.status(StatusCode::BAD_REQUEST)
.body(self.body())
.unwrap()
}
fn internal_error(&self) -> Response<Body> {
Response::builder()
.status(StatusCode::INTERNAL_SERVER_ERROR)
.body(self.body())
.unwrap()
}
fn not_found(&self) -> Response<Body> {
Response::builder()
.status(StatusCode::NOT_FOUND)
.body(Body::empty())
.unwrap()
}
fn no_content(&self) -> Response<Body> {
Response::builder()
.status(StatusCode::NO_CONTENT)
.body(self.body())
.unwrap()
}
fn body(&self) -> Body {
let json =
serde_json::json!({"error": self.to_string(), "error_code": self.api_error_code()})
.to_string();
Body::from(json)
}
/// Map the error type into an API error code.
fn api_error_code(&self) -> u32 {
ApiErrorCode::UNKNOWN.into()
}
}
#[derive(Debug, Snafu)]
pub enum RpcError {
#[snafu(display("gRPC transport error: {}{}", source, details))]
TransportError {
source: tonic::transport::Error,
details: String,
},
}
// Custom impl to include underlying source (not included in tonic
// transport error)
impl From<tonic::transport::Error> for RpcError {
fn from(source: tonic::transport::Error) -> Self {
use std::error::Error;
let details = source
.source()
.map(|e| format!(" ({})", e))
.unwrap_or_else(|| "".to_string());
Self::TransportError { source, details }
}
}
#[async_trait]
pub trait ServerType: std::fmt::Debug + Send + Sync + 'static {
type RouteError: RouteError;
fn metric_registry(&self) -> Arc<Registry>;
fn trace_collector(&self) -> Option<Arc<dyn TraceCollector>>;
/// Route given HTTP request.
///
/// Note that this is only called if none of the shared, common routes (e.g. `/health`) match.
async fn route_http_request(
&self,
req: Request<Body>,
) -> Result<Response<Body>, Self::RouteError>;
async fn server_grpc(self: Arc<Self>, builder_input: RpcBuilderInput) -> Result<(), RpcError>;
async fn background_worker(self: Arc<Self>);
fn shutdown_background_worker(&self);
}

View File

@ -1,4 +1,5 @@
//! Entrypoint of InfluxDB IOx binary
#![recursion_limit = "512"] // required for print_cpu
#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)]
#![warn(
missing_debug_implementations,
@ -8,18 +9,15 @@
clippy::future_not_send
)]
use crate::commands::tracing::{init_logs_and_tracing, init_simple_logs, TroggingGuard};
use dotenv::dotenv;
use influxdb_iox_client::connection::Builder;
use observability_deps::tracing::warn;
use once_cell::sync::Lazy;
use std::str::FromStr;
use structopt::StructOpt;
use tokio::runtime::Runtime;
use commands::tracing::{init_logs_and_tracing, init_simple_logs};
use observability_deps::tracing::warn;
use crate::commands::tracing::TroggingGuard;
use influxdb_iox_client::connection::Builder;
use std::str::FromStr;
mod commands {
pub mod database;
pub mod debug;
@ -31,8 +29,7 @@ mod commands {
pub mod tracing;
}
mod object_store;
mod server_id;
mod structopt_blocks;
pub mod influxdb_ioxd;
@ -48,6 +45,19 @@ static VERSION_STRING: Lazy<String> = Lazy::new(|| {
)
});
/// A comfy_table style that uses single ASCII lines for all borders with plusses at intersections.
///
/// Example:
///
/// ```
/// +------+--------------------------------------+
/// | Name | UUID |
/// +------+--------------------------------------+
/// | bar | ccc2b8bc-f25d-4341-9b64-b9cfe50d26de |
/// | foo | 3317ff2b-bbab-43ae-8c63-f0e9ea2f3bdb |
/// +------+--------------------------------------+
const TABLE_STYLE_SINGLE_LINE_BORDERS: &str = "||--+-++| ++++++";
#[cfg(all(feature = "heappy", feature = "jemalloc_replacing_malloc"))]
compile_error!("heappy and jemalloc_replacing_malloc features are mutually exclusive");
@ -60,19 +70,19 @@ compile_error!("heappy and jemalloc_replacing_malloc features are mutually exclu
Examples:
# Run the InfluxDB IOx server:
influxdb_iox run
influxdb_iox run database
# Run the interactive SQL prompt
influxdb_iox sql
# Display all server settings
influxdb_iox run --help
influxdb_iox run database --help
# Run the InfluxDB IOx server with extra verbose logging
influxdb_iox run -v
influxdb_iox run database -v
# Run InfluxDB IOx with full debug logging specified with RUST_LOG
RUST_LOG=debug influxdb_iox run
RUST_LOG=debug influxdb_iox run database
Command are generally structured in the form:
<type of object> <action> <arguments>

View File

@ -0,0 +1,48 @@
/// Boolean flag that works with environment variables.
///
/// Workaround for <https://github.com/TeXitoi/structopt/issues/428>
#[derive(Debug, Clone, Copy)]
pub enum BooleanFlag {
True,
False,
}
impl std::str::FromStr for BooleanFlag {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_ascii_lowercase().as_str() {
"yes" | "y" | "true" | "t" | "1" => Ok(Self::True),
"no" | "n" | "false" | "f" | "0" => Ok(Self::False),
_ => Err(format!(
"Invalid boolean flag '{}'. Valid options: yes, no, y, n, true, false, t, f, 1, 0",
s
)),
}
}
}
impl From<BooleanFlag> for bool {
fn from(yes_no: BooleanFlag) -> Self {
matches!(yes_no, BooleanFlag::True)
}
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
use super::*;
#[test]
fn test_parsing() {
assert!(bool::from(BooleanFlag::from_str("yes").unwrap()));
assert!(bool::from(BooleanFlag::from_str("Yes").unwrap()));
assert!(bool::from(BooleanFlag::from_str("YES").unwrap()));
assert!(!bool::from(BooleanFlag::from_str("No").unwrap()));
assert!(!bool::from(BooleanFlag::from_str("FaLse").unwrap()));
BooleanFlag::from_str("foo").unwrap_err();
}
}

View File

@ -0,0 +1,8 @@
//! Building blocks for [`structopt`]-driven configs.
//!
//! They can easily be re-used using `#[structopt(flatten)]`.
pub mod boolean_flag;
pub mod object_store;
pub mod run_config;
pub mod server_id;
pub mod socket_addr;

Some files were not shown because too many files have changed in this diff Show More