Merge branch 'main' into crepererum/issue1382-c
commit
f028a356f4
|
@ -3164,6 +3164,7 @@ name = "read_buffer"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow 0.1.0",
|
"arrow 0.1.0",
|
||||||
|
"arrow_util",
|
||||||
"criterion",
|
"criterion",
|
||||||
"croaring",
|
"croaring",
|
||||||
"data_types",
|
"data_types",
|
||||||
|
|
|
@ -21,6 +21,13 @@ impl BitSet {
|
||||||
Self::default()
|
Self::default()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates a new BitSet with `count` unset bits.
|
||||||
|
pub fn with_size(count: usize) -> Self {
|
||||||
|
let mut bitset = Self::default();
|
||||||
|
bitset.append_unset(count);
|
||||||
|
bitset
|
||||||
|
}
|
||||||
|
|
||||||
/// Appends `count` unset bits
|
/// Appends `count` unset bits
|
||||||
pub fn append_unset(&mut self, count: usize) {
|
pub fn append_unset(&mut self, count: usize) {
|
||||||
self.len += count;
|
self.len += count;
|
||||||
|
|
|
@ -12,6 +12,7 @@ edition = "2018"
|
||||||
|
|
||||||
[dependencies] # In alphabetical order
|
[dependencies] # In alphabetical order
|
||||||
arrow = { path = "../arrow" }
|
arrow = { path = "../arrow" }
|
||||||
|
arrow_util = { path = "../arrow_util" }
|
||||||
croaring = "0.4.5"
|
croaring = "0.4.5"
|
||||||
data_types = { path = "../data_types" }
|
data_types = { path = "../data_types" }
|
||||||
datafusion = { path = "../datafusion" }
|
datafusion = { path = "../datafusion" }
|
||||||
|
@ -46,5 +47,5 @@ name = "string"
|
||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
[[bench]]
|
[[bench]]
|
||||||
name = "row_group"
|
name = "read"
|
||||||
harness = false
|
harness = false
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
use criterion::{criterion_group, criterion_main};
|
||||||
|
|
||||||
|
mod read_filter;
|
||||||
|
mod read_group;
|
||||||
|
use read_filter::read_filter;
|
||||||
|
use read_group::read_group;
|
||||||
|
|
||||||
|
criterion_group!(benches, read_filter, read_group);
|
||||||
|
criterion_main!(benches);
|
|
@ -0,0 +1,314 @@
|
||||||
|
use criterion::{BenchmarkId, Criterion, Throughput};
|
||||||
|
use rand::distributions::Alphanumeric;
|
||||||
|
use rand::prelude::*;
|
||||||
|
use rand::Rng;
|
||||||
|
use rand_distr::{Distribution, Normal};
|
||||||
|
|
||||||
|
use internal_types::selection::Selection;
|
||||||
|
use packers::{sorter, Packers};
|
||||||
|
use read_buffer::{
|
||||||
|
benchmarks::{Column, ColumnType, RowGroup},
|
||||||
|
Chunk,
|
||||||
|
};
|
||||||
|
use read_buffer::{BinaryExpr, Predicate};
|
||||||
|
|
||||||
|
const ONE_MS: i64 = 1_000_000;
|
||||||
|
|
||||||
|
pub fn read_filter(c: &mut Criterion) {
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
|
||||||
|
let mut chunk = Chunk::new(read_buffer::ChunkMetrics::new_unregistered());
|
||||||
|
let row_group = generate_row_group(200_000, &mut rng);
|
||||||
|
read_buffer::benchmarks::upsert_table_with_row_group(&mut chunk, "table", row_group);
|
||||||
|
|
||||||
|
read_filter_no_pred_vary_proj(c, &chunk);
|
||||||
|
read_filter_with_pred_vary_proj(c, &chunk);
|
||||||
|
}
|
||||||
|
|
||||||
|
// These benchmarks track the performance of read_filter without any predicate
|
||||||
|
// but varying the size of projection (columns) requested
|
||||||
|
fn read_filter_no_pred_vary_proj(c: &mut Criterion, chunk: &Chunk) {
|
||||||
|
let mut group = c.benchmark_group("read_filter/no_pred");
|
||||||
|
|
||||||
|
// All these projections involve the same number of rows but with varying
|
||||||
|
// cardinalities.
|
||||||
|
let projections = vec![
|
||||||
|
(Selection::Some(&["user_id"]), 200_000),
|
||||||
|
(Selection::Some(&["node_id"]), 2_000),
|
||||||
|
(Selection::Some(&["cluster"]), 200),
|
||||||
|
(Selection::Some(&["env"]), 2),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (projection, exp_card) in projections {
|
||||||
|
// benchmark measures the throughput of group creation.
|
||||||
|
group.throughput(Throughput::Elements(200_000));
|
||||||
|
|
||||||
|
group.bench_with_input(
|
||||||
|
BenchmarkId::from_parameter(format!("cardinality_{:?}_rows_{:?}", exp_card, 200_000)),
|
||||||
|
&exp_card,
|
||||||
|
|b, _| {
|
||||||
|
b.iter(|| {
|
||||||
|
let result = chunk
|
||||||
|
.read_filter("table", Predicate::default(), projection)
|
||||||
|
.unwrap();
|
||||||
|
let rbs = result.collect::<Vec<_>>();
|
||||||
|
assert_eq!(rbs.len(), 1);
|
||||||
|
assert_eq!(rbs[0].num_rows(), 200_000);
|
||||||
|
assert_eq!(rbs[0].num_columns(), 1);
|
||||||
|
});
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
group.finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
// These benchmarks track the performance of read_filter with different predicates
|
||||||
|
fn read_filter_with_pred_vary_proj(c: &mut Criterion, chunk: &Chunk) {
|
||||||
|
let mut group = c.benchmark_group("read_filter/with_pred");
|
||||||
|
|
||||||
|
// these predicates vary the number of rows returned
|
||||||
|
let predicates = vec![(
|
||||||
|
Predicate::with_time_range(
|
||||||
|
&[BinaryExpr::from(("env", "=", "env-1"))],
|
||||||
|
i64::MIN,
|
||||||
|
i64::MAX,
|
||||||
|
),
|
||||||
|
100_000,
|
||||||
|
)];
|
||||||
|
|
||||||
|
for (predicate, exp_rows) in predicates {
|
||||||
|
// benchmark measures the throughput of group creation.
|
||||||
|
group.throughput(Throughput::Elements(exp_rows as u64));
|
||||||
|
|
||||||
|
group.bench_with_input(
|
||||||
|
BenchmarkId::from_parameter(format!("rows_{:?}", exp_rows)),
|
||||||
|
&exp_rows,
|
||||||
|
|b, _| {
|
||||||
|
b.iter(|| {
|
||||||
|
let result = chunk
|
||||||
|
.read_filter("table", predicate.clone(), Selection::All)
|
||||||
|
.unwrap();
|
||||||
|
let rbs = result.collect::<Vec<_>>();
|
||||||
|
assert_eq!(rbs.len(), 1);
|
||||||
|
assert!(rbs[0].num_rows() > 0); // data randomly generated so row numbers not exact
|
||||||
|
assert_eq!(rbs[0].num_columns(), 11);
|
||||||
|
});
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
group.finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(edd): figure out how to DRY this into a single place in `benches` "crate".
|
||||||
|
//
|
||||||
|
// This generates a `RowGroup` with a known schema, ~known column cardinalities
|
||||||
|
// and variable number of rows.
|
||||||
|
//
|
||||||
|
// The schema and cardinalities are in-line with a tracing data use-case.
|
||||||
|
fn generate_row_group(rows: usize, rng: &mut ThreadRng) -> RowGroup {
|
||||||
|
let mut timestamp = 1351700038292387000_i64;
|
||||||
|
let spans_per_trace = 10;
|
||||||
|
|
||||||
|
let mut column_packers: Vec<Packers> = vec![
|
||||||
|
Packers::from(Vec::<Option<String>>::with_capacity(rows)), // env (card 2)
|
||||||
|
Packers::from(Vec::<Option<String>>::with_capacity(rows)), // data_centre (card 20)
|
||||||
|
Packers::from(Vec::<Option<String>>::with_capacity(rows)), // cluster (card 200)
|
||||||
|
Packers::from(Vec::<Option<String>>::with_capacity(rows)), // user_id (card 200,000)
|
||||||
|
Packers::from(Vec::<Option<String>>::with_capacity(rows)), // request_id (card 2,000,000)
|
||||||
|
Packers::from(Vec::<Option<String>>::with_capacity(rows)), // node_id (card 2,000)
|
||||||
|
Packers::from(Vec::<Option<String>>::with_capacity(rows)), // pod_id (card 20,000)
|
||||||
|
Packers::from(Vec::<Option<String>>::with_capacity(rows)), // trace_id (card "rows / 10")
|
||||||
|
Packers::from(Vec::<Option<String>>::with_capacity(rows)), // span_id (card "rows")
|
||||||
|
Packers::from(Vec::<Option<i64>>::with_capacity(rows)), // duration
|
||||||
|
Packers::from(Vec::<Option<i64>>::with_capacity(rows)), // time
|
||||||
|
];
|
||||||
|
|
||||||
|
let n = rows / spans_per_trace;
|
||||||
|
for _ in 0..n {
|
||||||
|
column_packers =
|
||||||
|
generate_trace_for_row_group(spans_per_trace, timestamp, column_packers, rng);
|
||||||
|
|
||||||
|
// next trace is ~10 seconds in the future
|
||||||
|
timestamp += 10_000 * ONE_MS;
|
||||||
|
}
|
||||||
|
|
||||||
|
// sort the packers according to lowest to highest cardinality excluding
|
||||||
|
// columns that are likely to be unique.
|
||||||
|
//
|
||||||
|
// - env, data_centre, cluster, node_id, pod_id, user_id, request_id, time
|
||||||
|
sorter::sort(&mut column_packers, &[0, 1, 2, 5, 6, 3, 4, 10]).unwrap();
|
||||||
|
|
||||||
|
// create columns
|
||||||
|
let columns = vec![
|
||||||
|
(
|
||||||
|
"env".to_string(),
|
||||||
|
ColumnType::Tag(Column::from(column_packers[0].str_packer().values())),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"data_centre".to_string(),
|
||||||
|
ColumnType::Tag(Column::from(column_packers[1].str_packer().values())),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"cluster".to_string(),
|
||||||
|
ColumnType::Tag(Column::from(column_packers[2].str_packer().values())),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"user_id".to_string(),
|
||||||
|
ColumnType::Tag(Column::from(column_packers[3].str_packer().values())),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"request_id".to_string(),
|
||||||
|
ColumnType::Tag(Column::from(column_packers[4].str_packer().values())),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"node_id".to_string(),
|
||||||
|
ColumnType::Tag(Column::from(column_packers[5].str_packer().values())),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"pod_id".to_string(),
|
||||||
|
ColumnType::Tag(Column::from(column_packers[6].str_packer().values())),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"trace_id".to_string(),
|
||||||
|
ColumnType::Tag(Column::from(column_packers[7].str_packer().values())),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"span_id".to_string(),
|
||||||
|
ColumnType::Tag(Column::from(column_packers[8].str_packer().values())),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"duration".to_string(),
|
||||||
|
ColumnType::Field(Column::from(
|
||||||
|
column_packers[9].i64_packer().some_values().as_slice(),
|
||||||
|
)),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"time".to_string(),
|
||||||
|
ColumnType::Time(Column::from(
|
||||||
|
column_packers[10].i64_packer().some_values().as_slice(),
|
||||||
|
)),
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
RowGroup::new(rows as u32, columns)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate_trace_for_row_group(
|
||||||
|
spans_per_trace: usize,
|
||||||
|
timestamp: i64,
|
||||||
|
mut column_packers: Vec<Packers>,
|
||||||
|
rng: &mut ThreadRng,
|
||||||
|
) -> Vec<Packers> {
|
||||||
|
let env_idx = 0;
|
||||||
|
let data_centre_idx = 1;
|
||||||
|
let cluster_idx = 2;
|
||||||
|
let user_id_idx = 3;
|
||||||
|
let request_id_idx = 4;
|
||||||
|
let node_id_idx = 5;
|
||||||
|
let pod_id_idx = 6;
|
||||||
|
let trace_id_idx = 7;
|
||||||
|
let span_id_idx = 8;
|
||||||
|
let duration_idx = 9;
|
||||||
|
let time_idx = 10;
|
||||||
|
|
||||||
|
let env_value = rng.gen_range(0_u8..2);
|
||||||
|
let env = format!("env-{:?}", env_value); // cardinality of 2.
|
||||||
|
|
||||||
|
let data_centre_value = rng.gen_range(0_u8..10);
|
||||||
|
let data_centre = format!("data_centre-{:?}-{:?}", env_value, data_centre_value); // cardinality of 2 * 10 = 20
|
||||||
|
|
||||||
|
let cluster_value = rng.gen_range(0_u8..10);
|
||||||
|
let cluster = format!(
|
||||||
|
"cluster-{:?}-{:?}-{:?}",
|
||||||
|
env_value,
|
||||||
|
data_centre_value,
|
||||||
|
cluster_value // cardinality of 2 * 10 * 10 = 200
|
||||||
|
);
|
||||||
|
|
||||||
|
// user id is dependent on the cluster
|
||||||
|
let user_id_value = rng.gen_range(0_u32..1000);
|
||||||
|
let user_id = format!(
|
||||||
|
"uid-{:?}-{:?}-{:?}-{:?}",
|
||||||
|
env_value,
|
||||||
|
data_centre_value,
|
||||||
|
cluster_value,
|
||||||
|
user_id_value // cardinality of 2 * 10 * 10 * 1000 = 200,000
|
||||||
|
);
|
||||||
|
|
||||||
|
let request_id_value = rng.gen_range(0_u32..10);
|
||||||
|
let request_id = format!(
|
||||||
|
"rid-{:?}-{:?}-{:?}-{:?}-{:?}",
|
||||||
|
env_value,
|
||||||
|
data_centre_value,
|
||||||
|
cluster_value,
|
||||||
|
user_id_value,
|
||||||
|
request_id_value // cardinality of 2 * 10 * 10 * 1000 * 10 = 2,000,000
|
||||||
|
);
|
||||||
|
|
||||||
|
let trace_id = rng
|
||||||
|
.sample_iter(&Alphanumeric)
|
||||||
|
.map(char::from)
|
||||||
|
.take(8)
|
||||||
|
.collect::<String>();
|
||||||
|
|
||||||
|
// the trace should move across hosts, which in this setup would be nodes
|
||||||
|
// and pods.
|
||||||
|
let normal = Normal::new(10.0, 5.0).unwrap();
|
||||||
|
let node_id_prefix = format!("{}-{}-{}", env_value, data_centre_value, cluster_value,);
|
||||||
|
for _ in 0..spans_per_trace {
|
||||||
|
// these values are not the same for each span so need to be generated
|
||||||
|
// separately.
|
||||||
|
let node_id = rng.gen_range(0..10); // cardinality is 2 * 10 * 10 * 10 = 2,000
|
||||||
|
|
||||||
|
column_packers[pod_id_idx].str_packer_mut().push(format!(
|
||||||
|
"pod_id-{}-{}-{}",
|
||||||
|
node_id_prefix,
|
||||||
|
node_id,
|
||||||
|
rng.gen_range(0..10) // cardinality is 2 * 10 * 10 * 10 * 10 = 20,000
|
||||||
|
));
|
||||||
|
|
||||||
|
column_packers[node_id_idx]
|
||||||
|
.str_packer_mut()
|
||||||
|
.push(format!("node_id-{}-{}", node_id_prefix, node_id));
|
||||||
|
|
||||||
|
// randomly generate a span_id
|
||||||
|
column_packers[span_id_idx].str_packer_mut().push(
|
||||||
|
rng.sample_iter(&Alphanumeric)
|
||||||
|
.map(char::from)
|
||||||
|
.take(8)
|
||||||
|
.collect::<String>(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// randomly generate some duration times in milliseconds.
|
||||||
|
column_packers[duration_idx].i64_packer_mut().push(
|
||||||
|
(normal.sample(rng) * ONE_MS as f64)
|
||||||
|
.max(ONE_MS as f64) // minimum duration is 1ms
|
||||||
|
.round() as i64,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
column_packers[env_idx]
|
||||||
|
.str_packer_mut()
|
||||||
|
.fill_with(env, spans_per_trace);
|
||||||
|
column_packers[data_centre_idx]
|
||||||
|
.str_packer_mut()
|
||||||
|
.fill_with(data_centre, spans_per_trace);
|
||||||
|
column_packers[cluster_idx]
|
||||||
|
.str_packer_mut()
|
||||||
|
.fill_with(cluster, spans_per_trace);
|
||||||
|
column_packers[user_id_idx]
|
||||||
|
.str_packer_mut()
|
||||||
|
.fill_with(user_id, spans_per_trace);
|
||||||
|
column_packers[request_id_idx]
|
||||||
|
.str_packer_mut()
|
||||||
|
.fill_with(request_id, spans_per_trace);
|
||||||
|
column_packers[trace_id_idx]
|
||||||
|
.str_packer_mut()
|
||||||
|
.fill_with(trace_id, spans_per_trace);
|
||||||
|
|
||||||
|
column_packers[time_idx]
|
||||||
|
.i64_packer_mut()
|
||||||
|
.fill_with(timestamp, spans_per_trace);
|
||||||
|
|
||||||
|
column_packers
|
||||||
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
use criterion::{BenchmarkId, Criterion, Throughput};
|
||||||
use rand::distributions::Alphanumeric;
|
use rand::distributions::Alphanumeric;
|
||||||
use rand::prelude::*;
|
use rand::prelude::*;
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
|
@ -10,7 +10,7 @@ use read_buffer::{AggregateType, Predicate};
|
||||||
|
|
||||||
const ONE_MS: i64 = 1_000_000;
|
const ONE_MS: i64 = 1_000_000;
|
||||||
|
|
||||||
fn read_group(c: &mut Criterion) {
|
pub fn read_group(c: &mut Criterion) {
|
||||||
let mut rng = rand::thread_rng();
|
let mut rng = rand::thread_rng();
|
||||||
|
|
||||||
let row_group = generate_row_group(500_000, &mut rng);
|
let row_group = generate_row_group(500_000, &mut rng);
|
||||||
|
@ -455,6 +455,3 @@ fn generate_trace_for_row_group(
|
||||||
|
|
||||||
column_packers
|
column_packers
|
||||||
}
|
}
|
||||||
|
|
||||||
criterion_group!(benches, read_group);
|
|
||||||
criterion_main!(benches);
|
|
|
@ -788,28 +788,35 @@ mod test {
|
||||||
let got_column = rb.column(rb.schema().index_of(col_name).unwrap());
|
let got_column = rb.column(rb.schema().index_of(col_name).unwrap());
|
||||||
|
|
||||||
match exp {
|
match exp {
|
||||||
Values::String(exp_data) => match got_column.data_type() {
|
Values::Dictionary(keys, values) => match got_column.data_type() {
|
||||||
DataType::Utf8 => {
|
|
||||||
let arr = got_column.as_any().downcast_ref::<StringArray>().unwrap();
|
|
||||||
assert_eq!(&arr.iter().collect::<Vec<_>>(), exp_data);
|
|
||||||
}
|
|
||||||
DataType::Dictionary(key, value)
|
DataType::Dictionary(key, value)
|
||||||
if key.as_ref() == &DataType::Int32 && value.as_ref() == &DataType::Utf8 =>
|
if key.as_ref() == &DataType::Int32 && value.as_ref() == &DataType::Utf8 =>
|
||||||
{
|
{
|
||||||
|
// Record batch stores keys as i32
|
||||||
|
let keys = keys
|
||||||
|
.iter()
|
||||||
|
.map(|&x| i32::try_from(x).unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let dictionary = got_column
|
let dictionary = got_column
|
||||||
.as_any()
|
.as_any()
|
||||||
.downcast_ref::<DictionaryArray<Int32Type>>()
|
.downcast_ref::<DictionaryArray<Int32Type>>()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let values = dictionary.values();
|
let rb_values = dictionary.values();
|
||||||
let values = values.as_any().downcast_ref::<StringArray>().unwrap();
|
let rb_values = rb_values.as_any().downcast_ref::<StringArray>().unwrap();
|
||||||
|
|
||||||
let hydrated: Vec<_> = dictionary
|
// Ensure string values are same
|
||||||
.keys()
|
assert!(rb_values.iter().zip(values.iter()).all(|(a, b)| &a == b));
|
||||||
.iter()
|
|
||||||
.map(|key| key.map(|key| values.value(key as _)))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
assert_eq!(&hydrated, exp_data)
|
let rb_keys = dictionary.keys().values();
|
||||||
|
assert_eq!(rb_keys, keys.as_slice());
|
||||||
|
}
|
||||||
|
d => panic!("Unexpected type {:?}", d),
|
||||||
|
},
|
||||||
|
Values::String(exp_data) => match got_column.data_type() {
|
||||||
|
DataType::Utf8 => {
|
||||||
|
let arr = got_column.as_any().downcast_ref::<StringArray>().unwrap();
|
||||||
|
assert_eq!(&arr.iter().collect::<Vec<_>>(), exp_data);
|
||||||
}
|
}
|
||||||
d => panic!("Unexpected type {:?}", d),
|
d => panic!("Unexpected type {:?}", d),
|
||||||
},
|
},
|
||||||
|
@ -1278,11 +1285,12 @@ mod test {
|
||||||
.read_filter("Coolverine", predicate, Selection::All)
|
.read_filter("Coolverine", predicate, Selection::All)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let exp_env_values = Values::String(vec![Some("us-west")]);
|
let exp_env_values = Values::Dictionary(vec![0], vec![Some("us-west")]);
|
||||||
let exp_region_values = Values::String(vec![Some("west")]);
|
let exp_region_values = Values::Dictionary(vec![0], vec![Some("west")]);
|
||||||
let exp_counter_values = Values::F64(vec![1.2]);
|
let exp_counter_values = Values::F64(vec![1.2]);
|
||||||
let exp_sketchy_sensor_values = Values::I64N(vec![None]);
|
let exp_sketchy_sensor_values = Values::I64N(vec![None]);
|
||||||
let exp_active_values = Values::Bool(vec![Some(true)]);
|
let exp_active_values = Values::Bool(vec![Some(true)]);
|
||||||
|
let exp_msg_values = Values::String(vec![Some("message a")]);
|
||||||
|
|
||||||
let first_row_group = itr.next().unwrap();
|
let first_row_group = itr.next().unwrap();
|
||||||
assert_rb_column_equals(&first_row_group, "env", &exp_env_values);
|
assert_rb_column_equals(&first_row_group, "env", &exp_env_values);
|
||||||
|
@ -1294,11 +1302,7 @@ mod test {
|
||||||
&exp_sketchy_sensor_values,
|
&exp_sketchy_sensor_values,
|
||||||
);
|
);
|
||||||
assert_rb_column_equals(&first_row_group, "active", &exp_active_values);
|
assert_rb_column_equals(&first_row_group, "active", &exp_active_values);
|
||||||
assert_rb_column_equals(
|
assert_rb_column_equals(&first_row_group, "msg", &exp_msg_values);
|
||||||
&first_row_group,
|
|
||||||
"msg",
|
|
||||||
&Values::String(vec![Some("message a")]),
|
|
||||||
);
|
|
||||||
assert_rb_column_equals(&first_row_group, "time", &Values::I64(vec![100])); // first row from first record batch
|
assert_rb_column_equals(&first_row_group, "time", &Values::I64(vec![100])); // first row from first record batch
|
||||||
|
|
||||||
let second_row_group = itr.next().unwrap();
|
let second_row_group = itr.next().unwrap();
|
||||||
|
|
|
@ -246,6 +246,26 @@ impl Column {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// All values present at the provided logical row ids materialised in a
|
||||||
|
/// dictionary format.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Panics if called on a non-string columnar encoding.
|
||||||
|
pub fn values_as_dictionary(&self, row_ids: &[u32]) -> Values<'_> {
|
||||||
|
assert!(
|
||||||
|
row_ids.len() as u32 <= self.num_rows(),
|
||||||
|
"too many row ids {:?} provided for column with {:?} rows",
|
||||||
|
row_ids.len(),
|
||||||
|
self.num_rows()
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Self::String(_, data) = &self {
|
||||||
|
return data.values_as_dictionary(row_ids);
|
||||||
|
}
|
||||||
|
panic!("unsupported encoding type {}", self)
|
||||||
|
}
|
||||||
|
|
||||||
/// All logical values in the column.
|
/// All logical values in the column.
|
||||||
pub fn all_values(&self) -> Values<'_> {
|
pub fn all_values(&self) -> Values<'_> {
|
||||||
match &self {
|
match &self {
|
||||||
|
@ -258,6 +278,18 @@ impl Column {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// All logical values in the column returned in a dictionary encoded format.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Panics if called on a non-string columnar encoding.
|
||||||
|
pub fn all_values_as_dictionary(&self) -> Values<'_> {
|
||||||
|
if let Self::String(_, data) = &self {
|
||||||
|
return data.all_values_as_dictionary();
|
||||||
|
}
|
||||||
|
panic!("unsupported encoding type {}", self)
|
||||||
|
}
|
||||||
|
|
||||||
/// The value present at the provided logical row id.
|
/// The value present at the provided logical row id.
|
||||||
pub fn decode_id(&self, encoded_id: u32) -> Value<'_> {
|
pub fn decode_id(&self, encoded_id: u32) -> Value<'_> {
|
||||||
match &self {
|
match &self {
|
||||||
|
@ -1636,10 +1668,27 @@ mod test {
|
||||||
let col = Column::from(&[0.0, 1.1, 20.2, 22.3, 100.1324][..]);
|
let col = Column::from(&[0.0, 1.1, 20.2, 22.3, 100.1324][..]);
|
||||||
assert_eq!(col.values(&[1, 3]), Values::F64(vec![1.1, 22.3]));
|
assert_eq!(col.values(&[1, 3]), Values::F64(vec![1.1, 22.3]));
|
||||||
|
|
||||||
let col = Column::from(&[Some("a"), Some("b"), None, Some("c")][..]);
|
let col = Column::from(&[Some("a"), Some("b"), None, Some("c"), Some("b")][..]);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
col.values(&[1, 2, 3]),
|
col.values(&[1, 2, 3, 4]),
|
||||||
Values::String(vec![Some("b"), None, Some("c")])
|
Values::String(vec![Some("b"), None, Some("c"), Some("b")])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn values_as_dictionary() {
|
||||||
|
let col = Column::from(&[Some("a"), Some("b"), None, Some("c"), Some("b")][..]);
|
||||||
|
//
|
||||||
|
// Stored in dictionary like:
|
||||||
|
//
|
||||||
|
// dict: {NULL: 0, a: 1, b: 2, c: 3}
|
||||||
|
// values: [1, 2, 0, 3, 2]
|
||||||
|
assert_eq!(
|
||||||
|
col.values_as_dictionary(&[1, 2, 3, 4]),
|
||||||
|
Values::Dictionary(
|
||||||
|
vec![1, 0, 2, 1], // encoded IDs for [b, NULL, c, b]
|
||||||
|
vec![None, Some("b"), Some("c")] // dictionary
|
||||||
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@ use either::Either;
|
||||||
|
|
||||||
use super::cmp;
|
use super::cmp;
|
||||||
use super::encoding::string::{dictionary, rle};
|
use super::encoding::string::{dictionary, rle};
|
||||||
use super::encoding::string::{Dictionary, Encoding, RLE};
|
use super::encoding::string::{Dictionary, Encoding, NULL_ID, RLE};
|
||||||
use crate::column::{RowIDs, Statistics, Value, Values};
|
use crate::column::{RowIDs, Statistics, Value, Values};
|
||||||
|
|
||||||
// Edd's totally made up magic constant. This determines whether we would use
|
// Edd's totally made up magic constant. This determines whether we would use
|
||||||
|
@ -152,7 +152,7 @@ impl StringEncoding {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// All values present at the provided logical row ids.
|
/// All values present at the provided logical row IDs.
|
||||||
///
|
///
|
||||||
/// TODO(edd): perf - pooling of destination vectors.
|
/// TODO(edd): perf - pooling of destination vectors.
|
||||||
pub fn values(&self, row_ids: &[u32]) -> Values<'_> {
|
pub fn values(&self, row_ids: &[u32]) -> Values<'_> {
|
||||||
|
@ -162,6 +162,75 @@ impl StringEncoding {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns all values present at the provided logical row IDs as a
|
||||||
|
/// dictionary encoded `Values` format.
|
||||||
|
pub fn values_as_dictionary(&self, row_ids: &[u32]) -> Values<'_> {
|
||||||
|
//
|
||||||
|
// Example:
|
||||||
|
//
|
||||||
|
// Suppose you have column encoded like this:
|
||||||
|
//
|
||||||
|
// values: NULL, "alpha", "beta", "gamma"
|
||||||
|
// encoded: 1, 1, 2, 0, 3 (alpha, alpha, beta, NULL, gamma)
|
||||||
|
//
|
||||||
|
// And only the rows: {0, 1, 3, 4} are required.
|
||||||
|
//
|
||||||
|
// The column encoding will return the following encoded values
|
||||||
|
//
|
||||||
|
// encoded: 1, 1, 0, 3 (alpha, alpha, NULL, gamma)
|
||||||
|
//
|
||||||
|
// Because the dictionary has likely changed, the encoded values need
|
||||||
|
// to be transformed into a new domain `[0, encoded.len())` so that they
|
||||||
|
// become:
|
||||||
|
//
|
||||||
|
// keys: [1, 1, 0, 2]
|
||||||
|
// values: [None, Some("alpha"), Some("gamma")]
|
||||||
|
let mut keys = self.encoded_values(row_ids, vec![]);
|
||||||
|
|
||||||
|
// build a mapping from encoded value to new ordinal position.
|
||||||
|
let mut ordinal_mapping = hashbrown::HashMap::new();
|
||||||
|
for key in &keys {
|
||||||
|
ordinal_mapping.insert(*key, u32::default()); // don't know final ordinal position yet
|
||||||
|
}
|
||||||
|
|
||||||
|
// create new ordinal offsets - the encoded values need to be shifted
|
||||||
|
// into a new domain `[0, ordinal_mapping.len())` which is the length
|
||||||
|
// of the new dictionary.
|
||||||
|
let mut ordinal_mapping_keys = ordinal_mapping
|
||||||
|
.keys()
|
||||||
|
.into_iter()
|
||||||
|
.cloned()
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
ordinal_mapping_keys.sort_unstable();
|
||||||
|
|
||||||
|
for (i, key) in ordinal_mapping_keys.iter().enumerate() {
|
||||||
|
// now we can insert the new ordinal position of the encoded in key
|
||||||
|
// in the final values vector.
|
||||||
|
ordinal_mapping.insert(*key, i as u32);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rewrite all the encoded values into the new domain.
|
||||||
|
for key in keys.iter_mut() {
|
||||||
|
*key = *ordinal_mapping.get(key).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// now generate the values vector, which will contain the sorted set of
|
||||||
|
// string values
|
||||||
|
let mut values = match &self {
|
||||||
|
Self::RleDictionary(c) => ordinal_mapping_keys
|
||||||
|
.iter()
|
||||||
|
.map(|id| c.decode_id(*id))
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
Self::Dictionary(c) => ordinal_mapping_keys
|
||||||
|
.iter()
|
||||||
|
.map(|id| c.decode_id(*id))
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
};
|
||||||
|
values.sort_unstable();
|
||||||
|
|
||||||
|
Values::Dictionary(keys, values)
|
||||||
|
}
|
||||||
|
|
||||||
/// All values in the column.
|
/// All values in the column.
|
||||||
///
|
///
|
||||||
/// TODO(edd): perf - pooling of destination vectors.
|
/// TODO(edd): perf - pooling of destination vectors.
|
||||||
|
@ -172,6 +241,48 @@ impl StringEncoding {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns all values as a dictionary encoded `Values` format.
|
||||||
|
pub fn all_values_as_dictionary(&self) -> Values<'_> {
|
||||||
|
let mut keys = self.all_encoded_values(vec![]);
|
||||||
|
|
||||||
|
let values = if self.contains_null() {
|
||||||
|
// The column's ordered set of values including None because that is a
|
||||||
|
// reserved encoded key (`0`).
|
||||||
|
let mut values = vec![None];
|
||||||
|
match &self {
|
||||||
|
Self::RleDictionary(c) => {
|
||||||
|
values.extend(c.dictionary().into_iter().map(|s| Some(s.as_str())));
|
||||||
|
}
|
||||||
|
Self::Dictionary(c) => {
|
||||||
|
values.extend(c.dictionary().into_iter().map(|s| Some(s.as_str())));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
values
|
||||||
|
} else {
|
||||||
|
// since column doesn't contain null we need to shift all the encoded
|
||||||
|
// values down
|
||||||
|
assert_eq!(NULL_ID, 0);
|
||||||
|
for key in keys.iter_mut() {
|
||||||
|
*key -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
match &self {
|
||||||
|
Self::RleDictionary(c) => c
|
||||||
|
.dictionary()
|
||||||
|
.into_iter()
|
||||||
|
.map(|s| Some(s.as_str()))
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
Self::Dictionary(c) => c
|
||||||
|
.dictionary()
|
||||||
|
.into_iter()
|
||||||
|
.map(|s| Some(s.as_str()))
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Values::Dictionary(keys, values)
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the logical value for the specified encoded representation.
|
/// Returns the logical value for the specified encoded representation.
|
||||||
pub fn decode_id(&self, encoded_id: u32) -> Value<'_> {
|
pub fn decode_id(&self, encoded_id: u32) -> Value<'_> {
|
||||||
match &self {
|
match &self {
|
||||||
|
@ -487,3 +598,156 @@ impl From<&[&str]> for StringEncoding {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
// tests both `values_as_dictionary` and `all_values_as_dictionary`
|
||||||
|
fn values_as_dictionary() {
|
||||||
|
let set = vec!["apple", "beta", "orange", "pear"];
|
||||||
|
let data = vec![
|
||||||
|
Some("apple"),
|
||||||
|
Some("apple"),
|
||||||
|
Some("pear"),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
Some("orange"),
|
||||||
|
Some("beta"),
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut rle = RLE::with_dictionary(
|
||||||
|
set.iter()
|
||||||
|
.cloned()
|
||||||
|
.map(String::from)
|
||||||
|
.collect::<BTreeSet<String>>(),
|
||||||
|
);
|
||||||
|
for v in data.iter().map(|x| x.map(String::from)) {
|
||||||
|
rle.push_additional(v, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut dict = Dictionary::with_dictionary(
|
||||||
|
set.into_iter()
|
||||||
|
.map(String::from)
|
||||||
|
.collect::<BTreeSet<String>>(),
|
||||||
|
);
|
||||||
|
for v in data.iter().map(|x| x.map(String::from)) {
|
||||||
|
dict.push_additional(v, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
let encodings = vec![
|
||||||
|
StringEncoding::RleDictionary(rle),
|
||||||
|
StringEncoding::Dictionary(dict),
|
||||||
|
];
|
||||||
|
|
||||||
|
for enc in encodings {
|
||||||
|
_values_as_dictionary(&enc);
|
||||||
|
_all_values_as_dictionary(&enc);
|
||||||
|
}
|
||||||
|
|
||||||
|
// example without NULL values
|
||||||
|
let data = vec![
|
||||||
|
Some("apple"),
|
||||||
|
Some("apple"),
|
||||||
|
Some("beta"),
|
||||||
|
Some("orange"),
|
||||||
|
Some("pear"),
|
||||||
|
];
|
||||||
|
|
||||||
|
let encodings = vec![
|
||||||
|
StringEncoding::RleDictionary(RLE::from(data.clone())),
|
||||||
|
StringEncoding::Dictionary(Dictionary::from(data)),
|
||||||
|
];
|
||||||
|
|
||||||
|
for enc in encodings {
|
||||||
|
let exp_keys = vec![0, 0, 1, 2, 3];
|
||||||
|
let exp_values = vec![Some("apple"), Some("beta"), Some("orange"), Some("pear")];
|
||||||
|
|
||||||
|
let values = enc.all_values_as_dictionary();
|
||||||
|
if let Values::Dictionary(got_keys, got_values) = values {
|
||||||
|
assert_eq!(got_keys, exp_keys, "key comparison for {} failed", enc);
|
||||||
|
assert_eq!(
|
||||||
|
got_values, exp_values,
|
||||||
|
"values comparison for {} failed",
|
||||||
|
enc
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
panic!("invalid Values format returned, got {:?}", values);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn _values_as_dictionary(enc: &StringEncoding) {
|
||||||
|
// column is: [apple, apple, pear, NULL, NULL, orange, beta]
|
||||||
|
|
||||||
|
// Since the Read Buffer only accepts row IDs in order we only need to
|
||||||
|
// cover ascending rows in these tests.
|
||||||
|
let cases = vec![
|
||||||
|
(
|
||||||
|
&[0, 3, 4][..], // apple NULL, NULL
|
||||||
|
(vec![1, 0, 0], vec![None, Some("apple")]),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
&[6], // beta
|
||||||
|
(vec![0], vec![Some("beta")]),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
&[0, 3, 5][..], // apple NULL, orange
|
||||||
|
(vec![1, 0, 2], vec![None, Some("apple"), Some("orange")]),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
&[0, 1, 2, 3, 4, 5, 6], // apple, apple, pear, NULL, NULL, orange, beta
|
||||||
|
(
|
||||||
|
vec![1, 1, 4, 0, 0, 3, 2],
|
||||||
|
vec![
|
||||||
|
None,
|
||||||
|
Some("apple"),
|
||||||
|
Some("beta"),
|
||||||
|
Some("orange"),
|
||||||
|
Some("pear"),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (row_ids, (exp_keys, exp_values)) in cases {
|
||||||
|
let values = enc.values_as_dictionary(row_ids);
|
||||||
|
if let Values::Dictionary(got_keys, got_values) = values {
|
||||||
|
assert_eq!(got_keys, exp_keys, "key comparison for {} failed", enc);
|
||||||
|
assert_eq!(
|
||||||
|
got_values, exp_values,
|
||||||
|
"values comparison for {} failed",
|
||||||
|
enc
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
panic!("invalid Values format returned, got {:?}", values);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn _all_values_as_dictionary(enc: &StringEncoding) {
|
||||||
|
// column is: [apple, apple, pear, NULL, NULL, orange, beta]
|
||||||
|
|
||||||
|
let exp_keys = vec![1, 1, 4, 0, 0, 3, 2];
|
||||||
|
let exp_values = vec![
|
||||||
|
None,
|
||||||
|
Some("apple"),
|
||||||
|
Some("beta"),
|
||||||
|
Some("orange"),
|
||||||
|
Some("pear"),
|
||||||
|
];
|
||||||
|
|
||||||
|
let values = enc.all_values_as_dictionary();
|
||||||
|
if let Values::Dictionary(got_keys, got_values) = values {
|
||||||
|
assert_eq!(got_keys, exp_keys, "key comparison for {} failed", enc);
|
||||||
|
assert_eq!(
|
||||||
|
got_values, exp_values,
|
||||||
|
"values comparison for {} failed",
|
||||||
|
enc
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
panic!("invalid Values format returned, got {:?}", values);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -25,6 +25,15 @@ pub mod benchmarks {
|
||||||
cmp::Operator, encoding::scalar::Fixed, encoding::scalar::FixedNull, encoding::string,
|
cmp::Operator, encoding::scalar::Fixed, encoding::scalar::FixedNull, encoding::string,
|
||||||
Column, RowIDs,
|
Column, RowIDs,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub use crate::row_group::{ColumnType, RowGroup};
|
pub use crate::row_group::{ColumnType, RowGroup};
|
||||||
|
use crate::Chunk;
|
||||||
|
|
||||||
|
// Allow external benchmarks to use this crate-only test method
|
||||||
|
pub fn upsert_table_with_row_group(
|
||||||
|
chunk: &mut Chunk,
|
||||||
|
table_name: impl Into<String>,
|
||||||
|
row_group: RowGroup,
|
||||||
|
) {
|
||||||
|
chunk.upsert_table_with_row_group(table_name, row_group)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,7 +7,6 @@ use std::{
|
||||||
sync::Arc,
|
sync::Arc,
|
||||||
};
|
};
|
||||||
|
|
||||||
use arrow::array;
|
|
||||||
use hashbrown::{hash_map, HashMap};
|
use hashbrown::{hash_map, HashMap};
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use snafu::{ResultExt, Snafu};
|
use snafu::{ResultExt, Snafu};
|
||||||
|
@ -19,6 +18,7 @@ use crate::value::{
|
||||||
AggregateVec, EncodedValues, OwnedValue, Scalar, Value, Values, ValuesIterator,
|
AggregateVec, EncodedValues, OwnedValue, Scalar, Value, Values, ValuesIterator,
|
||||||
};
|
};
|
||||||
use arrow::{
|
use arrow::{
|
||||||
|
array,
|
||||||
array::ArrayRef,
|
array::ArrayRef,
|
||||||
datatypes::{DataType, TimeUnit},
|
datatypes::{DataType, TimeUnit},
|
||||||
record_batch::RecordBatch,
|
record_batch::RecordBatch,
|
||||||
|
@ -256,37 +256,40 @@ impl RowGroup {
|
||||||
|
|
||||||
// apply predicates to determine candidate rows.
|
// apply predicates to determine candidate rows.
|
||||||
let row_ids = self.row_ids_from_predicate(predicates);
|
let row_ids = self.row_ids_from_predicate(predicates);
|
||||||
let col_data = self.materialise_rows(columns, row_ids);
|
let col_data = self.materialise_rows(&schema, row_ids);
|
||||||
ReadFilterResult {
|
ReadFilterResult {
|
||||||
schema,
|
schema,
|
||||||
data: col_data,
|
data: col_data,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn materialise_rows(&self, names: &[ColumnName<'_>], row_ids: RowIDsOption) -> Vec<Values<'_>> {
|
fn materialise_rows(&self, schema: &ResultSchema, row_ids: RowIDsOption) -> Vec<Values<'_>> {
|
||||||
let mut col_data = Vec::with_capacity(names.len());
|
let mut col_data = Vec::with_capacity(schema.len());
|
||||||
match row_ids {
|
match row_ids {
|
||||||
RowIDsOption::None(_) => col_data, // nothing to materialise
|
RowIDsOption::None(_) => col_data, // nothing to materialise
|
||||||
RowIDsOption::Some(row_ids) => {
|
RowIDsOption::Some(row_ids) => {
|
||||||
// TODO(edd): causes an allocation. Implement a way to pass a
|
// TODO(edd): causes an allocation. Implement a way to pass a
|
||||||
// pooled buffer to the croaring Bitmap API.
|
// pooled buffer to the croaring Bitmap API.
|
||||||
let row_ids = row_ids.to_vec();
|
let row_ids = row_ids.to_vec();
|
||||||
for &name in names {
|
for (ct, _) in &schema.select_columns {
|
||||||
let (_, col) = self.column_name_and_column(name);
|
let (_, col) = self.column_name_and_column(ct.as_str());
|
||||||
|
if let schema::ColumnType::Tag(_) = ct {
|
||||||
|
col_data.push(col.values_as_dictionary(row_ids.as_slice()));
|
||||||
|
} else {
|
||||||
col_data.push(col.values(row_ids.as_slice()));
|
col_data.push(col.values(row_ids.as_slice()));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
col_data
|
col_data
|
||||||
}
|
}
|
||||||
|
|
||||||
RowIDsOption::All(_) => {
|
RowIDsOption::All(_) => {
|
||||||
// TODO(edd): Perf - add specialised method to get all
|
for (ct, _) in &schema.select_columns {
|
||||||
// materialised values from a column without having to
|
let (_, col) = self.column_name_and_column(ct.as_str());
|
||||||
// materialise a vector of row ids.......
|
if let schema::ColumnType::Tag(_) = ct {
|
||||||
let row_ids = (0..self.rows()).collect::<Vec<_>>();
|
col_data.push(col.all_values_as_dictionary());
|
||||||
|
} else {
|
||||||
for &name in names {
|
col_data.push(col.all_values());
|
||||||
let (_, col) = self.column_name_and_column(name);
|
}
|
||||||
col_data.push(col.values(row_ids.as_slice()));
|
|
||||||
}
|
}
|
||||||
col_data
|
col_data
|
||||||
}
|
}
|
||||||
|
@ -1684,27 +1687,6 @@ impl TryFrom<ReadFilterResult<'_>> for RecordBatch {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(InfluxColumnType::Tag) = schema.field(i).0 {
|
|
||||||
return match values {
|
|
||||||
Values::String(values) => {
|
|
||||||
// TODO: Preserve dictionary encoding
|
|
||||||
Ok(
|
|
||||||
Arc::new(
|
|
||||||
values
|
|
||||||
.into_iter()
|
|
||||||
.collect::<arrow::array::DictionaryArray<
|
|
||||||
arrow::datatypes::Int32Type,
|
|
||||||
>>(),
|
|
||||||
) as _,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
t => UnsupportedOperation {
|
|
||||||
msg: format!("cannot convert {:?} to DictionaryArray", t),
|
|
||||||
}
|
|
||||||
.fail(),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(arrow::array::ArrayRef::from(values))
|
Ok(arrow::array::ArrayRef::from(values))
|
||||||
})
|
})
|
||||||
.collect::<Result<Vec<_>, _>>()?;
|
.collect::<Result<Vec<_>, _>>()?;
|
||||||
|
@ -2473,6 +2455,55 @@ west,4
|
||||||
assert!(results.is_empty());
|
assert!(results.is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn read_filter_dictionaries() {
|
||||||
|
let mut columns = vec![];
|
||||||
|
let tc = ColumnType::Time(Column::from(&[1_i64, 2, 3, 4, 5, 6][..]));
|
||||||
|
columns.push(("time".to_string(), tc));
|
||||||
|
|
||||||
|
// Tag column that will be dictionary encoded when materialised
|
||||||
|
let rc = ColumnType::Tag(Column::from(
|
||||||
|
&["west", "west", "east", "west", "south", "north"][..],
|
||||||
|
));
|
||||||
|
columns.push(("region".to_string(), rc));
|
||||||
|
|
||||||
|
// Field column that will be stored as a string array when materialised
|
||||||
|
let mc = ColumnType::Field(Column::from(
|
||||||
|
&["GET", "POST", "POST", "POST", "PUT", "GET"][..],
|
||||||
|
));
|
||||||
|
columns.push(("method".to_string(), mc));
|
||||||
|
|
||||||
|
let row_group = RowGroup::new(6, columns);
|
||||||
|
|
||||||
|
let cases = vec![
|
||||||
|
(
|
||||||
|
vec!["method", "region", "time"],
|
||||||
|
Predicate::default(),
|
||||||
|
"method,region,time
|
||||||
|
GET,west,1
|
||||||
|
POST,west,2
|
||||||
|
POST,east,3
|
||||||
|
POST,west,4
|
||||||
|
PUT,south,5
|
||||||
|
GET,north,6
|
||||||
|
",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
vec!["method", "region", "time"],
|
||||||
|
Predicate::with_time_range(&[], -1, 3),
|
||||||
|
"method,region,time
|
||||||
|
GET,west,1
|
||||||
|
POST,west,2
|
||||||
|
",
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (cols, predicates, expected) in cases {
|
||||||
|
let results = row_group.read_filter(&cols, &predicates);
|
||||||
|
assert_eq!(format!("{:?}", &results), expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn read_aggregate() {
|
fn read_aggregate() {
|
||||||
let mut columns = vec![];
|
let mut columns = vec![];
|
||||||
|
|
|
@ -1349,6 +1349,11 @@ pub enum Values<'a> {
|
||||||
// UTF-8 valid unicode strings
|
// UTF-8 valid unicode strings
|
||||||
String(Vec<Option<&'a str>>),
|
String(Vec<Option<&'a str>>),
|
||||||
|
|
||||||
|
// A dictionary mapping between a vector of dictionary integer keys and the
|
||||||
|
// string values they refer to.
|
||||||
|
// NOTE the strings are always sorted
|
||||||
|
Dictionary(Vec<u32>, Vec<Option<&'a str>>),
|
||||||
|
|
||||||
// Scalar types
|
// Scalar types
|
||||||
I64(Vec<i64>),
|
I64(Vec<i64>),
|
||||||
U64(Vec<u64>),
|
U64(Vec<u64>),
|
||||||
|
@ -1368,6 +1373,7 @@ impl<'a> Values<'a> {
|
||||||
pub fn len(&self) -> usize {
|
pub fn len(&self) -> usize {
|
||||||
match &self {
|
match &self {
|
||||||
Self::String(c) => c.len(),
|
Self::String(c) => c.len(),
|
||||||
|
Self::Dictionary(c, _) => c.len(),
|
||||||
Self::I64(c) => c.len(),
|
Self::I64(c) => c.len(),
|
||||||
Self::U64(c) => c.len(),
|
Self::U64(c) => c.len(),
|
||||||
Self::F64(c) => c.len(),
|
Self::F64(c) => c.len(),
|
||||||
|
@ -1386,6 +1392,7 @@ impl<'a> Values<'a> {
|
||||||
pub fn is_null(&self, i: usize) -> bool {
|
pub fn is_null(&self, i: usize) -> bool {
|
||||||
match &self {
|
match &self {
|
||||||
Self::String(c) => c[i].is_none(),
|
Self::String(c) => c[i].is_none(),
|
||||||
|
Self::Dictionary(keys, values) => values[keys[i] as usize].is_none(),
|
||||||
Self::F64(_) => false,
|
Self::F64(_) => false,
|
||||||
Self::I64(_) => false,
|
Self::I64(_) => false,
|
||||||
Self::U64(_) => false,
|
Self::U64(_) => false,
|
||||||
|
@ -1403,6 +1410,10 @@ impl<'a> Values<'a> {
|
||||||
Some(v) => Value::String(v),
|
Some(v) => Value::String(v),
|
||||||
None => Value::Null,
|
None => Value::Null,
|
||||||
},
|
},
|
||||||
|
Self::Dictionary(keys, values) => match values[keys[i] as usize] {
|
||||||
|
Some(v) => Value::String(v),
|
||||||
|
None => Value::Null,
|
||||||
|
},
|
||||||
Self::F64(c) => Value::Scalar(Scalar::F64(c[i])),
|
Self::F64(c) => Value::Scalar(Scalar::F64(c[i])),
|
||||||
Self::I64(c) => Value::Scalar(Scalar::I64(c[i])),
|
Self::I64(c) => Value::Scalar(Scalar::I64(c[i])),
|
||||||
Self::U64(c) => Value::Scalar(Scalar::U64(c[i])),
|
Self::U64(c) => Value::Scalar(Scalar::U64(c[i])),
|
||||||
|
@ -1460,6 +1471,7 @@ impl<'a> Values<'a> {
|
||||||
fn value_str(&self, i: usize) -> &'a str {
|
fn value_str(&self, i: usize) -> &'a str {
|
||||||
match &self {
|
match &self {
|
||||||
Values::String(c) => c[i].unwrap(),
|
Values::String(c) => c[i].unwrap(),
|
||||||
|
Values::Dictionary(keys, values) => values[keys[i] as usize].unwrap(),
|
||||||
_ => panic!("value cannot be returned as &str"),
|
_ => panic!("value cannot be returned as &str"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1481,11 +1493,71 @@ impl<'a> Values<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
use arrow::{
|
||||||
|
array::{Array, ArrayDataBuilder, DictionaryArray},
|
||||||
|
buffer::Buffer,
|
||||||
|
datatypes::{DataType, Int32Type},
|
||||||
|
};
|
||||||
|
use arrow_util::bitset::BitSet;
|
||||||
|
use std::iter::FromIterator;
|
||||||
|
|
||||||
/// Moves ownership of Values into an arrow `ArrayRef`.
|
/// Moves ownership of Values into an arrow `ArrayRef`.
|
||||||
impl From<Values<'_>> for arrow::array::ArrayRef {
|
impl From<Values<'_>> for arrow::array::ArrayRef {
|
||||||
fn from(values: Values<'_>) -> Self {
|
fn from(values: Values<'_>) -> Self {
|
||||||
match values {
|
match values {
|
||||||
Values::String(values) => Arc::new(arrow::array::StringArray::from(values)),
|
Values::String(values) => Arc::new(arrow::array::StringArray::from(values)),
|
||||||
|
Values::Dictionary(mut keys, values) => {
|
||||||
|
// check for NULL values, setting null positions
|
||||||
|
// on the null bitmap if there is at least one NULL
|
||||||
|
// value.
|
||||||
|
let null_bitmap = if matches!(values.first(), Some(None)) {
|
||||||
|
let mut bitset = BitSet::with_size(keys.len());
|
||||||
|
for (i, v) in keys.iter_mut().enumerate() {
|
||||||
|
if *v as usize != 0 {
|
||||||
|
bitset.set(i); // valid value
|
||||||
|
}
|
||||||
|
|
||||||
|
// because Arrow Dictionary arrays do not maintain a
|
||||||
|
// None/NULL entry in the string values array we need to
|
||||||
|
// shift the encoded key down so it maps correctly to
|
||||||
|
// the values array. The encoded key for NULL entries is
|
||||||
|
// never used (it's undefined) so we can keep those
|
||||||
|
// encoded keys set to 0.
|
||||||
|
if *v > 0 {
|
||||||
|
*v -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(bitset)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
// If there is a null bitmap we need to remove the None entry
|
||||||
|
// from the string values array since Arrow doesn't maintain
|
||||||
|
// NULL entries in a dictionary's value array.
|
||||||
|
let values_arr = if null_bitmap.is_some() {
|
||||||
|
// drop NULL value entry as this is not stored in Arrow's
|
||||||
|
// dictionary values array.
|
||||||
|
assert!(values[0].is_none());
|
||||||
|
arrow::array::StringArray::from_iter(values.into_iter().skip(1))
|
||||||
|
} else {
|
||||||
|
arrow::array::StringArray::from(values)
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut builder = ArrayDataBuilder::new(DataType::Dictionary(
|
||||||
|
Box::new(DataType::Int32),
|
||||||
|
Box::new(DataType::Utf8),
|
||||||
|
))
|
||||||
|
.len(keys.len())
|
||||||
|
.add_buffer(Buffer::from_iter(keys))
|
||||||
|
.add_child_data(values_arr.data().clone());
|
||||||
|
|
||||||
|
if let Some(bm) = null_bitmap {
|
||||||
|
builder = builder.null_bit_buffer(bm.to_arrow());
|
||||||
|
}
|
||||||
|
|
||||||
|
Arc::new(DictionaryArray::<Int32Type>::from(builder.build()))
|
||||||
|
}
|
||||||
Values::I64(values) => Arc::new(arrow::array::Int64Array::from(values)),
|
Values::I64(values) => Arc::new(arrow::array::Int64Array::from(values)),
|
||||||
Values::U64(values) => Arc::new(arrow::array::UInt64Array::from(values)),
|
Values::U64(values) => Arc::new(arrow::array::UInt64Array::from(values)),
|
||||||
Values::F64(values) => Arc::new(arrow::array::Float64Array::from(values)),
|
Values::F64(values) => Arc::new(arrow::array::Float64Array::from(values)),
|
||||||
|
@ -1593,6 +1665,7 @@ impl EncodedValues {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use arrow::array::ArrayRef;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn aggregate_vec_update() {
|
fn aggregate_vec_update() {
|
||||||
|
@ -1783,4 +1856,72 @@ mod test {
|
||||||
let v1 = OwnedValue::ByteArray(vec![2, 44, 252]);
|
let v1 = OwnedValue::ByteArray(vec![2, 44, 252]);
|
||||||
assert_eq!(v1.size(), 35);
|
assert_eq!(v1.size(), 35);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn from_dictionary_arrow() {
|
||||||
|
let values = Values::Dictionary(
|
||||||
|
vec![0, 1, 2, 0, 1, 2, 2],
|
||||||
|
vec![Some("bones"), Some("just"), Some("planet telex")],
|
||||||
|
);
|
||||||
|
|
||||||
|
let arr = ArrayRef::from(values);
|
||||||
|
// no null values in Arrow dictionary array
|
||||||
|
assert_eq!(arr.null_count(), 0);
|
||||||
|
assert!((0..7).into_iter().all(|i| !arr.is_null(i)));
|
||||||
|
|
||||||
|
// Should produce the same the array as when created from an iterator
|
||||||
|
// of strings.
|
||||||
|
let exp_dict_arr = vec![
|
||||||
|
Some("bones"),
|
||||||
|
Some("just"),
|
||||||
|
Some("planet telex"),
|
||||||
|
Some("bones"),
|
||||||
|
Some("just"),
|
||||||
|
Some("planet telex"),
|
||||||
|
Some("planet telex"),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect::<arrow::array::DictionaryArray<arrow::datatypes::Int32Type>>();
|
||||||
|
|
||||||
|
let as_dict_arr = arr
|
||||||
|
.as_any()
|
||||||
|
.downcast_ref::<arrow::array::DictionaryArray<arrow::datatypes::Int32Type>>()
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(as_dict_arr.keys(), exp_dict_arr.keys());
|
||||||
|
|
||||||
|
// Now let's try with some NULL entries.
|
||||||
|
let values = Values::Dictionary(
|
||||||
|
vec![0, 1, 2, 0, 1, 2, 2],
|
||||||
|
vec![None, Some("just"), Some("planet telex")],
|
||||||
|
);
|
||||||
|
|
||||||
|
let arr = ArrayRef::from(values);
|
||||||
|
assert_eq!(arr.null_count(), 2);
|
||||||
|
for (i, exp) in vec![true, false, false, true, false, false, false]
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
{
|
||||||
|
assert_eq!(arr.is_null(i), *exp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Should produce the same the array as when created from an iterator
|
||||||
|
// of strings.
|
||||||
|
let exp_dict_arr = vec![
|
||||||
|
None,
|
||||||
|
Some("just"),
|
||||||
|
Some("planet telex"),
|
||||||
|
None,
|
||||||
|
Some("just"),
|
||||||
|
Some("planet telex"),
|
||||||
|
Some("planet telex"),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect::<arrow::array::DictionaryArray<arrow::datatypes::Int32Type>>();
|
||||||
|
|
||||||
|
let as_dict_arr = arr
|
||||||
|
.as_any()
|
||||||
|
.downcast_ref::<arrow::array::DictionaryArray<arrow::datatypes::Int32Type>>()
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(as_dict_arr.keys(), exp_dict_arr.keys());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,10 +48,14 @@ impl<E> From<Error> for UpdateError<E> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Config {
|
impl Config {
|
||||||
pub(crate) fn new(jobs: Arc<JobRegistry>, metric_registry: Arc<MetricRegistry>) -> Self {
|
pub(crate) fn new(
|
||||||
|
jobs: Arc<JobRegistry>,
|
||||||
|
metric_registry: Arc<MetricRegistry>,
|
||||||
|
remote_template: Option<RemoteTemplate>,
|
||||||
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
shutdown: Default::default(),
|
shutdown: Default::default(),
|
||||||
state: Default::default(),
|
state: RwLock::new(ConfigState::new(remote_template)),
|
||||||
jobs,
|
jobs,
|
||||||
metric_registry,
|
metric_registry,
|
||||||
}
|
}
|
||||||
|
@ -120,7 +124,11 @@ impl Config {
|
||||||
|
|
||||||
pub(crate) fn resolve_remote(&self, id: ServerId) -> Option<GRpcConnectionString> {
|
pub(crate) fn resolve_remote(&self, id: ServerId) -> Option<GRpcConnectionString> {
|
||||||
let state = self.state.read().expect("mutex poisoned");
|
let state = self.state.read().expect("mutex poisoned");
|
||||||
state.remotes.get(&id).cloned()
|
state
|
||||||
|
.remotes
|
||||||
|
.get(&id)
|
||||||
|
.cloned()
|
||||||
|
.or_else(|| state.remote_template.as_ref().map(|t| t.get(&id)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn commit(
|
fn commit(
|
||||||
|
@ -233,6 +241,36 @@ struct ConfigState {
|
||||||
databases: BTreeMap<DatabaseName<'static>, DatabaseState>,
|
databases: BTreeMap<DatabaseName<'static>, DatabaseState>,
|
||||||
/// Map between remote IOx server IDs and management API connection strings.
|
/// Map between remote IOx server IDs and management API connection strings.
|
||||||
remotes: BTreeMap<ServerId, GRpcConnectionString>,
|
remotes: BTreeMap<ServerId, GRpcConnectionString>,
|
||||||
|
/// Static map between remote server IDs and hostnames based on a template
|
||||||
|
remote_template: Option<RemoteTemplate>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ConfigState {
|
||||||
|
fn new(remote_template: Option<RemoteTemplate>) -> Self {
|
||||||
|
Self {
|
||||||
|
remote_template,
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A RemoteTemplate string is a remote connection template string.
|
||||||
|
/// Occurrences of the substring "{id}" in the template will be replaced
|
||||||
|
/// by the server ID.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct RemoteTemplate {
|
||||||
|
template: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RemoteTemplate {
|
||||||
|
pub fn new(template: impl Into<String>) -> Self {
|
||||||
|
let template = template.into();
|
||||||
|
Self { template }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get(&self, id: &ServerId) -> GRpcConnectionString {
|
||||||
|
self.template.replace("{id}", &format!("{}", id.get_u32()))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -316,12 +354,17 @@ mod test {
|
||||||
use crate::db::load_or_create_preserved_catalog;
|
use crate::db::load_or_create_preserved_catalog;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use std::num::NonZeroU32;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn create_db() {
|
async fn create_db() {
|
||||||
let name = DatabaseName::new("foo").unwrap();
|
let name = DatabaseName::new("foo").unwrap();
|
||||||
let metric_registry = Arc::new(metrics::MetricRegistry::new());
|
let metric_registry = Arc::new(metrics::MetricRegistry::new());
|
||||||
let config = Config::new(Arc::new(JobRegistry::new()), Arc::clone(&metric_registry));
|
let config = Config::new(
|
||||||
|
Arc::new(JobRegistry::new()),
|
||||||
|
Arc::clone(&metric_registry),
|
||||||
|
None,
|
||||||
|
);
|
||||||
let rules = DatabaseRules::new(name.clone());
|
let rules = DatabaseRules::new(name.clone());
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -363,7 +406,11 @@ mod test {
|
||||||
async fn test_db_drop() {
|
async fn test_db_drop() {
|
||||||
let name = DatabaseName::new("foo").unwrap();
|
let name = DatabaseName::new("foo").unwrap();
|
||||||
let metric_registry = Arc::new(metrics::MetricRegistry::new());
|
let metric_registry = Arc::new(metrics::MetricRegistry::new());
|
||||||
let config = Config::new(Arc::new(JobRegistry::new()), Arc::clone(&metric_registry));
|
let config = Config::new(
|
||||||
|
Arc::new(JobRegistry::new()),
|
||||||
|
Arc::clone(&metric_registry),
|
||||||
|
None,
|
||||||
|
);
|
||||||
let rules = DatabaseRules::new(name.clone());
|
let rules = DatabaseRules::new(name.clone());
|
||||||
|
|
||||||
let db_reservation = config.create_db(rules).unwrap();
|
let db_reservation = config.create_db(rules).unwrap();
|
||||||
|
@ -412,4 +459,28 @@ mod test {
|
||||||
|
|
||||||
assert_eq!(rules_path, expected_path);
|
assert_eq!(rules_path, expected_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_remote() {
|
||||||
|
let metric_registry = Arc::new(metrics::MetricRegistry::new());
|
||||||
|
let config = Config::new(
|
||||||
|
Arc::new(JobRegistry::new()),
|
||||||
|
Arc::clone(&metric_registry),
|
||||||
|
Some(RemoteTemplate::new("http://iox-query-{id}:8082")),
|
||||||
|
);
|
||||||
|
|
||||||
|
let server_id = ServerId::new(NonZeroU32::new(42).unwrap());
|
||||||
|
let remote = config.resolve_remote(server_id);
|
||||||
|
assert_eq!(
|
||||||
|
remote,
|
||||||
|
Some(GRpcConnectionString::from("http://iox-query-42:8082"))
|
||||||
|
);
|
||||||
|
|
||||||
|
let server_id = ServerId::new(NonZeroU32::new(24).unwrap());
|
||||||
|
let remote = config.resolve_remote(server_id);
|
||||||
|
assert_eq!(
|
||||||
|
remote,
|
||||||
|
Some(GRpcConnectionString::from("http://iox-query-24:8082"))
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -75,7 +75,7 @@ use bytes::BytesMut;
|
||||||
use cached::proc_macro::cached;
|
use cached::proc_macro::cached;
|
||||||
use db::load_or_create_preserved_catalog;
|
use db::load_or_create_preserved_catalog;
|
||||||
use futures::stream::TryStreamExt;
|
use futures::stream::TryStreamExt;
|
||||||
use observability_deps::tracing::{error, info, warn};
|
use observability_deps::tracing::{debug, error, info, warn};
|
||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
use snafu::{OptionExt, ResultExt, Snafu};
|
use snafu::{OptionExt, ResultExt, Snafu};
|
||||||
|
|
||||||
|
@ -93,12 +93,14 @@ use object_store::{path::ObjectStorePath, ObjectStore, ObjectStoreApi};
|
||||||
use query::{exec::Executor, DatabaseStore};
|
use query::{exec::Executor, DatabaseStore};
|
||||||
use tracker::{TaskId, TaskRegistration, TaskRegistryWithHistory, TaskTracker, TrackedFutureExt};
|
use tracker::{TaskId, TaskRegistration, TaskRegistryWithHistory, TaskTracker, TrackedFutureExt};
|
||||||
|
|
||||||
|
pub use crate::config::RemoteTemplate;
|
||||||
use crate::{
|
use crate::{
|
||||||
config::{
|
config::{
|
||||||
object_store_path_for_database_config, Config, GRpcConnectionString, DB_RULES_FILE_NAME,
|
object_store_path_for_database_config, Config, GRpcConnectionString, DB_RULES_FILE_NAME,
|
||||||
},
|
},
|
||||||
db::Db,
|
db::Db,
|
||||||
};
|
};
|
||||||
|
use cached::Return;
|
||||||
use data_types::database_rules::{NodeGroup, Shard, ShardId};
|
use data_types::database_rules::{NodeGroup, Shard, ShardId};
|
||||||
use generated_types::database_rules::{decode_database_rules, encode_database_rules};
|
use generated_types::database_rules::{decode_database_rules, encode_database_rules};
|
||||||
use influxdb_iox_client::{connection::Builder, write};
|
use influxdb_iox_client::{connection::Builder, write};
|
||||||
|
@ -109,7 +111,6 @@ pub mod buffer;
|
||||||
mod config;
|
mod config;
|
||||||
pub mod db;
|
pub mod db;
|
||||||
mod query_tests;
|
mod query_tests;
|
||||||
pub mod snapshot;
|
|
||||||
|
|
||||||
// This module exposes `query_tests` outside of the crate so that it may be used
|
// This module exposes `query_tests` outside of the crate so that it may be used
|
||||||
// in benchmarks. Do not import this module for non-benchmark purposes!
|
// in benchmarks. Do not import this module for non-benchmark purposes!
|
||||||
|
@ -228,15 +229,22 @@ pub struct ServerConfig {
|
||||||
object_store: Arc<ObjectStore>,
|
object_store: Arc<ObjectStore>,
|
||||||
|
|
||||||
metric_registry: Arc<MetricRegistry>,
|
metric_registry: Arc<MetricRegistry>,
|
||||||
|
|
||||||
|
remote_template: Option<RemoteTemplate>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ServerConfig {
|
impl ServerConfig {
|
||||||
/// Create a new config using the specified store.
|
/// Create a new config using the specified store.
|
||||||
pub fn new(object_store: Arc<ObjectStore>, metric_registry: Arc<MetricRegistry>) -> Self {
|
pub fn new(
|
||||||
|
object_store: Arc<ObjectStore>,
|
||||||
|
metric_registry: Arc<MetricRegistry>,
|
||||||
|
remote_template: Option<RemoteTemplate>,
|
||||||
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
num_worker_threads: None,
|
num_worker_threads: None,
|
||||||
object_store,
|
object_store,
|
||||||
metric_registry,
|
metric_registry,
|
||||||
|
remote_template,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -390,12 +398,17 @@ impl<M: ConnectionManager> Server<M> {
|
||||||
object_store,
|
object_store,
|
||||||
// to test the metrics provide a different registry to the `ServerConfig`.
|
// to test the metrics provide a different registry to the `ServerConfig`.
|
||||||
metric_registry,
|
metric_registry,
|
||||||
|
remote_template,
|
||||||
} = config;
|
} = config;
|
||||||
let num_worker_threads = num_worker_threads.unwrap_or_else(num_cpus::get);
|
let num_worker_threads = num_worker_threads.unwrap_or_else(num_cpus::get);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
id: Default::default(),
|
id: Default::default(),
|
||||||
config: Arc::new(Config::new(Arc::clone(&jobs), Arc::clone(&metric_registry))),
|
config: Arc::new(Config::new(
|
||||||
|
Arc::clone(&jobs),
|
||||||
|
Arc::clone(&metric_registry),
|
||||||
|
remote_template,
|
||||||
|
)),
|
||||||
store: object_store,
|
store: object_store,
|
||||||
connection_manager: Arc::new(connection_manager),
|
connection_manager: Arc::new(connection_manager),
|
||||||
exec: Arc::new(Executor::new(num_worker_threads)),
|
exec: Arc::new(Executor::new(num_worker_threads)),
|
||||||
|
@ -937,23 +950,25 @@ impl ConnectionManager for ConnectionManagerImpl {
|
||||||
&self,
|
&self,
|
||||||
connect: &str,
|
connect: &str,
|
||||||
) -> Result<Arc<Self::RemoteServer>, ConnectionManagerError> {
|
) -> Result<Arc<Self::RemoteServer>, ConnectionManagerError> {
|
||||||
cached_remote_server(connect.to_string()).await
|
let ret = cached_remote_server(connect.to_string()).await?;
|
||||||
|
debug!(was_cached=%ret.was_cached, %connect, "getting remote connection");
|
||||||
|
Ok(ret.value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// cannot be an associated function
|
// cannot be an associated function
|
||||||
// argument need to have static lifetime because they become caching keys
|
// argument need to have static lifetime because they become caching keys
|
||||||
#[cached(result = true)]
|
#[cached(result = true, with_cached_flag = true)]
|
||||||
async fn cached_remote_server(
|
async fn cached_remote_server(
|
||||||
connect: String,
|
connect: String,
|
||||||
) -> Result<Arc<RemoteServerImpl>, ConnectionManagerError> {
|
) -> Result<Return<Arc<RemoteServerImpl>>, ConnectionManagerError> {
|
||||||
let connection = Builder::default()
|
let connection = Builder::default()
|
||||||
.build(&connect)
|
.build(&connect)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| Box::new(e) as _)
|
.map_err(|e| Box::new(e) as _)
|
||||||
.context(RemoteServerConnectError)?;
|
.context(RemoteServerConnectError)?;
|
||||||
let client = write::Client::new(connection);
|
let client = write::Client::new(connection);
|
||||||
Ok(Arc::new(RemoteServerImpl { client }))
|
Ok(Return::new(Arc::new(RemoteServerImpl { client })))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An implementation for communicating with other IOx servers. This should
|
/// An implementation for communicating with other IOx servers. This should
|
||||||
|
@ -1055,11 +1070,7 @@ mod tests {
|
||||||
let test_registry = metrics::TestMetricRegistry::new(Arc::clone(®istry));
|
let test_registry = metrics::TestMetricRegistry::new(Arc::clone(®istry));
|
||||||
(
|
(
|
||||||
test_registry,
|
test_registry,
|
||||||
ServerConfig::new(
|
ServerConfig::new(Arc::new(object_store), registry, None).with_num_worker_threads(1),
|
||||||
Arc::new(object_store),
|
|
||||||
registry, // new registry ensures test isolation of metrics
|
|
||||||
)
|
|
||||||
.with_num_worker_threads(1),
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1158,8 +1169,8 @@ mod tests {
|
||||||
store.list_with_delimiter(&store.new_path()).await.unwrap();
|
store.list_with_delimiter(&store.new_path()).await.unwrap();
|
||||||
|
|
||||||
let manager = TestConnectionManager::new();
|
let manager = TestConnectionManager::new();
|
||||||
let config2 =
|
let config2 = ServerConfig::new(store, Arc::new(MetricRegistry::new()), Option::None)
|
||||||
ServerConfig::new(store, Arc::new(MetricRegistry::new())).with_num_worker_threads(1);
|
.with_num_worker_threads(1);
|
||||||
let server2 = Server::new(manager, config2);
|
let server2 = Server::new(manager, config2);
|
||||||
server2.set_id(ServerId::try_from(1).unwrap()).unwrap();
|
server2.set_id(ServerId::try_from(1).unwrap()).unwrap();
|
||||||
server2.load_database_configs().await.unwrap();
|
server2.load_database_configs().await.unwrap();
|
||||||
|
|
|
@ -1,328 +0,0 @@
|
||||||
use data_types::partition_metadata::{PartitionSummary, TableSummary};
|
|
||||||
use internal_types::selection::Selection;
|
|
||||||
use object_store::{path::ObjectStorePath, ObjectStore, ObjectStoreApi};
|
|
||||||
use query::{predicate::EMPTY_PREDICATE, PartitionChunk};
|
|
||||||
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use bytes::Bytes;
|
|
||||||
use observability_deps::tracing::{error, info};
|
|
||||||
use parking_lot::Mutex;
|
|
||||||
use snafu::{ResultExt, Snafu};
|
|
||||||
use tokio::sync::oneshot;
|
|
||||||
use uuid::Uuid;
|
|
||||||
|
|
||||||
#[derive(Debug, Snafu)]
|
|
||||||
pub enum Error {
|
|
||||||
#[snafu(display("Partition error creating snapshot: {}", source))]
|
|
||||||
PartitionError {
|
|
||||||
source: Box<dyn std::error::Error + Send + Sync>,
|
|
||||||
},
|
|
||||||
|
|
||||||
#[snafu(display("Table position out of bounds: {}", position))]
|
|
||||||
TablePositionOutOfBounds { position: usize },
|
|
||||||
|
|
||||||
#[snafu(display("Error generating json response: {}", source))]
|
|
||||||
JsonGenerationError { source: serde_json::Error },
|
|
||||||
|
|
||||||
#[snafu(display("Error opening Parquet Writer: {}", source))]
|
|
||||||
ParquetStreamToByte {
|
|
||||||
source: parquet_file::storage::Error,
|
|
||||||
},
|
|
||||||
|
|
||||||
#[snafu(display("Error writing to object store: {}", source))]
|
|
||||||
WritingToObjectStore { source: object_store::Error },
|
|
||||||
|
|
||||||
#[snafu(display("Error reading batches while writing to '{}': {}", file_name, source))]
|
|
||||||
ReadingBatches {
|
|
||||||
file_name: String,
|
|
||||||
source: arrow::error::ArrowError,
|
|
||||||
},
|
|
||||||
|
|
||||||
#[snafu(display("Stopped early"))]
|
|
||||||
StoppedEarly,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
|
||||||
|
|
||||||
/// Code for snapshotting a database chunk to a Parquet
|
|
||||||
/// file in object storage.
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Snapshot<T>
|
|
||||||
where
|
|
||||||
T: Send + Sync + 'static + PartitionChunk,
|
|
||||||
{
|
|
||||||
pub id: Uuid,
|
|
||||||
pub partition_summary: PartitionSummary,
|
|
||||||
pub metadata_path: object_store::path::Path,
|
|
||||||
pub data_path: object_store::path::Path,
|
|
||||||
store: Arc<ObjectStore>,
|
|
||||||
chunk: Arc<T>,
|
|
||||||
status: Mutex<Status>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> Snapshot<T>
|
|
||||||
where
|
|
||||||
T: Send + Sync + 'static + PartitionChunk,
|
|
||||||
{
|
|
||||||
fn new(
|
|
||||||
partition_key: impl Into<String>,
|
|
||||||
metadata_path: object_store::path::Path,
|
|
||||||
data_path: object_store::path::Path,
|
|
||||||
store: Arc<ObjectStore>,
|
|
||||||
partition: Arc<T>,
|
|
||||||
table: TableSummary,
|
|
||||||
) -> Self {
|
|
||||||
let status = Status::new(TableState::NotStarted);
|
|
||||||
|
|
||||||
Self {
|
|
||||||
id: Uuid::new_v4(),
|
|
||||||
partition_summary: PartitionSummary {
|
|
||||||
key: partition_key.into(),
|
|
||||||
tables: vec![table],
|
|
||||||
},
|
|
||||||
metadata_path,
|
|
||||||
data_path,
|
|
||||||
store,
|
|
||||||
chunk: partition,
|
|
||||||
status: Mutex::new(status),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn mark_table_running(&self) {
|
|
||||||
let mut status = self.status.lock();
|
|
||||||
if status.table_state == TableState::NotStarted {
|
|
||||||
status.table_state = TableState::Running;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn mark_table_finished(&self) {
|
|
||||||
let mut status = self.status.lock();
|
|
||||||
status.table_state = TableState::Finished;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn mark_meta_written(&self) {
|
|
||||||
let mut status = self.status.lock();
|
|
||||||
status.meta_written = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn finished(&self) -> bool {
|
|
||||||
let status = self.status.lock();
|
|
||||||
|
|
||||||
matches!(status.table_state, TableState::Finished)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn should_stop(&self) -> bool {
|
|
||||||
let status = self.status.lock();
|
|
||||||
status.stop_on_next_update
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn run(&self, notify: Option<oneshot::Sender<()>>) -> Result<()> {
|
|
||||||
self.mark_table_running();
|
|
||||||
|
|
||||||
// get all the data in this chunk:
|
|
||||||
let table_name = self.partition_summary.tables[0].name.as_ref();
|
|
||||||
|
|
||||||
let stream = self
|
|
||||||
.chunk
|
|
||||||
.read_filter(table_name, &EMPTY_PREDICATE, Selection::All)
|
|
||||||
.map_err(|e| Box::new(e) as _)
|
|
||||||
.context(PartitionError)?;
|
|
||||||
|
|
||||||
let schema = stream.schema();
|
|
||||||
|
|
||||||
let mut location = self.data_path.clone();
|
|
||||||
let file_name = format!("{}.parquet", table_name);
|
|
||||||
location.set_file_name(&file_name);
|
|
||||||
let data = parquet_file::storage::Storage::parquet_stream_to_bytes(stream, schema)
|
|
||||||
.await
|
|
||||||
.context(ParquetStreamToByte)?;
|
|
||||||
self.write_to_object_store(data, &location).await?;
|
|
||||||
self.mark_table_finished();
|
|
||||||
|
|
||||||
if self.should_stop() {
|
|
||||||
return StoppedEarly.fail();
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut partition_meta_path = self.metadata_path.clone();
|
|
||||||
let key = format!("{}.json", &self.partition_summary.key);
|
|
||||||
partition_meta_path.set_file_name(&key);
|
|
||||||
let json_data = serde_json::to_vec(&self.partition_summary).context(JsonGenerationError)?;
|
|
||||||
let data = Bytes::from(json_data);
|
|
||||||
let len = data.len();
|
|
||||||
let stream_data = std::io::Result::Ok(data);
|
|
||||||
self.store
|
|
||||||
.put(
|
|
||||||
&partition_meta_path,
|
|
||||||
futures::stream::once(async move { stream_data }),
|
|
||||||
Some(len),
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
.context(WritingToObjectStore)?;
|
|
||||||
|
|
||||||
self.mark_meta_written();
|
|
||||||
|
|
||||||
if let Some(notify) = notify {
|
|
||||||
if let Err(e) = notify.send(()) {
|
|
||||||
error!("error sending notify: {:?}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
async fn write_to_object_store(
|
|
||||||
&self,
|
|
||||||
data: Vec<u8>,
|
|
||||||
file_name: &object_store::path::Path,
|
|
||||||
) -> Result<()> {
|
|
||||||
let len = data.len();
|
|
||||||
let data = Bytes::from(data);
|
|
||||||
let stream_data = Result::Ok(data);
|
|
||||||
|
|
||||||
self.store
|
|
||||||
.put(
|
|
||||||
&file_name,
|
|
||||||
futures::stream::once(async move { stream_data }),
|
|
||||||
Some(len),
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
.context(WritingToObjectStore)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn set_error(&self, e: Error) {
|
|
||||||
let mut status = self.status.lock();
|
|
||||||
status.error = Some(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Clone)]
|
|
||||||
pub enum TableState {
|
|
||||||
NotStarted,
|
|
||||||
Running,
|
|
||||||
Finished,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Status {
|
|
||||||
table_state: TableState,
|
|
||||||
meta_written: bool,
|
|
||||||
stop_on_next_update: bool,
|
|
||||||
error: Option<Error>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Status {
|
|
||||||
fn new(table_state: TableState) -> Self {
|
|
||||||
Self {
|
|
||||||
table_state,
|
|
||||||
meta_written: false,
|
|
||||||
stop_on_next_update: false,
|
|
||||||
error: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn snapshot_chunk<T>(
|
|
||||||
metadata_path: object_store::path::Path,
|
|
||||||
data_path: object_store::path::Path,
|
|
||||||
store: Arc<ObjectStore>,
|
|
||||||
partition_key: &str,
|
|
||||||
chunk: Arc<T>,
|
|
||||||
table_stats: TableSummary,
|
|
||||||
notify: Option<oneshot::Sender<()>>,
|
|
||||||
) -> Result<Arc<Snapshot<T>>>
|
|
||||||
where
|
|
||||||
T: Send + Sync + 'static + PartitionChunk,
|
|
||||||
{
|
|
||||||
let snapshot = Snapshot::new(
|
|
||||||
partition_key.to_string(),
|
|
||||||
metadata_path,
|
|
||||||
data_path,
|
|
||||||
store,
|
|
||||||
chunk,
|
|
||||||
table_stats,
|
|
||||||
);
|
|
||||||
let snapshot = Arc::new(snapshot);
|
|
||||||
|
|
||||||
let return_snapshot = Arc::clone(&snapshot);
|
|
||||||
|
|
||||||
tokio::spawn(async move {
|
|
||||||
info!(
|
|
||||||
"starting snapshot of {} to {}",
|
|
||||||
&snapshot.partition_summary.key,
|
|
||||||
&snapshot.data_path.display()
|
|
||||||
);
|
|
||||||
if let Err(e) = snapshot.run(notify).await {
|
|
||||||
error!("error running snapshot: {:?}", e);
|
|
||||||
snapshot.set_error(e);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(return_snapshot)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
use crate::{db::test_helpers::write_lp, query_tests::utils::TestDb};
|
|
||||||
use futures::TryStreamExt;
|
|
||||||
use object_store::memory::InMemory;
|
|
||||||
use query::{predicate::Predicate, Database};
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn snapshot() {
|
|
||||||
let lp = r#"
|
|
||||||
cpu,host=A,region=west user=23.2,system=55.1 1
|
|
||||||
cpu,host=A,region=west user=3.2,system=50.1 10
|
|
||||||
cpu,host=B,region=east user=10.0,system=74.1 1
|
|
||||||
"#;
|
|
||||||
|
|
||||||
let db = TestDb::builder()
|
|
||||||
.object_store(Arc::new(ObjectStore::new_in_memory(InMemory::new())))
|
|
||||||
.build()
|
|
||||||
.await
|
|
||||||
.db;
|
|
||||||
write_lp(&db, &lp);
|
|
||||||
|
|
||||||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
|
||||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
|
||||||
let mut metadata_path = store.new_path();
|
|
||||||
metadata_path.push_dir("meta");
|
|
||||||
|
|
||||||
let mut data_path = store.new_path();
|
|
||||||
data_path.push_dir("data");
|
|
||||||
|
|
||||||
let chunk = Arc::clone(&db.chunks(&Predicate::default())[0]);
|
|
||||||
let table_summary = db
|
|
||||||
.table_summary("1970-01-01T00", "cpu", chunk.id())
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let snapshot = snapshot_chunk(
|
|
||||||
metadata_path.clone(),
|
|
||||||
data_path,
|
|
||||||
Arc::clone(&store),
|
|
||||||
"testaroo",
|
|
||||||
chunk,
|
|
||||||
table_summary,
|
|
||||||
Some(tx),
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
rx.await.unwrap();
|
|
||||||
|
|
||||||
let mut location = metadata_path;
|
|
||||||
location.set_file_name("testaroo.json");
|
|
||||||
|
|
||||||
let summary = store
|
|
||||||
.get(&location)
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.map_ok(|b| bytes::BytesMut::from(&b[..]))
|
|
||||||
.try_concat()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let meta: PartitionSummary = serde_json::from_slice(&*summary).unwrap();
|
|
||||||
assert_eq!(meta, snapshot.partition_summary);
|
|
||||||
assert!(snapshot.finished());
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -397,6 +397,15 @@ Possible values (case insensitive):
|
||||||
/// environments.
|
/// environments.
|
||||||
#[structopt(long = "--azure-storage-access-key", env = "AZURE_STORAGE_ACCESS_KEY")]
|
#[structopt(long = "--azure-storage-access-key", env = "AZURE_STORAGE_ACCESS_KEY")]
|
||||||
pub azure_storage_access_key: Option<String>,
|
pub azure_storage_access_key: Option<String>,
|
||||||
|
|
||||||
|
/// When IOx nodes need to talk to remote peers they consult an internal remote address
|
||||||
|
/// mapping. This mapping is populated via API calls. If the mapping doesn't produce
|
||||||
|
/// a result, this config entry allows to generate a hostname from at template:
|
||||||
|
/// occurrences of the "{id}" substring will be replaced with the remote Server ID.
|
||||||
|
///
|
||||||
|
/// Example: http://node-{id}.ioxmydomain.com:8082
|
||||||
|
#[structopt(long = "--remote-template", env = "INFLUXDB_IOX_REMOTE_TEMPLATE")]
|
||||||
|
pub remote_template: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn command(config: Config) -> Result<()> {
|
pub async fn command(config: Config) -> Result<()> {
|
||||||
|
|
|
@ -7,7 +7,7 @@ use object_store::{
|
||||||
use observability_deps::tracing::{self, error, info, warn, Instrument};
|
use observability_deps::tracing::{self, error, info, warn, Instrument};
|
||||||
use panic_logging::SendPanicsToTracing;
|
use panic_logging::SendPanicsToTracing;
|
||||||
use server::{
|
use server::{
|
||||||
ConnectionManagerImpl as ConnectionManager, Server as AppServer,
|
ConnectionManagerImpl as ConnectionManager, RemoteTemplate, Server as AppServer,
|
||||||
ServerConfig as AppServerConfig,
|
ServerConfig as AppServerConfig,
|
||||||
};
|
};
|
||||||
use snafu::{ResultExt, Snafu};
|
use snafu::{ResultExt, Snafu};
|
||||||
|
@ -123,7 +123,8 @@ pub async fn main(config: Config) -> Result<()> {
|
||||||
let object_store = ObjectStore::try_from(&config)?;
|
let object_store = ObjectStore::try_from(&config)?;
|
||||||
let object_storage = Arc::new(object_store);
|
let object_storage = Arc::new(object_store);
|
||||||
let metric_registry = Arc::new(metrics::MetricRegistry::new());
|
let metric_registry = Arc::new(metrics::MetricRegistry::new());
|
||||||
let server_config = AppServerConfig::new(object_storage, metric_registry);
|
let remote_template = config.remote_template.map(RemoteTemplate::new);
|
||||||
|
let server_config = AppServerConfig::new(object_storage, metric_registry, remote_template);
|
||||||
|
|
||||||
let server_config = if let Some(n) = config.num_worker_threads {
|
let server_config = if let Some(n) = config.num_worker_threads {
|
||||||
info!(
|
info!(
|
||||||
|
|
|
@ -18,8 +18,7 @@ use data_types::{
|
||||||
};
|
};
|
||||||
use influxdb_iox_client::format::QueryOutputFormat;
|
use influxdb_iox_client::format::QueryOutputFormat;
|
||||||
use influxdb_line_protocol::parse_lines;
|
use influxdb_line_protocol::parse_lines;
|
||||||
use object_store::ObjectStoreApi;
|
use query::Database;
|
||||||
use query::{Database, PartitionChunk};
|
|
||||||
use server::{ConnectionManager, Server as AppServer};
|
use server::{ConnectionManager, Server as AppServer};
|
||||||
|
|
||||||
// External crates
|
// External crates
|
||||||
|
@ -361,7 +360,6 @@ where
|
||||||
.get("/metrics", handle_metrics::<M>)
|
.get("/metrics", handle_metrics::<M>)
|
||||||
.get("/iox/api/v1/databases/:name/query", query::<M>)
|
.get("/iox/api/v1/databases/:name/query", query::<M>)
|
||||||
.get("/api/v1/partitions", list_partitions::<M>)
|
.get("/api/v1/partitions", list_partitions::<M>)
|
||||||
.post("/api/v1/snapshot", snapshot_partition::<M>)
|
|
||||||
.get("/debug/pprof", pprof_home::<M>)
|
.get("/debug/pprof", pprof_home::<M>)
|
||||||
.get("/debug/pprof/profile", pprof_profile::<M>)
|
.get("/debug/pprof/profile", pprof_profile::<M>)
|
||||||
// Specify the error handler to handle any errors caused by
|
// Specify the error handler to handle any errors caused by
|
||||||
|
@ -737,78 +735,6 @@ struct SnapshotInfo {
|
||||||
table_name: String,
|
table_name: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tracing::instrument(level = "debug")]
|
|
||||||
async fn snapshot_partition<M: ConnectionManager + Send + Sync + Debug + 'static>(
|
|
||||||
req: Request<Body>,
|
|
||||||
) -> Result<Response<Body>, ApplicationError> {
|
|
||||||
use object_store::path::ObjectStorePath;
|
|
||||||
|
|
||||||
let path = req.uri().path().to_string();
|
|
||||||
let server = Arc::clone(&req.data::<Arc<AppServer<M>>>().expect("server state"));
|
|
||||||
// TODO - catch error conditions
|
|
||||||
let obs = server.metrics.http_requests.observation();
|
|
||||||
let query = req.uri().query().context(ExpectedQueryString {})?;
|
|
||||||
|
|
||||||
let snapshot: SnapshotInfo = serde_urlencoded::from_str(query).context(InvalidQueryString {
|
|
||||||
query_string: query,
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let db_name =
|
|
||||||
org_and_bucket_to_database(&snapshot.org, &snapshot.bucket).context(BucketMappingError)?;
|
|
||||||
|
|
||||||
let metric_kv = vec![
|
|
||||||
KeyValue::new("db_name", db_name.to_string()),
|
|
||||||
KeyValue::new("path", path),
|
|
||||||
];
|
|
||||||
|
|
||||||
// TODO: refactor the rest of this out of the http route and into the server
|
|
||||||
// crate.
|
|
||||||
let db = server.db(&db_name).context(BucketNotFound {
|
|
||||||
org: &snapshot.org,
|
|
||||||
bucket: &snapshot.bucket,
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let store = Arc::clone(&server.store);
|
|
||||||
|
|
||||||
let mut metadata_path = store.new_path();
|
|
||||||
metadata_path.push_dir(&db_name.to_string());
|
|
||||||
let mut data_path = metadata_path.clone();
|
|
||||||
metadata_path.push_dir("meta");
|
|
||||||
data_path.push_all_dirs(&["data", &snapshot.partition]);
|
|
||||||
|
|
||||||
let partition_key = &snapshot.partition;
|
|
||||||
let table_name = &snapshot.table_name;
|
|
||||||
if let Some(chunk) = db
|
|
||||||
.rollover_partition(partition_key, table_name)
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
{
|
|
||||||
let table_stats = db
|
|
||||||
.table_summary(partition_key, table_name, chunk.id())
|
|
||||||
.unwrap();
|
|
||||||
let snapshot = server::snapshot::snapshot_chunk(
|
|
||||||
metadata_path,
|
|
||||||
data_path,
|
|
||||||
store,
|
|
||||||
partition_key,
|
|
||||||
chunk,
|
|
||||||
table_stats,
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
obs.ok_with_labels(&metric_kv);
|
|
||||||
let ret = format!("{}", snapshot.id);
|
|
||||||
Ok(Response::new(Body::from(ret)))
|
|
||||||
} else {
|
|
||||||
Err(ApplicationError::NoSnapshot {
|
|
||||||
db_name: db_name.to_string(),
|
|
||||||
partition: partition_key.to_string(),
|
|
||||||
table_name: table_name.to_string(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tracing::instrument(level = "debug")]
|
#[tracing::instrument(level = "debug")]
|
||||||
async fn pprof_home<M: ConnectionManager + Send + Sync + Debug + 'static>(
|
async fn pprof_home<M: ConnectionManager + Send + Sync + Debug + 'static>(
|
||||||
req: Request<Body>,
|
req: Request<Body>,
|
||||||
|
@ -923,7 +849,6 @@ mod tests {
|
||||||
use std::{
|
use std::{
|
||||||
convert::TryFrom,
|
convert::TryFrom,
|
||||||
net::{IpAddr, Ipv4Addr, SocketAddr},
|
net::{IpAddr, Ipv4Addr, SocketAddr},
|
||||||
num::NonZeroU32,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
use arrow::record_batch::RecordBatch;
|
use arrow::record_batch::RecordBatch;
|
||||||
|
@ -943,6 +868,7 @@ mod tests {
|
||||||
AppServerConfig::new(
|
AppServerConfig::new(
|
||||||
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
|
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
|
||||||
registry,
|
registry,
|
||||||
|
None,
|
||||||
)
|
)
|
||||||
.with_num_worker_threads(1),
|
.with_num_worker_threads(1),
|
||||||
)
|
)
|
||||||
|
@ -1320,53 +1246,6 @@ mod tests {
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_snapshot() {
|
|
||||||
let (_, config) = config();
|
|
||||||
let app_server = Arc::new(AppServer::new(ConnectionManagerImpl {}, config));
|
|
||||||
app_server
|
|
||||||
.set_id(ServerId::new(NonZeroU32::new(1).unwrap()))
|
|
||||||
.unwrap();
|
|
||||||
app_server
|
|
||||||
.create_database(
|
|
||||||
DatabaseRules::new(DatabaseName::new("MyOrg_MyBucket").unwrap()),
|
|
||||||
app_server.require_id().unwrap(),
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let server_url = test_server(Arc::clone(&app_server));
|
|
||||||
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
let lp_data = "h2o_temperature,location=santa_monica,state=CA surface_degrees=65.2,bottom_degrees=50.4 1617286224000000000";
|
|
||||||
|
|
||||||
// send write data
|
|
||||||
let bucket_name = "MyBucket";
|
|
||||||
let org_name = "MyOrg";
|
|
||||||
let response = client
|
|
||||||
.post(&format!(
|
|
||||||
"{}/api/v2/write?bucket={}&org={}",
|
|
||||||
server_url, bucket_name, org_name
|
|
||||||
))
|
|
||||||
.body(lp_data)
|
|
||||||
.send()
|
|
||||||
.await;
|
|
||||||
|
|
||||||
check_response("write", response, StatusCode::NO_CONTENT, Some("")).await;
|
|
||||||
|
|
||||||
// issue first snapshot => OK
|
|
||||||
let url = format!(
|
|
||||||
"{}/api/v1/snapshot?bucket={}&org={}&partition=&table_name=h2o_temperature",
|
|
||||||
server_url, bucket_name, org_name
|
|
||||||
);
|
|
||||||
let response = client.post(&url).body(lp_data).send().await;
|
|
||||||
check_response("snapshot", response, StatusCode::OK, None).await;
|
|
||||||
|
|
||||||
// second snapshot results in "not modified"
|
|
||||||
let response = client.post(&url).body(lp_data).send().await;
|
|
||||||
check_response("snapshot", response, StatusCode::NOT_MODIFIED, None).await;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_content_type(response: &Result<Response, reqwest::Error>) -> String {
|
fn get_content_type(response: &Result<Response, reqwest::Error>) -> String {
|
||||||
if let Ok(response) = response {
|
if let Ok(response) = response {
|
||||||
response
|
response
|
||||||
|
|
Loading…
Reference in New Issue