Merge branch 'main' into cn/read-buffer-cache

pull/24376/head
kodiakhq[bot] 2022-06-06 12:52:48 +00:00 committed by GitHub
commit 412309e7b1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 132 additions and 187 deletions

View File

@ -8,7 +8,7 @@ description = "Re-exports datafusion at a specific version"
[dependencies] [dependencies]
# Rename to workaround doctest bug # Rename to workaround doctest bug
# Turn off optional datafusion features (e.g. don't get support for crypo functions or avro) # Turn off optional datafusion features (e.g. don't get support for crypto functions or avro)
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="8ddd99c8432fdac2c236040973f984a4146f18b7", default-features = false, package = "datafusion" } upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="8ddd99c8432fdac2c236040973f984a4146f18b7", default-features = false, package = "datafusion" }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="8ddd99c8432fdac2c236040973f984a4146f18b7" } datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="8ddd99c8432fdac2c236040973f984a4146f18b7" }
workspace-hack = { path = "../workspace-hack"} workspace-hack = { path = "../workspace-hack"}

View File

@ -1,8 +1,9 @@
# InfluxDB IOx Documentation # InfluxDB IOx Documentation
Please see the main [README](../README.md) for user facing documentation.
This directory contains internal design documentation of potential This directory contains internal design documentation of potential
interest for those who wish to understand how the code works. It is interest for those who wish to understand how the code works.
not intended to be general user facing documentation
## IOx Tech Talks ## IOx Tech Talks

View File

@ -14,21 +14,21 @@ Some examples
```bash ```bash
# Default verbosity # Default verbosity
$ ./influxdb_iox run database $ ./influxdb_iox run all-in-one
# More verbose # More verbose
$ ./influxdb_iox run database -v $ ./influxdb_iox run all-in-one -v
# Even more verbose # Even more verbose
$ ./influxdb_iox run database -vv $ ./influxdb_iox run all-in-one -vv
# Everything!! # Everything!!
$ ./influxdb_iox run database --log-filter trace $ ./influxdb_iox run all-in-one --log-filter trace
# Default info, but debug within http module # Default info, but debug within http module
$ ./influxdb_iox run database --log-filter info,influxdb_iox::influxdb_ioxd::http=debug $ ./influxdb_iox run all-in-one --log-filter info,influxdb_iox::influxdb_ioxd::http=debug
``` ```
Additionally, the output format can be controlled with `--log-format` Additionally, the output format can be controlled with `--log-format`
```bash ```bash
$ ./influxdb_iox run database --log-filter debug --log-format logfmt $ ./influxdb_iox run all-in-one --log-filter debug --log-format logfmt
``` ```
## Developer Guide ## Developer Guide
@ -69,7 +69,7 @@ will strip out all trace level callsites from the release binary.
### Format ### Format
IOx supports logging in many formats. For a list run `influxdb_iox run database --help` and view the help output IOx supports logging in many formats. For a list run `influxdb_iox run --help` and view the help output
for `--log-format`. for `--log-format`.
<sup>1.</sup> This span propagation uses thread-local storage and therefore does not automatically carry across <sup>1.</sup> This span propagation uses thread-local storage and therefore does not automatically carry across

View File

@ -6,7 +6,7 @@ Here are useful metrics
### Requests to IOx Server including Routers and Query Servers ### Requests to IOx Server including Routers and Query Servers
| Metric name | Code Name | Description | | Metric name | Code Name | Description |
| --- | --- | --- | | --- | --- | --- |
| http_requests_total | http_requests | Total number of HTTP requests | | http_requests_total | http_requests | Total number of HTTP requests |
| gRPC_requests_total | requests | Total number of gROC requests | | gRPC_requests_total | requests | Total number of gROC requests |
| http_request_duration_seconds| ? | Time to finish a request | | http_request_duration_seconds| ? | Time to finish a request |
@ -17,7 +17,7 @@ Here are useful metrics
### Line Protocol Data ingested into Routers ### Line Protocol Data ingested into Routers
| Metric name | Code Name | Description | | Metric name | Code Name | Description |
| --- | --- | --- | | --- | --- | --- |
| ingest_points_total | ingest_lines_total | Total number of lines ingested | | ingest_points_total | ingest_lines_total | Total number of lines ingested |
| ingest_fields_total | ingest_fields_total | Total number of fields (columns) ingested | | ingest_fields_total | ingest_fields_total | Total number of fields (columns) ingested |
| ingest_points_bytes_total | ingest_points_bytes_total | Total number of bytes ingested | | ingest_points_bytes_total | ingest_points_bytes_total | Total number of bytes ingested |
@ -26,9 +26,9 @@ Here are useful metrics
### Chunks ### Chunks
| Metric name | Code Name | Description | | Metric name | Code Name | Description |
| --- | --- | --- | | --- | --- | --- |
| catalog_chunks_mem_usage_bytes | memory_metrics | Total memory usage by chunks (MUB, RUB, OS statistics) | | catalog_chunks_mem_usage_bytes | memory_metrics | Total memory usage by chunks |
| catalog_loaded_chunks | chunk_storage | Total number of chunks (MUB, RUB, RUBandOS) for each table | | catalog_loaded_chunks | chunk_storage | Total number of chunks for each table |
| catalog_loaded_rows | row_count | Total number of rows (MUB, RUB, RUBandOS) for each table | | catalog_loaded_rows | row_count | Total number of rows for each table |
| catalog_lock_total | ? | ? | | catalog_lock_total | ? | ? |
| catalog_lock_wait_seconds_total | ? | ? | | catalog_lock_wait_seconds_total | ? | ? |
| ? | partition_lock_tracker | ? | | ? | partition_lock_tracker | ? |
@ -55,7 +55,7 @@ Here are useful metrics
| Metric name | Code Name | Description | | Metric name | Code Name | Description |
| --- | --- | --- | | --- | --- | --- |
| write_buffer_ingest_requests_total | red | Total number of write requests | | write_buffer_ingest_requests_total | red | Total number of write requests |
| write_buffer_read_bytes_total | bytes_read | Total number of write requested bytes | | write_buffer_read_bytes_total | bytes_read | Total number of write requested bytes |
| write_buffer_last_sequence_number | last_sequence_number | sequence number of last write request | | write_buffer_last_sequence_number | last_sequence_number | sequence number of last write request |
| write_buffer_sequence_number_lag | sequence_number_lag | The difference between the the last sequence number available (e.g. Kafka offset) and (= minus) last consumed sequence number | | write_buffer_sequence_number_lag | sequence_number_lag | The difference between the the last sequence number available (e.g. Kafka offset) and (= minus) last consumed sequence number |
| write_buffer_last_min_ts | last_min_ts | Minimum timestamp of last write as unix timestamp in nanoseconds | | write_buffer_last_min_ts | last_min_ts | Minimum timestamp of last write as unix timestamp in nanoseconds |

View File

@ -42,74 +42,6 @@ You can also see more logging using the `LOG_FILTER` variable. For example:
LOG_FILTER=debug,sqlx=warn,h2=warn TEST_INTEGRATION=1 TEST_INFLUXDB_IOX_CATALOG_DSN=postgresql://localhost:5432/alamb cargo test --test end_to_end LOG_FILTER=debug,sqlx=warn,h2=warn TEST_INTEGRATION=1 TEST_INFLUXDB_IOX_CATALOG_DSN=postgresql://localhost:5432/alamb cargo test --test end_to_end
``` ```
## Running the IOx server from source
### Starting the server
You can run IOx locally with a command like this (replacing `--data-dir` with your preferred location)
```shell
cargo run -- run -v --object-store=file --data-dir=$HOME/.influxdb_iox --server-id=42
```
### Loading data
In another terminal window, try loading some data. These commands will create a database called `parquet_db` and load the contents of `tests/fixtures/lineproto/metrics.lp` into it
```shell
cd influxdb_iox
./target/debug/influxdb_iox database create parquet_db
./target/debug/influxdb_iox database write parquet_db tests/fixtures/lineproto/metrics.lp
```
### Editing configuration
You can interactively edit the configuration of the IOx instance with a command like this:
```shell
./scripts/edit_db_rules localhost:8082 parquet_db
```
Which will bring up your editor with a file that looks like this. Any changes you make to the file will be sent to IOx as its new config.
In this case, these settings will cause data to be persisted to parquet almost immediately
```json
{
"rules": {
"name": "parquet_db",
"partitionTemplate": {
"parts": [
{
"time": "%Y-%m-%d %H:00:00"
}
]
},
"lifecycleRules": {
"bufferSizeSoft": "52428800",
"bufferSizeHard": "104857600",
"dropNonPersisted": true,
"immutable": false,
"persist": true,
"workerBackoffMillis": "1000",
"catalogTransactionsUntilCheckpoint": "100",
"lateArriveWindowSeconds": 1,
"persistRowThreshold": "1",
"persistAgeThresholdSeconds": 1,
"mubRowThreshold": "1",
"parquetCacheLimit": "0",
"maxActiveCompactionsCpuFraction": 1
},
"workerCleanupAvgSleep": "500s"
}
}
```
### Examining Parquet Files
You can use tools such as `parquet-tools` to examine the parquet files created by IOx. For example, the following command will show the contents of the `disk` table when persisted as parquet (note the actual filename will be different):
```shell
parquet-tools meta /Users/alamb/.influxdb_iox/42/parquet_db/data/disk/2020-06-11\ 16\:00\:00/1.4b1a7805-d6de-495e-844b-32fa452147c7.parquet
```
## Object storage ## Object storage
### To run the tests or not run the tests ### To run the tests or not run the tests

View File

@ -68,7 +68,7 @@ The simplest way to use the massif output is to use [massif-visualizer]:
![massif-visualizer screenshot](./images/screenshot_massif_visualizer.jpeg) ![massif-visualizer screenshot](./images/screenshot_massif_visualizer.jpeg)
[heappy]: https://github.com/mkmik/heappy [heappy]: https://github.com/mkmik/heappy
[jemalloc]: ttps://github.com/jemalloc/jemalloc [jemalloc]: https://github.com/jemalloc/jemalloc
[lazycell]: https://crates.io/crates/lazycell [lazycell]: https://crates.io/crates/lazycell
[Massif]: https://valgrind.org/docs/manual/ms-manual.html [Massif]: https://valgrind.org/docs/manual/ms-manual.html
[massif-visualizer]: https://github.com/KDE/massif-visualizer [massif-visualizer]: https://github.com/KDE/massif-visualizer

View File

@ -77,7 +77,7 @@ pub fn agent_pre_generated(c: &mut Criterion) {
let spec: DataSpec = toml::from_str(r#" let spec: DataSpec = toml::from_str(r#"
name = "storage_cardinality_example" name = "storage_cardinality_example"
# Values are automatically generated before the agents are intialized. They generate tag key/value pairs # Values are automatically generated before the agents are initialized. They generate tag key/value pairs
# with the name of the value as the tag key and the evaluated template as the value. These pairs # with the name of the value as the tag key and the evaluated template as the value. These pairs
# are Arc wrapped so they can be shared across tagsets and used in the agents as pre-generated data. # are Arc wrapped so they can be shared across tagsets and used in the agents as pre-generated data.
[[values]] [[values]]
@ -122,7 +122,7 @@ template = "{{id}}"
cardinality = 10 cardinality = 10
# makes a tagset so every bucket appears in every partition. The other tags are descriptive and don't # makes a tagset so every bucket appears in every partition. The other tags are descriptive and don't
# increase the cardiality beyond count(bucket) * count(partition). Later this example will use the # increase the cardinality beyond count(bucket) * count(partition). Later this example will use the
# agent and measurement generation to take this base tagset and increase cardinality on a per-agent basis. # agent and measurement generation to take this base tagset and increase cardinality on a per-agent basis.
[[tag_sets]] [[tag_sets]]
name = "bucket_set" name = "bucket_set"

View File

@ -137,7 +137,7 @@ f64_range = [0.0, 1.0]
[[database_writers]] [[database_writers]]
agents = [ agents = [
{name = "high", sampling_interval = "10s", count = 10}, # 5,000 meassurmeents {name = "high", sampling_interval = "10s", count = 10}, # 5,000 measurements
{name = "medium", sampling_interval = "10s", count = 20}, # 20,000 measurements {name = "medium", sampling_interval = "10s", count = 20}, # 20,000 measurements
{name = "low", sampling_interval = "10s", count = 20} # 200,000 measurements {name = "low", sampling_interval = "10s", count = 20} # 200,000 measurements
] ]

View File

@ -84,7 +84,7 @@ name = "mem"
i64_range = [0, 10000000] i64_range = [0, 10000000]
[[agents.measurements.fields]] [[agents.measurements.fields]]
name = "avaiable_percent" name = "available_percent"
f64_range = [0.0, 100.0] f64_range = [0.0, 100.0]
[[agents.measurements.fields]] [[agents.measurements.fields]]

View File

@ -33,7 +33,7 @@
name = "full_example" name = "full_example"
# Values are automatically generated before the agents are intialized. They generate tag key/value pairs # Values are automatically generated before the agents are initialized. They generate tag key/value pairs
# with the name of the value as the tag key and the evaluated template as the value. These pairs # with the name of the value as the tag key and the evaluated template as the value. These pairs
# can be shared across tagsets and used in the agents as pre-generated data. # can be shared across tagsets and used in the agents as pre-generated data.
[[values]] [[values]]
@ -55,7 +55,7 @@ template = "foo_{{guid}}_{{id}}_{{random 5}}_{{format-time \"%Y-%m-%d\"}}"
name = "t1" name = "t1"
template = "t1_{{id}}" template = "t1_{{id}}"
cardinality = 3 cardinality = 3
# each t1 genereated will reference one of t3 and one of foo_bar. As each t1 is generated # each t1 generated will reference one of t3 and one of foo_bar. As each t1 is generated
# it will loop through the t3 and foo_bar collections. So the 3rd t1 that is generated will # it will loop through the t3 and foo_bar collections. So the 3rd t1 that is generated will
# reference the first t3 and foo_bar # reference the first t3 and foo_bar
has_one = ["t3", "foo_bar"] has_one = ["t3", "foo_bar"]
@ -85,7 +85,7 @@ name = "example"
# for_each specifies how to iterate through the values to generate tagsets. If you want to # for_each specifies how to iterate through the values to generate tagsets. If you want to
# use values that belong_to others or are a has_one, specify their parent first. For values # use values that belong_to others or are a has_one, specify their parent first. For values
# without relationships, you'll get a combined cardinality of each multiplied by the other. # without relationships, you'll get a combined cardinality of each multiplied by the other.
# In this eaxmple we get cardinality of card(t1) * card(foo_bar) * card(other). The has_one # In this example we get cardinality of card(t1) * card(foo_bar) * card(other). The has_one
# members of t1 don't increase cardinality. # members of t1 don't increase cardinality.
for_each = [ for_each = [
"t1", "t1",

View File

@ -1,6 +1,6 @@
name = "storage_cardinality_example" name = "storage_cardinality_example"
# Values are automatically generated before the agents are intialized. They generate tag key/value pairs # Values are automatically generated before the agents are initialized. They generate tag key/value pairs
# with the name of the value as the tag key and the evaluated template as the value. These pairs # with the name of the value as the tag key and the evaluated template as the value. These pairs
# are Arc wrapped so they can be shared across tagsets and used in the agents as pre-generated data. # are Arc wrapped so they can be shared across tagsets and used in the agents as pre-generated data.
[[values]] [[values]]
@ -45,7 +45,7 @@ template = "{{id}}"
cardinality = 10 cardinality = 10
# makes a tagset so every bucket appears in every partition. The other tags are descriptive and don't # makes a tagset so every bucket appears in every partition. The other tags are descriptive and don't
# increase the cardiality beyond count(bucket) * count(partition). Later this example will use the # increase the cardinality beyond count(bucket) * count(partition). Later this example will use the
# agent and measurement generation to take this base tagset and increase cardinality on a per-agent basis. # agent and measurement generation to take this base tagset and increase cardinality on a per-agent basis.
[[tag_sets]] [[tag_sets]]
name = "bucket_set" name = "bucket_set"

View File

@ -241,7 +241,7 @@ pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync + 'static {
selection: Selection<'_>, selection: Selection<'_>,
) -> Result<SendableRecordBatchStream, QueryChunkError>; ) -> Result<SendableRecordBatchStream, QueryChunkError>;
/// Returns chunk type which is either MUB, RUB, OS /// Returns chunk type. Useful in tests and debug logs.
fn chunk_type(&self) -> &str; fn chunk_type(&self) -> &str;
/// Order of this chunk relative to other overlapping chunks. /// Order of this chunk relative to other overlapping chunks.

View File

@ -932,17 +932,19 @@ impl Deduplicater {
/// Return a sort plan for for a given chunk /// Return a sort plan for for a given chunk
/// This plan is applied for every chunk to read data from chunk /// This plan is applied for every chunk to read data from chunk
/// The plan will look like this. Reading bottom up: /// The plan will look like this. Reading bottom up:
/// 1. First we scan the data in IOxReadFilterNode which represents ///
/// a custom implemented scan of MUB, RUB, OS. Both Select Predicate of /// 1. First we scan the data in IOxReadFilterNode which represents a custom implemented scan
/// the query and Delete Predicates of the chunk is pushed down /// of the chunk. Both Select Predicate of the query and Delete Predicates of the chunk is
/// here to eliminate as much data as early as possible but it is not guaranteed /// pushed down here to eliminate as much data as early as possible but it is not
/// all filters are applied because only certain expressions work /// guaranteed all filters are applied because only certain expressions work at this low
/// at this low chunk scan level. /// chunk scan level. Delete Predicates are tombstone of deleted data that will be
/// Delete Predicates are tombstone of deleted data that will be eliminated at read time. /// eliminated at read time.
/// 2. If the chunk has Delete Predicates, the FilterExec will be added to filter data out /// 2. If the chunk has Delete Predicates, the FilterExec will be added to filter data out.
/// We apply delete predicate filter at this low level because the Delete Predicates are chunk specific. /// We apply delete predicate filter at this low level because the Delete Predicates are
/// 3. Then SortExec is added if there is a request to sort this chunk at this stage /// chunk specific.
/// See the description of function build_scan_plan to see why the sort may be needed /// 3. Then SortExec is added if there is a request to sort this chunk at this stage.
/// See the description of function build_scan_plan to see why the sort may be needed.
///
/// ```text /// ```text
/// ┌─────────────────┐ /// ┌─────────────────┐
/// │ ProjectionExec │ /// │ ProjectionExec │

View File

@ -13,8 +13,8 @@ use data_types::{DeleteExpr, DeletePredicate, Op, Scalar, TimestampRange};
// when they happen // when they happen
#[derive(Debug)] #[derive(Debug)]
/// Setup for delete query test with one table and one chunk moved from MUB to RUB to OS /// Setup for delete query test with one table and one chunk. All data will be soft deleted in this
/// All data will be soft deleted in this setup /// setup.
pub struct OneDeleteSimpleExprOneChunkDeleteAll {} pub struct OneDeleteSimpleExprOneChunkDeleteAll {}
#[async_trait] #[async_trait]
impl DbSetup for OneDeleteSimpleExprOneChunkDeleteAll { impl DbSetup for OneDeleteSimpleExprOneChunkDeleteAll {
@ -31,13 +31,12 @@ impl DbSetup for OneDeleteSimpleExprOneChunkDeleteAll {
exprs: vec![], exprs: vec![],
}; };
// this returns 15 scenarios
all_scenarios_for_one_chunk(vec![&pred], vec![], lp_lines, table_name, partition_key).await all_scenarios_for_one_chunk(vec![&pred], vec![], lp_lines, table_name, partition_key).await
} }
} }
#[derive(Debug)] #[derive(Debug)]
/// Setup for delete query test with one table and one chunk moved from MUB to RUB to OS /// Setup for delete query test with one table and one chunk
pub struct OneDeleteSimpleExprOneChunk {} pub struct OneDeleteSimpleExprOneChunk {}
#[async_trait] #[async_trait]
impl DbSetup for OneDeleteSimpleExprOneChunk { impl DbSetup for OneDeleteSimpleExprOneChunk {
@ -58,14 +57,12 @@ impl DbSetup for OneDeleteSimpleExprOneChunk {
)], )],
}; };
// this returns 15 scenarios
all_scenarios_for_one_chunk(vec![&pred], vec![], lp_lines, table_name, partition_key).await all_scenarios_for_one_chunk(vec![&pred], vec![], lp_lines, table_name, partition_key).await
} }
} }
#[derive(Debug)] #[derive(Debug)]
/// Setup for many scenario move chunk from from MUB to RUB to OS /// Setup for many scenarios moving the chunk to different stages. No delete in this case.
/// No delete in this case
pub struct NoDeleteOneChunk {} pub struct NoDeleteOneChunk {}
#[async_trait] #[async_trait]
impl DbSetup for NoDeleteOneChunk { impl DbSetup for NoDeleteOneChunk {
@ -80,13 +77,12 @@ impl DbSetup for NoDeleteOneChunk {
"cpu,foo=me bar=1 40", "cpu,foo=me bar=1 40",
]; ];
// this returns 15 scenarios
all_scenarios_for_one_chunk(vec![], vec![], lp_lines, table_name, partition_key).await all_scenarios_for_one_chunk(vec![], vec![], lp_lines, table_name, partition_key).await
} }
} }
#[derive(Debug)] #[derive(Debug)]
/// Setup for multi-expression delete query test with one table and one chunk moved from MUB to RUB to OS /// Setup for multi-expression delete query test with one table and one chunk
pub struct OneDeleteMultiExprsOneChunk {} pub struct OneDeleteMultiExprsOneChunk {}
#[async_trait] #[async_trait]
impl DbSetup for OneDeleteMultiExprsOneChunk { impl DbSetup for OneDeleteMultiExprsOneChunk {
@ -109,20 +105,19 @@ impl DbSetup for OneDeleteMultiExprsOneChunk {
], ],
}; };
// this returns 15 scenarios
all_scenarios_for_one_chunk(vec![&pred], vec![], lp_lines, table_name, partition_key).await all_scenarios_for_one_chunk(vec![&pred], vec![], lp_lines, table_name, partition_key).await
} }
} }
#[derive(Debug)] #[derive(Debug)]
/// Setup for multi-expression delete query test with one table and one chunk moved from MUB to RUB to OS /// Setup for multi-expression delete query test with one table and one chunk. Two deletes at
/// Two deletes at different chunk stages /// different chunk stages.
pub struct TwoDeletesMultiExprsOneChunk {} pub struct TwoDeletesMultiExprsOneChunk {}
#[async_trait] #[async_trait]
impl DbSetup for TwoDeletesMultiExprsOneChunk { impl DbSetup for TwoDeletesMultiExprsOneChunk {
async fn make(&self) -> Vec<DbScenario> { async fn make(&self) -> Vec<DbScenario> {
// The main purpose of these scenarios is the multi-expression delete predicate is added in MUB and // The main purpose of these scenarios is the multi-expression delete predicate is added in
// is moved with chunk moving. Then one more delete after moving // the ingester and is moved with chunk moving. Then one more delete after moving.
// General setup for all scenarios // General setup for all scenarios
let partition_key = "1970-01-01T00"; let partition_key = "1970-01-01T00";

View File

@ -40,7 +40,6 @@ impl DbSetup for OneMeasurementRealisticTimes {
"cpu,region=west user=21.0 1626809430000000000", "cpu,region=west user=21.0 1626809430000000000",
]; ];
// return all possible scenarios a chunk: MUB open, MUB frozen, RUB, RUB & OS, OS
all_scenarios_for_one_chunk(vec![], vec![], lp_lines, "cpu", partition_key).await all_scenarios_for_one_chunk(vec![], vec![], lp_lines, "cpu", partition_key).await
} }
} }
@ -59,7 +58,6 @@ impl DbSetup for OneMeasurementNoTags {
"h2o level=200.0 300", "h2o level=200.0 300",
]; ];
// return all possible scenarios a chunk: MUB open, MUB frozen, RUB, RUB & OS, OS
all_scenarios_for_one_chunk(vec![], vec![], lp_lines, "h2o", partition_key).await all_scenarios_for_one_chunk(vec![], vec![], lp_lines, "h2o", partition_key).await
} }
} }
@ -81,7 +79,6 @@ impl DbSetup for OneMeasurementManyNullTags {
"h2o,state=NY,city=NYC,borough=Brooklyn temp=61.0 600", "h2o,state=NY,city=NYC,borough=Brooklyn temp=61.0 600",
]; ];
// return all possible scenarios a chunk: MUB open, MUB frozen, RUB, RUB & OS, OS
all_scenarios_for_one_chunk(vec![], vec![], lp_lines, "cpu", partition_key).await all_scenarios_for_one_chunk(vec![], vec![], lp_lines, "cpu", partition_key).await
} }
} }
@ -176,7 +173,6 @@ impl DbSetup for TwoMeasurements {
"disk,region=east bytes=99i 200", "disk,region=east bytes=99i 200",
]; ];
// return all possible scenarios a chunk: MUB open, MUB frozen, RUB, RUB & OS, OS
all_scenarios_for_one_chunk(vec![], vec![], lp_lines, "cpu", partition_key).await all_scenarios_for_one_chunk(vec![], vec![], lp_lines, "cpu", partition_key).await
} }
} }
@ -208,7 +204,8 @@ impl DbSetup for TwoMeasurementsWithDelete {
)], )],
}; };
// return all possible combination scenarios of a chunk stage and when the delete predicates are applied // return all possible combination scenarios of a chunk stage and when the delete
// predicates are applied
all_scenarios_for_one_chunk(vec![&pred], vec![], lp_lines, table_name, partition_key).await all_scenarios_for_one_chunk(vec![&pred], vec![], lp_lines, table_name, partition_key).await
} }
} }
@ -246,7 +243,8 @@ impl DbSetup for TwoMeasurementsWithDeleteAll {
exprs: vec![], exprs: vec![],
}; };
// return all possible combination scenarios of a chunk stage and when the delete predicates are applied // return all possible combination scenarios of a chunk stage and when the delete
// predicates are applied
all_scenarios_for_one_chunk( all_scenarios_for_one_chunk(
vec![&pred1], vec![&pred1],
vec![&pred2], vec![&pred2],
@ -489,7 +487,8 @@ impl DbSetup for ManyFieldsSeveralChunks {
// c4: parquet stage & overlap with c1 // c4: parquet stage & overlap with c1
let lp_lines4 = vec![ let lp_lines4 = vec![
"h2o,state=MA,city=Boston temp=88.6 230", "h2o,state=MA,city=Boston temp=88.6 230",
"h2o,state=MA,city=Boston other_temp=80 250", // duplicate with a row in c1 but more recent => this row is kept "h2o,state=MA,city=Boston other_temp=80 250", // duplicate with a row in c1 but more
// recent => this row is kept
]; ];
let c4 = ChunkData { let c4 = ChunkData {
lp_lines: lp_lines4, lp_lines: lp_lines4,
@ -559,8 +558,9 @@ impl DbSetup for OneMeasurementFourChunksWithDuplicates {
// . time range: 150 - 300 // . time range: 150 - 300
// . no duplicates in its own chunk // . no duplicates in its own chunk
let lp_lines2 = vec![ let lp_lines2 = vec![
"h2o,state=MA,city=Bedford max_temp=78.75,area=742u 150", // new field (area) and update available NULL (max_temp) // new field (area) and update available NULL (max_temp)
"h2o,state=MA,city=Boston min_temp=65.4 250", // update min_temp from NULL "h2o,state=MA,city=Bedford max_temp=78.75,area=742u 150",
"h2o,state=MA,city=Boston min_temp=65.4 250", // update min_temp from NULL
"h2o,state=MA,city=Reading min_temp=53.4, 250", "h2o,state=MA,city=Reading min_temp=53.4, 250",
"h2o,state=CA,city=SF min_temp=79.0,max_temp=87.2,area=500u 300", "h2o,state=CA,city=SF min_temp=79.0,max_temp=87.2,area=500u 300",
"h2o,state=CA,city=SJ min_temp=78.5,max_temp=88.0 300", "h2o,state=CA,city=SJ min_temp=78.5,max_temp=88.0 300",
@ -696,7 +696,7 @@ impl DbSetup for EndToEndTest {
]; ];
let partition_key = "1970-01-01T00"; let partition_key = "1970-01-01T00";
// return all possible scenarios a chunk: MUB open, MUB frozen, RUB, RUB & OS, OS
all_scenarios_for_one_chunk(vec![], vec![], lp_lines, "cpu_load_short", partition_key).await all_scenarios_for_one_chunk(vec![], vec![], lp_lines, "cpu_load_short", partition_key).await
} }
} }
@ -759,7 +759,7 @@ impl DbSetup for TwoMeasurementsMultiSeries {
"o2,state=MA,city=Boston temp=53.4,reading=51 250", // to row 4 "o2,state=MA,city=Boston temp=53.4,reading=51 250", // to row 4
]; ];
// Swap around data is not inserted in series order // Swap around data is not inserted in series order
lp_lines.swap(0, 2); lp_lines.swap(0, 2);
lp_lines.swap(4, 5); lp_lines.swap(4, 5);
@ -783,7 +783,7 @@ impl DbSetup for TwoMeasurementsMultiSeriesWithDelete {
"o2,state=MA,city=Boston temp=53.4,reading=51 250", // to row 4 "o2,state=MA,city=Boston temp=53.4,reading=51 250", // to row 4
]; ];
// Swap around data is not inserted in series order // Swap around data is not inserted in series order
lp_lines.swap(0, 2); lp_lines.swap(0, 2);
lp_lines.swap(4, 5); lp_lines.swap(4, 5);
@ -822,7 +822,7 @@ impl DbSetup for TwoMeasurementsMultiSeriesWithDeleteAll {
"o2,state=MA,city=Boston temp=53.4,reading=51 250", // to row 4 "o2,state=MA,city=Boston temp=53.4,reading=51 250", // to row 4
]; ];
// Swap around data is not inserted in series order // Swap around data is not inserted in series order
lp_lines.swap(0, 2); lp_lines.swap(0, 2);
lp_lines.swap(4, 5); lp_lines.swap(4, 5);
@ -978,9 +978,8 @@ impl DbSetup for OneMeasurementNoTagsWithDelete {
} }
} }
/// This will create many scenarios (at least 15), some have a chunk with /// This will create many scenarios: some have a chunk with soft deleted data, some have no chunks
/// soft deleted data, some have no chunks because there is no point to /// because there is no point to create compacted chunks with all deleted data.
/// create a RUB for one or many compacted MUB with all deleted data.
pub struct OneMeasurementNoTagsWithDeleteAllWithAndWithoutChunk {} pub struct OneMeasurementNoTagsWithDeleteAllWithAndWithoutChunk {}
#[async_trait] #[async_trait]
impl DbSetup for OneMeasurementNoTagsWithDeleteAllWithAndWithoutChunk { impl DbSetup for OneMeasurementNoTagsWithDeleteAllWithAndWithoutChunk {
@ -995,8 +994,8 @@ impl DbSetup for OneMeasurementNoTagsWithDeleteAllWithAndWithoutChunk {
exprs: vec![], exprs: vec![],
}; };
// Apply predicate before the chunk is moved if any. There will be // Apply predicate before the chunk is moved if any. There will be scenarios without chunks
// scenario without chunks as a consequence of not-compacting-deleted-data // as a consequence of not-compacting-deleted-data
all_scenarios_for_one_chunk( all_scenarios_for_one_chunk(
vec![&pred], vec![&pred],
vec![], vec![],

View File

@ -44,7 +44,7 @@ use std::{
sync::Mutex, sync::Mutex,
}; };
// Structs, enums, and functions used to exhaust all test scenarios of chunk life cycle // Structs, enums, and functions used to exhaust all test scenarios of chunk lifecycle
// & when delete predicates are applied // & when delete predicates are applied
// STRUCTs & ENUMs // STRUCTs & ENUMs
@ -55,9 +55,9 @@ pub struct ChunkData<'a, 'b> {
/// which stage this chunk will be created. /// which stage this chunk will be created.
/// ///
/// If not set, this chunk will be created in [all](ChunkStage::all) stages. This can be helpful when the test /// If not set, this chunk will be created in [all](ChunkStage::all) stages. This can be
/// scenario is not specific to the chunk stage. If this is used for multiple chunks, then all stage permutations /// helpful when the test scenario is not specific to the chunk stage. If this is used for
/// will be generated. /// multiple chunks, then all stage permutations will be generated.
pub chunk_stage: Option<ChunkStage>, pub chunk_stage: Option<ChunkStage>,
/// Delete predicates /// Delete predicates
@ -80,7 +80,8 @@ impl<'a, 'b> ChunkData<'a, 'b> {
} }
} }
/// Replace [`DeleteTime::Begin`] and [`DeleteTime::End`] with values that correspond to the linked [`ChunkStage`]. /// Replace [`DeleteTime::Begin`] and [`DeleteTime::End`] with values that correspond to the
/// linked [`ChunkStage`].
fn replace_begin_and_end_delete_times(self) -> Self { fn replace_begin_and_end_delete_times(self) -> Self {
Self { Self {
preds: self preds: self
@ -100,7 +101,7 @@ impl<'a, 'b> ChunkData<'a, 'b> {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ChunkStage { pub enum ChunkStage {
/// In parquet file. /// In parquet file, persisted by the ingester. Now managed by the querier.
Parquet, Parquet,
/// In ingester. /// In ingester.
@ -119,10 +120,12 @@ impl Display for ChunkStage {
impl PartialOrd for ChunkStage { impl PartialOrd for ChunkStage {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> { fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
match (self, other) { match (self, other) {
// allow multiple parquet chunks (for the same partition). sequence numbers will be used for ordering. // allow multiple parquet chunks (for the same partition). sequence numbers will be
// used for ordering.
(Self::Parquet, Self::Parquet) => Some(Ordering::Equal), (Self::Parquet, Self::Parquet) => Some(Ordering::Equal),
// "parquet" chunks are older (i.e. come earlier) than chunks that still life in the ingester // "parquet" chunks are older (i.e. come earlier) than chunks that still life in the
// ingester
(Self::Parquet, Self::Ingester) => Some(Ordering::Less), (Self::Parquet, Self::Ingester) => Some(Ordering::Less),
(Self::Ingester, Self::Parquet) => Some(Ordering::Greater), (Self::Ingester, Self::Parquet) => Some(Ordering::Greater),
@ -149,7 +152,8 @@ pub struct Pred<'a> {
} }
impl<'a> Pred<'a> { impl<'a> Pred<'a> {
/// Replace [`DeleteTime::Begin`] and [`DeleteTime::End`] with values that correspond to the linked [`ChunkStage`]. /// Replace [`DeleteTime::Begin`] and [`DeleteTime::End`] with values that correspond to the
/// linked [`ChunkStage`].
fn replace_begin_and_end_delete_times(self, stage: ChunkStage) -> Self { fn replace_begin_and_end_delete_times(self, stage: ChunkStage) -> Self {
Self { Self {
delete_time: self.delete_time.replace_begin_and_end_delete_times(stage), delete_time: self.delete_time.replace_begin_and_end_delete_times(stage),
@ -168,9 +172,11 @@ impl<'a> Pred<'a> {
/// Describes when a delete predicate was applied. /// Describes when a delete predicate was applied.
/// ///
/// # Ordering /// # Ordering
/// Compared to [`ChunkStage`], the ordering here may seem a bit confusing. While the latest payload / LP data ///
/// resists in the ingester and is not yet available as a parquet file, the latest tombstones apply to parquet files and /// Compared to [`ChunkStage`], the ordering here may seem a bit confusing. While the latest
/// were (past tense!) NOT applied while the LP data was in the ingester. /// payload / LP data resists in the ingester and is not yet available as a parquet file, the
/// latest tombstones apply to parquet files and were (past tense!) NOT applied while the LP data
/// was in the ingester.
#[derive(Debug, Clone, Copy, PartialEq)] #[derive(Debug, Clone, Copy, PartialEq)]
pub enum DeleteTime { pub enum DeleteTime {
/// Special delete time which marks the first time that could be used from deletion. /// Special delete time which marks the first time that could be used from deletion.
@ -182,11 +188,13 @@ pub enum DeleteTime {
Ingester { Ingester {
/// Flag if the tombstone also exists in the catalog. /// Flag if the tombstone also exists in the catalog.
/// ///
/// If this is set to `false`, then the tombstone was applied by the ingester but does not exist in the catalog /// If this is set to `false`, then the tombstone was applied by the ingester but does not
/// any longer. This can be because: /// exist in the catalog any longer. This can be because:
/// ///
/// - the ingester decided that it doesn't need to be added to the catalog (this is currently/2022-04-21 not implemented!) /// - the ingester decided that it doesn't need to be added to the catalog (this is
/// - the compactor pruned the tombstone from the catalog because there are zero affected parquet files /// currently/2022-04-21 not implemented!)
/// - the compactor pruned the tombstone from the catalog because there are zero affected
/// parquet files
also_in_catalog: bool, also_in_catalog: bool,
}, },
@ -223,7 +231,8 @@ impl DeleteTime {
} }
} }
/// Replace [`DeleteTime::Begin`] and [`DeleteTime::End`] with values that correspond to the linked [`ChunkStage`]. /// Replace [`DeleteTime::Begin`] and [`DeleteTime::End`] with values that correspond to the
/// linked [`ChunkStage`].
fn replace_begin_and_end_delete_times(self, stage: ChunkStage) -> Self { fn replace_begin_and_end_delete_times(self, stage: ChunkStage) -> Self {
match self { match self {
Self::Begin => Self::begin_for(stage), Self::Begin => Self::begin_for(stage),
@ -266,14 +275,14 @@ impl Display for DeleteTime {
// -------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------
/// All scenarios chunk stages and their life cycle moves for given set of delete predicates. /// All scenarios of chunk stages and their lifecycle moves for a given set of delete predicates.
/// If the delete predicates are empty, all scenarios of different chunk stages will be returned. /// If the delete predicates are empty, all scenarios of different chunk stages will be returned.
pub async fn all_scenarios_for_one_chunk( pub async fn all_scenarios_for_one_chunk(
// These delete predicates are applied at all stages of the chunk life cycle // These delete predicates are applied at all stages of the chunk lifecycle
chunk_stage_preds: Vec<&DeletePredicate>, chunk_stage_preds: Vec<&DeletePredicate>,
// These delete predicates are applied to all chunks at their final stages // These delete predicates are applied to all chunks at their final stages
at_end_preds: Vec<&DeletePredicate>, at_end_preds: Vec<&DeletePredicate>,
// Input data, formatted as line protocol. One chunk will be created for each measurement // Input data, formatted as line protocol. One chunk will be created for each measurement
// (table) that appears in the input // (table) that appears in the input
lp_lines: Vec<&str>, lp_lines: Vec<&str>,
// Table to which the delete predicates will be applied // Table to which the delete predicates will be applied
@ -284,10 +293,9 @@ pub async fn all_scenarios_for_one_chunk(
let mut scenarios = vec![]; let mut scenarios = vec![];
// Go over chunk stages // Go over chunk stages
for chunk_stage in ChunkStage::all() { for chunk_stage in ChunkStage::all() {
// Apply delete chunk_stage_preds to this chunk stage at // Apply delete chunk_stage_preds to this chunk stage at all stages at and before that in
// all stages at and before that in the life cycle to the chunk // the lifecycle of the chunk. But we only need to get all delete times if
// But only need to get all delete times if chunk_stage_preds is not empty, // chunk_stage_preds is not empty, otherwise, produce only one scenario of each chunk stage
// otherwise, produce only one scenario of each chunk stage
let mut delete_times = vec![DeleteTime::begin_for(chunk_stage)]; let mut delete_times = vec![DeleteTime::begin_for(chunk_stage)];
if !chunk_stage_preds.is_empty() { if !chunk_stage_preds.is_empty() {
delete_times = DeleteTime::all_from_and_before(chunk_stage) delete_times = DeleteTime::all_from_and_before(chunk_stage)
@ -325,9 +333,9 @@ pub async fn all_scenarios_for_one_chunk(
scenarios scenarios
} }
/// Build a chunk that may move with life cycle before/after deletes /// Build a chunk that may move with lifecycle before/after deletes. Note that the only chunk in
/// Note that the only chunk in this function can be moved to different stages and delete predicates /// this function can be moved to different stages, and delete predicates can be applied at
/// can be applied at different stages when the chunk is moved. /// different stages when the chunk is moved.
async fn make_chunk_with_deletes_at_different_stages( async fn make_chunk_with_deletes_at_different_stages(
lp_lines: Vec<&str>, lp_lines: Vec<&str>,
chunk_stage: ChunkStage, chunk_stage: ChunkStage,
@ -350,12 +358,7 @@ async fn make_chunk_with_deletes_at_different_stages(
DbScenario { scenario_name, db } DbScenario { scenario_name, db }
} }
/// This function loads two chunks of lp data into 4 different scenarios /// Load two chunks of lp data into different chunk scenarios.
///
/// Data in single open mutable buffer chunk
/// Data in one open mutable buffer chunk, one closed mutable chunk
/// Data in one open mutable buffer chunk, one read buffer chunk
/// Data in one two read buffer chunks,
pub async fn make_two_chunk_scenarios( pub async fn make_two_chunk_scenarios(
partition_key: &str, partition_key: &str,
data1: &str, data1: &str,
@ -480,7 +483,10 @@ async fn make_chunk(mock_ingester: &mut MockIngester, chunk: ChunkData<'_, '_>)
panic!("Cannot have delete time '{other}' for ingester chunk") panic!("Cannot have delete time '{other}' for ingester chunk")
} }
DeleteTime::Begin | DeleteTime::End => { DeleteTime::Begin | DeleteTime::End => {
unreachable!("Begin/end cases should have been replaced with concrete instances at this point") unreachable!(
"Begin/end cases should have been replaced \
with concrete instances at this point"
)
} }
} }
} }
@ -507,7 +513,8 @@ async fn make_chunk(mock_ingester: &mut MockIngester, chunk: ChunkData<'_, '_>)
.await; .await;
mock_ingester.buffer_operation(op).await; mock_ingester.buffer_operation(op).await;
// tombstones are created immediately, need to remember their ID to handle deletion later // tombstones are created immediately, need to remember their ID to
// handle deletion later
let mut tombstone_id = None; let mut tombstone_id = None;
for id in mock_ingester.tombstone_ids(delete_table_name).await { for id in mock_ingester.tombstone_ids(delete_table_name).await {
if !ids_pre.contains(&id) { if !ids_pre.contains(&id) {
@ -521,7 +528,10 @@ async fn make_chunk(mock_ingester: &mut MockIngester, chunk: ChunkData<'_, '_>)
// will be attached AFTER the chunk was created // will be attached AFTER the chunk was created
} }
DeleteTime::Begin | DeleteTime::End => { DeleteTime::Begin | DeleteTime::End => {
unreachable!("Begin/end cases should have been replaced with concrete instances at this point") unreachable!(
"Begin/end cases should have been replaced \
with concrete instances at this point"
)
} }
} }
} }
@ -568,7 +578,10 @@ async fn make_chunk(mock_ingester: &mut MockIngester, chunk: ChunkData<'_, '_>)
mock_ingester.buffer_operation(op).await; mock_ingester.buffer_operation(op).await;
} }
DeleteTime::Begin | DeleteTime::End => { DeleteTime::Begin | DeleteTime::End => {
unreachable!("Begin/end cases should have been replaced with concrete instances at this point") unreachable!(
"Begin/end cases should have been replaced \
with concrete instances at this point"
)
} }
} }
} }
@ -600,8 +613,8 @@ async fn make_chunk(mock_ingester: &mut MockIngester, chunk: ChunkData<'_, '_>)
/// Ingester that can be controlled specifically for query tests. /// Ingester that can be controlled specifically for query tests.
/// ///
/// This uses as much ingester code as possible but allows more direct control over aspects like lifecycle and /// This uses as much ingester code as possible but allows more direct control over aspects like
/// partioning. /// lifecycle and partioning.
#[derive(Debug)] #[derive(Debug)]
struct MockIngester { struct MockIngester {
/// Test catalog state. /// Test catalog state.
@ -618,9 +631,10 @@ struct MockIngester {
/// Memory of partition keys for certain sequence numbers. /// Memory of partition keys for certain sequence numbers.
/// ///
/// This is currently required because [`DmlWrite`] does not carry partiion information so we need to do that. In /// This is currently required because [`DmlWrite`] does not carry partiion information so we
/// production this is not required because the router and the ingester use the same partition logic, but we need /// need to do that. In production this is not required because the router and the ingester use
/// direct control over the partion key for the query tests. /// the same partition logic, but we need direct control over the partion key for the query
/// tests.
partition_keys: HashMap<SequenceNumber, String>, partition_keys: HashMap<SequenceNumber, String>,
/// Ingester state. /// Ingester state.
@ -671,7 +685,8 @@ impl MockIngester {
/// ///
/// This will never persist. /// This will never persist.
/// ///
/// Takes `&self mut` because our partioning implementation does not work with concurrent access. /// Takes `&self mut` because our partioning implementation does not work with concurrent
/// access.
async fn buffer_operation(&mut self, dml_operation: DmlOperation) { async fn buffer_operation(&mut self, dml_operation: DmlOperation) {
let lifecycle_handle = NoopLifecycleHandle {}; let lifecycle_handle = NoopLifecycleHandle {};
@ -828,7 +843,8 @@ impl MockIngester {
/// Finalizes the ingester and creates a querier namespace that can be used for query tests. /// Finalizes the ingester and creates a querier namespace that can be used for query tests.
/// ///
/// The querier namespace will hold a simulated connection to the ingester to be able to query unpersisted data. /// The querier namespace will hold a simulated connection to the ingester to be able to query
/// unpersisted data.
async fn into_query_namespace(self) -> Arc<QuerierNamespace> { async fn into_query_namespace(self) -> Arc<QuerierNamespace> {
let mut repos = self.catalog.catalog.repositories().await; let mut repos = self.catalog.catalog.repositories().await;
let schema = Arc::new( let schema = Arc::new(
@ -912,8 +928,8 @@ impl IngesterFlightClient for MockIngester {
_ingester_address: Arc<str>, _ingester_address: Arc<str>,
request: IngesterQueryRequest, request: IngesterQueryRequest,
) -> Result<Box<dyn IngesterFlightClientQueryData>, IngesterFlightClientError> { ) -> Result<Box<dyn IngesterFlightClientQueryData>, IngesterFlightClientError> {
// NOTE: we MUST NOT unwrap errors here because some query tests assert error behavior (e.g. passing predicates // NOTE: we MUST NOT unwrap errors here because some query tests assert error behavior
// of wrong types) // (e.g. passing predicates of wrong types)
let response = prepare_data_to_querier(&self.ingester_data, &request) let response = prepare_data_to_querier(&self.ingester_data, &request)
.await .await
.map_err(|e| IngesterFlightClientError::Flight { .map_err(|e| IngesterFlightClientError::Flight {
@ -943,8 +959,8 @@ impl IngesterFlightClient for MockIngester {
} }
} }
/// Helper struct to present [`IngesterQueryResponse`] (produces by the ingester) as a [`IngesterFlightClientQueryData`] /// Helper struct to present [`IngesterQueryResponse`] (produces by the ingester) as a
/// (used by the querier) without doing any real gRPC IO. /// [`IngesterFlightClientQueryData`] (used by the querier) without doing any real gRPC IO.
#[derive(Debug)] #[derive(Debug)]
struct QueryDataAdapter { struct QueryDataAdapter {
response: IngesterQueryResponse, response: IngesterQueryResponse,

View File

@ -71,7 +71,7 @@ async fn run_table_schema_test_case<D>(
} }
fn is_unsorted_chunk_type(chunk: &dyn QueryChunk) -> bool { fn is_unsorted_chunk_type(chunk: &dyn QueryChunk) -> bool {
(chunk.chunk_type() == "MUB") || (chunk.chunk_type() == "IngesterPartition") chunk.chunk_type() == "IngesterPartition"
} }
#[tokio::test] #[tokio::test]