Merge remote-tracking branch 'origin/main' into (fix)issue-835-wrong-http-status-write-path

pull/24376/head
Carol (Nichols || Goulding) 2021-03-24 13:57:37 -04:00
commit db028a412d
172 changed files with 15889 additions and 4822 deletions

4
.cargo/config Normal file
View File

@ -0,0 +1,4 @@
[target.x86_64-unknown-linux-gnu]
rustflags = [
"-C", "link-arg=-fuse-ld=lld",
]

3
.gitattributes vendored Normal file
View File

@ -0,0 +1,3 @@
generated_types/protos/google/ linguist-generated=true
generated_types/protos/grpc/ linguist-generated=true
generated_types/src/wal_generated.rs linguist-generated=true

View File

@ -19,6 +19,11 @@ on: [pull_request]
name: ci
env:
# Disable full debug symbol generation to speed up CI build
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
jobs:
build:

View File

@ -186,3 +186,20 @@ cargo clippy --all-targets --workspace -- -D warnings
[`rustfmt`]: https://github.com/rust-lang/rustfmt
[`clippy`]: https://github.com/rust-lang/rust-clippy
## Upgrading the `flatbuffers` crate
IOx uses Flatbuffers for its write-ahead log. The structure is defined in
[`generated_types/protos/wal.fbs`]. We have then used the `flatc` Flatbuffers compiler to generate
the corresponding Rust code in [`generated_types/src/wal_generated.rs`], which is checked in to the
repository.
The checked-in code is compatible with the `flatbuffers` crate version in the `Cargo.lock` file. If
upgrading the version of the `flatbuffers` crate that IOx depends on, the generated code will need
to be updated as well.
Instructions for updating the generated code are in [`docs/regenerating_flatbuffers.md`].
[`generated_types/protos/wal.fbs`]: generated_types/protos/wal.fbs
[`generated_types/src/wal_generated.rs`]: generated_types/src/wal_generated.rs
[`docs/regenerating_flatbuffers.md`]: docs/regenerating_flatbuffers.md

454
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -4,10 +4,12 @@ version = "0.1.0"
authors = ["Paul Dix <paul@pauldix.net>"]
edition = "2018"
default-run = "influxdb_iox"
readme = "README.md"
[workspace] # In alphabetical order
members = [
"arrow_deps",
"catalog",
"data_types",
"generated_types",
"google_types",
@ -16,6 +18,7 @@ members = [
"influxdb_tsm",
"influxdb2_client",
"ingest",
"internal_types",
"logfmt",
"mem_qe",
"mutable_buffer",
@ -40,9 +43,10 @@ debug = true
arrow_deps = { path = "arrow_deps" }
data_types = { path = "data_types" }
generated_types = { path = "generated_types" }
influxdb_iox_client = { path = "influxdb_iox_client" }
influxdb_iox_client = { path = "influxdb_iox_client", features = ["format"] }
influxdb_line_protocol = { path = "influxdb_line_protocol" }
influxdb_tsm = { path = "influxdb_tsm" }
internal_types = { path = "internal_types" }
ingest = { path = "ingest" }
logfmt = { path = "logfmt" }
mem_qe = { path = "mem_qe" }
@ -63,13 +67,15 @@ clap = "2.33.1"
csv = "1.1"
dirs = "3.0.1"
dotenv = "0.15.0"
env_logger = "0.7.1"
env_logger = "0.8.3"
flate2 = "1.0"
futures = "0.3.1"
http = "0.2.0"
hyper = "0.14"
opentelemetry = { version = "0.12", default-features = false, features = ["trace", "tokio-support"] }
opentelemetry-jaeger = { version = "0.11", features = ["tokio"] }
# used by arrow/datafusion anyway
prettytable-rs = "0.8"
prost = "0.7"
# Forked to upgrade hyper and tokio
routerify = { git = "https://github.com/influxdata/routerify", rev = "274e250" }
@ -79,8 +85,9 @@ serde_urlencoded = "0.7.0"
snafu = "0.6.9"
structopt = "0.3.21"
thiserror = "1.0.23"
tokio = { version = "1.0", features = ["macros", "rt-multi-thread", "parking_lot"] }
tokio = { version = "1.0", features = ["macros", "rt-multi-thread", "parking_lot", "signal"] }
tokio-stream = { version = "0.1.2", features = ["net"] }
tokio-util = { version = "0.6.3" }
tonic = "0.4.0"
tonic-health = "0.3.0"
tracing = { version = "0.1", features = ["release_max_level_debug"] }
@ -93,6 +100,8 @@ tracing-subscriber = { version = "0.2.15", features = ["parking_lot"] }
influxdb2_client = { path = "influxdb2_client" }
influxdb_iox_client = { path = "influxdb_iox_client", features = ["flight"] }
test_helpers = { path = "test_helpers" }
once_cell = { version = "1.4.0", features = ["parking_lot"] }
parking_lot = "0.11.1"
# Crates.io dependencies, in alphabetical order
assert_cmd = "1.0.0"
@ -100,14 +109,10 @@ criterion = "0.3"
flate2 = "1.0"
hex = "0.4.2"
predicates = "1.0.4"
rand = "0.7.2"
rand = "0.8.3"
reqwest = "0.11"
tempfile = "3.1.0"
[[bin]]
name = "cpu_feature_check"
path = "src/cpu_feature_check/main.rs"
[[bench]]
name = "encoders"
harness = false

View File

@ -20,7 +20,7 @@ RUN \
FROM debian:buster-slim
RUN apt-get update \
&& apt-get install -y libssl1.1 libgcc1 libc6 --no-install-recommends \
&& apt-get install -y libssl1.1 libgcc1 libc6 ca-certificates --no-install-recommends \
&& rm -rf /var/lib/{apt,dpkg,cache,log}
RUN groupadd -g 1500 rust \
@ -36,3 +36,5 @@ COPY --from=build /root/influxdb_iox /usr/bin/influxdb_iox
EXPOSE 8080 8082
ENTRYPOINT ["/usr/bin/influxdb_iox"]
CMD ["run"]

147
README.md
View File

@ -28,8 +28,7 @@ We're also hosting monthly tech talks and community office hours on the project
## Quick Start
To compile and run InfluxDB IOx from source, you'll need a Rust compiler and a `flatc` FlatBuffers
compiler.
To compile and run InfluxDB IOx from source, you'll need a Rust compiler and `clang`.
### Build a Docker Image
@ -80,36 +79,6 @@ rustc --version
and you should see a nightly version of Rust!
### Installing `flatc`
InfluxDB IOx uses the [FlatBuffer] serialization format for its write-ahead log. The [`flatc`
compiler] reads the schema in `generated_types/wal.fbs` and generates the corresponding Rust code.
Install `flatc` >= 1.12.0 with one of these methods as appropriate to your operating system:
* Using a [Windows binary release]
* Using the [`flatbuffers` package for conda]
* Using the [`flatbuffers` package for Arch Linux]
* Using the [`flatbuffers` package for Homebrew]
Once you have installed the packages, you should be able to run:
```shell
flatc --version
```
and see the version displayed.
You won't have to run `flatc` directly; once it's available, Rust's Cargo build tool manages the
compilation process by calling `flatc` for you.
[FlatBuffer]: https://google.github.io/flatbuffers/
[`flatc` compiler]: https://google.github.io/flatbuffers/flatbuffers_guide_using_schema_compiler.html
[Windows binary release]: https://github.com/google/flatbuffers/releases
[`flatbuffers` package for conda]: https://anaconda.org/conda-forge/flatbuffers
[`flatbuffers` package for Arch Linux]: https://www.archlinux.org/packages/community/x86_64/flatbuffers/
[`flatbuffers` package for Homebrew]: https://github.com/Homebrew/homebrew-core/blob/HEAD/Formula/flatbuffers.rb
### Installing `clang`
An installation of `clang` is required to build the [`croaring`] dependency - if
@ -133,17 +102,16 @@ takes its configuration as environment variables.
You can see a list of the current configuration values by running `influxdb_iox
--help`, as well as the specific subcommand config options such as `influxdb_iox
server --help`.
run --help`.
Should you desire specifying config via a file, you can do so using a
`.env` formatted file in the working directory. You can use the
provided [example](docs/env.example) as a template if you want:
```bash
```shell
cp docs/env.example .env
```
### Compiling and Starting the Server
InfluxDB IOx is built using Cargo, Rust's package manager and build tool.
@ -163,7 +131,7 @@ which will create a binary in `target/debug` that you can run with:
You can compile and run with one command by using:
```shell
cargo run
cargo run -- server
```
When compiling for performance testing, build in release mode by using:
@ -175,13 +143,13 @@ cargo build --release
which will create the corresponding binary in `target/release`:
```shell
./target/release/influxdb_iox
./target/release/influxdb_iox run
```
Similarly, you can do this in one step with:
```shell
cargo run --release
cargo run --release -- server
```
The server will, by default, start an HTTP API server on port `8080` and a gRPC server on port
@ -190,34 +158,60 @@ The server will, by default, start an HTTP API server on port `8080` and a gRPC
### Writing and Reading Data
Each IOx instance requires a writer ID.
This can be set three ways:
This can be set one of 4 ways:
- set an environment variable `INFLUXDB_IOX_ID=42`
- set a flag `--writer-id 42`
- send an HTTP PUT request:
```
curl --request PUT \
--url http://localhost:8080/iox/api/v1/id \
--header 'Content-Type: application/json' \
--data '{
"id": 42
}'
- use the API (not convered here)
- use the CLI
```shell
influxdb_iox writer set 42
```
To write data, you need a destination database.
This is set via HTTP PUT, identifying the database by org `company` and bucket `sensors`:
```
curl --request PUT \
--url http://localhost:8080/iox/api/v1/databases/company_sensors \
--header 'Content-Type: application/json' \
--data '{
}'
To write data, you need to create a database. You can do so via the API or using the CLI. For example, to create a database called `company_sensors` with a 100MB mutable buffer, use this command:
```shell
influxdb_iox database create company_sensors -m 100
```
Data can be stored in InfluxDB IOx by sending it in [line protocol] format to the `/api/v2/write`
endpoint. Data is stored by organization and bucket names. Here's an example using [`curl`] with
the organization name `company` and the bucket name `sensors` that will send the data in the
`tests/fixtures/lineproto/metrics.lp` file in this repository, assuming that you're running the
server on the default port:
Data can be stored in InfluxDB IOx by sending it in [line protocol]
format to the `/api/v2/write` endpoint or using the CLI. For example,
here is a command that will send the data in the
`tests/fixtures/lineproto/metrics.lp` file in this repository,
assuming that you're running the server on the default port into
the `company_sensors` database, you can use:
```shell
influxdb_iox database write company_sensors tests/fixtures/lineproto/metrics.lp
```
To query data stored in the `company_sensors` database:
```shell
influxdb_iox database query company_sensors "SELECT * FROM cpu LIMIT 10"
```
### Using the CLI
To ease deloyment, IOx is packaged as a combined binary which has
commands to start the IOx server as well as a CLI interface for
interacting with and configuring such servers.
The CLI itself is documented via extensive built in help which you can
access by runing `influxdb_iox --help`
### InfluxDB 2.0 compatibility
InfluxDB IOx allows seamless interoperability with InfluxDB 2.0.
InfluxDB 2.0 stores data in organization and buckets, but InfluxDB IOx
stores data in named databases. IOx maps `organization` and `bucket`
to a database named with the two parts separated by an underscore
(`_`): `organization_bucket`.
Here's an example using [`curl`] command to send the same data into
the `company_sensors` database using the InfluxDB 2.0 `/api/v2/write`
API:
```shell
curl -v "http://127.0.0.1:8080/api/v2/write?org=company&bucket=sensors" --data-binary @tests/fixtures/lineproto/metrics.lp
@ -226,29 +220,46 @@ curl -v "http://127.0.0.1:8080/api/v2/write?org=company&bucket=sensors" --data-b
[line protocol]: https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/
[`curl`]: https://curl.se/
To query stored data, use the `/api/v2/read` endpoint with a SQL query. This example will return
all data in the `company` organization's `sensors` bucket for the `processes` measurement:
```shell
curl -v -G -d 'org=company' -d 'bucket=sensors' --data-urlencode 'sql_query=select * from processes' "http://127.0.0.1:8080/api/v2/read"
```
### Health Checks
The HTTP API exposes a healthcheck endpoint at `/health`
```shell
```console
$ curl http://127.0.0.1:8080/health
OK
```
The gRPC API implements the [gRPC Health Checking Protocol](https://github.com/grpc/grpc/blob/master/doc/health-checking.md). This can be tested with [grpc-health-probe](https://github.com/grpc-ecosystem/grpc-health-probe)
```shell
```console
$ grpc_health_probe -addr 127.0.0.1:8082 -service influxdata.platform.storage.Storage
status: SERVING
```
### Manually calling gRPC API
If you want to manually invoke one of the gRPC APIs, you can use any gRPC CLI client;
a good one is [grpcurl](https://github.com/fullstorydev/grpcurl).
Tonic (the gRPC server library we're using) currently doesn't have support for gRPC reflection,
hence you must pass all `.proto` files to your client. You can find a conventient `grpcurl` wrapper
that does that in the `scripts` directory:
```console
$ ./scripts/grpcurl -plaintext 127.0.0.1:8082 list
grpc.health.v1.Health
influxdata.iox.management.v1.ManagementService
influxdata.platform.storage.IOxTesting
influxdata.platform.storage.Storage
$ ./scripts/grpcurl -plaintext 127.0.0.1:8082 influxdata.iox.management.v1.ManagementService.ListDatabases
{
"names": [
"foobar_weather"
]
}
```
## Contributing
We welcome community contributions from anyone!

View File

@ -8,14 +8,14 @@ description = "Apache Arrow / Parquet / DataFusion dependencies for InfluxDB IOx
[dependencies] # In alphabetical order
# We are using development version of arrow/parquet/datafusion and the dependencies are at the same rev
# The version can be found here: https://github.com/apache/arrow/commit/4f6adc700d1cebc50a0594b5aa671f64491cc20e
# The version can be found here: https://github.com/apache/arrow/commit/6208a79739d0228ecc566fa8436ee61068452212
#
arrow = { git = "https://github.com/apache/arrow.git", rev = "4f6adc700d1cebc50a0594b5aa671f64491cc20e" , features = ["simd"] }
arrow-flight = { git = "https://github.com/apache/arrow.git", rev = "4f6adc700d1cebc50a0594b5aa671f64491cc20e" }
arrow = { git = "https://github.com/apache/arrow.git", rev = "6208a79739d0228ecc566fa8436ee61068452212" , features = ["simd"] }
arrow-flight = { git = "https://github.com/apache/arrow.git", rev = "6208a79739d0228ecc566fa8436ee61068452212" }
# Turn off optional datafusion features (function packages)
datafusion = { git = "https://github.com/apache/arrow.git", rev = "4f6adc700d1cebc50a0594b5aa671f64491cc20e", default-features = false }
datafusion = { git = "https://github.com/apache/arrow.git", rev = "6208a79739d0228ecc566fa8436ee61068452212", default-features = false }
# Turn off the "arrow" feature; it currently has a bug that causes the crate to rebuild every time
# and we're not currently using it anyway
parquet = { git = "https://github.com/apache/arrow.git", rev = "4f6adc700d1cebc50a0594b5aa671f64491cc20e", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }
parquet = { git = "https://github.com/apache/arrow.git", rev = "6208a79739d0228ecc566fa8436ee61068452212", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }

View File

@ -44,7 +44,7 @@ pub fn sort_record_batch(batch: RecordBatch) -> RecordBatch {
})
.collect();
let sort_output = lexsort(&sort_input).expect("Sorting to complete");
let sort_output = lexsort(&sort_input, None).expect("Sorting to complete");
RecordBatch::try_new(batch.schema(), sort_output).unwrap()
}

View File

@ -204,7 +204,7 @@ fn integer_encode_random(c: &mut Criterion) {
&LARGER_BATCH_SIZES,
|batch_size| {
(1..batch_size)
.map(|_| rand::thread_rng().gen_range(0, 100))
.map(|_| rand::thread_rng().gen_range(0..100))
.collect()
},
influxdb_tsm::encoders::integer::encode,
@ -323,7 +323,7 @@ fn integer_decode_random(c: &mut Criterion) {
&LARGER_BATCH_SIZES,
|batch_size| {
let decoded: Vec<i64> = (1..batch_size)
.map(|_| rand::thread_rng().gen_range(0, 100))
.map(|_| rand::thread_rng().gen_range(0..100))
.collect();
let mut encoded = vec![];
influxdb_tsm::encoders::integer::encode(&decoded, &mut encoded).unwrap();

View File

@ -1,5 +1,4 @@
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use data_types::schema::Schema;
use influxdb_line_protocol::parse_lines;
use ingest::{
parquet::{
@ -8,6 +7,7 @@ use ingest::{
},
ConversionSettings, LineProtocolConverter,
};
use internal_types::schema::Schema;
use packers::{Error as TableError, IOxTableWriter, IOxTableWriterSource};
use std::time::Duration;

View File

@ -52,7 +52,7 @@ fn i64_vec_with_nulls(size: usize, null_percent: usize) -> Vec<Option<u64>> {
let mut a = Vec::with_capacity(size);
// insert 10% null values
for _ in 0..size {
if rng.gen_range(0, null_percent) == 0 {
if rng.gen_range(0..null_percent) == 0 {
a.push(None);
} else {
a.push(Some(1_u64));

View File

@ -2,12 +2,13 @@ version: v1beta1
build:
roots:
- generated_types/protos/
excludes:
- generated_types/protos/com
- generated_types/protos/influxdata/platform
- generated_types/protos/grpc
lint:
ignore:
- google
- grpc
- com/github/influxdata/idpe/storage/read
- influxdata/platform
use:
- DEFAULT
- STYLE_DEFAULT

11
catalog/Cargo.toml Normal file
View File

@ -0,0 +1,11 @@
[package]
name = "catalog"
version = "0.1.0"
authors = ["Andrew Lamb <andrew@nerdnetworks.org>"]
edition = "2018"
description = "InfluxDB IOx Metadata catalog implementation"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
snafu = "0.6"

69
catalog/src/chunk.rs Normal file
View File

@ -0,0 +1,69 @@
use std::sync::Arc;
/// The state
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum ChunkState {
/// Chunk can accept new writes
Open,
/// Chunk can still accept new writes, but will likely be closed soon
Closing,
/// Chunk is closed for new writes and has become read only
Closed,
/// Chunk is closed for new writes, and is actively moving to the read
/// buffer
Moving,
/// Chunk has been completely loaded in the read buffer
Moved,
}
/// The catalog representation of a Chunk in IOx. Note that a chunk
/// may exist in several physical locations at any given time (e.g. in
/// mutable buffer and in read buffer)
#[derive(Debug, PartialEq)]
pub struct Chunk {
/// What partition does the chunk belong to?
partition_key: Arc<String>,
/// The ID of the chunk
id: u32,
/// The state of this chunk
state: ChunkState,
/* TODO: Additional fields
* such as object_store_path, etc */
}
impl Chunk {
/// Create a new chunk in the Open state
pub(crate) fn new(partition_key: impl Into<String>, id: u32) -> Self {
let partition_key = Arc::new(partition_key.into());
Self {
partition_key,
id,
state: ChunkState::Open,
}
}
pub fn id(&self) -> u32 {
self.id
}
pub fn key(&self) -> &str {
self.partition_key.as_ref()
}
pub fn state(&self) -> ChunkState {
self.state
}
pub fn set_state(&mut self, state: ChunkState) {
// TODO add state transition validation here?
self.state = state;
}
}

361
catalog/src/lib.rs Normal file
View File

@ -0,0 +1,361 @@
//! This module contains the implementation of the InfluxDB IOx Metadata catalog
#![deny(rust_2018_idioms)]
#![warn(
missing_debug_implementations,
clippy::explicit_iter_loop,
clippy::use_self,
clippy::clone_on_ref_ptr
)]
use std::collections::{btree_map::Entry, BTreeMap};
use snafu::{OptionExt, Snafu};
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("unknown partition: {}", partition_key))]
UnknownPartition { partition_key: String },
#[snafu(display("unknown chunk: {}:{}", partition_key, chunk_id))]
UnknownChunk {
partition_key: String,
chunk_id: u32,
},
#[snafu(display("partition already exists: {}", partition_key))]
PartitionAlreadyExists { partition_key: String },
#[snafu(display("chunk already exists: {}:{}", partition_key, chunk_id))]
ChunkAlreadyExists {
partition_key: String,
chunk_id: u32,
},
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
pub mod chunk;
pub mod partition;
use chunk::Chunk;
use partition::Partition;
/// InfluxDB IOx Metadata Catalog
///
/// The Catalog stores information such as which chunks exist, what
/// state they are in, and what objects on object store are used, etc.
///
/// The catalog is also responsible for (eventually) persisting this
/// information as well as ensuring that references between different
/// objects remain valid (e.g. that the `partition_key` field of all
/// Chunk's refer to valid partitions).
#[derive(Default, Debug)]
pub struct Catalog {
/// key is partition_key
partitions: BTreeMap<String, Partition>,
}
impl Catalog {
pub fn new() -> Self {
Self {
..Default::default()
}
}
/// Return an immutable chunk reference given the specified partition and
/// chunk id
pub fn chunk(&self, partition_key: impl AsRef<str>, chunk_id: u32) -> Result<&Chunk> {
let partition_key = partition_key.as_ref();
self.valid_partition(partition_key)?.chunk(chunk_id)
}
/// Return an mutable chunk reference given the specified partition and
/// chunk id
pub fn chunk_mut(
&mut self,
partition_key: impl AsRef<str>,
chunk_id: u32,
) -> Result<&mut Chunk> {
let partition_key = partition_key.as_ref();
self.valid_partition_mut(partition_key)?.chunk_mut(chunk_id)
}
/// Creates a new `Chunk` with id `id` within a specified Partition.
pub fn create_chunk(&mut self, partition_key: impl AsRef<str>, chunk_id: u32) -> Result<()> {
let partition_key = partition_key.as_ref();
self.valid_partition_mut(partition_key)?
.create_chunk(chunk_id)
}
/// Removes the specified `Chunk` from the catalog
pub fn drop_chunk(&mut self, partition_key: impl AsRef<str>, chunk_id: u32) -> Result<()> {
let partition_key = partition_key.as_ref();
self.valid_partition_mut(partition_key)?
.drop_chunk(chunk_id)
}
/// List all `Chunk`s in this database
pub fn chunks(&self) -> impl Iterator<Item = &Chunk> {
self.partitions.values().flat_map(|p| p.chunks())
}
/// List all `Chunk`s in a particular partition
pub fn partition_chunks(
&self,
partition_key: impl AsRef<str>,
) -> Result<impl Iterator<Item = &Chunk>> {
let partition_key = partition_key.as_ref();
let iter = self.valid_partition(partition_key)?.chunks();
Ok(iter)
}
// List all partitions in this database
pub fn partitions(&self) -> impl Iterator<Item = &Partition> {
self.partitions.values()
}
// Get a specific partition by name, returning `None` if there is no such
// partition
pub fn partition(&self, partition_key: impl AsRef<str>) -> Option<&Partition> {
let partition_key = partition_key.as_ref();
self.partitions.get(partition_key)
}
// Create a new partition in the catalog, returning an error if it already
// exists
pub fn create_partition(&mut self, partition_key: impl Into<String>) -> Result<()> {
let partition_key = partition_key.into();
let entry = self.partitions.entry(partition_key);
match entry {
Entry::Vacant(entry) => {
let partition = Partition::new(entry.key());
entry.insert(partition);
Ok(())
}
Entry::Occupied(entry) => PartitionAlreadyExists {
partition_key: entry.key(),
}
.fail(),
}
}
/// Internal helper to return the specified partition or an error
/// if there is no such partition
fn valid_partition(&self, partition_key: &str) -> Result<&Partition> {
self.partitions
.get(partition_key)
.context(UnknownPartition { partition_key })
}
/// Internal helper to return the specified partition as a mutable
/// reference or an error if there is no such partition
fn valid_partition_mut(&mut self, partition_key: &str) -> Result<&mut Partition> {
self.partitions
.get_mut(partition_key)
.context(UnknownPartition { partition_key })
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn partition_create() {
let mut catalog = Catalog::new();
catalog.create_partition("p1").unwrap();
let err = catalog.create_partition("p1").unwrap_err();
assert_eq!(err.to_string(), "partition already exists: p1");
}
#[test]
fn partition_get() {
let mut catalog = Catalog::new();
catalog.create_partition("p1").unwrap();
catalog.create_partition("p2").unwrap();
let p1 = catalog.partition("p1").unwrap();
assert_eq!(p1.key(), "p1");
let p2 = catalog.partition("p2").unwrap();
assert_eq!(p2.key(), "p2");
let p3 = catalog.partition("p3");
assert!(p3.is_none());
}
#[test]
fn partition_list() {
let mut catalog = Catalog::new();
assert_eq!(catalog.partitions().count(), 0);
catalog.create_partition("p1").unwrap();
catalog.create_partition("p2").unwrap();
catalog.create_partition("p3").unwrap();
let mut partition_keys: Vec<String> =
catalog.partitions().map(|p| p.key().into()).collect();
partition_keys.sort_unstable();
assert_eq!(partition_keys, vec!["p1", "p2", "p3"]);
}
#[test]
fn chunk_create_no_partition() {
let mut catalog = Catalog::new();
let err = catalog
.create_chunk("non existent partition", 0)
.unwrap_err();
assert_eq!(err.to_string(), "unknown partition: non existent partition");
}
#[test]
fn chunk_create() {
let mut catalog = Catalog::new();
catalog.create_partition("p1").unwrap();
catalog.create_chunk("p1", 0).unwrap();
catalog.create_chunk("p1", 1).unwrap();
let c1_0 = catalog.chunk("p1", 0).unwrap();
assert_eq!(c1_0.key(), "p1");
assert_eq!(c1_0.id(), 0);
let c1_0 = catalog.chunk_mut("p1", 0).unwrap();
assert_eq!(c1_0.key(), "p1");
assert_eq!(c1_0.id(), 0);
let c1_1 = catalog.chunk("p1", 1).unwrap();
assert_eq!(c1_1.key(), "p1");
assert_eq!(c1_1.id(), 1);
let err = catalog.chunk("p3", 0).unwrap_err();
assert_eq!(err.to_string(), "unknown partition: p3");
let err = catalog.chunk("p1", 100).unwrap_err();
assert_eq!(err.to_string(), "unknown chunk: p1:100");
}
#[test]
fn chunk_create_dupe() {
let mut catalog = Catalog::new();
catalog.create_partition("p1").unwrap();
catalog.create_chunk("p1", 0).unwrap();
let res = catalog.create_chunk("p1", 0).unwrap_err();
assert_eq!(res.to_string(), "chunk already exists: p1:0");
}
#[test]
fn chunk_list() {
let mut catalog = Catalog::new();
assert_eq!(catalog.chunks().count(), 0);
catalog.create_partition("p1").unwrap();
catalog.create_chunk("p1", 0).unwrap();
catalog.create_chunk("p1", 1).unwrap();
catalog.create_partition("p2").unwrap();
catalog.create_chunk("p2", 100).unwrap();
assert_eq!(
chunk_strings(&catalog),
vec!["Chunk p1:0", "Chunk p1:1", "Chunk p2:100"]
);
assert_eq!(
partition_chunk_strings(&catalog, "p1"),
vec!["Chunk p1:0", "Chunk p1:1"]
);
assert_eq!(
partition_chunk_strings(&catalog, "p2"),
vec!["Chunk p2:100"]
);
}
#[test]
fn chunk_list_err() {
let catalog = Catalog::new();
match catalog.partition_chunks("p3") {
Err(err) => assert_eq!(err.to_string(), "unknown partition: p3"),
Ok(_) => panic!("unexpected success"),
};
}
fn chunk_strings(catalog: &Catalog) -> Vec<String> {
let mut chunks: Vec<String> = catalog
.chunks()
.map(|c| format!("Chunk {}:{}", c.key(), c.id()))
.collect();
chunks.sort_unstable();
chunks
}
fn partition_chunk_strings(catalog: &Catalog, partition_key: &str) -> Vec<String> {
let mut chunks: Vec<String> = catalog
.partition_chunks(partition_key)
.unwrap()
.map(|c| format!("Chunk {}:{}", c.key(), c.id()))
.collect();
chunks.sort_unstable();
chunks
}
#[test]
fn chunk_drop() {
let mut catalog = Catalog::new();
catalog.create_partition("p1").unwrap();
catalog.create_chunk("p1", 0).unwrap();
catalog.create_chunk("p1", 1).unwrap();
catalog.create_partition("p2").unwrap();
catalog.create_chunk("p2", 0).unwrap();
assert_eq!(catalog.chunks().count(), 3);
catalog.drop_chunk("p1", 1).unwrap();
catalog.chunk("p1", 1).unwrap_err(); // chunk is gone
assert_eq!(catalog.chunks().count(), 2);
catalog.drop_chunk("p2", 0).unwrap();
catalog.chunk("p2", 0).unwrap_err(); // chunk is gone
assert_eq!(catalog.chunks().count(), 1);
}
#[test]
fn chunk_drop_non_existent_partition() {
let mut catalog = Catalog::new();
let err = catalog.drop_chunk("p3", 0).unwrap_err();
assert_eq!(err.to_string(), "unknown partition: p3");
}
#[test]
fn chunk_drop_non_existent_chunk() {
let mut catalog = Catalog::new();
catalog.create_partition("p3").unwrap();
let err = catalog.drop_chunk("p3", 0).unwrap_err();
assert_eq!(err.to_string(), "unknown chunk: p3:0");
}
#[test]
fn chunk_recreate_dropped() {
let mut catalog = Catalog::new();
catalog.create_partition("p1").unwrap();
catalog.create_chunk("p1", 0).unwrap();
catalog.create_chunk("p1", 1).unwrap();
assert_eq!(catalog.chunks().count(), 2);
catalog.drop_chunk("p1", 0).unwrap();
assert_eq!(catalog.chunks().count(), 1);
// should be ok to recreate
catalog.create_chunk("p1", 0).unwrap();
assert_eq!(catalog.chunks().count(), 2);
}
}

108
catalog/src/partition.rs Normal file
View File

@ -0,0 +1,108 @@
//! The catalog representation of a Partition
use crate::chunk::Chunk;
use std::collections::{btree_map::Entry, BTreeMap};
use super::{ChunkAlreadyExists, Result, UnknownChunk};
use snafu::OptionExt;
/// IOx Catalog Partition
///
/// A partition contains multiple Chunks.
#[derive(Debug, Default)]
pub struct Partition {
/// The partition key
key: String,
/// The chunks that make up this partition, indexed by id
chunks: BTreeMap<u32, Chunk>,
}
impl Partition {
/// Return the partition_key of this Partition
pub fn key(&self) -> &str {
&self.key
}
}
impl Partition {
/// Create a new partition catalog object.
///
/// This function is not pub because `Partition`s should be
/// created using the interfaces on [`Catalog`] and not
/// instantiated directly.
pub(crate) fn new(key: impl Into<String>) -> Self {
let key = key.into();
Self {
key,
..Default::default()
}
}
/// Create a new Chunk
///
/// This function is not pub because `Chunks`s should be created
/// using the interfaces on [`Catalog`] and not instantiated
/// directly.
pub(crate) fn create_chunk(&mut self, chunk_id: u32) -> Result<()> {
let entry = self.chunks.entry(chunk_id);
match entry {
Entry::Vacant(entry) => {
entry.insert(Chunk::new(&self.key, chunk_id));
Ok(())
}
Entry::Occupied(_) => ChunkAlreadyExists {
partition_key: self.key(),
chunk_id,
}
.fail(),
}
}
/// Drop the specified
///
/// This function is not pub because `Chunks`s should be dropped
/// using the interfaces on [`Catalog`] and not instantiated
/// directly.
pub(crate) fn drop_chunk(&mut self, chunk_id: u32) -> Result<()> {
match self.chunks.remove(&chunk_id) {
Some(_) => Ok(()),
None => UnknownChunk {
partition_key: self.key(),
chunk_id,
}
.fail(),
}
}
/// Return an immutable chunk reference by chunk id
///
/// This function is not pub because `Chunks`s should be
/// accessed using the interfaces on [`Catalog`]
pub(crate) fn chunk(&self, chunk_id: u32) -> Result<&Chunk> {
self.chunks.get(&chunk_id).context(UnknownChunk {
partition_key: self.key(),
chunk_id,
})
}
/// Return a mutable chunk reference by chunk id
///
/// This function is not pub because `Chunks`s should be
/// accessed using the interfaces on [`Catalog`]
pub(crate) fn chunk_mut(&mut self, chunk_id: u32) -> Result<&mut Chunk> {
self.chunks.get_mut(&chunk_id).context(UnknownChunk {
partition_key: &self.key,
chunk_id,
})
}
/// Return a iterator over chunks
///
/// This function is not pub because `Chunks`s should be
/// accessed using the interfaces on [`Catalog`]
pub(crate) fn chunks(&self) -> impl Iterator<Item = &Chunk> {
self.chunks.values()
}
}

View File

@ -2,24 +2,22 @@
name = "data_types"
version = "0.1.0"
authors = ["pauldix <paul@pauldix.net>"]
description = "InfluxDB IOx data_types, shared between IOx instances and IOx clients"
edition = "2018"
readme = "README.md"
[dependencies] # In alphabetical order
arrow_deps = { path = "../arrow_deps" }
chrono = { version = "0.4", features = ["serde"] }
crc32fast = "1.2.0"
flatbuffers = "0.6"
generated_types = { path = "../generated_types" }
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
percent-encoding = "2.1.0"
prost = "0.7"
regex = "1.4"
serde = "1.0"
serde_regex = "1.1"
snafu = "0.6"
tonic = { version = "0.4.0" }
tracing = "0.1"
[dev-dependencies] # In alphabetical order
criterion = "0.3"
test_helpers = { path = "../test_helpers" }
[[bench]]
name = "benchmark"
harness = false

5
data_types/README.md Normal file
View File

@ -0,0 +1,5 @@
# Data Types
This crate contains types that are designed for external consumption (in `influxdb_iox_client` and other "client" facing uses).
*Client facing* in this case means exposed via management API or CLI and where changing the structs may require additional coordination / organization with clients.

176
data_types/src/chunk.rs Normal file
View File

@ -0,0 +1,176 @@
//! Module contains a representation of chunk metadata
use std::{convert::TryFrom, sync::Arc};
use crate::field_validation::FromField;
use generated_types::{google::FieldViolation, influxdata::iox::management::v1 as management};
use serde::{Deserialize, Serialize};
/// Which storage system is a chunk located in?
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Serialize, Deserialize)]
pub enum ChunkStorage {
/// The chunk is still open for new writes, in the Mutable Buffer
OpenMutableBuffer,
/// The chunk is no longer open for writes, in the Mutable Buffer
ClosedMutableBuffer,
/// The chunk is in the Read Buffer (where it can not be mutated)
ReadBuffer,
/// The chunk is stored in Object Storage (where it can not be mutated)
ObjectStore,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Serialize, Deserialize)]
/// Represents metadata about a chunk in a database.
/// A chunk can contain one or more tables.
pub struct ChunkSummary {
/// The partitition key of this chunk
pub partition_key: Arc<String>,
/// The id of this chunk
pub id: u32,
/// How is this chunk stored?
pub storage: ChunkStorage,
/// The total estimated size of this chunk, in bytes
pub estimated_bytes: usize,
}
/// Conversion code to management API chunk structure
impl From<ChunkSummary> for management::Chunk {
fn from(summary: ChunkSummary) -> Self {
let ChunkSummary {
partition_key,
id,
storage,
estimated_bytes,
} = summary;
let storage: management::ChunkStorage = storage.into();
let storage = storage.into(); // convert to i32
let estimated_bytes = estimated_bytes as u64;
let partition_key = match Arc::try_unwrap(partition_key) {
// no one else has a reference so take the string
Ok(partition_key) => partition_key,
// some other refernece exists to this string, so clone it
Err(partition_key) => partition_key.as_ref().clone(),
};
Self {
partition_key,
id,
storage,
estimated_bytes,
}
}
}
impl From<ChunkStorage> for management::ChunkStorage {
fn from(storage: ChunkStorage) -> Self {
match storage {
ChunkStorage::OpenMutableBuffer => Self::OpenMutableBuffer,
ChunkStorage::ClosedMutableBuffer => Self::ClosedMutableBuffer,
ChunkStorage::ReadBuffer => Self::ReadBuffer,
ChunkStorage::ObjectStore => Self::ObjectStore,
}
}
}
/// Conversion code from management API chunk structure
impl TryFrom<management::Chunk> for ChunkSummary {
type Error = FieldViolation;
fn try_from(proto: management::Chunk) -> Result<Self, Self::Error> {
// Use prost enum conversion
let storage = proto.storage().scope("storage")?;
let management::Chunk {
partition_key,
id,
estimated_bytes,
..
} = proto;
let estimated_bytes = estimated_bytes as usize;
let partition_key = Arc::new(partition_key);
Ok(Self {
partition_key,
id,
storage,
estimated_bytes,
})
}
}
impl TryFrom<management::ChunkStorage> for ChunkStorage {
type Error = FieldViolation;
fn try_from(proto: management::ChunkStorage) -> Result<Self, Self::Error> {
match proto {
management::ChunkStorage::OpenMutableBuffer => Ok(Self::OpenMutableBuffer),
management::ChunkStorage::ClosedMutableBuffer => Ok(Self::ClosedMutableBuffer),
management::ChunkStorage::ReadBuffer => Ok(Self::ReadBuffer),
management::ChunkStorage::ObjectStore => Ok(Self::ObjectStore),
management::ChunkStorage::Unspecified => Err(FieldViolation::required("")),
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn valid_proto_to_summary() {
let proto = management::Chunk {
partition_key: "foo".to_string(),
id: 42,
estimated_bytes: 1234,
storage: management::ChunkStorage::ObjectStore.into(),
};
let summary = ChunkSummary::try_from(proto).expect("conversion successful");
let expected = ChunkSummary {
partition_key: Arc::new("foo".to_string()),
id: 42,
estimated_bytes: 1234,
storage: ChunkStorage::ObjectStore,
};
assert_eq!(
summary, expected,
"Actual:\n\n{:?}\n\nExpected:\n\n{:?}\n\n",
summary, expected
);
}
#[test]
fn valid_summary_to_proto() {
let summary = ChunkSummary {
partition_key: Arc::new("foo".to_string()),
id: 42,
estimated_bytes: 1234,
storage: ChunkStorage::ObjectStore,
};
let proto = management::Chunk::try_from(summary).expect("conversion successful");
let expected = management::Chunk {
partition_key: "foo".to_string(),
id: 42,
estimated_bytes: 1234,
storage: management::ChunkStorage::ObjectStore.into(),
};
assert_eq!(
proto, expected,
"Actual:\n\n{:?}\n\nExpected:\n\n{:?}\n\n",
proto, expected
);
}
}

View File

@ -1,6 +1,7 @@
use std::convert::{TryFrom, TryInto};
use chrono::{DateTime, TimeZone, Utc};
use regex::Regex;
use serde::{Deserialize, Serialize};
use snafu::Snafu;
@ -33,73 +34,11 @@ pub struct DatabaseRules {
/// database call, so an empty default is fine.
#[serde(default)]
pub name: String, // TODO: Use DatabaseName here
/// Template that generates a partition key for each row inserted into the
/// db
#[serde(default)]
pub partition_template: PartitionTemplate,
/// The set of host groups that data should be replicated to. Which host a
/// write goes to within a host group is determined by consistent hashing of
/// the partition key. We'd use this to create a host group per
/// availability zone, so you might have 5 availability zones with 2
/// hosts in each. Replication will ensure that N of those zones get a
/// write. For each zone, only a single host needs to get the write.
/// Replication is for ensuring a write exists across multiple hosts
/// before returning success. Its purpose is to ensure write durability,
/// rather than write availability for query (this is covered by
/// subscriptions).
#[serde(default)]
pub replication: Vec<HostGroupId>,
/// The minimum number of host groups to replicate a write to before success
/// is returned. This can be overridden on a per request basis.
/// Replication will continue to write to the other host groups in the
/// background.
#[serde(default)]
pub replication_count: u8,
/// How long the replication queue can get before either rejecting writes or
/// dropping missed writes. The queue is kept in memory on a
/// per-database basis. A queue size of zero means it will only try to
/// replicate synchronously and drop any failures.
#[serde(default)]
pub replication_queue_max_size: usize,
/// `subscriptions` are used for query servers to get data via either push
/// or pull as it arrives. They are separate from replication as they
/// have a different purpose. They're for query servers or other clients
/// that want to subscribe to some subset of data being written in. This
/// could either be specific partitions, ranges of partitions, tables, or
/// rows matching some predicate. This is step #3 from the diagram.
#[serde(default)]
pub subscriptions: Vec<Subscription>,
/// If set to `true`, this server should answer queries from one or more of
/// of its local write buffer and any read-only partitions that it knows
/// about. In this case, results will be merged with any others from the
/// remote goups or read-only partitions.
#[serde(default)]
pub query_local: bool,
/// Set `primary_query_group` to a host group if remote servers should be
/// issued queries for this database. All hosts in the group should be
/// queried with this server acting as the coordinator that merges
/// results together. If a specific host in the group is unavailable,
/// another host in the same position from a secondary group should be
/// queried. For example, imagine we've partitioned the data in this DB into
/// 4 partitions and we are replicating the data across 3 availability
/// zones. We have 4 hosts in each of those AZs, thus they each have 1
/// partition. We'd set the primary group to be the 4 hosts in the same
/// AZ as this one, and the secondary groups as the hosts in the other 2
/// AZs.
#[serde(default)]
pub primary_query_group: Option<HostGroupId>,
#[serde(default)]
pub secondary_query_groups: Vec<HostGroupId>,
/// Use `read_only_partitions` when a server should answer queries for
/// partitions that come from object storage. This can be used to start
/// up a new query server to handle queries by pointing it at a
/// collection of partitions and then telling it to also pull
/// data from the replication servers (writes that haven't been snapshotted
/// into a partition).
#[serde(default)]
pub read_only_partitions: Vec<PartitionId>,
/// When set this will buffer WAL writes in memory based on the
/// configuration.
@ -113,6 +52,16 @@ pub struct DatabaseRules {
/// in object storage.
#[serde(default = "MutableBufferConfig::default_option")]
pub mutable_buffer_config: Option<MutableBufferConfig>,
/// An optional config to split writes into different "shards". A shard
/// is a logical concept, but the usage is meant to split data into
/// mutually exclusive areas. The rough order of organization is:
/// database -> shard -> partition -> chunk. For example, you could shard
/// based on table name and assign to 1 of 10 shards. Within each
/// shard you would have partitions, which would likely be based off time.
/// This makes it possible to horizontally scale out writes.
#[serde(default)]
pub shard_config: Option<ShardConfig>,
}
impl DatabaseRules {
@ -149,28 +98,9 @@ impl Partitioner for DatabaseRules {
impl From<DatabaseRules> for management::DatabaseRules {
fn from(rules: DatabaseRules) -> Self {
let subscriptions: Vec<management::subscription_config::Subscription> =
rules.subscriptions.into_iter().map(Into::into).collect();
let replication_config = management::ReplicationConfig {
replications: rules.replication,
replication_count: rules.replication_count as _,
replication_queue_max_size: rules.replication_queue_max_size as _,
};
let query_config = management::QueryConfig {
query_local: rules.query_local,
primary: rules.primary_query_group.unwrap_or_default(),
secondaries: rules.secondary_query_groups,
read_only_partitions: rules.read_only_partitions,
};
Self {
name: rules.name,
partition_template: Some(rules.partition_template.into()),
replication_config: Some(replication_config),
subscription_config: Some(management::SubscriptionConfig { subscriptions }),
query_config: Some(query_config),
wal_buffer_config: rules.wal_buffer_config.map(Into::into),
mutable_buffer_config: rules.mutable_buffer_config.map(Into::into),
}
@ -183,15 +113,6 @@ impl TryFrom<management::DatabaseRules> for DatabaseRules {
fn try_from(proto: management::DatabaseRules) -> Result<Self, Self::Error> {
DatabaseName::new(&proto.name).field("name")?;
let subscriptions = proto
.subscription_config
.map(|s| {
s.subscriptions
.vec_field("subscription_config.subscriptions")
})
.transpose()?
.unwrap_or_default();
let wal_buffer_config = proto.wal_buffer_config.optional("wal_buffer_config")?;
let mutable_buffer_config = proto
@ -203,22 +124,12 @@ impl TryFrom<management::DatabaseRules> for DatabaseRules {
.optional("partition_template")?
.unwrap_or_default();
let query = proto.query_config.unwrap_or_default();
let replication = proto.replication_config.unwrap_or_default();
Ok(Self {
name: proto.name,
partition_template,
replication: replication.replications,
replication_count: replication.replication_count as _,
replication_queue_max_size: replication.replication_queue_max_size as _,
subscriptions,
query_local: query.query_local,
primary_query_group: query.primary.optional(),
secondary_query_groups: query.secondaries,
read_only_partitions: query.read_only_partitions,
wal_buffer_config,
mutable_buffer_config,
shard_config: None,
})
}
}
@ -718,10 +629,19 @@ impl TryFrom<management::PartitionTemplate> for PartitionTemplate {
/// part of a partition key.
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone)]
pub enum TemplatePart {
/// The name of a table
Table,
/// The value in a named column
Column(String),
/// Applies a `strftime` format to the "time" column.
///
/// For example, a time format of "%Y-%m-%d %H:%M:%S" will produce
/// partition key parts such as "2021-03-14 12:25:21" and
/// "2021-04-14 12:24:21"
TimeFormat(String),
/// Applies a regex to the value in a string column
RegexCapture(RegexCapture),
/// Applies a `strftime` pattern to some column other than "time"
StrftimeColumn(StrftimeColumn),
}
@ -733,8 +653,15 @@ pub struct RegexCapture {
regex: String,
}
/// `StrftimeColumn` can be used to create a time based partition key off some
/// [`StrftimeColumn`] is used to create a time based partition key off some
/// column other than the builtin `time` column.
///
/// The value of the named column is formatted using a `strftime`
/// style string.
///
/// For example, a time format of "%Y-%m-%d %H:%M:%S" will produce
/// partition key parts such as "2021-03-14 12:25:21" and
/// "2021-04-14 12:24:21"
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone)]
pub struct StrftimeColumn {
column: String,
@ -802,66 +729,192 @@ impl TryFrom<management::partition_template::Part> for TemplatePart {
}
}
/// `PartitionId` is the object storage identifier for a specific partition. It
/// should be a path that can be used against an object store to locate all the
/// files and subdirectories for a partition. It takes the form of `/<writer
/// ID>/<database>/<partition key>/`.
pub type PartitionId = String;
pub type WriterId = u32;
/// `Subscription` represents a group of hosts that want to receive data as it
/// arrives. The subscription has a matcher that is used to determine what data
/// will match it, and an optional queue for storing matched writes. Subscribers
/// that recieve some subeset of an individual replicated write will get a new
/// replicated write, but with the same originating writer ID and sequence
/// number for the consuming subscriber's tracking purposes.
///
/// For pull based subscriptions, the requester will send a matcher, which the
/// receiver will execute against its in-memory WAL.
/// ShardConfig defines rules for assigning a line/row to an individual
/// host or a group of hosts. A shard
/// is a logical concept, but the usage is meant to split data into
/// mutually exclusive areas. The rough order of organization is:
/// database -> shard -> partition -> chunk. For example, you could shard
/// based on table name and assign to 1 of 10 shards. Within each
/// shard you would have partitions, which would likely be based off time.
/// This makes it possible to horizontally scale out writes.
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone)]
pub struct Subscription {
pub name: String,
pub host_group_id: HostGroupId,
pub matcher: Matcher,
pub struct ShardConfig {
/// An optional matcher. If there is a match, the route will be evaluated to
/// the given targets, otherwise the hash ring will be evaluated. This is
/// useful for overriding the hashring function on some hot spot. For
/// example, if you use the table name as the input to the hash function
/// and your ring has 4 slots. If two tables that are very hot get
/// assigned to the same slot you can override that by putting in a
/// specific matcher to pull that table over to a different node.
pub specific_targets: Option<MatcherToTargets>,
/// An optional default hasher which will route to one in a collection of
/// nodes.
pub hash_ring: Option<HashRing>,
/// If set to true the router will ignore any errors sent by the remote
/// targets in this route. That is, the write request will succeed
/// regardless of this route's success.
pub ignore_errors: bool,
}
impl From<Subscription> for management::subscription_config::Subscription {
fn from(s: Subscription) -> Self {
/// Maps a matcher with specific target group. If the line/row matches
/// it should be sent to the group.
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone, Default)]
pub struct MatcherToTargets {
pub matcher: Matcher,
pub target: NodeGroup,
}
/// A collection of IOx nodes
pub type NodeGroup = Vec<WriterId>;
/// HashRing is a rule for creating a hash key for a row and mapping that to
/// an individual node on a ring.
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone)]
pub struct HashRing {
/// If true the table name will be included in the hash key
pub table_name: bool,
/// include the values of these columns in the hash key
pub columns: Vec<String>,
/// ring of node groups. Each group holds a shard
pub node_groups: Vec<NodeGroup>,
}
/// A matcher is used to match routing rules or subscriptions on a row-by-row
/// (or line) basis.
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct Matcher {
/// if provided, match if the table name matches against the regex
#[serde(with = "serde_regex")]
pub table_name_regex: Option<Regex>,
// paul: what should we use for predicate matching here against a single row/line?
pub predicate: Option<String>,
}
impl PartialEq for Matcher {
fn eq(&self, other: &Self) -> bool {
// this is kind of janky, but it's only used during tests and should get the job
// done
format!("{:?}{:?}", self.table_name_regex, self.predicate)
== format!("{:?}{:?}", other.table_name_regex, other.predicate)
}
}
impl Eq for Matcher {}
impl From<ShardConfig> for management::ShardConfig {
fn from(shard_config: ShardConfig) -> Self {
Self {
name: s.name,
host_group_id: s.host_group_id,
matcher: Some(s.matcher.into()),
specific_targets: shard_config.specific_targets.map(|i| i.into()),
hash_ring: shard_config.hash_ring.map(|i| i.into()),
ignore_errors: shard_config.ignore_errors,
}
}
}
impl TryFrom<management::subscription_config::Subscription> for Subscription {
impl TryFrom<management::ShardConfig> for ShardConfig {
type Error = FieldViolation;
fn try_from(proto: management::subscription_config::Subscription) -> Result<Self, Self::Error> {
fn try_from(proto: management::ShardConfig) -> Result<Self, Self::Error> {
Ok(Self {
name: proto.name.required("name")?,
host_group_id: proto.host_group_id.required("host_group_id")?,
matcher: proto.matcher.optional("matcher")?.unwrap_or_default(),
specific_targets: proto
.specific_targets
.map(|i| i.try_into())
.map_or(Ok(None), |r| r.map(Some))?,
hash_ring: proto
.hash_ring
.map(|i| i.try_into())
.map_or(Ok(None), |r| r.map(Some))?,
ignore_errors: proto.ignore_errors,
})
}
}
/// `Matcher` specifies the rule against the table name and/or a predicate
/// against the row to determine if it matches the write rule.
#[derive(Debug, Default, Serialize, Deserialize, Eq, PartialEq, Clone)]
pub struct Matcher {
pub tables: MatchTables,
// TODO: make this work with query::Predicate
#[serde(skip_serializing_if = "Option::is_none")]
pub predicate: Option<String>,
/// Returns none if v matches its default value.
fn none_if_default<T: Default + PartialEq>(v: T) -> Option<T> {
if v == Default::default() {
None
} else {
Some(v)
}
}
impl From<MatcherToTargets> for management::MatcherToTargets {
fn from(matcher_to_targets: MatcherToTargets) -> Self {
Self {
matcher: none_if_default(matcher_to_targets.matcher.into()),
target: none_if_default(from_node_group_for_management_node_group(
matcher_to_targets.target,
)),
}
}
}
impl TryFrom<management::MatcherToTargets> for MatcherToTargets {
type Error = FieldViolation;
fn try_from(proto: management::MatcherToTargets) -> Result<Self, Self::Error> {
Ok(Self {
matcher: proto.matcher.unwrap_or_default().try_into()?,
target: try_from_management_node_group_for_node_group(
proto.target.unwrap_or_default(),
)?,
})
}
}
impl From<HashRing> for management::HashRing {
fn from(hash_ring: HashRing) -> Self {
Self {
table_name: hash_ring.table_name,
columns: hash_ring.columns,
node_groups: hash_ring
.node_groups
.into_iter()
.map(from_node_group_for_management_node_group)
.collect(),
}
}
}
impl TryFrom<management::HashRing> for HashRing {
type Error = FieldViolation;
fn try_from(proto: management::HashRing) -> Result<Self, Self::Error> {
Ok(Self {
table_name: proto.table_name,
columns: proto.columns,
node_groups: proto
.node_groups
.into_iter()
.map(try_from_management_node_group_for_node_group)
.collect::<Result<Vec<_>, _>>()?,
})
}
}
// cannot (and/or don't know how to) add impl From inside prost generated code
fn from_node_group_for_management_node_group(node_group: NodeGroup) -> management::NodeGroup {
management::NodeGroup {
nodes: node_group
.into_iter()
.map(|id| management::node_group::Node { id })
.collect(),
}
}
fn try_from_management_node_group_for_node_group(
proto: management::NodeGroup,
) -> Result<NodeGroup, FieldViolation> {
Ok(proto.nodes.into_iter().map(|i| i.id).collect())
}
impl From<Matcher> for management::Matcher {
fn from(m: Matcher) -> Self {
fn from(matcher: Matcher) -> Self {
Self {
predicate: m.predicate.unwrap_or_default(),
table_matcher: Some(m.tables.into()),
table_name_regex: matcher
.table_name_regex
.map(|r| r.to_string())
.unwrap_or_default(),
predicate: matcher.predicate.unwrap_or_default(),
}
}
}
@ -870,61 +923,31 @@ impl TryFrom<management::Matcher> for Matcher {
type Error = FieldViolation;
fn try_from(proto: management::Matcher) -> Result<Self, Self::Error> {
let table_name_regex = match &proto.table_name_regex as &str {
"" => None,
re => Some(Regex::new(re).map_err(|e| FieldViolation {
field: "table_name_regex".to_string(),
description: e.to_string(),
})?),
};
let predicate = match proto.predicate {
p if p.is_empty() => None,
p => Some(p),
};
Ok(Self {
tables: proto.table_matcher.required("table_matcher")?,
predicate: proto.predicate.optional(),
table_name_regex,
predicate,
})
}
}
/// `MatchTables` looks at the table name of a row to determine if it should
/// match the rule.
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone)]
#[serde(rename_all = "camelCase")]
pub enum MatchTables {
#[serde(rename = "*")]
All,
Table(String),
Regex(String),
}
impl Default for MatchTables {
fn default() -> Self {
Self::All
}
}
impl From<MatchTables> for management::matcher::TableMatcher {
fn from(m: MatchTables) -> Self {
match m {
MatchTables::All => Self::All(Empty {}),
MatchTables::Table(table) => Self::Table(table),
MatchTables::Regex(regex) => Self::Regex(regex),
}
}
}
impl TryFrom<management::matcher::TableMatcher> for MatchTables {
type Error = FieldViolation;
fn try_from(proto: management::matcher::TableMatcher) -> Result<Self, Self::Error> {
use management::matcher::TableMatcher;
Ok(match proto {
TableMatcher::All(_) => Self::All,
TableMatcher::Table(table) => Self::Table(table.required("table_matcher.table")?),
TableMatcher::Regex(regex) => Self::Regex(regex.required("table_matcher.regex")?),
})
}
}
pub type HostGroupId = String;
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone)]
pub struct HostGroup {
pub id: HostGroupId,
/// `hosts` is a vector of connection strings for remote hosts.
pub hosts: Vec<String>,
}
/// `PartitionId` is the object storage identifier for a specific partition. It
/// should be a path that can be used against an object store to locate all the
/// files and subdirectories for a partition. It takes the form of `/<writer
/// ID>/<database>/<partition key>/`.
pub type PartitionId = String;
pub type WriterId = u32;
#[cfg(test)]
mod tests {
@ -1107,16 +1130,9 @@ mod tests {
assert_eq!(protobuf.name, back.name);
assert_eq!(rules.partition_template.parts.len(), 0);
assert_eq!(rules.subscriptions.len(), 0);
assert!(rules.primary_query_group.is_none());
assert_eq!(rules.read_only_partitions.len(), 0);
assert_eq!(rules.secondary_query_groups.len(), 0);
// These will be defaulted as optionality not preserved on non-protobuf
// DatabaseRules
assert_eq!(back.replication_config, Some(Default::default()));
assert_eq!(back.subscription_config, Some(Default::default()));
assert_eq!(back.query_config, Some(Default::default()));
assert_eq!(back.partition_template, Some(Default::default()));
// These should be none as preserved on non-protobuf DatabaseRules
@ -1124,65 +1140,6 @@ mod tests {
assert!(back.mutable_buffer_config.is_none());
}
#[test]
fn test_database_rules_query() {
let readonly = vec!["readonly1".to_string(), "readonly2".to_string()];
let secondaries = vec!["secondary1".to_string(), "secondary2".to_string()];
let protobuf = management::DatabaseRules {
name: "database".to_string(),
query_config: Some(management::QueryConfig {
query_local: true,
primary: "primary".to_string(),
secondaries: secondaries.clone(),
read_only_partitions: readonly.clone(),
}),
..Default::default()
};
let rules: DatabaseRules = protobuf.clone().try_into().unwrap();
let back: management::DatabaseRules = rules.clone().into();
assert_eq!(rules.name, protobuf.name);
assert_eq!(protobuf.name, back.name);
assert_eq!(rules.read_only_partitions, readonly);
assert_eq!(rules.primary_query_group, Some("primary".to_string()));
assert_eq!(rules.secondary_query_groups, secondaries);
assert_eq!(rules.subscriptions.len(), 0);
assert_eq!(rules.partition_template.parts.len(), 0);
// Should be the same as was specified
assert_eq!(back.query_config, protobuf.query_config);
assert!(back.wal_buffer_config.is_none());
assert!(back.mutable_buffer_config.is_none());
// These will be defaulted as optionality not preserved on non-protobuf
// DatabaseRules
assert_eq!(back.replication_config, Some(Default::default()));
assert_eq!(back.subscription_config, Some(Default::default()));
assert_eq!(back.partition_template, Some(Default::default()));
}
#[test]
fn test_query_config_default() {
let protobuf = management::DatabaseRules {
name: "database".to_string(),
query_config: Some(Default::default()),
..Default::default()
};
let rules: DatabaseRules = protobuf.clone().try_into().unwrap();
let back: management::DatabaseRules = rules.clone().into();
assert!(rules.primary_query_group.is_none());
assert_eq!(rules.secondary_query_groups.len(), 0);
assert_eq!(rules.read_only_partitions.len(), 0);
assert_eq!(rules.query_local, false);
assert_eq!(protobuf.query_config, back.query_config);
}
#[test]
fn test_partition_template_default() {
let protobuf = management::DatabaseRules {
@ -1317,87 +1274,6 @@ mod tests {
assert_eq!(&err.description, "Duration must be positive");
}
#[test]
fn test_matcher_default() {
let protobuf: management::Matcher = Default::default();
let res: Result<Matcher, _> = protobuf.try_into();
let err = res.expect_err("expected failure");
assert_eq!(&err.field, "table_matcher");
assert_eq!(&err.description, "Field is required");
}
#[test]
fn test_matcher() {
let protobuf = management::Matcher {
predicate: Default::default(),
table_matcher: Some(management::matcher::TableMatcher::Regex(
"regex".to_string(),
)),
};
let matcher: Matcher = protobuf.try_into().unwrap();
assert_eq!(matcher.tables, MatchTables::Regex("regex".to_string()));
assert!(matcher.predicate.is_none());
}
#[test]
fn test_subscription_default() {
let pb_matcher = Some(management::Matcher {
predicate: "predicate1".to_string(),
table_matcher: Some(management::matcher::TableMatcher::Table(
"table".to_string(),
)),
});
let matcher = Matcher {
tables: MatchTables::Table("table".to_string()),
predicate: Some("predicate1".to_string()),
};
let subscription_config = management::SubscriptionConfig {
subscriptions: vec![
management::subscription_config::Subscription {
name: "subscription1".to_string(),
host_group_id: "host group".to_string(),
matcher: pb_matcher.clone(),
},
management::subscription_config::Subscription {
name: "subscription2".to_string(),
host_group_id: "host group".to_string(),
matcher: pb_matcher,
},
],
};
let protobuf = management::DatabaseRules {
name: "database".to_string(),
subscription_config: Some(subscription_config),
..Default::default()
};
let rules: DatabaseRules = protobuf.clone().try_into().unwrap();
let back: management::DatabaseRules = rules.clone().into();
assert_eq!(protobuf.subscription_config, back.subscription_config);
assert_eq!(
rules.subscriptions,
vec![
Subscription {
name: "subscription1".to_string(),
host_group_id: "host group".to_string(),
matcher: matcher.clone()
},
Subscription {
name: "subscription2".to_string(),
host_group_id: "host group".to_string(),
matcher
}
]
)
}
#[test]
fn mutable_buffer_config_default() {
let protobuf: management::MutableBufferConfig = Default::default();
@ -1528,4 +1404,128 @@ mod tests {
assert_eq!(err3.field, "column.column_name");
assert_eq!(err3.description, "Field is required");
}
#[test]
fn test_matcher_default() {
let protobuf = management::Matcher {
..Default::default()
};
let matcher: Matcher = protobuf.clone().try_into().unwrap();
let back: management::Matcher = matcher.clone().into();
assert!(matcher.table_name_regex.is_none());
assert_eq!(protobuf.table_name_regex, back.table_name_regex);
assert_eq!(matcher.predicate, None);
assert_eq!(protobuf.predicate, back.predicate);
}
#[test]
fn test_matcher_regexp() {
let protobuf = management::Matcher {
table_name_regex: "^foo$".into(),
..Default::default()
};
let matcher: Matcher = protobuf.clone().try_into().unwrap();
let back: management::Matcher = matcher.clone().into();
assert_eq!(matcher.table_name_regex.unwrap().to_string(), "^foo$");
assert_eq!(protobuf.table_name_regex, back.table_name_regex);
}
#[test]
fn test_matcher_bad_regexp() {
let protobuf = management::Matcher {
table_name_regex: "*".into(),
..Default::default()
};
let matcher: Result<Matcher, FieldViolation> = protobuf.try_into();
assert!(matcher.is_err());
assert_eq!(matcher.err().unwrap().field, "table_name_regex");
}
#[test]
fn test_hash_ring_default() {
let protobuf = management::HashRing {
..Default::default()
};
let hash_ring: HashRing = protobuf.clone().try_into().unwrap();
let back: management::HashRing = hash_ring.clone().into();
assert_eq!(hash_ring.table_name, false);
assert_eq!(protobuf.table_name, back.table_name);
assert!(hash_ring.columns.is_empty());
assert_eq!(protobuf.columns, back.columns);
assert!(hash_ring.node_groups.is_empty());
assert_eq!(protobuf.node_groups, back.node_groups);
}
#[test]
fn test_hash_ring_nodes() {
let protobuf = management::HashRing {
node_groups: vec![
management::NodeGroup {
nodes: vec![
management::node_group::Node { id: 10 },
management::node_group::Node { id: 11 },
management::node_group::Node { id: 12 },
],
},
management::NodeGroup {
nodes: vec![management::node_group::Node { id: 20 }],
},
],
..Default::default()
};
let hash_ring: HashRing = protobuf.try_into().unwrap();
assert_eq!(hash_ring.node_groups.len(), 2);
assert_eq!(hash_ring.node_groups[0].len(), 3);
assert_eq!(hash_ring.node_groups[1].len(), 1);
}
#[test]
fn test_matcher_to_targets_default() {
let protobuf = management::MatcherToTargets {
..Default::default()
};
let matcher_to_targets: MatcherToTargets = protobuf.clone().try_into().unwrap();
let back: management::MatcherToTargets = matcher_to_targets.clone().into();
assert_eq!(
matcher_to_targets.matcher,
Matcher {
..Default::default()
}
);
assert_eq!(protobuf.matcher, back.matcher);
assert_eq!(matcher_to_targets.target, Vec::<WriterId>::new());
assert_eq!(protobuf.target, back.target);
}
#[test]
fn test_shard_config_default() {
let protobuf = management::ShardConfig {
..Default::default()
};
let shard_config: ShardConfig = protobuf.clone().try_into().unwrap();
let back: management::ShardConfig = shard_config.clone().into();
assert!(shard_config.specific_targets.is_none());
assert_eq!(protobuf.specific_targets, back.specific_targets);
assert!(shard_config.hash_ring.is_none());
assert_eq!(protobuf.hash_ring, back.hash_ring);
assert_eq!(shard_config.ignore_errors, false);
assert_eq!(protobuf.ignore_errors, back.ignore_errors);
}
}

View File

@ -18,9 +18,3 @@ pub struct WalMetadataQuery {
pub struct WalMetadataResponse {
pub segments: Vec<SegmentSummary>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
/// Body of the response to the /databases endpoint.
pub struct ListDatabasesResponse {
pub names: Vec<String>,
}

160
data_types/src/job.rs Normal file
View File

@ -0,0 +1,160 @@
use generated_types::google::{protobuf::Any, FieldViolation, FieldViolationExt};
use generated_types::{
google::longrunning, influxdata::iox::management::v1 as management, protobuf_type_url_eq,
};
use serde::{Deserialize, Serialize};
use std::convert::TryFrom;
/// Metadata associated with a set of background tasks
/// Used in combination with TrackerRegistry
///
/// TODO: Serde is temporary until prost adds JSON support
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub enum Job {
Dummy {
nanos: Vec<u64>,
},
/// Persist a WAL segment to object store
PersistSegment {
writer_id: u32,
segment_id: u64,
},
/// Move a chunk from mutable buffer to read buffer
CloseChunk {
db_name: String,
partition_key: String,
chunk_id: u32,
},
}
impl From<Job> for management::operation_metadata::Job {
fn from(job: Job) -> Self {
match job {
Job::Dummy { nanos } => Self::Dummy(management::Dummy { nanos }),
Job::PersistSegment {
writer_id,
segment_id,
} => Self::PersistSegment(management::PersistSegment {
writer_id,
segment_id,
}),
Job::CloseChunk {
db_name,
partition_key,
chunk_id,
} => Self::CloseChunk(management::CloseChunk {
db_name,
partition_key,
chunk_id,
}),
}
}
}
impl From<management::operation_metadata::Job> for Job {
fn from(value: management::operation_metadata::Job) -> Self {
use management::operation_metadata::Job;
match value {
Job::Dummy(management::Dummy { nanos }) => Self::Dummy { nanos },
Job::PersistSegment(management::PersistSegment {
writer_id,
segment_id,
}) => Self::PersistSegment {
writer_id,
segment_id,
},
Job::CloseChunk(management::CloseChunk {
db_name,
partition_key,
chunk_id,
}) => Self::CloseChunk {
db_name,
partition_key,
chunk_id,
},
}
}
}
/// The status of a running operation
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub enum OperationStatus {
/// A task associated with the operation is running
Running,
/// All tasks associated with the operation have finished
///
/// Note: This does not indicate success or failure only that
/// no tasks associated with the operation are running
Complete,
/// The operation was cancelled and no associated tasks are running
Cancelled,
/// An operation error was returned
///
/// Note: The tracker system currently will never return this
Errored,
}
/// A group of asynchronous tasks being performed by an IOx server
///
/// TODO: Temporary until prost adds JSON support
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Operation {
/// ID of the running operation
pub id: usize,
/// Number of subtasks for this operation
pub task_count: u64,
/// Number of pending tasks for this operation
pub pending_count: u64,
/// Wall time spent executing this operation
pub wall_time: std::time::Duration,
/// CPU time spent executing this operation
pub cpu_time: std::time::Duration,
/// Additional job metadata
pub job: Option<Job>,
/// The status of the running operation
pub status: OperationStatus,
}
impl TryFrom<longrunning::Operation> for Operation {
type Error = FieldViolation;
fn try_from(operation: longrunning::Operation) -> Result<Self, Self::Error> {
let metadata: Any = operation
.metadata
.ok_or_else(|| FieldViolation::required("metadata"))?;
if !protobuf_type_url_eq(&metadata.type_url, management::OPERATION_METADATA) {
return Err(FieldViolation {
field: "metadata.type_url".to_string(),
description: "Unexpected field type".to_string(),
});
}
let meta: management::OperationMetadata =
prost::Message::decode(metadata.value).field("metadata.value")?;
let status = match &operation.result {
None => OperationStatus::Running,
Some(longrunning::operation::Result::Response(_)) => OperationStatus::Complete,
Some(longrunning::operation::Result::Error(status)) => {
if status.code == tonic::Code::Cancelled as i32 {
OperationStatus::Cancelled
} else {
OperationStatus::Errored
}
}
};
Ok(Self {
id: operation.name.parse().field("name")?,
task_count: meta.task_count,
pending_count: meta.pending_count,
wall_time: std::time::Duration::from_nanos(meta.wall_nanos),
cpu_time: std::time::Duration::from_nanos(meta.cpu_nanos),
job: meta.job.map(Into::into),
status,
})
}
}

View File

@ -10,28 +10,17 @@
clippy::clone_on_ref_ptr
)]
pub use schema::TIME_COLUMN_NAME;
pub use database_name::*;
/// The name of the column containing table names returned by a call to
/// `table_names`.
pub const TABLE_NAMES_COLUMN_NAME: &str = "table";
/// The name of the column containing column names returned by a call to
/// `column_names`.
pub const COLUMN_NAMES_COLUMN_NAME: &str = "column";
pub mod data;
pub mod chunk;
pub mod database_rules;
pub mod error;
pub mod http;
pub mod job;
pub mod names;
pub mod partition_metadata;
pub mod schema;
pub mod selection;
pub mod timestamp;
pub mod wal;
mod database_name;
pub use database_name::*;
pub(crate) mod field_validation;

View File

@ -9,25 +9,9 @@
# docker build -f docker/Dockerfile.ci .
##
# Build any binaries that can be copied into the CI image
# Note we build flatbuffers from source (pinned to a particualar version)
FROM rust:slim-buster AS flatc
ARG flatbuffers_version="v1.12.0"
RUN apt-get update \
&& mkdir -p /usr/share/man/man1 \
&& apt-get install -y \
git make clang cmake llvm \
--no-install-recommends \
&& git clone -b ${flatbuffers_version} -- https://github.com/google/flatbuffers.git /usr/local/src/flatbuffers \
&& cmake -S /usr/local/src/flatbuffers -B /usr/local/src/flatbuffers \
-G "Unix Makefiles" \
-DCMAKE_BUILD_TYPE=Release \
&& make -C /usr/local/src/flatbuffers -j $(nproc) flatc
# Build actual image used for CI pipeline
FROM rust:slim-buster
COPY --from=flatc /usr/local/src/flatbuffers/flatc /usr/bin/flatc
# make Apt non-interactive
RUN echo 'APT::Get::Assume-Yes "true";' > /etc/apt/apt.conf.d/90ci \
&& echo 'DPkg::Options "--force-confnew";' >> /etc/apt/apt.conf.d/90ci
@ -39,8 +23,7 @@ RUN apt-get update \
&& apt-get install -y \
git locales sudo openssh-client ca-certificates tar gzip parallel \
unzip zip bzip2 gnupg curl make pkg-config libssl-dev \
musl musl-dev musl-tools clang llvm \
jq \
jq clang lld \
--no-install-recommends \
&& apt-get clean autoclean \
&& apt-get autoremove --yes \

View File

@ -4,7 +4,7 @@
FROM debian:buster-slim
RUN apt-get update \
&& apt-get install -y libssl1.1 libgcc1 libc6 \
&& apt-get install -y libssl1.1 libgcc1 libc6 ca-certificates --no-install-recommends \
&& rm -rf /var/lib/{apt,dpkg,cache,log}
RUN groupadd -g 1500 rust \
@ -20,3 +20,5 @@ COPY target/release/influxdb_iox /usr/bin/influxdb_iox
EXPOSE 8080 8082
ENTRYPOINT ["influxdb_iox"]
CMD ["run"]

View File

@ -4,6 +4,15 @@ This directory contains internal design documentation of potential
interest for those who wish to understand how the code works. It is
not intended to be general user facing documentation
## IOx Tech Talks
We hold monthly Tech Talks that explain the project's technical underpinnings. You can register for the [InfluxDB IOx Tech Talks here](https://www.influxdata.com/community-showcase/influxdb-tech-talks/), or you can find links to previous sessions below:
* December 2020: Rusty Introduction to Apache Arrow [recording](https://www.youtube.com/watch?v=dQFjKa9vKhM)
* Jan 2021: Data Lifecycle in InfluxDB IOx & How it Uses Object Storage for Persistence [recording](https://www.youtube.com/watch?v=KwdPifHC1Gc)
* February 2021: Intro to the InfluxDB IOx Read Buffer [recording](https://www.youtube.com/watch?v=KslD31VNqPU) [slides](https://www.slideshare.net/influxdata/influxdb-iox-tech-talks-intro-to-the-influxdb-iox-read-buffer-a-readoptimized-inmemory-query-execution-engine)
* March 2021: Query Engine Design and the Rust-Based DataFusion in Apache Arrow [recording](https://www.youtube.com/watch?v=K6eCAVEk4kU) [slides](https://www.slideshare.net/influxdata/influxdb-iox-tech-talks-query-engine-design-and-the-rustbased-datafusion-in-apache-arrow-244161934)
## Table of Contents:
* Rust style and Idiom guide: [style_guide.md](style_guide.md)
@ -13,3 +22,5 @@ not intended to be general user facing documentation
* Thoughts on using multiple cores: [multi_core_tasks.md](multi_core_tasks.md)
* [Query Engine Docs](../query/README.md)
* [Testing documentation](testing.md) for developers of IOx
* [Regenerating Flatbuffers code](regenerating_flatbuffers.md) when updating the version of the `flatbuffers` crate

View File

@ -7,7 +7,7 @@
# The full list of available configuration values can be found by in
# the command line help (e.g. `env: INFLUXDB_IOX_DB_DIR=`):
#
# ./influxdb_iox server --help
# ./influxdb_iox run --help
#
#
# The identifier for the server. Used for writing to object storage and as

View File

@ -0,0 +1,7 @@
# Regenerating Flatbuffers code
When updating the version of the [flatbuffers](https://crates.io/crates/flatbuffers) Rust crate used as a dependency in the IOx workspace, the generated Rust code in `generated_types/src/wal_generated.rs` also needs to be updated in sync.
To update the generated code, edit `generated_types/regenerate-flatbuffers.sh` and set the `FB_COMMIT` variable at the top of the file to the commit SHA of the same commit in the [flatbuffers repository](https://github.com/google/flatbuffers) where the `flatbuffers` Rust crate version was updated. This ensures we'll be [using the same version of `flatc` that the crate was tested with](https://github.com/google/flatbuffers/issues/6199#issuecomment-714562121).
Then run the `generated_types/regenerate-flatbuffers.sh` script and check in any changes. Check the whole project builds.

View File

@ -21,14 +21,14 @@ of the object stores, the relevant tests will run.
### Configuration differences when running the tests
When running `influxdb_iox server`, you can pick one object store to use. When running the tests,
When running `influxdb_iox run`, you can pick one object store to use. When running the tests,
you can run them against all the possible object stores. There's still only one
`INFLUXDB_IOX_BUCKET` variable, though, so that will set the bucket name for all configured object
stores. Use the same bucket name when setting up the different services.
Other than possibly configuring multiple object stores, configuring the tests to use the object
store services is the same as configuring the server to use an object store service. See the output
of `influxdb_iox server --help` for instructions.
of `influxdb_iox run --help` for instructions.
## InfluxDB IOx Client

2
generated_types/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
.flatbuffers

View File

@ -5,12 +5,14 @@ authors = ["Paul Dix <paul@pauldix.net>"]
edition = "2018"
[dependencies] # In alphabetical order
flatbuffers = "0.6.1"
# See docs/regenerating_flatbuffers.md about updating generated code when updating the
# version of the flatbuffers crate
flatbuffers = "0.8"
futures = "0.3.1"
prost = "0.7"
prost-types = "0.7"
tonic = "0.4"
tracing = "0.1"
google_types = { path = "../google_types" }
[build-dependencies] # In alphabetical order

View File

@ -1,10 +1,6 @@
//! Compiles Protocol Buffers and FlatBuffers schema definitions into
//! native Rust types.
//! Compiles Protocol Buffers into native Rust types.
use std::{
path::{Path, PathBuf},
process::Command,
};
use std::path::{Path, PathBuf};
type Error = Box<dyn std::error::Error>;
type Result<T, E = Error> = std::result::Result<T, E>;
@ -13,7 +9,6 @@ fn main() -> Result<()> {
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("protos");
generate_grpc_types(&root)?;
generate_wal_types(&root)?;
Ok(())
}
@ -28,7 +23,7 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
let storage_path = root.join("influxdata/platform/storage");
let idpe_path = root.join("com/github/influxdata/idpe/storage/read");
let management_path = root.join("influxdata/iox/management/v1");
let grpc_path = root.join("grpc/health/v1");
let write_path = root.join("influxdata/iox/write/v1");
let proto_files = vec![
storage_path.join("test.proto"),
@ -39,8 +34,16 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
idpe_path.join("source.proto"),
management_path.join("base_types.proto"),
management_path.join("database_rules.proto"),
management_path.join("chunk.proto"),
management_path.join("partition.proto"),
management_path.join("service.proto"),
grpc_path.join("service.proto"),
management_path.join("shard.proto"),
management_path.join("jobs.proto"),
write_path.join("service.proto"),
root.join("grpc/health/v1/service.proto"),
root.join("google/longrunning/operations.proto"),
root.join("google/rpc/error_details.proto"),
root.join("google/rpc/status.proto"),
];
// Tell cargo to recompile if any of these proto files are changed
@ -52,36 +55,10 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
config
.compile_well_known_types()
.extern_path(".google", "::google_types");
.disable_comments(&[".google"])
.extern_path(".google.protobuf", "::google_types::protobuf");
tonic_build::configure().compile_with_config(config, &proto_files, &[root.into()])?;
Ok(())
}
/// Schema used in the WAL
///
/// Creates `wal_generated.rs`
fn generate_wal_types(root: &Path) -> Result<()> {
let wal_file = root.join("wal.fbs");
println!("cargo:rerun-if-changed={}", wal_file.display());
let out_dir: PathBuf = std::env::var_os("OUT_DIR")
.expect("Could not determine `OUT_DIR`")
.into();
let status = Command::new("flatc")
.arg("--rust")
.arg("-o")
.arg(&out_dir)
.arg(wal_file)
.status();
match status {
Ok(status) if !status.success() => panic!("`flatc` failed to compile the .fbs to Rust"),
Ok(_status) => {} // Successfully compiled
Err(err) => panic!("Could not execute `flatc`: {}", err),
}
Ok(())
}

View File

@ -0,0 +1,31 @@
// Copyright (c) 2015, Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.api;
import "google/api/http.proto";
import "google/protobuf/descriptor.proto";
option go_package = "google.golang.org/genproto/googleapis/api/annotations;annotations";
option java_multiple_files = true;
option java_outer_classname = "AnnotationsProto";
option java_package = "com.google.api";
option objc_class_prefix = "GAPI";
extend google.protobuf.MethodOptions {
// See `HttpRule`.
HttpRule http = 72295728;
}

View File

@ -0,0 +1,99 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.api;
import "google/protobuf/descriptor.proto";
option go_package = "google.golang.org/genproto/googleapis/api/annotations;annotations";
option java_multiple_files = true;
option java_outer_classname = "ClientProto";
option java_package = "com.google.api";
option objc_class_prefix = "GAPI";
extend google.protobuf.MethodOptions {
// A definition of a client library method signature.
//
// In client libraries, each proto RPC corresponds to one or more methods
// which the end user is able to call, and calls the underlying RPC.
// Normally, this method receives a single argument (a struct or instance
// corresponding to the RPC request object). Defining this field will
// add one or more overloads providing flattened or simpler method signatures
// in some languages.
//
// The fields on the method signature are provided as a comma-separated
// string.
//
// For example, the proto RPC and annotation:
//
// rpc CreateSubscription(CreateSubscriptionRequest)
// returns (Subscription) {
// option (google.api.method_signature) = "name,topic";
// }
//
// Would add the following Java overload (in addition to the method accepting
// the request object):
//
// public final Subscription createSubscription(String name, String topic)
//
// The following backwards-compatibility guidelines apply:
//
// * Adding this annotation to an unannotated method is backwards
// compatible.
// * Adding this annotation to a method which already has existing
// method signature annotations is backwards compatible if and only if
// the new method signature annotation is last in the sequence.
// * Modifying or removing an existing method signature annotation is
// a breaking change.
// * Re-ordering existing method signature annotations is a breaking
// change.
repeated string method_signature = 1051;
}
extend google.protobuf.ServiceOptions {
// The hostname for this service.
// This should be specified with no prefix or protocol.
//
// Example:
//
// service Foo {
// option (google.api.default_host) = "foo.googleapi.com";
// ...
// }
string default_host = 1049;
// OAuth scopes needed for the client.
//
// Example:
//
// service Foo {
// option (google.api.oauth_scopes) = \
// "https://www.googleapis.com/auth/cloud-platform";
// ...
// }
//
// If there is more than one scope, use a comma-separated string:
//
// Example:
//
// service Foo {
// option (google.api.oauth_scopes) = \
// "https://www.googleapis.com/auth/cloud-platform,"
// "https://www.googleapis.com/auth/monitoring";
// ...
// }
string oauth_scopes = 1050;
}

View File

@ -0,0 +1,375 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.api;
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/api/annotations;annotations";
option java_multiple_files = true;
option java_outer_classname = "HttpProto";
option java_package = "com.google.api";
option objc_class_prefix = "GAPI";
// Defines the HTTP configuration for an API service. It contains a list of
// [HttpRule][google.api.HttpRule], each specifying the mapping of an RPC method
// to one or more HTTP REST API methods.
message Http {
// A list of HTTP configuration rules that apply to individual API methods.
//
// **NOTE:** All service configuration rules follow "last one wins" order.
repeated HttpRule rules = 1;
// When set to true, URL path parameters will be fully URI-decoded except in
// cases of single segment matches in reserved expansion, where "%2F" will be
// left encoded.
//
// The default behavior is to not decode RFC 6570 reserved characters in multi
// segment matches.
bool fully_decode_reserved_expansion = 2;
}
// # gRPC Transcoding
//
// gRPC Transcoding is a feature for mapping between a gRPC method and one or
// more HTTP REST endpoints. It allows developers to build a single API service
// that supports both gRPC APIs and REST APIs. Many systems, including [Google
// APIs](https://github.com/googleapis/googleapis),
// [Cloud Endpoints](https://cloud.google.com/endpoints), [gRPC
// Gateway](https://github.com/grpc-ecosystem/grpc-gateway),
// and [Envoy](https://github.com/envoyproxy/envoy) proxy support this feature
// and use it for large scale production services.
//
// `HttpRule` defines the schema of the gRPC/REST mapping. The mapping specifies
// how different portions of the gRPC request message are mapped to the URL
// path, URL query parameters, and HTTP request body. It also controls how the
// gRPC response message is mapped to the HTTP response body. `HttpRule` is
// typically specified as an `google.api.http` annotation on the gRPC method.
//
// Each mapping specifies a URL path template and an HTTP method. The path
// template may refer to one or more fields in the gRPC request message, as long
// as each field is a non-repeated field with a primitive (non-message) type.
// The path template controls how fields of the request message are mapped to
// the URL path.
//
// Example:
//
// service Messaging {
// rpc GetMessage(GetMessageRequest) returns (Message) {
// option (google.api.http) = {
// get: "/v1/{name=messages/*}"
// };
// }
// }
// message GetMessageRequest {
// string name = 1; // Mapped to URL path.
// }
// message Message {
// string text = 1; // The resource content.
// }
//
// This enables an HTTP REST to gRPC mapping as below:
//
// HTTP | gRPC
// -----|-----
// `GET /v1/messages/123456` | `GetMessage(name: "messages/123456")`
//
// Any fields in the request message which are not bound by the path template
// automatically become HTTP query parameters if there is no HTTP request body.
// For example:
//
// service Messaging {
// rpc GetMessage(GetMessageRequest) returns (Message) {
// option (google.api.http) = {
// get:"/v1/messages/{message_id}"
// };
// }
// }
// message GetMessageRequest {
// message SubMessage {
// string subfield = 1;
// }
// string message_id = 1; // Mapped to URL path.
// int64 revision = 2; // Mapped to URL query parameter `revision`.
// SubMessage sub = 3; // Mapped to URL query parameter `sub.subfield`.
// }
//
// This enables a HTTP JSON to RPC mapping as below:
//
// HTTP | gRPC
// -----|-----
// `GET /v1/messages/123456?revision=2&sub.subfield=foo` |
// `GetMessage(message_id: "123456" revision: 2 sub: SubMessage(subfield:
// "foo"))`
//
// Note that fields which are mapped to URL query parameters must have a
// primitive type or a repeated primitive type or a non-repeated message type.
// In the case of a repeated type, the parameter can be repeated in the URL
// as `...?param=A&param=B`. In the case of a message type, each field of the
// message is mapped to a separate parameter, such as
// `...?foo.a=A&foo.b=B&foo.c=C`.
//
// For HTTP methods that allow a request body, the `body` field
// specifies the mapping. Consider a REST update method on the
// message resource collection:
//
// service Messaging {
// rpc UpdateMessage(UpdateMessageRequest) returns (Message) {
// option (google.api.http) = {
// patch: "/v1/messages/{message_id}"
// body: "message"
// };
// }
// }
// message UpdateMessageRequest {
// string message_id = 1; // mapped to the URL
// Message message = 2; // mapped to the body
// }
//
// The following HTTP JSON to RPC mapping is enabled, where the
// representation of the JSON in the request body is determined by
// protos JSON encoding:
//
// HTTP | gRPC
// -----|-----
// `PATCH /v1/messages/123456 { "text": "Hi!" }` | `UpdateMessage(message_id:
// "123456" message { text: "Hi!" })`
//
// The special name `*` can be used in the body mapping to define that
// every field not bound by the path template should be mapped to the
// request body. This enables the following alternative definition of
// the update method:
//
// service Messaging {
// rpc UpdateMessage(Message) returns (Message) {
// option (google.api.http) = {
// patch: "/v1/messages/{message_id}"
// body: "*"
// };
// }
// }
// message Message {
// string message_id = 1;
// string text = 2;
// }
//
//
// The following HTTP JSON to RPC mapping is enabled:
//
// HTTP | gRPC
// -----|-----
// `PATCH /v1/messages/123456 { "text": "Hi!" }` | `UpdateMessage(message_id:
// "123456" text: "Hi!")`
//
// Note that when using `*` in the body mapping, it is not possible to
// have HTTP parameters, as all fields not bound by the path end in
// the body. This makes this option more rarely used in practice when
// defining REST APIs. The common usage of `*` is in custom methods
// which don't use the URL at all for transferring data.
//
// It is possible to define multiple HTTP methods for one RPC by using
// the `additional_bindings` option. Example:
//
// service Messaging {
// rpc GetMessage(GetMessageRequest) returns (Message) {
// option (google.api.http) = {
// get: "/v1/messages/{message_id}"
// additional_bindings {
// get: "/v1/users/{user_id}/messages/{message_id}"
// }
// };
// }
// }
// message GetMessageRequest {
// string message_id = 1;
// string user_id = 2;
// }
//
// This enables the following two alternative HTTP JSON to RPC mappings:
//
// HTTP | gRPC
// -----|-----
// `GET /v1/messages/123456` | `GetMessage(message_id: "123456")`
// `GET /v1/users/me/messages/123456` | `GetMessage(user_id: "me" message_id:
// "123456")`
//
// ## Rules for HTTP mapping
//
// 1. Leaf request fields (recursive expansion nested messages in the request
// message) are classified into three categories:
// - Fields referred by the path template. They are passed via the URL path.
// - Fields referred by the [HttpRule.body][google.api.HttpRule.body]. They are passed via the HTTP
// request body.
// - All other fields are passed via the URL query parameters, and the
// parameter name is the field path in the request message. A repeated
// field can be represented as multiple query parameters under the same
// name.
// 2. If [HttpRule.body][google.api.HttpRule.body] is "*", there is no URL query parameter, all fields
// are passed via URL path and HTTP request body.
// 3. If [HttpRule.body][google.api.HttpRule.body] is omitted, there is no HTTP request body, all
// fields are passed via URL path and URL query parameters.
//
// ### Path template syntax
//
// Template = "/" Segments [ Verb ] ;
// Segments = Segment { "/" Segment } ;
// Segment = "*" | "**" | LITERAL | Variable ;
// Variable = "{" FieldPath [ "=" Segments ] "}" ;
// FieldPath = IDENT { "." IDENT } ;
// Verb = ":" LITERAL ;
//
// The syntax `*` matches a single URL path segment. The syntax `**` matches
// zero or more URL path segments, which must be the last part of the URL path
// except the `Verb`.
//
// The syntax `Variable` matches part of the URL path as specified by its
// template. A variable template must not contain other variables. If a variable
// matches a single path segment, its template may be omitted, e.g. `{var}`
// is equivalent to `{var=*}`.
//
// The syntax `LITERAL` matches literal text in the URL path. If the `LITERAL`
// contains any reserved character, such characters should be percent-encoded
// before the matching.
//
// If a variable contains exactly one path segment, such as `"{var}"` or
// `"{var=*}"`, when such a variable is expanded into a URL path on the client
// side, all characters except `[-_.~0-9a-zA-Z]` are percent-encoded. The
// server side does the reverse decoding. Such variables show up in the
// [Discovery
// Document](https://developers.google.com/discovery/v1/reference/apis) as
// `{var}`.
//
// If a variable contains multiple path segments, such as `"{var=foo/*}"`
// or `"{var=**}"`, when such a variable is expanded into a URL path on the
// client side, all characters except `[-_.~/0-9a-zA-Z]` are percent-encoded.
// The server side does the reverse decoding, except "%2F" and "%2f" are left
// unchanged. Such variables show up in the
// [Discovery
// Document](https://developers.google.com/discovery/v1/reference/apis) as
// `{+var}`.
//
// ## Using gRPC API Service Configuration
//
// gRPC API Service Configuration (service config) is a configuration language
// for configuring a gRPC service to become a user-facing product. The
// service config is simply the YAML representation of the `google.api.Service`
// proto message.
//
// As an alternative to annotating your proto file, you can configure gRPC
// transcoding in your service config YAML files. You do this by specifying a
// `HttpRule` that maps the gRPC method to a REST endpoint, achieving the same
// effect as the proto annotation. This can be particularly useful if you
// have a proto that is reused in multiple services. Note that any transcoding
// specified in the service config will override any matching transcoding
// configuration in the proto.
//
// Example:
//
// http:
// rules:
// # Selects a gRPC method and applies HttpRule to it.
// - selector: example.v1.Messaging.GetMessage
// get: /v1/messages/{message_id}/{sub.subfield}
//
// ## Special notes
//
// When gRPC Transcoding is used to map a gRPC to JSON REST endpoints, the
// proto to JSON conversion must follow the [proto3
// specification](https://developers.google.com/protocol-buffers/docs/proto3#json).
//
// While the single segment variable follows the semantics of
// [RFC 6570](https://tools.ietf.org/html/rfc6570) Section 3.2.2 Simple String
// Expansion, the multi segment variable **does not** follow RFC 6570 Section
// 3.2.3 Reserved Expansion. The reason is that the Reserved Expansion
// does not expand special characters like `?` and `#`, which would lead
// to invalid URLs. As the result, gRPC Transcoding uses a custom encoding
// for multi segment variables.
//
// The path variables **must not** refer to any repeated or mapped field,
// because client libraries are not capable of handling such variable expansion.
//
// The path variables **must not** capture the leading "/" character. The reason
// is that the most common use case "{var}" does not capture the leading "/"
// character. For consistency, all path variables must share the same behavior.
//
// Repeated message fields must not be mapped to URL query parameters, because
// no client library can support such complicated mapping.
//
// If an API needs to use a JSON array for request or response body, it can map
// the request or response body to a repeated field. However, some gRPC
// Transcoding implementations may not support this feature.
message HttpRule {
// Selects a method to which this rule applies.
//
// Refer to [selector][google.api.DocumentationRule.selector] for syntax details.
string selector = 1;
// Determines the URL pattern is matched by this rules. This pattern can be
// used with any of the {get|put|post|delete|patch} methods. A custom method
// can be defined using the 'custom' field.
oneof pattern {
// Maps to HTTP GET. Used for listing and getting information about
// resources.
string get = 2;
// Maps to HTTP PUT. Used for replacing a resource.
string put = 3;
// Maps to HTTP POST. Used for creating a resource or performing an action.
string post = 4;
// Maps to HTTP DELETE. Used for deleting a resource.
string delete = 5;
// Maps to HTTP PATCH. Used for updating a resource.
string patch = 6;
// The custom pattern is used for specifying an HTTP method that is not
// included in the `pattern` field, such as HEAD, or "*" to leave the
// HTTP method unspecified for this rule. The wild-card rule is useful
// for services that provide content to Web (HTML) clients.
CustomHttpPattern custom = 8;
}
// The name of the request field whose value is mapped to the HTTP request
// body, or `*` for mapping all request fields not captured by the path
// pattern to the HTTP body, or omitted for not having any HTTP request body.
//
// NOTE: the referred field must be present at the top-level of the request
// message type.
string body = 7;
// Optional. The name of the response field whose value is mapped to the HTTP
// response body. When omitted, the entire response message will be used
// as the HTTP response body.
//
// NOTE: The referred field must be present at the top-level of the response
// message type.
string response_body = 12;
// Additional HTTP bindings for the selector. Nested bindings must
// not contain an `additional_bindings` field themselves (that is,
// the nesting may only be one level deep).
repeated HttpRule additional_bindings = 11;
}
// A custom pattern is used for defining custom HTTP verb.
message CustomHttpPattern {
// The name of this custom HTTP verb.
string kind = 1;
// The path matched by this custom verb.
string path = 2;
}

View File

@ -0,0 +1,247 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.longrunning;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/protobuf/any.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/empty.proto";
import "google/rpc/status.proto";
import "google/protobuf/descriptor.proto";
option cc_enable_arenas = true;
option csharp_namespace = "Google.LongRunning";
option go_package = "google.golang.org/genproto/googleapis/longrunning;longrunning";
option java_multiple_files = true;
option java_outer_classname = "OperationsProto";
option java_package = "com.google.longrunning";
option php_namespace = "Google\\LongRunning";
extend google.protobuf.MethodOptions {
// Additional information regarding long-running operations.
// In particular, this specifies the types that are returned from
// long-running operations.
//
// Required for methods that return `google.longrunning.Operation`; invalid
// otherwise.
google.longrunning.OperationInfo operation_info = 1049;
}
// Manages long-running operations with an API service.
//
// When an API method normally takes long time to complete, it can be designed
// to return [Operation][google.longrunning.Operation] to the client, and the client can use this
// interface to receive the real response asynchronously by polling the
// operation resource, or pass the operation resource to another API (such as
// Google Cloud Pub/Sub API) to receive the response. Any API service that
// returns long-running operations should implement the `Operations` interface
// so developers can have a consistent client experience.
service Operations {
option (google.api.default_host) = "longrunning.googleapis.com";
// Lists operations that match the specified filter in the request. If the
// server doesn't support this method, it returns `UNIMPLEMENTED`.
//
// NOTE: the `name` binding allows API services to override the binding
// to use different resource name schemes, such as `users/*/operations`. To
// override the binding, API services can add a binding such as
// `"/v1/{name=users/*}/operations"` to their service configuration.
// For backwards compatibility, the default name includes the operations
// collection id, however overriding users must ensure the name binding
// is the parent resource, without the operations collection id.
rpc ListOperations(ListOperationsRequest) returns (ListOperationsResponse) {
option (google.api.http) = {
get: "/v1/{name=operations}"
};
option (google.api.method_signature) = "name,filter";
}
// Gets the latest state of a long-running operation. Clients can use this
// method to poll the operation result at intervals as recommended by the API
// service.
rpc GetOperation(GetOperationRequest) returns (Operation) {
option (google.api.http) = {
get: "/v1/{name=operations/**}"
};
option (google.api.method_signature) = "name";
}
// Deletes a long-running operation. This method indicates that the client is
// no longer interested in the operation result. It does not cancel the
// operation. If the server doesn't support this method, it returns
// `google.rpc.Code.UNIMPLEMENTED`.
rpc DeleteOperation(DeleteOperationRequest) returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v1/{name=operations/**}"
};
option (google.api.method_signature) = "name";
}
// Starts asynchronous cancellation on a long-running operation. The server
// makes a best effort to cancel the operation, but success is not
// guaranteed. If the server doesn't support this method, it returns
// `google.rpc.Code.UNIMPLEMENTED`. Clients can use
// [Operations.GetOperation][google.longrunning.Operations.GetOperation] or
// other methods to check whether the cancellation succeeded or whether the
// operation completed despite cancellation. On successful cancellation,
// the operation is not deleted; instead, it becomes an operation with
// an [Operation.error][google.longrunning.Operation.error] value with a [google.rpc.Status.code][google.rpc.Status.code] of 1,
// corresponding to `Code.CANCELLED`.
rpc CancelOperation(CancelOperationRequest) returns (google.protobuf.Empty) {
option (google.api.http) = {
post: "/v1/{name=operations/**}:cancel"
body: "*"
};
option (google.api.method_signature) = "name";
}
// Waits until the specified long-running operation is done or reaches at most
// a specified timeout, returning the latest state. If the operation is
// already done, the latest state is immediately returned. If the timeout
// specified is greater than the default HTTP/RPC timeout, the HTTP/RPC
// timeout is used. If the server does not support this method, it returns
// `google.rpc.Code.UNIMPLEMENTED`.
// Note that this method is on a best-effort basis. It may return the latest
// state before the specified timeout (including immediately), meaning even an
// immediate response is no guarantee that the operation is done.
rpc WaitOperation(WaitOperationRequest) returns (Operation) {
}
}
// This resource represents a long-running operation that is the result of a
// network API call.
message Operation {
// The server-assigned name, which is only unique within the same service that
// originally returns it. If you use the default HTTP mapping, the
// `name` should be a resource name ending with `operations/{unique_id}`.
string name = 1;
// Service-specific metadata associated with the operation. It typically
// contains progress information and common metadata such as create time.
// Some services might not provide such metadata. Any method that returns a
// long-running operation should document the metadata type, if any.
google.protobuf.Any metadata = 2;
// If the value is `false`, it means the operation is still in progress.
// If `true`, the operation is completed, and either `error` or `response` is
// available.
bool done = 3;
// The operation result, which can be either an `error` or a valid `response`.
// If `done` == `false`, neither `error` nor `response` is set.
// If `done` == `true`, exactly one of `error` or `response` is set.
oneof result {
// The error result of the operation in case of failure or cancellation.
google.rpc.Status error = 4;
// The normal response of the operation in case of success. If the original
// method returns no data on success, such as `Delete`, the response is
// `google.protobuf.Empty`. If the original method is standard
// `Get`/`Create`/`Update`, the response should be the resource. For other
// methods, the response should have the type `XxxResponse`, where `Xxx`
// is the original method name. For example, if the original method name
// is `TakeSnapshot()`, the inferred response type is
// `TakeSnapshotResponse`.
google.protobuf.Any response = 5;
}
}
// The request message for [Operations.GetOperation][google.longrunning.Operations.GetOperation].
message GetOperationRequest {
// The name of the operation resource.
string name = 1;
}
// The request message for [Operations.ListOperations][google.longrunning.Operations.ListOperations].
message ListOperationsRequest {
// The name of the operation's parent resource.
string name = 4;
// The standard list filter.
string filter = 1;
// The standard list page size.
int32 page_size = 2;
// The standard list page token.
string page_token = 3;
}
// The response message for [Operations.ListOperations][google.longrunning.Operations.ListOperations].
message ListOperationsResponse {
// A list of operations that matches the specified filter in the request.
repeated Operation operations = 1;
// The standard List next-page token.
string next_page_token = 2;
}
// The request message for [Operations.CancelOperation][google.longrunning.Operations.CancelOperation].
message CancelOperationRequest {
// The name of the operation resource to be cancelled.
string name = 1;
}
// The request message for [Operations.DeleteOperation][google.longrunning.Operations.DeleteOperation].
message DeleteOperationRequest {
// The name of the operation resource to be deleted.
string name = 1;
}
// The request message for [Operations.WaitOperation][google.longrunning.Operations.WaitOperation].
message WaitOperationRequest {
// The name of the operation resource to wait on.
string name = 1;
// The maximum duration to wait before timing out. If left blank, the wait
// will be at most the time permitted by the underlying HTTP/RPC protocol.
// If RPC context deadline is also specified, the shorter one will be used.
google.protobuf.Duration timeout = 2;
}
// A message representing the message types used by a long-running operation.
//
// Example:
//
// rpc LongRunningRecognize(LongRunningRecognizeRequest)
// returns (google.longrunning.Operation) {
// option (google.longrunning.operation_info) = {
// response_type: "LongRunningRecognizeResponse"
// metadata_type: "LongRunningRecognizeMetadata"
// };
// }
message OperationInfo {
// Required. The message name of the primary return type for this
// long-running operation.
// This type will be used to deserialize the LRO's response.
//
// If the response is in a different package from the rpc, a fully-qualified
// message name must be used (e.g. `google.protobuf.Struct`).
//
// Note: Altering this value constitutes a breaking change.
string response_type = 1;
// Required. The message name of the metadata type for this long-running
// operation.
//
// If the response is in a different package from the rpc, a fully-qualified
// message name must be used (e.g. `google.protobuf.Struct`).
//
// Note: Altering this value constitutes a breaking change.
string metadata_type = 2;
}

View File

@ -21,10 +21,3 @@ enum ColumnType {
COLUMN_TYPE_STRING = 4;
COLUMN_TYPE_BOOL = 5;
}
message HostGroup {
string id = 1;
// connection strings for remote hosts.
repeated string hosts = 2;
}

View File

@ -0,0 +1,37 @@
syntax = "proto3";
package influxdata.iox.management.v1;
// Which storage system is a chunk located in?
enum ChunkStorage {
// Not currently returned
CHUNK_STORAGE_UNSPECIFIED = 0;
// The chunk is still open for new writes, in the Mutable Buffer
CHUNK_STORAGE_OPEN_MUTABLE_BUFFER = 1;
// The chunk is no longer open for writes, in the Mutable Buffer
CHUNK_STORAGE_CLOSED_MUTABLE_BUFFER = 2;
// The chunk is in the Read Buffer (where it can not be mutated)
CHUNK_STORAGE_READ_BUFFER = 3;
// The chunk is stored in Object Storage (where it can not be mutated)
CHUNK_STORAGE_OBJECT_STORE = 4;
}
// `Chunk` represents part of a partition of data in a database.
// A chunk can contain one or more tables.
message Chunk {
// The partitition key of this chunk
string partition_key = 1;
// The id of this chunk
uint32 id = 2;
// Which storage system the chunk is located in
ChunkStorage storage = 3;
// The total estimated size of this chunk, in bytes
uint64 estimated_bytes = 4;
}

View File

@ -31,90 +31,6 @@ message PartitionTemplate {
repeated Part parts = 1;
}
message Matcher {
// A query predicate to filter rows
string predicate = 1;
// Restrict selection to a specific table or tables specified by a regex
oneof table_matcher {
google.protobuf.Empty all = 2;
string table = 3;
string regex = 4;
}
}
message ReplicationConfig {
// The set of host groups that data should be replicated to. Which host a
// write goes to within a host group is determined by consistent hashing of
// the partition key. We'd use this to create a host group per
// availability zone, so you might have 5 availability zones with 2
// hosts in each. Replication will ensure that N of those zones get a
// write. For each zone, only a single host needs to get the write.
// Replication is for ensuring a write exists across multiple hosts
// before returning success. Its purpose is to ensure write durability,
// rather than write availability for query (this is covered by
// subscriptions).
repeated string replications = 1;
// The minimum number of host groups to replicate a write to before success
// is returned. This can be overridden on a per request basis.
// Replication will continue to write to the other host groups in the
// background.
uint32 replication_count = 2;
// How long the replication queue can get before either rejecting writes or
// dropping missed writes. The queue is kept in memory on a
// per-database basis. A queue size of zero means it will only try to
// replicate synchronously and drop any failures.
uint64 replication_queue_max_size = 3;
}
message SubscriptionConfig {
message Subscription {
string name = 1;
string host_group_id = 2;
Matcher matcher = 3;
}
// `subscriptions` are used for query servers to get data via either push
// or pull as it arrives. They are separate from replication as they
// have a different purpose. They're for query servers or other clients
// that want to subscribe to some subset of data being written in. This
// could either be specific partitions, ranges of partitions, tables, or
// rows matching some predicate.
repeated Subscription subscriptions = 1;
}
message QueryConfig {
// If set to `true`, this server should answer queries from one or more of
// of its local write buffer and any read-only partitions that it knows
// about. In this case, results will be merged with any others from the
// remote goups or read-only partitions.
bool query_local = 1;
// Set `primary` to a host group if remote servers should be
// issued queries for this database. All hosts in the group should be
// queried with this server acting as the coordinator that merges
// results together.
string primary = 2;
// If a specific host in the primary group is unavailable,
// another host in the same position from a secondary group should be
// queried. For example, imagine we've partitioned the data in this DB into
// 4 partitions and we are replicating the data across 3 availability
// zones. We have 4 hosts in each of those AZs, thus they each have 1
// partition. We'd set the primary group to be the 4 hosts in the same
// AZ as this one, and the secondary groups as the hosts in the other 2 AZs.
repeated string secondaries = 3;
// Use `readOnlyPartitions` when a server should answer queries for
// partitions that come from object storage. This can be used to start
// up a new query server to handle queries by pointing it at a
// collection of partitions and then telling it to also pull
// data from the replication servers (writes that haven't been snapshotted
// into a partition).
repeated string read_only_partitions = 4;
}
message WalBufferConfig {
enum Rollover {
ROLLOVER_UNSPECIFIED = 0;
@ -231,15 +147,6 @@ message DatabaseRules {
// Template that generates a partition key for each row inserted into the database
PartitionTemplate partition_template = 2;
// Synchronous replication configuration for this database
ReplicationConfig replication_config = 3;
// Asynchronous pull-based subscription configuration for this database
SubscriptionConfig subscription_config = 4;
// Query configuration for this database
QueryConfig query_config = 5;
// WAL configuration for this database
WalBufferConfig wal_buffer_config = 6;

View File

@ -0,0 +1,47 @@
syntax = "proto3";
package influxdata.iox.management.v1;
message OperationMetadata {
// How many nanoseconds of CPU time have been spent on this job so far?
uint64 cpu_nanos = 1;
// How many nanoseconds has it been since the job was submitted
uint64 wall_nanos = 2;
// How many total tasks does this job have currently
uint64 task_count = 3;
// How many tasks for this job are still pending
uint64 pending_count = 4;
// What kind of job is it?
oneof job {
Dummy dummy = 5;
PersistSegment persist_segment = 6;
CloseChunk close_chunk = 7;
}
}
// A job that simply sleeps for a specified time and then returns success
message Dummy {
// How long the job should sleep for before returning
repeated uint64 nanos = 1;
}
// A job that persists a WAL segment to object store
message PersistSegment {
uint32 writer_id = 1;
uint64 segment_id = 2;
}
// Move a chunk from mutable buffer to read buffer
message CloseChunk {
// name of the database
string db_name = 1;
// partition key
string partition_key = 2;
// chunk_id
uint32 chunk_id = 3;
}

View File

@ -0,0 +1,12 @@
syntax = "proto3";
package influxdata.iox.management.v1;
// `Partition` is comprised of data in one or more chunks
//
// TODO: add additional information to this structure (e.g. partition
// names, stats, etc)
message Partition {
// The partitition key of this partition
string key = 1;
}

View File

@ -1,8 +1,10 @@
syntax = "proto3";
package influxdata.iox.management.v1;
import "google/protobuf/empty.proto";
import "google/longrunning/operations.proto";
import "influxdata/iox/management/v1/database_rules.proto";
import "influxdata/iox/management/v1/chunk.proto";
import "influxdata/iox/management/v1/partition.proto";
service ManagementService {
rpc GetWriterId(GetWriterIdRequest) returns (GetWriterIdResponse);
@ -14,6 +16,43 @@ service ManagementService {
rpc GetDatabase(GetDatabaseRequest) returns (GetDatabaseResponse);
rpc CreateDatabase(CreateDatabaseRequest) returns (CreateDatabaseResponse);
// List chunks available on this database
rpc ListChunks(ListChunksRequest) returns (ListChunksResponse);
// List remote IOx servers we know about.
rpc ListRemotes(ListRemotesRequest) returns (ListRemotesResponse);
// Update information about a remote IOx server (upsert).
rpc UpdateRemote(UpdateRemoteRequest) returns (UpdateRemoteResponse);
// Delete a reference to remote IOx server.
rpc DeleteRemote(DeleteRemoteRequest) returns (DeleteRemoteResponse);
// Creates a dummy job that for each value of the nanos field
// spawns a task that sleeps for that number of nanoseconds before returning
rpc CreateDummyJob(CreateDummyJobRequest) returns (CreateDummyJobResponse) {
option (google.longrunning.operation_info) = {
response_type: "google.protobuf.Empty"
metadata_type: "OperationMetadata"
};
}
// List partitions in a database
rpc ListPartitions(ListPartitionsRequest) returns (ListPartitionsResponse);
// Get detail information about a partition
rpc GetPartition(GetPartitionRequest) returns (GetPartitionResponse);
// List chunks in a partition
rpc ListPartitionChunks(ListPartitionChunksRequest) returns (ListPartitionChunksResponse);
// Create a new chunk in the mutable buffer
rpc NewPartitionChunk(NewPartitionChunkRequest) returns (NewPartitionChunkResponse);
// Close a chunk and move it to the read buffer
rpc ClosePartitionChunk(ClosePartitionChunkRequest) returns (ClosePartitionChunkResponse);
}
message GetWriterIdRequest {}
@ -47,3 +86,121 @@ message CreateDatabaseRequest {
}
message CreateDatabaseResponse {}
message ListChunksRequest {
// the name of the database
string db_name = 1;
}
message ListChunksResponse {
repeated Chunk chunks = 1;
}
message CreateDummyJobRequest {
repeated uint64 nanos = 1;
}
message CreateDummyJobResponse {
google.longrunning.Operation operation = 1;
}
message ListRemotesRequest {}
message ListRemotesResponse {
repeated Remote remotes = 1;
}
// This resource represents a remote IOx server.
message Remote {
// The writer ID associated with a remote IOx server.
uint32 id = 1;
// The address of the remote IOx server gRPC endpoint.
string connection_string = 2;
}
// Updates information about a remote IOx server.
//
// If a remote for a given `id` already exists, it is updated in place.
message UpdateRemoteRequest {
// If omitted, the remote associated with `id` will be removed.
Remote remote = 1;
// TODO(#917): add an optional flag to test the connection or not before adding it.
}
message UpdateRemoteResponse {}
message DeleteRemoteRequest{
uint32 id = 1;
}
message DeleteRemoteResponse {}
// Request to list all partitions from a named database
message ListPartitionsRequest {
// the name of the database
string db_name = 1;
}
message ListPartitionsResponse {
// All partitions in a database
repeated Partition partitions = 1;
}
// Request to list all chunks in a specific partitions from a named database
message ListPartitionChunksRequest {
// the name of the database
string db_name = 1;
// the partition key
string partition_key = 2;
}
message GetPartitionResponse {
// Detailed information about a partition
Partition partition = 1;
}
message ListPartitionChunksResponse {
// All chunks in a partition
repeated Chunk chunks = 1;
}
// Request to get details of a specific partition from a named database
message GetPartitionRequest {
// the name of the database
string db_name = 1;
// the partition key
string partition_key = 2;
}
// Request that a new chunk for writing is created in the mutable buffer
message NewPartitionChunkRequest {
// the name of the database
string db_name = 1;
// the partition key
string partition_key = 2;
}
message NewPartitionChunkResponse {
}
// Request that a chunk be closed and moved to the read buffer
message ClosePartitionChunkRequest {
// the name of the database
string db_name = 1;
// the partition key
string partition_key = 2;
// the chunk id
uint32 chunk_id = 3;
}
message ClosePartitionChunkResponse {
// The operation that tracks the work for migrating the chunk
google.longrunning.Operation operation = 1;
}

View File

@ -0,0 +1,68 @@
syntax = "proto3";
package influxdata.iox.management.v1;
// NOTE: documentation is manually synced from data_types/src/database_rules.rs
// `ShardConfig` defines rules for assigning a line/row to an individual
// host or a group of hosts. A shard
// is a logical concept, but the usage is meant to split data into
// mutually exclusive areas. The rough order of organization is:
// database -> shard -> partition -> chunk. For example, you could shard
// based on table name and assign to 1 of 10 shards. Within each
// shard you would have partitions, which would likely be based off time.
// This makes it possible to horizontally scale out writes.
message ShardConfig {
/// An optional matcher. If there is a match, the route will be evaluated to
/// the given targets, otherwise the hash ring will be evaluated. This is
/// useful for overriding the hashring function on some hot spot. For
/// example, if you use the table name as the input to the hash function
/// and your ring has 4 slots. If two tables that are very hot get
/// assigned to the same slot you can override that by putting in a
/// specific matcher to pull that table over to a different node.
MatcherToTargets specific_targets = 1;
/// An optional default hasher which will route to one in a collection of
/// nodes.
HashRing hash_ring = 2;
/// If set to true the router will ignore any errors sent by the remote
/// targets in this route. That is, the write request will succeed
/// regardless of this route's success.
bool ignore_errors = 3;
}
// Maps a matcher with specific target group. If the line/row matches
// it should be sent to the group.
message MatcherToTargets {
Matcher matcher = 1;
NodeGroup target = 2;
}
/// A matcher is used to match routing rules or subscriptions on a row-by-row
/// (or line) basis.
message Matcher {
// if provided, match if the table name matches against the regex
string table_name_regex = 1;
// paul: what should we use for predicate matching here against a single row/line?
string predicate = 2;
}
// A collection of IOx nodes
message NodeGroup {
message Node {
uint32 id = 1;
}
repeated Node nodes = 1;
}
// HashRing is a rule for creating a hash key for a row and mapping that to
// an individual node on a ring.
message HashRing {
// If true the table name will be included in the hash key
bool table_name = 1;
// include the values of these columns in the hash key
repeated string columns = 2;
// ring of node groups. Each group holds a shard
repeated NodeGroup node_groups = 3;
}

View File

@ -0,0 +1,23 @@
syntax = "proto3";
package influxdata.iox.write.v1;
service WriteService {
// write data into a specific Database
rpc Write(WriteRequest) returns (WriteResponse);
}
message WriteRequest {
// name of database into which to write
string db_name = 1;
// data, in [LineProtocol] format
//
// [LineProtocol](https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/#data-types-and-format)
string lp_data = 2;
}
message WriteResponse {
// how many lines were parsed and written into the database
uint64 lines_written = 1;
}

View File

@ -0,0 +1,49 @@
#!/bin/bash -e
# The commit where the Rust `flatbuffers` crate version was changed to the version in `Cargo.lock`
# Update this, rerun this script, and check in the changes in the generated code when the
# `flatbuffers` crate version is updated.
FB_COMMIT="86401e078d0746d2381735415f8c2dfe849f3f52"
# Change to the generated_types crate directory, where this script is located
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
pushd $DIR
echo "Building flatc from source ..."
FB_URL="https://github.com/google/flatbuffers"
FB_DIR=".flatbuffers"
FLATC="$FB_DIR/bazel-bin/flatc"
if [ -z $(which bazel) ]; then
echo "bazel is required to build flatc"
exit 1
fi
echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')"
if [ ! -e $FB_DIR ]; then
echo "git clone $FB_URL ..."
git clone -b master --no-tag $FB_URL $FB_DIR
else
echo "git pull $FB_URL ..."
git -C $FB_DIR pull --ff-only
fi
echo "hard reset to $FB_COMMIT"
git -C $FB_DIR reset --hard $FB_COMMIT
pushd $FB_DIR
echo "run: bazel build :flatc ..."
bazel build :flatc
popd
WAL_FBS="$DIR/protos/wal.fbs"
WAL_RS_DIR="$DIR/src"
$FLATC --rust -o $WAL_RS_DIR $WAL_FBS
cargo fmt
popd
echo "DONE! Please run 'cargo test' and check in any changes."

View File

@ -0,0 +1,276 @@
//! Protobuf types for errors from the google standards and
//! conversions to `tonic::Status`
pub use google_types::*;
pub mod rpc {
include!(concat!(env!("OUT_DIR"), "/google.rpc.rs"));
}
pub mod longrunning {
include!(concat!(env!("OUT_DIR"), "/google.longrunning.rs"));
}
use self::protobuf::Any;
use prost::{
bytes::{Bytes, BytesMut},
Message,
};
use std::convert::{TryFrom, TryInto};
use std::iter::FromIterator;
use tonic::Status;
use tracing::error;
// A newtype struct to provide conversion into tonic::Status
struct EncodeError(prost::EncodeError);
impl From<EncodeError> for tonic::Status {
fn from(error: EncodeError) -> Self {
error!(error=%error.0, "failed to serialise error response details");
tonic::Status::unknown(format!("failed to serialise server error: {}", error.0))
}
}
impl From<prost::EncodeError> for EncodeError {
fn from(e: prost::EncodeError) -> Self {
Self(e)
}
}
fn encode_status(code: tonic::Code, message: String, details: Any) -> tonic::Status {
let mut buffer = BytesMut::new();
let status = rpc::Status {
code: code as i32,
message: message.clone(),
details: vec![details],
};
match status.encode(&mut buffer) {
Ok(_) => tonic::Status::with_details(code, message, buffer.freeze()),
Err(e) => EncodeError(e).into(),
}
}
/// Error returned if a request field has an invalid value. Includes
/// machinery to add parent field names for context -- thus it will
/// report `rules.write_timeout` than simply `write_timeout`.
#[derive(Debug, Default, Clone)]
pub struct FieldViolation {
pub field: String,
pub description: String,
}
impl FieldViolation {
pub fn required(field: impl Into<String>) -> Self {
Self {
field: field.into(),
description: "Field is required".to_string(),
}
}
/// Re-scopes this error as the child of another field
pub fn scope(self, field: impl Into<String>) -> Self {
let field = if self.field.is_empty() {
field.into()
} else {
[field.into(), self.field].join(".")
};
Self {
field,
description: self.description,
}
}
}
impl std::error::Error for FieldViolation {}
impl std::fmt::Display for FieldViolation {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Violation for field \"{}\": {}",
self.field, self.description
)
}
}
fn encode_bad_request(violation: Vec<FieldViolation>) -> Result<Any, EncodeError> {
let mut buffer = BytesMut::new();
rpc::BadRequest {
field_violations: violation
.into_iter()
.map(|f| rpc::bad_request::FieldViolation {
field: f.field,
description: f.description,
})
.collect(),
}
.encode(&mut buffer)?;
Ok(Any {
type_url: "type.googleapis.com/google.rpc.BadRequest".to_string(),
value: buffer.freeze(),
})
}
impl From<FieldViolation> for tonic::Status {
fn from(f: FieldViolation) -> Self {
let message = f.to_string();
match encode_bad_request(vec![f]) {
Ok(details) => encode_status(tonic::Code::InvalidArgument, message, details),
Err(e) => e.into(),
}
}
}
#[derive(Debug, Default, Clone)]
pub struct InternalError {}
impl From<InternalError> for tonic::Status {
fn from(_: InternalError) -> Self {
tonic::Status::new(tonic::Code::Internal, "Internal Error")
}
}
#[derive(Debug, Default, Clone)]
pub struct AlreadyExists {
pub resource_type: String,
pub resource_name: String,
pub owner: String,
pub description: String,
}
fn encode_resource_info(
resource_type: String,
resource_name: String,
owner: String,
description: String,
) -> Result<Any, EncodeError> {
let mut buffer = BytesMut::new();
rpc::ResourceInfo {
resource_type,
resource_name,
owner,
description,
}
.encode(&mut buffer)?;
Ok(Any {
type_url: "type.googleapis.com/google.rpc.ResourceInfo".to_string(),
value: buffer.freeze(),
})
}
impl From<AlreadyExists> for tonic::Status {
fn from(exists: AlreadyExists) -> Self {
let message = format!(
"Resource {}/{} already exists",
exists.resource_type, exists.resource_name
);
match encode_resource_info(
exists.resource_type,
exists.resource_name,
exists.owner,
exists.description,
) {
Ok(details) => encode_status(tonic::Code::AlreadyExists, message, details),
Err(e) => e.into(),
}
}
}
#[derive(Debug, Default, Clone)]
pub struct NotFound {
pub resource_type: String,
pub resource_name: String,
pub owner: String,
pub description: String,
}
impl From<NotFound> for tonic::Status {
fn from(not_found: NotFound) -> Self {
let message = format!(
"Resource {}/{} not found",
not_found.resource_type, not_found.resource_name
);
match encode_resource_info(
not_found.resource_type,
not_found.resource_name,
not_found.owner,
not_found.description,
) {
Ok(details) => encode_status(tonic::Code::NotFound, message, details),
Err(e) => e.into(),
}
}
}
#[derive(Debug, Default, Clone)]
pub struct PreconditionViolation {
pub category: String,
pub subject: String,
pub description: String,
}
fn encode_precondition_failure(violations: Vec<PreconditionViolation>) -> Result<Any, EncodeError> {
use rpc::precondition_failure::Violation;
let mut buffer = BytesMut::new();
rpc::PreconditionFailure {
violations: violations
.into_iter()
.map(|x| Violation {
r#type: x.category,
subject: x.subject,
description: x.description,
})
.collect(),
}
.encode(&mut buffer)?;
Ok(Any {
type_url: "type.googleapis.com/google.rpc.PreconditionFailure".to_string(),
value: buffer.freeze(),
})
}
impl From<PreconditionViolation> for tonic::Status {
fn from(violation: PreconditionViolation) -> Self {
let message = format!(
"Precondition violation {} - {}: {}",
violation.subject, violation.category, violation.description
);
match encode_precondition_failure(vec![violation]) {
Ok(details) => encode_status(tonic::Code::FailedPrecondition, message, details),
Err(e) => e.into(),
}
}
}
/// An extension trait that adds the ability to convert an error
/// that can be converted to a String to a FieldViolation
pub trait FieldViolationExt {
type Output;
fn field(self, field: &'static str) -> Result<Self::Output, FieldViolation>;
}
impl<T, E> FieldViolationExt for Result<T, E>
where
E: ToString,
{
type Output = T;
fn field(self, field: &'static str) -> Result<T, FieldViolation> {
self.map_err(|e| FieldViolation {
field: field.to_string(),
description: e.to_string(),
})
}
}

View File

@ -9,61 +9,74 @@
clippy::clone_on_ref_ptr
)]
mod pb {
pub mod influxdata {
pub mod platform {
pub mod storage {
include!(concat!(env!("OUT_DIR"), "/influxdata.platform.storage.rs"));
/// This module imports the generated protobuf code into a Rust module
/// hierarchy that matches the namespace hierarchy of the protobuf
/// definitions
pub mod influxdata {
pub mod platform {
pub mod storage {
include!(concat!(env!("OUT_DIR"), "/influxdata.platform.storage.rs"));
// Can't implement `Default` because `prost::Message` implements `Default`
impl TimestampRange {
pub fn max() -> Self {
TimestampRange {
start: std::i64::MIN,
end: std::i64::MAX,
}
}
}
}
}
pub mod iox {
pub mod management {
pub mod v1 {
include!(concat!(env!("OUT_DIR"), "/influxdata.iox.management.v1.rs"));
}
}
}
}
pub mod com {
pub mod github {
pub mod influxdata {
pub mod idpe {
pub mod storage {
pub mod read {
include!(concat!(
env!("OUT_DIR"),
"/com.github.influxdata.idpe.storage.read.rs"
));
}
// Can't implement `Default` because `prost::Message` implements `Default`
impl TimestampRange {
pub fn max() -> Self {
TimestampRange {
start: std::i64::MIN,
end: std::i64::MAX,
}
}
}
}
}
// Needed because of https://github.com/hyperium/tonic/issues/471
pub mod grpc {
pub mod health {
pub mod iox {
pub mod management {
pub mod v1 {
include!(concat!(env!("OUT_DIR"), "/grpc.health.v1.rs"));
/// Operation metadata type
pub const OPERATION_METADATA: &str =
"influxdata.iox.management.v1.OperationMetadata";
include!(concat!(env!("OUT_DIR"), "/influxdata.iox.management.v1.rs"));
}
}
pub mod write {
pub mod v1 {
include!(concat!(env!("OUT_DIR"), "/influxdata.iox.write.v1.rs"));
}
}
}
}
include!(concat!(env!("OUT_DIR"), "/wal_generated.rs"));
pub mod com {
pub mod github {
pub mod influxdata {
pub mod idpe {
pub mod storage {
pub mod read {
include!(concat!(
env!("OUT_DIR"),
"/com.github.influxdata.idpe.storage.read.rs"
));
}
}
}
}
}
}
// Needed because of https://github.com/hyperium/tonic/issues/471
pub mod grpc {
pub mod health {
pub mod v1 {
include!(concat!(env!("OUT_DIR"), "/grpc.health.v1.rs"));
}
}
}
/// Generated Flatbuffers code for working with the write-ahead log
pub mod wal_generated;
pub use wal_generated::wal;
/// gRPC Storage Service
pub const STORAGE_SERVICE: &str = "influxdata.platform.storage.Storage";
@ -71,9 +84,62 @@ pub const STORAGE_SERVICE: &str = "influxdata.platform.storage.Storage";
pub const IOX_TESTING_SERVICE: &str = "influxdata.platform.storage.IOxTesting";
/// gRPC Arrow Flight Service
pub const ARROW_SERVICE: &str = "arrow.flight.protocol.FlightService";
/// The type prefix for any types
pub const ANY_TYPE_PREFIX: &str = "type.googleapis.com";
pub use pb::com::github::influxdata::idpe::storage::read::*;
pub use pb::influxdata::platform::storage::*;
/// Returns the protobuf URL usable with a google.protobuf.Any message
/// This is the full Protobuf package and message name prefixed by
/// "type.googleapis.com/"
pub fn protobuf_type_url(protobuf_type: &str) -> String {
format!("{}/{}", ANY_TYPE_PREFIX, protobuf_type)
}
pub use google_types as google;
pub use pb::{grpc, influxdata};
/// Compares the protobuf type URL found within a google.protobuf.Any
/// message to an expected Protobuf package and message name
///
/// i.e. strips off the "type.googleapis.com/" prefix from `url`
/// and compares the result with `protobuf_type`
///
/// ```
/// use generated_types::protobuf_type_url_eq;
/// assert!(protobuf_type_url_eq("type.googleapis.com/google.protobuf.Empty", "google.protobuf.Empty"));
/// assert!(!protobuf_type_url_eq("type.googleapis.com/google.protobuf.Empty", "something.else"));
/// ```
pub fn protobuf_type_url_eq(url: &str, protobuf_type: &str) -> bool {
let mut split = url.splitn(2, '/');
match (split.next(), split.next()) {
(Some(ANY_TYPE_PREFIX), Some(t)) => t == protobuf_type,
_ => false,
}
}
pub use com::github::influxdata::idpe::storage::read::*;
pub use influxdata::platform::storage::*;
pub mod google;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_protobuf_type_url() {
use influxdata::iox::management::v1::OPERATION_METADATA;
let t = protobuf_type_url(OPERATION_METADATA);
assert_eq!(
&t,
"type.googleapis.com/influxdata.iox.management.v1.OperationMetadata"
);
assert!(protobuf_type_url_eq(&t, OPERATION_METADATA));
assert!(!protobuf_type_url_eq(&t, "foo"));
// The URL must start with the type.googleapis.com prefix
assert!(!protobuf_type_url_eq(
OPERATION_METADATA,
OPERATION_METADATA
));
}
}

3220
generated_types/src/wal_generated.rs generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -7,9 +7,6 @@ edition = "2018"
[dependencies] # In alphabetical order
prost = "0.7"
prost-types = "0.7"
tonic = "0.4"
tracing = { version = "0.1" }
[build-dependencies] # In alphabetical order
prost-build = "0.7"

View File

@ -9,11 +9,7 @@ type Result<T, E = Error> = std::result::Result<T, E>;
fn main() -> Result<()> {
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("protos");
let proto_files = vec![
root.join("google/rpc/error_details.proto"),
root.join("google/rpc/status.proto"),
root.join("google/protobuf/types.proto"),
];
let proto_files = vec![root.join("google/protobuf/types.proto")];
// Tell cargo to recompile if any of these proto files are changed
for proto_file in &proto_files {

View File

@ -36,260 +36,7 @@ mod pb {
}
}
}
pub mod rpc {
include!(concat!(env!("OUT_DIR"), "/google.rpc.rs"));
}
}
}
pub use pb::google::*;
use pb::google::protobuf::Any;
use prost::{
bytes::{Bytes, BytesMut},
Message,
};
use std::convert::{TryFrom, TryInto};
use std::iter::FromIterator;
use tonic::Status;
use tracing::error;
// A newtype struct to provide conversion into tonic::Status
struct EncodeError(prost::EncodeError);
impl From<EncodeError> for tonic::Status {
fn from(error: EncodeError) -> Self {
error!(error=%error.0, "failed to serialise error response details");
tonic::Status::unknown(format!("failed to serialise server error: {}", error.0))
}
}
impl From<prost::EncodeError> for EncodeError {
fn from(e: prost::EncodeError) -> Self {
Self(e)
}
}
fn encode_status(code: tonic::Code, message: String, details: Any) -> tonic::Status {
let mut buffer = BytesMut::new();
let status = pb::google::rpc::Status {
code: code as i32,
message: message.clone(),
details: vec![details],
};
match status.encode(&mut buffer) {
Ok(_) => tonic::Status::with_details(code, message, buffer.freeze()),
Err(e) => EncodeError(e).into(),
}
}
#[derive(Debug, Default, Clone)]
pub struct FieldViolation {
pub field: String,
pub description: String,
}
impl FieldViolation {
pub fn required(field: impl Into<String>) -> Self {
Self {
field: field.into(),
description: "Field is required".to_string(),
}
}
/// Re-scopes this error as the child of another field
pub fn scope(self, field: impl Into<String>) -> Self {
let field = if self.field.is_empty() {
field.into()
} else {
[field.into(), self.field].join(".")
};
Self {
field,
description: self.description,
}
}
}
fn encode_bad_request(violation: Vec<FieldViolation>) -> Result<Any, EncodeError> {
let mut buffer = BytesMut::new();
pb::google::rpc::BadRequest {
field_violations: violation
.into_iter()
.map(|f| pb::google::rpc::bad_request::FieldViolation {
field: f.field,
description: f.description,
})
.collect(),
}
.encode(&mut buffer)?;
Ok(Any {
type_url: "type.googleapis.com/google.rpc.BadRequest".to_string(),
value: buffer.freeze(),
})
}
impl From<FieldViolation> for tonic::Status {
fn from(f: FieldViolation) -> Self {
let message = format!("Violation for field \"{}\": {}", f.field, f.description);
match encode_bad_request(vec![f]) {
Ok(details) => encode_status(tonic::Code::InvalidArgument, message, details),
Err(e) => e.into(),
}
}
}
#[derive(Debug, Default, Clone)]
pub struct InternalError {}
impl From<InternalError> for tonic::Status {
fn from(_: InternalError) -> Self {
tonic::Status::new(tonic::Code::Internal, "Internal Error")
}
}
#[derive(Debug, Default, Clone)]
pub struct AlreadyExists {
pub resource_type: String,
pub resource_name: String,
pub owner: String,
pub description: String,
}
fn encode_resource_info(
resource_type: String,
resource_name: String,
owner: String,
description: String,
) -> Result<Any, EncodeError> {
let mut buffer = BytesMut::new();
pb::google::rpc::ResourceInfo {
resource_type,
resource_name,
owner,
description,
}
.encode(&mut buffer)?;
Ok(Any {
type_url: "type.googleapis.com/google.rpc.ResourceInfo".to_string(),
value: buffer.freeze(),
})
}
impl From<AlreadyExists> for tonic::Status {
fn from(exists: AlreadyExists) -> Self {
let message = format!(
"Resource {}/{} already exists",
exists.resource_type, exists.resource_name
);
match encode_resource_info(
exists.resource_type,
exists.resource_name,
exists.owner,
exists.description,
) {
Ok(details) => encode_status(tonic::Code::AlreadyExists, message, details),
Err(e) => e.into(),
}
}
}
#[derive(Debug, Default, Clone)]
pub struct NotFound {
pub resource_type: String,
pub resource_name: String,
pub owner: String,
pub description: String,
}
impl From<NotFound> for tonic::Status {
fn from(not_found: NotFound) -> Self {
let message = format!(
"Resource {}/{} not found",
not_found.resource_type, not_found.resource_name
);
match encode_resource_info(
not_found.resource_type,
not_found.resource_name,
not_found.owner,
not_found.description,
) {
Ok(details) => encode_status(tonic::Code::NotFound, message, details),
Err(e) => e.into(),
}
}
}
#[derive(Debug, Default, Clone)]
pub struct PreconditionViolation {
pub category: String,
pub subject: String,
pub description: String,
}
fn encode_precondition_failure(violations: Vec<PreconditionViolation>) -> Result<Any, EncodeError> {
use pb::google::rpc::precondition_failure::Violation;
let mut buffer = BytesMut::new();
pb::google::rpc::PreconditionFailure {
violations: violations
.into_iter()
.map(|x| Violation {
r#type: x.category,
subject: x.subject,
description: x.description,
})
.collect(),
}
.encode(&mut buffer)?;
Ok(Any {
type_url: "type.googleapis.com/google.rpc.PreconditionFailure".to_string(),
value: buffer.freeze(),
})
}
impl From<PreconditionViolation> for tonic::Status {
fn from(violation: PreconditionViolation) -> Self {
let message = format!(
"Precondition violation {} - {}: {}",
violation.subject, violation.category, violation.description
);
match encode_precondition_failure(vec![violation]) {
Ok(details) => encode_status(tonic::Code::FailedPrecondition, message, details),
Err(e) => e.into(),
}
}
}
/// An extension trait that adds the ability to convert an error
/// that can be converted to a String to a FieldViolation
pub trait FieldViolationExt {
type Output;
fn field(self, field: &'static str) -> Result<Self::Output, FieldViolation>;
}
impl<T, E> FieldViolationExt for Result<T, E>
where
E: ToString,
{
type Output = T;
fn field(self, field: &'static str) -> Result<T, FieldViolation> {
self.map_err(|e| FieldViolation {
field: field.to_string(),
description: e.to_string(),
})
}
}

View File

@ -0,0 +1,23 @@
# InfluxDB V2 Client API
This crate contains a work-in-progress implementation of a Rust client for the [InfluxDB 2.0 API](https://docs.influxdata.com/influxdb/v2.0/reference/api/).
This client is not the Rust client for IOx. You can find that [here](../influxdb_iox_client).
The InfluxDB IOx project plans to focus its efforts on the subset of the API which are most relevent to IOx, but we accept (welcome!) PRs for adding the other pieces of functionality.
## Design Notes
When it makes sense, this client aims to mirror the [InfluxDB 2.x Go client API](https://github.com/influxdata/influxdb-client-go)
## Contributing
If you would like to contribute code you can do through GitHub by forking the repository and sending a pull request into the master branch.
## Future work
- [ ] Publish as a crate on [crates.io](http://crates.io)
If you would like to contribute code you can do through GitHub by forking the repository and sending a pull request into the main branch.

View File

@ -0,0 +1,11 @@
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let influx_url = "some-url";
let token = "some-token";
let client = influxdb2_client::Client::new(influx_url, token);
println!("{:?}", client.ready().await?);
Ok(())
}

View File

@ -302,3 +302,5 @@ cpu,host=server01,region=us-west usage=0.87
Ok(())
}
}
mod ready;

View File

@ -0,0 +1,51 @@
use reqwest::{Method, StatusCode};
use snafu::ResultExt;
use super::{Client, Http, RequestError, ReqwestProcessing};
impl Client {
/// Get the readiness of an instance at startup
pub async fn ready(&self) -> Result<bool, RequestError> {
let ready_url = format!("{}/ready", self.url);
let response = self
.request(Method::GET, &ready_url)
.send()
.await
.context(ReqwestProcessing)?;
match response.status() {
StatusCode::OK => Ok(true),
_ => {
let status = response.status();
let text = response.text().await.context(ReqwestProcessing)?;
Http { status, text }.fail()?
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use mockito::mock;
type Error = Box<dyn std::error::Error>;
type Result<T = (), E = Error> = std::result::Result<T, E>;
#[tokio::test]
async fn ready() -> Result {
let token = "some-token";
let mock_server = mock("GET", "/ready")
.match_header("Authorization", format!("Token {}", token).as_str())
.create();
let client = Client::new(&mockito::server_url(), token);
let _result = client.ready().await;
mock_server.assert();
Ok(())
}
}

View File

@ -6,6 +6,7 @@ edition = "2018"
[features]
flight = ["arrow_deps", "serde/derive", "serde_json", "futures-util"]
format = ["arrow_deps"]
[dependencies]
# Workspace dependencies, in alphabetical order
@ -23,5 +24,5 @@ tokio = { version = "1.0", features = ["macros"] }
tonic = { version = "0.4.0" }
[dev-dependencies] # In alphabetical order
rand = "0.8.1"
rand = "0.8.3"
serde_json = "1.0"

View File

@ -4,6 +4,12 @@ pub mod health;
/// Client for the management API
pub mod management;
/// Client for the write API
pub mod write;
/// Client for the operations API
pub mod operations;
#[cfg(feature = "flight")]
/// Client for the flight API
pub mod flight;

View File

@ -5,6 +5,7 @@ use thiserror::Error;
use self::generated_types::{management_service_client::ManagementServiceClient, *};
use crate::connection::Connection;
use ::generated_types::google::longrunning::Operation;
use std::convert::TryInto;
/// Re-export generated_types
@ -80,8 +81,111 @@ pub enum GetDatabaseError {
ServerError(tonic::Status),
}
/// Errors returned by Client::list_chunks
#[derive(Debug, Error)]
pub enum ListChunksError {
/// Client received an unexpected error from the server
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
ServerError(tonic::Status),
}
/// Errors returned by Client::list_remotes
#[derive(Debug, Error)]
pub enum ListRemotesError {
/// Client received an unexpected error from the server
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
ServerError(tonic::Status),
}
/// Errors returned by Client::update_remote
#[derive(Debug, Error)]
pub enum UpdateRemoteError {
/// Client received an unexpected error from the server
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
ServerError(tonic::Status),
}
/// Errors returned by Client::create_dummy_job
#[derive(Debug, Error)]
pub enum CreateDummyJobError {
/// Response contained no payload
#[error("Server returned an empty response")]
EmptyResponse,
/// Client received an unexpected error from the server
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
ServerError(tonic::Status),
}
/// Errors returned by Client::list_partitions
#[derive(Debug, Error)]
pub enum ListPartitionsError {
/// Database not found
#[error("Database not found")]
DatabaseNotFound,
/// Client received an unexpected error from the server
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
ServerError(tonic::Status),
}
/// Errors returned by Client::get_partition
#[derive(Debug, Error)]
pub enum GetPartitionError {
/// Database not found
#[error("Database not found")]
DatabaseNotFound,
/// Partition not found
#[error("Partition not found")]
PartitionNotFound,
/// Client received an unexpected error from the server
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
ServerError(tonic::Status),
}
/// Errors returned by Client::list_partition_chunks
#[derive(Debug, Error)]
pub enum ListPartitionChunksError {
/// Client received an unexpected error from the server
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
ServerError(tonic::Status),
}
/// Errors returned by Client::new_partition_chunk
#[derive(Debug, Error)]
pub enum NewPartitionChunkError {
/// Database not found
#[error("Database not found")]
DatabaseNotFound,
/// Client received an unexpected error from the server
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
ServerError(tonic::Status),
}
/// Errors returned by Client::close_partition_chunk
#[derive(Debug, Error)]
pub enum ClosePartitionChunkError {
/// Database not found
#[error("Database not found")]
DatabaseNotFound,
/// Response contained no payload
#[error("Server returned an empty response")]
EmptyResponse,
/// Client received an unexpected error from the server
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
ServerError(tonic::Status),
}
/// An IOx Management API client.
///
/// This client wraps the underlying `tonic` generated client with a
/// more ergonomic interface.
///
/// ```no_run
/// #[tokio::main]
/// # async fn main() {
@ -198,4 +302,196 @@ impl Client {
.ok_or(GetDatabaseError::EmptyResponse)?;
Ok(rules)
}
/// List chunks in a database.
pub async fn list_chunks(
&mut self,
db_name: impl Into<String>,
) -> Result<Vec<Chunk>, ListChunksError> {
let db_name = db_name.into();
let response = self
.inner
.list_chunks(ListChunksRequest { db_name })
.await
.map_err(ListChunksError::ServerError)?;
Ok(response.into_inner().chunks)
}
/// List remotes.
pub async fn list_remotes(&mut self) -> Result<Vec<generated_types::Remote>, ListRemotesError> {
let response = self
.inner
.list_remotes(ListRemotesRequest {})
.await
.map_err(ListRemotesError::ServerError)?;
Ok(response.into_inner().remotes)
}
/// Update remote
pub async fn update_remote(
&mut self,
id: u32,
connection_string: impl Into<String>,
) -> Result<(), UpdateRemoteError> {
self.inner
.update_remote(UpdateRemoteRequest {
remote: Some(generated_types::Remote {
id,
connection_string: connection_string.into(),
}),
})
.await
.map_err(UpdateRemoteError::ServerError)?;
Ok(())
}
/// Delete remote
pub async fn delete_remote(&mut self, id: u32) -> Result<(), UpdateRemoteError> {
self.inner
.delete_remote(DeleteRemoteRequest { id })
.await
.map_err(UpdateRemoteError::ServerError)?;
Ok(())
}
/// List all partitions of the database
pub async fn list_partitions(
&mut self,
db_name: impl Into<String>,
) -> Result<Vec<Partition>, ListPartitionsError> {
let db_name = db_name.into();
let response = self
.inner
.list_partitions(ListPartitionsRequest { db_name })
.await
.map_err(|status| match status.code() {
tonic::Code::NotFound => ListPartitionsError::DatabaseNotFound,
_ => ListPartitionsError::ServerError(status),
})?;
let ListPartitionsResponse { partitions } = response.into_inner();
Ok(partitions)
}
/// Get details about a specific partition
pub async fn get_partition(
&mut self,
db_name: impl Into<String>,
partition_key: impl Into<String>,
) -> Result<Partition, GetPartitionError> {
let db_name = db_name.into();
let partition_key = partition_key.into();
let response = self
.inner
.get_partition(GetPartitionRequest {
db_name,
partition_key,
})
.await
.map_err(|status| match status.code() {
tonic::Code::NotFound => GetPartitionError::DatabaseNotFound,
_ => GetPartitionError::ServerError(status),
})?;
let GetPartitionResponse { partition } = response.into_inner();
partition.ok_or(GetPartitionError::PartitionNotFound)
}
/// List chunks in a partition
pub async fn list_partition_chunks(
&mut self,
db_name: impl Into<String>,
partition_key: impl Into<String>,
) -> Result<Vec<Chunk>, ListPartitionChunksError> {
let db_name = db_name.into();
let partition_key = partition_key.into();
let response = self
.inner
.list_partition_chunks(ListPartitionChunksRequest {
db_name,
partition_key,
})
.await
.map_err(ListPartitionChunksError::ServerError)?;
Ok(response.into_inner().chunks)
}
/// Create a new chunk in a partittion
pub async fn new_partition_chunk(
&mut self,
db_name: impl Into<String>,
partition_key: impl Into<String>,
) -> Result<(), NewPartitionChunkError> {
let db_name = db_name.into();
let partition_key = partition_key.into();
self.inner
.new_partition_chunk(NewPartitionChunkRequest {
db_name,
partition_key,
})
.await
.map_err(|status| match status.code() {
tonic::Code::NotFound => NewPartitionChunkError::DatabaseNotFound,
_ => NewPartitionChunkError::ServerError(status),
})?;
Ok(())
}
/// Creates a dummy job that for each value of the nanos field
/// spawns a task that sleeps for that number of nanoseconds before
/// returning
pub async fn create_dummy_job(
&mut self,
nanos: Vec<u64>,
) -> Result<Operation, CreateDummyJobError> {
let response = self
.inner
.create_dummy_job(CreateDummyJobRequest { nanos })
.await
.map_err(CreateDummyJobError::ServerError)?;
Ok(response
.into_inner()
.operation
.ok_or(CreateDummyJobError::EmptyResponse)?)
}
/// Closes the specified chunk in the specified partition and
/// begins it moving to the read buffer.
///
/// Returns the job tracking the data's movement
pub async fn close_partition_chunk(
&mut self,
db_name: impl Into<String>,
partition_key: impl Into<String>,
chunk_id: u32,
) -> Result<Operation, ClosePartitionChunkError> {
let db_name = db_name.into();
let partition_key = partition_key.into();
let response = self
.inner
.close_partition_chunk(ClosePartitionChunkRequest {
db_name,
partition_key,
chunk_id,
})
.await
.map_err(|status| match status.code() {
tonic::Code::NotFound => ClosePartitionChunkError::DatabaseNotFound,
_ => ClosePartitionChunkError::ServerError(status),
})?;
Ok(response
.into_inner()
.operation
.ok_or(ClosePartitionChunkError::EmptyResponse)?)
}
}

View File

@ -0,0 +1,125 @@
use thiserror::Error;
use ::generated_types::google::FieldViolation;
use crate::connection::Connection;
use self::generated_types::{operations_client::OperationsClient, *};
/// Re-export generated_types
pub mod generated_types {
pub use generated_types::google::longrunning::*;
}
/// Error type for the operations Client
#[derive(Debug, Error)]
pub enum Error {
/// Client received an invalid response
#[error("Invalid server response: {}", .0)]
InvalidResponse(#[from] FieldViolation),
/// Operation was not found
#[error("Operation not found: {}", .0)]
NotFound(usize),
/// Client received an unexpected error from the server
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
ServerError(tonic::Status),
}
/// Result type for the operations Client
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// An IOx Long Running Operations API client.
///
/// ```no_run
/// #[tokio::main]
/// # async fn main() {
/// use influxdb_iox_client::{
/// operations::Client,
/// connection::Builder,
/// };
///
/// let mut connection = Builder::default()
/// .build("http://127.0.0.1:8082")
/// .await
/// .unwrap();
///
/// let mut client = Client::new(connection);
/// # }
/// ```
#[derive(Debug, Clone)]
pub struct Client {
inner: OperationsClient<Connection>,
}
impl Client {
/// Creates a new client with the provided connection
pub fn new(channel: tonic::transport::Channel) -> Self {
Self {
inner: OperationsClient::new(channel),
}
}
/// Get information about all operations
pub async fn list_operations(&mut self) -> Result<Vec<Operation>> {
Ok(self
.inner
.list_operations(ListOperationsRequest::default())
.await
.map_err(Error::ServerError)?
.into_inner()
.operations)
}
/// Get information about a specific operation
pub async fn get_operation(&mut self, id: usize) -> Result<Operation> {
Ok(self
.inner
.get_operation(GetOperationRequest {
name: id.to_string(),
})
.await
.map_err(|e| match e.code() {
tonic::Code::NotFound => Error::NotFound(id),
_ => Error::ServerError(e),
})?
.into_inner())
}
/// Cancel a given operation
pub async fn cancel_operation(&mut self, id: usize) -> Result<()> {
self.inner
.cancel_operation(CancelOperationRequest {
name: id.to_string(),
})
.await
.map_err(|e| match e.code() {
tonic::Code::NotFound => Error::NotFound(id),
_ => Error::ServerError(e),
})?;
Ok(())
}
/// Waits until an operation completes, or the timeout expires, and
/// returns the latest operation metadata
pub async fn wait_operation(
&mut self,
id: usize,
timeout: Option<std::time::Duration>,
) -> Result<Operation> {
Ok(self
.inner
.wait_operation(WaitOperationRequest {
name: id.to_string(),
timeout: timeout.map(Into::into),
})
.await
.map_err(|e| match e.code() {
tonic::Code::NotFound => Error::NotFound(id),
_ => Error::ServerError(e),
})?
.into_inner())
}
}

View File

@ -0,0 +1,77 @@
use thiserror::Error;
use self::generated_types::{write_service_client::WriteServiceClient, *};
use crate::connection::Connection;
/// Re-export generated_types
pub mod generated_types {
pub use generated_types::influxdata::iox::write::v1::*;
}
/// Errors returned by Client::write_data
#[derive(Debug, Error)]
pub enum WriteError {
/// Client received an unexpected error from the server
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
ServerError(tonic::Status),
}
/// An IOx Write API client.
///
/// ```no_run
/// #[tokio::main]
/// # async fn main() {
/// use influxdb_iox_client::{
/// write::Client,
/// connection::Builder,
/// };
///
/// let mut connection = Builder::default()
/// .build("http://127.0.0.1:8082")
/// .await
/// .unwrap();
///
/// let mut client = Client::new(connection);
///
/// // write a line of line procol data
/// client
/// .write("bananas", "cpu,region=west user=23.2 100")
/// .await
/// .expect("failed to create database");
/// # }
/// ```
#[derive(Debug, Clone)]
pub struct Client {
inner: WriteServiceClient<Connection>,
}
impl Client {
/// Creates a new client with the provided connection
pub fn new(channel: tonic::transport::Channel) -> Self {
Self {
inner: WriteServiceClient::new(channel),
}
}
/// Write the [LineProtocol] formatted data in `lp_data` to
/// database `name`. Returns the number of lines which were parsed
/// and written to the database
///
/// [LineProtocol](https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/#data-types-and-format)
pub async fn write(
&mut self,
db_name: impl Into<String>,
lp_data: impl Into<String>,
) -> Result<usize, WriteError> {
let db_name = db_name.into();
let lp_data = lp_data.into();
let response = self
.inner
.write(WriteRequest { db_name, lp_data })
.await
.map_err(WriteError::ServerError)?;
Ok(response.into_inner().lines_written as usize)
}
}

View File

@ -0,0 +1,217 @@
//! Output formatting utilities for Arrow record batches
use std::{fmt::Display, str::FromStr};
use thiserror::Error;
use arrow_deps::arrow::{
self, csv::WriterBuilder, error::ArrowError, json::ArrayWriter, record_batch::RecordBatch,
};
/// Error type for results formatting
#[derive(Debug, Error)]
pub enum Error {
/// Unknown formatting type
#[error("Unknown format type: {}. Expected one of 'pretty', 'csv' or 'json'", .0)]
Invalid(String),
/// Error pretty printing
#[error("Arrow pretty printing error: {}", .0)]
PrettyArrow(ArrowError),
/// Error during CSV conversion
#[error("Arrow csv printing error: {}", .0)]
CsvArrow(ArrowError),
/// Error during JSON conversion
#[error("Arrow json printing error: {}", .0)]
JsonArrow(ArrowError),
/// Error converting CSV output to utf-8
#[error("Error converting CSV output to UTF-8: {}", .0)]
CsvUtf8(std::string::FromUtf8Error),
/// Error converting JSON output to utf-8
#[error("Error converting JSON output to UTF-8: {}", .0)]
JsonUtf8(std::string::FromUtf8Error),
}
type Result<T, E = Error> = std::result::Result<T, E>;
#[derive(Debug, Copy, Clone, PartialEq)]
/// Requested output format for the query endpoint
pub enum QueryOutputFormat {
/// Arrow pretty printer format (default)
Pretty,
/// Comma separated values
CSV,
/// Arrow JSON format
JSON,
}
impl Display for QueryOutputFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
QueryOutputFormat::Pretty => write!(f, "pretty"),
QueryOutputFormat::CSV => write!(f, "csv"),
QueryOutputFormat::JSON => write!(f, "json"),
}
}
}
impl Default for QueryOutputFormat {
fn default() -> Self {
Self::Pretty
}
}
impl FromStr for QueryOutputFormat {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_ascii_lowercase().as_str() {
"pretty" => Ok(Self::Pretty),
"csv" => Ok(Self::CSV),
"json" => Ok(Self::JSON),
_ => Err(Error::Invalid(s.to_string())),
}
}
}
impl QueryOutputFormat {
/// Return the Mcontent-type of this format
pub fn content_type(&self) -> &'static str {
match self {
Self::Pretty => "text/plain",
Self::CSV => "text/csv",
Self::JSON => "application/json",
}
}
}
impl QueryOutputFormat {
/// Format the [`RecordBatch`]es into a String in one of the
/// following formats:
///
/// Pretty:
/// ```text
/// +----------------+--------------+-------+-----------------+------------+
/// | bottom_degrees | location | state | surface_degrees | time |
/// +----------------+--------------+-------+-----------------+------------+
/// | 50.4 | santa_monica | CA | 65.2 | 1568756160 |
/// +----------------+--------------+-------+-----------------+------------+
/// ```
///
/// CSV:
/// ```text
/// bottom_degrees,location,state,surface_degrees,time
/// 50.4,santa_monica,CA,65.2,1568756160
/// ```
///
/// JSON:
///
/// Example (newline + whitespace added for clarity):
/// ```text
/// [
/// {"bottom_degrees":50.4,"location":"santa_monica","state":"CA","surface_degrees":65.2,"time":1568756160},
/// {"location":"Boston","state":"MA","surface_degrees":50.2,"time":1568756160}
/// ]
/// ```
pub fn format(&self, batches: &[RecordBatch]) -> Result<String> {
match self {
Self::Pretty => batches_to_pretty(&batches),
Self::CSV => batches_to_csv(&batches),
Self::JSON => batches_to_json(&batches),
}
}
}
fn batches_to_pretty(batches: &[RecordBatch]) -> Result<String> {
arrow::util::pretty::pretty_format_batches(batches).map_err(Error::PrettyArrow)
}
fn batches_to_csv(batches: &[RecordBatch]) -> Result<String> {
let mut bytes = vec![];
{
let mut writer = WriterBuilder::new().has_headers(true).build(&mut bytes);
for batch in batches {
writer.write(batch).map_err(Error::CsvArrow)?;
}
}
let csv = String::from_utf8(bytes).map_err(Error::CsvUtf8)?;
Ok(csv)
}
fn batches_to_json(batches: &[RecordBatch]) -> Result<String> {
let mut bytes = vec![];
{
let mut writer = ArrayWriter::new(&mut bytes);
writer.write_batches(batches).map_err(Error::CsvArrow)?;
writer.finish().map_err(Error::CsvArrow)?;
}
let json = String::from_utf8(bytes).map_err(Error::JsonUtf8)?;
Ok(json)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_from_str() {
assert_eq!(
QueryOutputFormat::from_str("pretty").unwrap(),
QueryOutputFormat::Pretty
);
assert_eq!(
QueryOutputFormat::from_str("pRetty").unwrap(),
QueryOutputFormat::Pretty
);
assert_eq!(
QueryOutputFormat::from_str("csv").unwrap(),
QueryOutputFormat::CSV
);
assert_eq!(
QueryOutputFormat::from_str("CSV").unwrap(),
QueryOutputFormat::CSV
);
assert_eq!(
QueryOutputFormat::from_str("json").unwrap(),
QueryOutputFormat::JSON
);
assert_eq!(
QueryOutputFormat::from_str("JSON").unwrap(),
QueryOutputFormat::JSON
);
assert_eq!(
QueryOutputFormat::from_str("un").unwrap_err().to_string(),
"Unknown format type: un. Expected one of 'pretty', 'csv' or 'json'"
);
}
#[test]
fn test_from_roundtrip() {
assert_eq!(
QueryOutputFormat::from_str(&QueryOutputFormat::Pretty.to_string()).unwrap(),
QueryOutputFormat::Pretty
);
assert_eq!(
QueryOutputFormat::from_str(&QueryOutputFormat::CSV.to_string()).unwrap(),
QueryOutputFormat::CSV
);
assert_eq!(
QueryOutputFormat::from_str(&QueryOutputFormat::JSON.to_string()).unwrap(),
QueryOutputFormat::JSON
);
}
}

View File

@ -8,12 +8,15 @@
)]
#![allow(clippy::missing_docs_in_private_items)]
pub use client::{health, management};
pub use generated_types::{protobuf_type_url, protobuf_type_url_eq};
#[cfg(feature = "flight")]
pub use client::flight;
pub use client::*;
/// Builder for constructing connections for use with the various gRPC clients
pub mod connection;
#[cfg(feature = "format")]
/// Output formatting utilities
pub mod format;
mod client;

View File

@ -50,6 +50,12 @@ pub enum Error {
value: String,
},
#[snafu(display(r#"Unable to parse unsigned integer value '{}'"#, value))]
UIntegerValueInvalid {
source: std::num::ParseIntError,
value: String,
},
#[snafu(display(r#"Unable to parse floating-point value '{}'"#, value))]
FloatValueInvalid {
source: std::num::ParseFloatError,
@ -333,10 +339,11 @@ pub type FieldSet<'a> = SmallVec<[(EscapedStr<'a>, FieldValue<'a>); 4]>;
pub type TagSet<'a> = SmallVec<[(EscapedStr<'a>, EscapedStr<'a>); 8]>;
/// Allowed types of Fields in a `ParsedLine`. One of the types described in
/// https://docs.influxdata.com/influxdb/v1.8/write_protocols/line_protocol_tutorial/#data-types
/// https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/#data-types-and-format
#[derive(Debug, Clone, PartialEq)]
pub enum FieldValue<'a> {
I64(i64),
U64(u64),
F64(f64),
String(EscapedStr<'a>),
Boolean(bool),
@ -349,6 +356,7 @@ impl<'a> Display for FieldValue<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::I64(v) => write!(f, "{}i", v),
Self::U64(v) => write!(f, "{}u", v),
Self::F64(v) => write!(f, "{}", v),
Self::String(v) => escape_and_write_value(f, v, FIELD_VALUE_STRING_DELIMITERS),
Self::Boolean(v) => write!(f, "{}", v),
@ -644,47 +652,76 @@ fn field_key(i: &str) -> IResult<&str, EscapedStr<'_>> {
fn field_value(i: &str) -> IResult<&str, FieldValue<'_>> {
let int = map(field_integer_value, FieldValue::I64);
let uint = map(field_uinteger_value, FieldValue::U64);
let float = map(field_float_value, FieldValue::F64);
let string = map(field_string_value, FieldValue::String);
let boolv = map(field_bool_value, FieldValue::Boolean);
alt((int, float, string, boolv))(i)
alt((int, uint, float, string, boolv))(i)
}
fn field_integer_value(i: &str) -> IResult<&str, i64> {
let tagged_value = terminated(integral_value_common, tag("i"));
let tagged_value = terminated(integral_value_signed, tag("i"));
map_fail(tagged_value, |value| {
value.parse().context(IntegerValueInvalid { value })
})(i)
}
fn field_uinteger_value(i: &str) -> IResult<&str, u64> {
let tagged_value = terminated(digit1, tag("u"));
map_fail(tagged_value, |value| {
value.parse().context(UIntegerValueInvalid { value })
})(i)
}
fn field_float_value(i: &str) -> IResult<&str, f64> {
let value = alt((field_float_value_with_decimal, field_float_value_no_decimal));
let value = alt((
field_float_value_with_exponential_and_decimal,
field_float_value_with_exponential_no_decimal,
field_float_value_with_decimal,
field_float_value_no_decimal,
));
map_fail(value, |value| {
value.parse().context(FloatValueInvalid { value })
})(i)
}
fn field_float_value_with_decimal(i: &str) -> IResult<&str, &str> {
recognize(separated_pair(integral_value_common, tag("."), digit1))(i)
recognize(separated_pair(integral_value_signed, tag("."), digit1))(i)
}
fn field_float_value_with_exponential_and_decimal(i: &str) -> IResult<&str, &str> {
recognize(separated_pair(
integral_value_signed,
tag("."),
exponential_value,
))(i)
}
fn field_float_value_with_exponential_no_decimal(i: &str) -> IResult<&str, &str> {
exponential_value(i)
}
fn exponential_value(i: &str) -> IResult<&str, &str> {
recognize(separated_pair(digit1, tag("e+"), digit1))(i)
}
fn field_float_value_no_decimal(i: &str) -> IResult<&str, &str> {
integral_value_common(i)
integral_value_signed(i)
}
fn integral_value_common(i: &str) -> IResult<&str, &str> {
fn integral_value_signed(i: &str) -> IResult<&str, &str> {
recognize(preceded(opt(tag("-")), digit1))(i)
}
fn timestamp(i: &str) -> IResult<&str, i64> {
map_fail(integral_value_common, |value| {
map_fail(integral_value_signed, |value| {
value.parse().context(TimestampValueInvalid { value })
})(i)
}
fn field_string_value(i: &str) -> IResult<&str, EscapedStr<'_>> {
// https://docs.influxdata.com/influxdb/v1.8/write_protocols/line_protocol_tutorial/#data-types
// https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/#data-types-and-format
// For string field values, backslash is only used to escape itself(\) or double
// quotes.
let string_data = alt((
@ -707,7 +744,7 @@ fn field_string_value(i: &str) -> IResult<&str, EscapedStr<'_>> {
}
fn field_bool_value(i: &str) -> IResult<&str, bool> {
// https://docs.influxdata.com/influxdb/v1.8/write_protocols/line_protocol_tutorial/#data-types
// https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/#data-types-and-format
// "specify TRUE with t, T, true, True, or TRUE. Specify FALSE with f, F, false,
// False, or FALSE
alt((
@ -1037,6 +1074,13 @@ mod test {
}
}
fn unwrap_u64(&self) -> u64 {
match self {
Self::U64(v) => *v,
_ => panic!("field was not an u64"),
}
}
fn unwrap_f64(&self) -> f64 {
match self {
Self::F64(v) => *v,
@ -1202,6 +1246,19 @@ mod test {
Ok(())
}
#[test]
fn parse_single_field_unteger() -> Result {
let input = "foo asdf=23u 1234";
let vals = parse(input)?;
assert_eq!(vals[0].series.measurement, "foo");
assert_eq!(vals[0].timestamp, Some(1234));
assert_eq!(vals[0].field_set[0].0, "asdf");
assert_eq!(vals[0].field_set[0].1.unwrap_u64(), 23);
Ok(())
}
#[test]
fn parse_single_field_float_no_decimal() -> Result {
let input = "foo asdf=44 546";
@ -1340,6 +1397,23 @@ mod test {
Ok(())
}
#[test]
fn parse_two_fields_unteger() -> Result {
let input = "foo asdf=23u,bar=5u 1234";
let vals = parse(input)?;
assert_eq!(vals[0].series.measurement, "foo");
assert_eq!(vals[0].timestamp, Some(1234));
assert_eq!(vals[0].field_set[0].0, "asdf");
assert_eq!(vals[0].field_set[0].1.unwrap_u64(), 23);
assert_eq!(vals[0].field_set[1].0, "bar");
assert_eq!(vals[0].field_set[1].1.unwrap_u64(), 5);
Ok(())
}
#[test]
fn parse_two_fields_float() -> Result {
let input = "foo asdf=23.1,bar=5 1234";
@ -1365,7 +1439,7 @@ mod test {
#[test]
fn parse_mixed_field_types() -> Result {
let input = r#"foo asdf=23.1,bar=5i,baz="the string",frab=false 1234"#;
let input = r#"foo asdf=23.1,bar=-5i,qux=9u,baz="the string",frab=false 1234"#;
let vals = parse(input)?;
assert_eq!(vals[0].series.measurement, "foo");
@ -1378,13 +1452,16 @@ mod test {
));
assert_eq!(vals[0].field_set[1].0, "bar");
assert_eq!(vals[0].field_set[1].1.unwrap_i64(), 5);
assert_eq!(vals[0].field_set[1].1.unwrap_i64(), -5);
assert_eq!(vals[0].field_set[2].0, "baz");
assert_eq!(vals[0].field_set[2].1.unwrap_string(), "the string");
assert_eq!(vals[0].field_set[2].0, "qux");
assert_eq!(vals[0].field_set[2].1.unwrap_u64(), 9);
assert_eq!(vals[0].field_set[3].0, "frab");
assert_eq!(vals[0].field_set[3].1.unwrap_bool(), false);
assert_eq!(vals[0].field_set[3].0, "baz");
assert_eq!(vals[0].field_set[3].1.unwrap_string(), "the string");
assert_eq!(vals[0].field_set[4].0, "frab");
assert_eq!(vals[0].field_set[4].1.unwrap_bool(), false);
Ok(())
}
@ -1400,6 +1477,49 @@ mod test {
Ok(())
}
#[test]
fn parse_negative_uinteger() -> Result {
let input = "m0 field=-1u 99";
let parsed = parse(input);
assert!(
matches!(parsed, Err(super::Error::CannotParseEntireLine { .. })),
"Wrong error: {:?}",
parsed,
);
Ok(())
}
#[test]
fn parse_scientific_float() -> Result {
let input = "m0 field=-1.234456e+06 1615869152385000000";
let vals = parse(input)?;
assert_eq!(vals.len(), 1);
let input = "m0 field=1.234456e+06 1615869152385000000";
let vals = parse(input)?;
assert_eq!(vals.len(), 1);
let input = "m0 field=-1.234456e06 1615869152385000000";
let parsed = parse(input);
assert!(
matches!(parsed, Err(super::Error::CannotParseEntireLine { .. })),
"Wrong error: {:?}",
parsed,
);
let input = "m0 field=1.234456e06 1615869152385000000";
let parsed = parse(input);
assert!(
matches!(parsed, Err(super::Error::CannotParseEntireLine { .. })),
"Wrong error: {:?}",
parsed,
);
Ok(())
}
#[test]
fn parse_negative_float() -> Result {
let input = "m0 field2=-1 99";
@ -1428,6 +1548,20 @@ mod test {
Ok(())
}
#[test]
fn parse_out_of_range_uinteger() -> Result {
let input = "m0 field=99999999999999999999999999999999u 99";
let parsed = parse(input);
assert!(
matches!(parsed, Err(super::Error::UIntegerValueInvalid { .. })),
"Wrong error: {:?}",
parsed,
);
Ok(())
}
#[test]
fn parse_out_of_range_float() -> Result {
let input = format!("m0 field={val}.{val} 99", val = "9".repeat(200));
@ -1913,7 +2047,8 @@ her"#,
#[test]
fn field_value_display() -> Result {
assert_eq!(FieldValue::I64(42).to_string(), "42i");
assert_eq!(FieldValue::I64(-42).to_string(), "-42i");
assert_eq!(FieldValue::U64(42).to_string(), "42u");
assert_eq!(FieldValue::F64(42.11).to_string(), "42.11");
assert_eq!(
FieldValue::String(EscapedStr::from("foo")).to_string(),

View File

@ -13,5 +13,5 @@ tracing = "0.1"
[dev-dependencies] # In alphabetical order
flate2 = "1.0"
hex = "0.4.2"
rand = "0.7.2"
rand = "0.8.3"
test_helpers = { path = "../test_helpers" }

View File

@ -381,7 +381,7 @@ mod tests {
let mut a = Vec::with_capacity(n as usize);
for i in 0..n {
let top_bit = (i & 1) << (bits - 1);
let v = rng.gen_range(0, max) | top_bit;
let v = rng.gen_range(0..max) | top_bit;
assert!(v < max);
a.push(v);
}

View File

@ -6,9 +6,9 @@ edition = "2018"
[dependencies] # In alphabetical order
arrow_deps = { path = "../arrow_deps" }
data_types = { path = "../data_types" }
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
influxdb_tsm = { path = "../influxdb_tsm" }
internal_types = { path = "../internal_types" }
packers = { path = "../packers" }
snafu = "0.6.2"
tracing = "0.1"

View File

@ -11,16 +11,15 @@
clippy::clone_on_ref_ptr
)]
use data_types::{
schema::{builder::InfluxSchemaBuilder, InfluxFieldType, Schema},
TIME_COLUMN_NAME,
};
use influxdb_line_protocol::{FieldValue, ParsedLine};
use influxdb_tsm::{
mapper::{ColumnData, MeasurementTable, TSMMeasurementMapper},
reader::{BlockDecoder, TSMBlockReader, TSMIndexReader},
BlockType, TSMError,
};
use internal_types::schema::{
builder::InfluxSchemaBuilder, InfluxFieldType, Schema, TIME_COLUMN_NAME,
};
use packers::{
ByteArray, Error as TableError, IOxTableWriter, IOxTableWriterSource, Packer, Packers,
};
@ -75,7 +74,7 @@ pub enum Error {
#[snafu(display(r#"Error building schema: {}"#, source))]
BuildingSchema {
source: data_types::schema::builder::Error,
source: internal_types::schema::builder::Error,
},
#[snafu(display(r#"Error writing to TableWriter: {}"#, source))]
@ -96,8 +95,8 @@ pub enum Error {
CouldNotFindColumn,
}
impl From<data_types::schema::builder::Error> for Error {
fn from(source: data_types::schema::builder::Error) -> Self {
impl From<internal_types::schema::builder::Error> for Error {
fn from(source: internal_types::schema::builder::Error) -> Self {
Self::BuildingSchema { source }
}
}
@ -310,6 +309,7 @@ impl<'a> MeasurementSampler<'a> {
let field_type = match field_value {
FieldValue::F64(_) => InfluxFieldType::Float,
FieldValue::I64(_) => InfluxFieldType::Integer,
FieldValue::U64(_) => InfluxFieldType::UInteger,
FieldValue::String(_) => InfluxFieldType::String,
FieldValue::Boolean(_) => InfluxFieldType::Boolean,
};
@ -474,6 +474,9 @@ fn pack_lines<'a>(schema: &Schema, lines: &[ParsedLine<'a>]) -> Vec<Packers> {
FieldValue::I64(i) => {
packer.i64_packer_mut().push(i);
}
FieldValue::U64(i) => {
packer.u64_packer_mut().push(i);
}
FieldValue::String(ref s) => {
packer.bytes_packer_mut().push(ByteArray::from(s.as_str()));
}
@ -816,7 +819,8 @@ impl TSMFileConverter {
mut block_reader: impl BlockDecoder,
m: &mut MeasurementTable,
) -> Result<(Schema, Vec<Packers>), Error> {
let mut builder = data_types::schema::builder::SchemaBuilder::new().measurement(&m.name);
let mut builder =
internal_types::schema::builder::SchemaBuilder::new().measurement(&m.name);
let mut packed_columns: Vec<Packers> = Vec::new();
let mut tks = Vec::new();
@ -1095,11 +1099,11 @@ impl std::fmt::Debug for TSMFileConverter {
#[cfg(test)]
mod tests {
use super::*;
use data_types::{assert_column_eq, schema::InfluxColumnType};
use influxdb_tsm::{
reader::{BlockData, MockBlockDecoder},
Block,
};
use internal_types::{assert_column_eq, schema::InfluxColumnType};
use packers::{Error as TableError, IOxTableWriter, IOxTableWriterSource, Packers};
use test_helpers::approximately_equal;

View File

@ -1,7 +1,7 @@
//! This module contains the code to write table data to parquet
use arrow_deps::parquet::{
self,
basic::{Compression, Encoding, LogicalType, Repetition, Type as PhysicalType},
basic::{Compression, ConvertedType, Encoding, Repetition, Type as PhysicalType},
errors::ParquetError,
file::{
properties::{WriterProperties, WriterPropertiesBuilder},
@ -9,7 +9,7 @@ use arrow_deps::parquet::{
},
schema::types::{ColumnPath, Type},
};
use data_types::schema::{InfluxColumnType, InfluxFieldType, Schema};
use internal_types::schema::{InfluxColumnType, InfluxFieldType, Schema};
use parquet::file::writer::ParquetWriter;
use snafu::{OptionExt, ResultExt, Snafu};
use std::{
@ -97,7 +97,7 @@ where
///
/// ```
/// # use std::fs;
/// # use data_types::schema::{builder::SchemaBuilder, InfluxFieldType};
/// # use internal_types::schema::{builder::SchemaBuilder, InfluxFieldType};
/// # use packers::IOxTableWriter;
/// # use packers::{Packer, Packers};
/// # use ingest::parquet::writer::{IOxParquetTableWriter, CompressionLevel};
@ -297,19 +297,19 @@ fn convert_to_parquet_schema(schema: &Schema) -> Result<Arc<parquet::schema::typ
i, influxdb_column_type, field
);
let (physical_type, logical_type) = match influxdb_column_type {
Some(InfluxColumnType::Tag) => (PhysicalType::BYTE_ARRAY, Some(LogicalType::UTF8)),
Some(InfluxColumnType::Tag) => (PhysicalType::BYTE_ARRAY, Some(ConvertedType::UTF8)),
Some(InfluxColumnType::Field(InfluxFieldType::Boolean)) => {
(PhysicalType::BOOLEAN, None)
}
Some(InfluxColumnType::Field(InfluxFieldType::Float)) => (PhysicalType::DOUBLE, None),
Some(InfluxColumnType::Field(InfluxFieldType::Integer)) => {
(PhysicalType::INT64, Some(LogicalType::UINT_64))
(PhysicalType::INT64, Some(ConvertedType::UINT_64))
}
Some(InfluxColumnType::Field(InfluxFieldType::UInteger)) => {
(PhysicalType::INT64, Some(LogicalType::UINT_64))
(PhysicalType::INT64, Some(ConvertedType::UINT_64))
}
Some(InfluxColumnType::Field(InfluxFieldType::String)) => {
(PhysicalType::BYTE_ARRAY, Some(LogicalType::UTF8))
(PhysicalType::BYTE_ARRAY, Some(ConvertedType::UTF8))
}
Some(InfluxColumnType::Timestamp) => {
// At the time of writing, the underlying rust parquet
@ -325,7 +325,7 @@ fn convert_to_parquet_schema(schema: &Schema) -> Result<Arc<parquet::schema::typ
// https://github.com/apache/arrow/tree/master/rust/parquet#supported-parquet-version
//
// Thus store timestampts using microsecond precision instead of nanosecond
(PhysicalType::INT64, Some(LogicalType::TIMESTAMP_MICROS))
(PhysicalType::INT64, Some(ConvertedType::TIMESTAMP_MICROS))
}
None => {
return UnsupportedDataType {
@ -340,7 +340,7 @@ fn convert_to_parquet_schema(schema: &Schema) -> Result<Arc<parquet::schema::typ
.with_repetition(Repetition::OPTIONAL);
if let Some(t) = logical_type {
parquet_column_builder = parquet_column_builder.with_logical_type(t);
parquet_column_builder = parquet_column_builder.with_converted_type(t);
}
let parquet_column_type = parquet_column_builder
@ -505,7 +505,7 @@ fn create_writer_props(
#[cfg(test)]
mod tests {
use data_types::schema::builder::SchemaBuilder;
use internal_types::schema::builder::SchemaBuilder;
use super::*;

View File

@ -1,5 +1,5 @@
use data_types::schema::{builder::SchemaBuilder, InfluxFieldType};
use ingest::parquet::writer::{CompressionLevel, IOxParquetTableWriter};
use internal_types::schema::{builder::SchemaBuilder, InfluxFieldType};
use packers::{IOxTableWriter, Packer, Packers};
use arrow_deps::parquet::data_type::ByteArray;

28
internal_types/Cargo.toml Normal file
View File

@ -0,0 +1,28 @@
[package]
name = "internal_types"
version = "0.1.0"
authors = ["Andrew Lamb <andrew@nerdnetworks.org>"]
edition = "2018"
description = "InfluxDB IOx internal types, shared between IOx instances"
readme = "README.md"
[dependencies]
arrow_deps = { path = "../arrow_deps" }
crc32fast = "1.2.0"
chrono = { version = "0.4", features = ["serde"] }
data_types = { path = "../data_types" }
# See docs/regenerating_flatbuffers.md about updating generated code when updating the
# version of the flatbuffers crate
flatbuffers = "0.8"
generated_types = { path = "../generated_types" }
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
ouroboros = "0.8.3"
snafu = "0.6"
tracing = "0.1"
[dev-dependencies] # In alphabetical order
criterion = "0.3"
[[bench]]
name = "benchmark"
harness = false

7
internal_types/README.md Normal file
View File

@ -0,0 +1,7 @@
# Internal Types
This crate contains InfluxDB IOx "internal" types which are shared
across crates and internally between IOx instances, but not exposed
externally to clients
*Internal* in this case means that changing the structs is designed not to require additional coordination / organization with clients.

View File

@ -1,12 +1,15 @@
use criterion::measurement::WallTime;
use criterion::{criterion_group, criterion_main, Bencher, BenchmarkId, Criterion, Throughput};
use data_types::data::{lines_to_replicated_write as lines_to_rw, ReplicatedWrite};
use data_types::database_rules::{DatabaseRules, PartitionTemplate, TemplatePart};
use generated_types::wal as wb;
use influxdb_line_protocol::{parse_lines, ParsedLine};
use std::collections::{BTreeMap, BTreeSet};
use std::fmt;
use std::time::Duration;
use internal_types::data::{lines_to_replicated_write as lines_to_rw, ReplicatedWrite};
use std::{
collections::{BTreeMap, BTreeSet},
convert::TryFrom,
fmt,
time::Duration,
};
const NEXT_ENTRY_NS: i64 = 1_000_000_000;
const STARTING_TIMESTAMP_NS: i64 = 0;
@ -61,7 +64,7 @@ fn replicated_write_into_bytes(c: &mut Criterion) {
assert_eq!(write.entry_count(), config.partition_count);
b.iter(|| {
let _ = write.bytes().len();
let _ = write.data().len();
});
},
);
@ -73,7 +76,7 @@ fn bytes_into_struct(c: &mut Criterion) {
run_group("bytes_into_struct", c, |lines, rules, config, b| {
let write = lines_to_rw(0, 0, &lines, rules);
assert_eq!(write.entry_count(), config.partition_count);
let data = write.bytes();
let data = write.data();
b.iter(|| {
let mut db = Db::default();
@ -160,7 +163,7 @@ struct Db {
impl Db {
fn deserialize_write(&mut self, data: &[u8]) {
let write = ReplicatedWrite::from(data);
let write = ReplicatedWrite::try_from(data.to_vec()).unwrap();
if let Some(batch) = write.write_buffer_batch() {
if let Some(entries) = batch.entries() {

View File

@ -1,92 +1,103 @@
//! This module contains helper methods for constructing replicated writes
//! based on `DatabaseRules`.
use crate::database_rules::Partitioner;
use crate::TIME_COLUMN_NAME;
use crate::schema::TIME_COLUMN_NAME;
use data_types::database_rules::Partitioner;
use generated_types::wal as wb;
use influxdb_line_protocol::{FieldValue, ParsedLine};
use std::{collections::BTreeMap, fmt};
use std::{collections::BTreeMap, convert::TryFrom, fmt};
use chrono::Utc;
use crc32fast::Hasher;
use flatbuffers::FlatBufferBuilder;
use ouroboros::self_referencing;
pub fn type_description(value: wb::ColumnValue) -> &'static str {
use wb::ColumnValue::*;
match value {
NONE => "none",
TagValue => "tag",
I64Value => "i64",
U64Value => "u64",
F64Value => "f64",
BoolValue => "bool",
StringValue => "String",
wb::ColumnValue::TagValue => "tag",
wb::ColumnValue::I64Value => "i64",
wb::ColumnValue::U64Value => "u64",
wb::ColumnValue::F64Value => "f64",
wb::ColumnValue::BoolValue => "bool",
wb::ColumnValue::StringValue => "String",
wb::ColumnValue::NONE => "none",
_ => "none",
}
}
/// A friendlier wrapper to help deal with the Flatbuffers write data
#[derive(Debug, Default, Clone, PartialEq)]
#[self_referencing]
#[derive(Debug, Clone, PartialEq)]
pub struct ReplicatedWrite {
pub data: Vec<u8>,
data: Vec<u8>,
#[borrows(data)]
#[covariant]
fb: wb::ReplicatedWrite<'this>,
#[borrows(data)]
#[covariant]
write_buffer_batch: Option<wb::WriteBufferBatch<'this>>,
}
impl ReplicatedWrite {
/// Returns the Flatbuffers struct represented by the raw bytes.
pub fn to_fb(&self) -> wb::ReplicatedWrite<'_> {
flatbuffers::get_root::<wb::ReplicatedWrite<'_>>(&self.data)
}
/// Returns the Flatbuffers struct for the WriteBufferBatch in the raw bytes
/// of the payload of the ReplicatedWrite.
pub fn write_buffer_batch(&self) -> Option<wb::WriteBufferBatch<'_>> {
match self.to_fb().payload() {
Some(d) => Some(flatbuffers::get_root::<wb::WriteBufferBatch<'_>>(&d)),
None => None,
}
pub fn write_buffer_batch(&self) -> Option<&wb::WriteBufferBatch<'_>> {
self.borrow_write_buffer_batch().as_ref()
}
/// Returns the Flatbuffers struct for the ReplicatedWrite
pub fn fb(&self) -> &wb::ReplicatedWrite<'_> {
self.borrow_fb()
}
/// Returns true if this replicated write matches the writer and sequence.
pub fn equal_to_writer_and_sequence(&self, writer_id: u32, sequence_number: u64) -> bool {
let fb = self.to_fb();
fb.writer() == writer_id && fb.sequence() == sequence_number
self.fb().writer() == writer_id && self.fb().sequence() == sequence_number
}
/// Returns the writer id and sequence number
pub fn writer_and_sequence(&self) -> (u32, u64) {
let fb = self.to_fb();
(fb.writer(), fb.sequence())
(self.fb().writer(), self.fb().sequence())
}
/// Returns the serialized bytes for the write. (used for benchmarking)
pub fn bytes(&self) -> &Vec<u8> {
&self.data
/// Returns the serialized bytes for the write
pub fn data(&self) -> &[u8] {
self.borrow_data()
}
/// Returns the number of write buffer entries in this replicated write
pub fn entry_count(&self) -> usize {
if let Some(batch) = self.write_buffer_batch() {
if let Some(entries) = batch.entries() {
return entries.len();
}
}
0
self.write_buffer_batch()
.map_or(0, |wbb| wbb.entries().map_or(0, |entries| entries.len()))
}
}
impl From<&[u8]> for ReplicatedWrite {
fn from(data: &[u8]) -> Self {
Self {
data: Vec::from(data),
impl TryFrom<Vec<u8>> for ReplicatedWrite {
type Error = flatbuffers::InvalidFlatbuffer;
fn try_from(data: Vec<u8>) -> Result<Self, Self::Error> {
ReplicatedWriteTryBuilder {
data,
fb_builder: |data| flatbuffers::root::<wb::ReplicatedWrite<'_>>(data),
write_buffer_batch_builder: |data| match flatbuffers::root::<wb::ReplicatedWrite<'_>>(
data,
)?
.payload()
{
Some(payload) => Ok(Some(flatbuffers::root::<wb::WriteBufferBatch<'_>>(
&payload,
)?)),
None => Ok(None),
},
}
.try_build()
}
}
impl fmt::Display for ReplicatedWrite {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let fb = self.to_fb();
let fb = self.fb();
write!(
f,
"\nwriter:{}, sequence:{}, checksum:{}\n",
@ -143,6 +154,7 @@ impl fmt::Display for ReplicatedWrite {
.unwrap_or("")
.to_string(),
wb::ColumnValue::NONE => "".to_string(),
_ => "".to_string(),
};
write!(f, " {}:{}", value.column().unwrap_or(""), val)?;
}
@ -192,9 +204,8 @@ pub fn lines_to_replicated_write(
fbb.finish(write, None);
let (mut data, idx) = fbb.collapse();
ReplicatedWrite {
data: data.split_off(idx),
}
ReplicatedWrite::try_from(data.split_off(idx))
.expect("Flatbuffer data just constructed should be valid")
}
pub fn split_lines_into_write_entry_partitions(
@ -317,6 +328,7 @@ fn add_line<'a>(
for (column, value) in &line.field_set {
let val = match value {
FieldValue::I64(v) => add_i64_value(fbb, column.as_str(), *v),
FieldValue::U64(v) => add_u64_value(fbb, column.as_str(), *v),
FieldValue::F64(v) => add_f64_value(fbb, column.as_str(), *v),
FieldValue::Boolean(v) => add_bool_value(fbb, column.as_str(), *v),
FieldValue::String(v) => add_string_value(fbb, column.as_str(), v.as_str()),
@ -393,6 +405,16 @@ fn add_i64_value<'a>(
add_value(fbb, column, wb::ColumnValue::I64Value, iv.as_union_value())
}
fn add_u64_value<'a>(
fbb: &mut FlatBufferBuilder<'a>,
column: &str,
value: u64,
) -> flatbuffers::WIPOffset<wb::Value<'a>> {
let iv = wb::U64Value::create(fbb, &wb::U64ValueArgs { value });
add_value(fbb, column, wb::ColumnValue::U64Value, iv.as_union_value())
}
fn add_bool_value<'a>(
fbb: &mut FlatBufferBuilder<'a>,
column: &str,

11
internal_types/src/lib.rs Normal file
View File

@ -0,0 +1,11 @@
#![deny(rust_2018_idioms)]
#![warn(
missing_debug_implementations,
clippy::explicit_iter_loop,
clippy::use_self,
clippy::clone_on_ref_ptr
)]
pub mod data;
pub mod schema;
pub mod selection;

View File

@ -140,7 +140,7 @@ impl SchemaBuilder {
/// schema validation happens at this time.
/// ```
/// use data_types::schema::{builder::SchemaBuilder, InfluxColumnType, InfluxFieldType};
/// use internal_types::schema::{builder::SchemaBuilder, InfluxColumnType, InfluxFieldType};
///
/// let schema = SchemaBuilder::new()
/// .tag("region")

View File

@ -8,8 +8,8 @@ edition = "2018"
arrow_deps = { path = "../arrow_deps" }
chrono = "0.4"
croaring = "0.4.5"
crossbeam = "0.7.3"
env_logger = "0.7.1"
crossbeam = "0.8"
env_logger = "0.8.3"
human_format = "1.0.3"
packers = { path = "../packers" }
snafu = "0.6.8"

View File

@ -18,8 +18,11 @@ arrow_deps = { path = "../arrow_deps" }
async-trait = "0.1"
chrono = "0.4"
data_types = { path = "../data_types" }
flatbuffers = "0.6.1"
# See docs/regenerating_flatbuffers.md about updating generated code when updating the
# version of the flatbuffers crate
flatbuffers = "0.8"
generated_types = { path = "../generated_types" }
internal_types = { path = "../internal_types" }
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
snafu = "0.6.2"
string-interner = "0.12.2"

View File

@ -9,7 +9,8 @@ use chrono::{DateTime, Utc};
use generated_types::wal as wb;
use std::collections::{BTreeSet, HashMap};
use data_types::{partition_metadata::TableSummary, schema::Schema, selection::Selection};
use data_types::partition_metadata::TableSummary;
use internal_types::{schema::Schema, selection::Selection};
use crate::{
column::Column,

View File

@ -2,9 +2,9 @@ use generated_types::wal as wb;
use snafu::Snafu;
use crate::dictionary::Dictionary;
use data_types::{data::type_description, partition_metadata::StatValues};
use arrow_deps::arrow::datatypes::DataType as ArrowDataType;
use data_types::partition_metadata::StatValues;
use internal_types::data::type_description;
use std::mem;
@ -34,6 +34,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
pub enum Column {
F64(Vec<Option<f64>>, StatValues<f64>),
I64(Vec<Option<i64>>, StatValues<i64>),
U64(Vec<Option<u64>>, StatValues<u64>),
String(Vec<Option<String>>, StatValues<String>),
Bool(Vec<Option<bool>>, StatValues<bool>),
Tag(Vec<Option<u32>>, StatValues<String>),
@ -45,10 +46,8 @@ impl Column {
capacity: usize,
value: wb::Value<'_>,
) -> Result<Self> {
use wb::ColumnValue::*;
Ok(match value.value_type() {
F64Value => {
wb::ColumnValue::F64Value => {
let val = value
.value_as_f64value()
.expect("f64 value should be present")
@ -57,7 +56,7 @@ impl Column {
vals.push(Some(val));
Self::F64(vals, StatValues::new(val))
}
I64Value => {
wb::ColumnValue::I64Value => {
let val = value
.value_as_i64value()
.expect("i64 value should be present")
@ -66,7 +65,16 @@ impl Column {
vals.push(Some(val));
Self::I64(vals, StatValues::new(val))
}
StringValue => {
wb::ColumnValue::U64Value => {
let val = value
.value_as_u64value()
.expect("u64 value should be present")
.value();
let mut vals = vec![None; capacity];
vals.push(Some(val));
Self::U64(vals, StatValues::new(val))
}
wb::ColumnValue::StringValue => {
let val = value
.value_as_string_value()
.expect("string value should be present")
@ -76,7 +84,7 @@ impl Column {
vals.push(Some(val.to_string()));
Self::String(vals, StatValues::new(val.to_string()))
}
BoolValue => {
wb::ColumnValue::BoolValue => {
let val = value
.value_as_bool_value()
.expect("bool value should be present")
@ -85,7 +93,7 @@ impl Column {
vals.push(Some(val));
Self::Bool(vals, StatValues::new(val))
}
TagValue => {
wb::ColumnValue::TagValue => {
let val = value
.value_as_tag_value()
.expect("tag value should be present")
@ -109,6 +117,7 @@ impl Column {
match self {
Self::F64(v, _) => v.len(),
Self::I64(v, _) => v.len(),
Self::U64(v, _) => v.len(),
Self::String(v, _) => v.len(),
Self::Bool(v, _) => v.len(),
Self::Tag(v, _) => v.len(),
@ -123,6 +132,7 @@ impl Column {
match self {
Self::F64(_, _) => "f64",
Self::I64(_, _) => "i64",
Self::U64(_, _) => "u64",
Self::String(_, _) => "String",
Self::Bool(_, _) => "bool",
Self::Tag(_, _) => "tag",
@ -134,6 +144,7 @@ impl Column {
match self {
Self::F64(..) => ArrowDataType::Float64,
Self::I64(..) => ArrowDataType::Int64,
Self::U64(..) => ArrowDataType::UInt64,
Self::String(..) => ArrowDataType::Utf8,
Self::Bool(..) => ArrowDataType::Boolean,
Self::Tag(..) => ArrowDataType::Utf8,
@ -179,6 +190,15 @@ impl Column {
}
None => false,
},
Self::U64(vals, stats) => match value.value_as_u64value() {
Some(u64_val) => {
let u64_val = u64_val.value();
vals.push(Some(u64_val));
stats.update(u64_val);
true
}
None => false,
},
Self::F64(vals, stats) => match value.value_as_f64value() {
Some(f64_val) => {
let f64_val = f64_val.value();
@ -216,6 +236,11 @@ impl Column {
v.push(None);
}
}
Self::U64(v, _) => {
if v.len() == len {
v.push(None);
}
}
Self::String(v, _) => {
if v.len() == len {
v.push(None);
@ -290,6 +315,9 @@ impl Column {
Self::I64(v, stats) => {
mem::size_of::<Option<i64>>() * v.len() + mem::size_of_val(&stats)
}
Self::U64(v, stats) => {
mem::size_of::<Option<u64>>() * v.len() + mem::size_of_val(&stats)
}
Self::Bool(v, stats) => {
mem::size_of::<Option<bool>>() * v.len() + mem::size_of_val(&stats)
}

View File

@ -1,8 +1,6 @@
use data_types::{
data::ReplicatedWrite,
database_rules::{PartitionSort, PartitionSortRules},
};
use data_types::database_rules::{PartitionSort, PartitionSortRules};
use generated_types::wal;
use internal_types::data::ReplicatedWrite;
use crate::{chunk::Chunk, partition::Partition};
@ -79,6 +77,14 @@ impl MutableBufferDb {
}
}
/// returns the id of the current open chunk in the specified partition
pub fn open_chunk_id(&self, partition_key: &str) -> u32 {
let partition = self.get_partition(partition_key);
let partition = partition.read().expect("mutex poisoned");
partition.open_chunk_id()
}
/// Directs the writes from batch into the appropriate partitions
fn write_entries_to_partitions(&self, batch: &wal::WriteBufferBatch<'_>) -> Result<()> {
if let Some(entries) = batch.entries() {
@ -96,7 +102,8 @@ impl MutableBufferDb {
Ok(())
}
/// Rolls over the active chunk in this partititon
/// Rolls over the active chunk in this partititon. Returns the
/// previously open (now closed) Chunk
pub fn rollover_partition(&self, partition_key: &str) -> Result<Arc<Chunk>> {
let partition = self.get_partition(partition_key);
let mut partition = partition.write().expect("mutex poisoned");
@ -181,7 +188,7 @@ impl MutableBufferDb {
Some(b) => self.write_entries_to_partitions(&b)?,
None => {
return MissingPayload {
writer: write.to_fb().writer(),
writer: write.fb().writer(),
}
.fail()
}
@ -240,12 +247,10 @@ impl MutableBufferDb {
mod tests {
use super::*;
use chrono::{DateTime, Utc};
use data_types::{
data::lines_to_replicated_write, database_rules::Partitioner, selection::Selection,
};
use data_types::database_rules::{Order, Partitioner};
use internal_types::{data::lines_to_replicated_write, selection::Selection};
use arrow_deps::arrow::array::{Array, StringArray};
use data_types::database_rules::Order;
use influxdb_line_protocol::{parse_lines, ParsedLine};
type TestError = Box<dyn std::error::Error + Send + Sync + 'static>;

View File

@ -107,6 +107,11 @@ impl Partition {
}
}
/// returns the id of the current open chunk in this partition
pub(crate) fn open_chunk_id(&self) -> u32 {
self.open_chunk.id()
}
/// write data to the open chunk
pub fn write_entry(&mut self, entry: &wb::WriteBufferEntry<'_>) -> Result<()> {
assert_eq!(
@ -173,6 +178,8 @@ impl Partition {
///
/// Queries will continue to see data in the specified chunk until
/// it is dropped.
///
/// Returns the previously open (now closed) Chunk
pub fn rollover_chunk(&mut self) -> Arc<Chunk> {
let chunk_id = self.id_generator;
self.id_generator += 1;
@ -295,10 +302,8 @@ impl<'a> Iterator for ChunkIter<'a> {
mod tests {
use super::*;
use chrono::Utc;
use data_types::{
data::split_lines_into_write_entry_partitions, partition_metadata::PartitionSummary,
selection::Selection,
};
use data_types::partition_metadata::PartitionSummary;
use internal_types::{data::split_lines_into_write_entry_partitions, selection::Selection};
use arrow_deps::{
arrow::record_batch::RecordBatch, assert_table_eq, test_util::sort_record_batch,
@ -924,7 +929,7 @@ mod tests {
let lines: Vec<_> = parse_lines(&lp_string).map(|l| l.unwrap()).collect();
let data = split_lines_into_write_entry_partitions(|_| partition.key().into(), &lines);
let batch = flatbuffers::get_root::<wb::WriteBufferBatch<'_>>(&data);
let batch = flatbuffers::root::<wb::WriteBufferBatch<'_>>(&data).unwrap();
let entries = batch.entries().unwrap();
for entry in entries {

View File

@ -10,7 +10,8 @@ use arrow_deps::{
},
util::{make_range_expr, AndExprBuilder},
};
use data_types::{timestamp::TimestampRange, TIME_COLUMN_NAME};
use data_types::timestamp::TimestampRange;
use internal_types::schema::TIME_COLUMN_NAME;
//use snafu::{OptionExt, ResultExt, Snafu};
use snafu::{ensure, ResultExt, Snafu};

View File

@ -12,18 +12,20 @@ use crate::{
dictionary::{Dictionary, Error as DictionaryError},
pred::{ChunkIdSet, ChunkPredicate},
};
use data_types::{
partition_metadata::{ColumnSummary, Statistics},
schema::{builder::SchemaBuilder, Schema},
use data_types::partition_metadata::{ColumnSummary, Statistics};
use internal_types::{
schema::{builder::SchemaBuilder, Schema, TIME_COLUMN_NAME},
selection::Selection,
TIME_COLUMN_NAME,
};
use snafu::{OptionExt, ResultExt, Snafu};
use arrow_deps::{
arrow,
arrow::{
array::{ArrayRef, BooleanBuilder, Float64Builder, Int64Builder, StringBuilder},
array::{
ArrayRef, BooleanBuilder, Float64Builder, Int64Builder, StringBuilder, UInt64Builder,
},
datatypes::DataType as ArrowDataType,
record_batch::RecordBatch,
},
@ -82,7 +84,7 @@ pub enum Error {
#[snafu(display("Internal error converting schema: {}", source))]
InternalSchema {
source: data_types::schema::builder::Error,
source: internal_types::schema::builder::Error,
},
#[snafu(display(
@ -326,6 +328,7 @@ impl Table {
schema_builder.field(column_name, ArrowDataType::Int64)
}
}
Column::U64(_, _) => schema_builder.field(column_name, ArrowDataType::UInt64),
Column::Bool(_, _) => schema_builder.field(column_name, ArrowDataType::Boolean),
};
}
@ -399,6 +402,15 @@ impl Table {
Arc::new(builder.finish()) as ArrayRef
}
Column::U64(vals, _) => {
let mut builder = UInt64Builder::new(vals.len());
for v in vals {
builder.append_option(*v).context(ArrowError {})?;
}
Arc::new(builder.finish()) as ArrayRef
}
Column::Bool(vals, _) => {
let mut builder = BooleanBuilder::new(vals.len());
@ -504,6 +516,7 @@ impl Table {
match column {
Column::F64(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
Column::I64(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
Column::U64(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
Column::String(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
Column::Bool(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
Column::Tag(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
@ -545,6 +558,7 @@ impl Table {
let stats = match c {
Column::F64(_, stats) => Statistics::F64(stats.clone()),
Column::I64(_, stats) => Statistics::I64(stats.clone()),
Column::U64(_, stats) => Statistics::U64(stats.clone()),
Column::Bool(_, stats) => Statistics::Bool(stats.clone()),
Column::String(_, stats) | Column::Tag(_, stats) => {
Statistics::String(stats.clone())
@ -583,8 +597,8 @@ impl<'a> TableColSelection<'a> {
#[cfg(test)]
mod tests {
use data_types::data::split_lines_into_write_entry_partitions;
use influxdb_line_protocol::{parse_lines, ParsedLine};
use internal_types::data::split_lines_into_write_entry_partitions;
use super::*;
@ -736,7 +750,7 @@ mod tests {
let mut table = Table::new(dictionary.lookup_value_or_insert("table_name"));
let lp_lines = vec![
"h2o,state=MA,city=Boston float_field=70.4,int_field=8i,bool_field=t,string_field=\"foo\" 100",
"h2o,state=MA,city=Boston float_field=70.4,int_field=8i,uint_field=42u,bool_field=t,string_field=\"foo\" 100",
];
write_lines_to_table(&mut table, dictionary, lp_lines);
@ -751,6 +765,7 @@ mod tests {
.tag("state")
.field("string_field", ArrowDataType::Utf8)
.timestamp()
.field("uint_field", ArrowDataType::UInt64)
.build()
.unwrap();
@ -793,7 +808,7 @@ mod tests {
let data = split_lines_into_write_entry_partitions(chunk_key_func, &lines);
let batch = flatbuffers::get_root::<wb::WriteBufferBatch<'_>>(&data);
let batch = flatbuffers::root::<wb::WriteBufferBatch<'_>>(&data).unwrap();
let entries = batch.entries().expect("at least one entry");
for entry in entries {

View File

@ -13,7 +13,7 @@ use futures::{
Stream, StreamExt, TryStreamExt,
};
use rusoto_core::ByteStream;
use rusoto_credential::StaticProvider;
use rusoto_credential::{InstanceMetadataProvider, StaticProvider};
use rusoto_s3::S3;
use snafu::{futures::TryStreamExt as _, OptionExt, ResultExt, Snafu};
use std::convert::TryFrom;
@ -108,6 +108,12 @@ pub enum Error {
region: String,
source: rusoto_core::region::ParseRegionError,
},
#[snafu(display("Missing aws-access-key"))]
MissingAccessKey,
#[snafu(display("Missing aws-secret-access-key"))]
MissingSecretAccessKey,
}
/// Configuration for connecting to [Amazon S3](https://aws.amazon.com/s3/).
@ -285,8 +291,8 @@ impl AmazonS3 {
/// Configure a connection to Amazon S3 using the specified credentials in
/// the specified Amazon region and bucket
pub fn new(
access_key_id: impl Into<String>,
secret_access_key: impl Into<String>,
access_key_id: Option<impl Into<String>>,
secret_access_key: Option<impl Into<String>>,
region: impl Into<String>,
bucket_name: impl Into<String>,
) -> Result<Self> {
@ -296,11 +302,22 @@ impl AmazonS3 {
let http_client = rusoto_core::request::HttpClient::new()
.expect("Current implementation of rusoto_core has no way for this to fail");
let credentials_provider =
StaticProvider::new_minimal(access_key_id.into(), secret_access_key.into());
let client = match (access_key_id, secret_access_key) {
(Some(access_key_id), Some(secret_access_key)) => {
let credentials_provider =
StaticProvider::new_minimal(access_key_id.into(), secret_access_key.into());
rusoto_s3::S3Client::new_with(http_client, credentials_provider, region)
}
(None, Some(_)) => return Err(Error::MissingAccessKey),
(Some(_), None) => return Err(Error::MissingSecretAccessKey),
_ => {
let credentials_provider = InstanceMetadataProvider::new();
rusoto_s3::S3Client::new_with(http_client, credentials_provider, region)
}
};
Ok(Self {
client: rusoto_s3::S3Client::new_with(http_client, credentials_provider, region),
client,
bucket_name: bucket_name.into(),
})
}
@ -502,8 +519,8 @@ mod tests {
let config = maybe_skip_integration!();
let integration = ObjectStore::new_amazon_s3(
AmazonS3::new(
config.access_key_id,
config.secret_access_key,
Some(config.access_key_id),
Some(config.secret_access_key),
config.region,
config.bucket,
)
@ -524,8 +541,8 @@ mod tests {
let integration = ObjectStore::new_amazon_s3(
AmazonS3::new(
config.access_key_id,
config.secret_access_key,
Some(config.access_key_id),
Some(config.secret_access_key),
config.region,
&config.bucket,
)
@ -556,8 +573,8 @@ mod tests {
let config = maybe_skip_integration!();
let integration = ObjectStore::new_amazon_s3(
AmazonS3::new(
config.access_key_id,
config.secret_access_key,
Some(config.access_key_id),
Some(config.secret_access_key),
config.region,
&config.bucket,
)
@ -599,8 +616,8 @@ mod tests {
let integration = ObjectStore::new_amazon_s3(
AmazonS3::new(
config.access_key_id,
config.secret_access_key,
Some(config.access_key_id),
Some(config.secret_access_key),
config.region,
&config.bucket,
)
@ -637,8 +654,8 @@ mod tests {
let integration = ObjectStore::new_amazon_s3(
AmazonS3::new(
config.access_key_id,
config.secret_access_key,
Some(config.access_key_id),
Some(config.secret_access_key),
config.region,
&config.bucket,
)
@ -685,8 +702,8 @@ mod tests {
let integration = ObjectStore::new_amazon_s3(
AmazonS3::new(
config.access_key_id,
config.secret_access_key,
Some(config.access_key_id),
Some(config.secret_access_key),
config.region,
&config.bucket,
)
@ -731,8 +748,8 @@ mod tests {
let config = maybe_skip_integration!();
let integration = ObjectStore::new_amazon_s3(
AmazonS3::new(
config.access_key_id,
config.secret_access_key,
Some(config.access_key_id),
Some(config.secret_access_key),
config.region,
config.bucket,
)
@ -757,8 +774,8 @@ mod tests {
let integration = ObjectStore::new_amazon_s3(
AmazonS3::new(
config.access_key_id,
config.secret_access_key,
Some(config.access_key_id),
Some(config.secret_access_key),
config.region,
&config.bucket,
)
@ -795,8 +812,8 @@ mod tests {
let integration = ObjectStore::new_amazon_s3(
AmazonS3::new(
config.access_key_id,
config.secret_access_key,
Some(config.access_key_id),
Some(config.secret_access_key),
config.region,
&config.bucket,
)

View File

@ -6,12 +6,12 @@ edition = "2018"
[dependencies] # In alphabetical order
arrow_deps = { path = "../arrow_deps" }
data_types = { path = "../data_types" }
human_format = "1.0.3"
influxdb_tsm = { path = "../influxdb_tsm" }
internal_types = { path = "../internal_types" }
snafu = "0.6.2"
tracing = "0.1"
[dev-dependencies] # In alphabetical order
rand = "0.7.3"
rand = "0.8.3"
test_helpers = { path = "../test_helpers" }

View File

@ -15,7 +15,7 @@ use snafu::Snafu;
pub use crate::packers::{Packer, Packers};
pub use arrow_deps::parquet::data_type::ByteArray;
use data_types::schema::Schema;
use internal_types::schema::Schema;
use std::borrow::Cow;

View File

@ -10,7 +10,7 @@ use std::iter;
use std::slice::Chunks;
use arrow_deps::parquet::data_type::ByteArray;
use data_types::schema::{InfluxColumnType, InfluxFieldType};
use internal_types::schema::{InfluxColumnType, InfluxFieldType};
use std::default::Default;
// NOTE: See https://blog.twitter.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet.html
@ -20,6 +20,7 @@ use std::default::Default;
pub enum Packers {
Float(Packer<f64>),
Integer(Packer<i64>),
UInteger(Packer<u64>),
Bytes(Packer<ByteArray>),
String(Packer<String>),
Boolean(Packer<bool>),
@ -52,6 +53,7 @@ impl<'a> Packers {
match self {
Self::Float(p) => PackerChunker::Float(p.values.chunks(chunk_size)),
Self::Integer(p) => PackerChunker::Integer(p.values.chunks(chunk_size)),
Self::UInteger(p) => PackerChunker::UInteger(p.values.chunks(chunk_size)),
Self::Bytes(p) => PackerChunker::Bytes(p.values.chunks(chunk_size)),
Self::String(p) => PackerChunker::String(p.values.chunks(chunk_size)),
Self::Boolean(p) => PackerChunker::Boolean(p.values.chunks(chunk_size)),
@ -69,6 +71,7 @@ impl<'a> Packers {
match self {
Self::Float(p) => p.reserve_exact(additional),
Self::Integer(p) => p.reserve_exact(additional),
Self::UInteger(p) => p.reserve_exact(additional),
Self::Bytes(p) => p.reserve_exact(additional),
Self::String(p) => p.reserve_exact(additional),
Self::Boolean(p) => p.reserve_exact(additional),
@ -79,6 +82,7 @@ impl<'a> Packers {
match self {
Self::Float(p) => p.push_option(None),
Self::Integer(p) => p.push_option(None),
Self::UInteger(p) => p.push_option(None),
Self::Bytes(p) => p.push_option(None),
Self::String(p) => p.push_option(None),
Self::Boolean(p) => p.push_option(None),
@ -90,6 +94,7 @@ impl<'a> Packers {
match self {
Self::Float(p) => p.swap(a, b),
Self::Integer(p) => p.swap(a, b),
Self::UInteger(p) => p.swap(a, b),
Self::Bytes(p) => p.swap(a, b),
Self::String(p) => p.swap(a, b),
Self::Boolean(p) => p.swap(a, b),
@ -101,6 +106,7 @@ impl<'a> Packers {
match self {
Self::Float(p) => p.num_rows(),
Self::Integer(p) => p.num_rows(),
Self::UInteger(p) => p.num_rows(),
Self::Bytes(p) => p.num_rows(),
Self::String(p) => p.num_rows(),
Self::Boolean(p) => p.num_rows(),
@ -114,6 +120,7 @@ impl<'a> Packers {
match self {
Self::Float(p) => p.is_null(row),
Self::Integer(p) => p.is_null(row),
Self::UInteger(p) => p.is_null(row),
Self::Bytes(p) => p.is_null(row),
Self::String(p) => p.is_null(row),
Self::Boolean(p) => p.is_null(row),
@ -124,6 +131,7 @@ impl<'a> Packers {
typed_packer_accessors! {
(f64_packer, f64_packer_mut, f64, Float),
(i64_packer, i64_packer_mut, i64, Integer),
(u64_packer, u64_packer_mut, u64, UInteger),
(bytes_packer, bytes_packer_mut, ByteArray, Bytes),
(str_packer, str_packer_mut, String, String),
(bool_packer, bool_packer_mut, bool, Boolean),
@ -245,6 +253,7 @@ impl std::convert::From<Vec<Option<Vec<u8>>>> for Packers {
pub enum PackerChunker<'a> {
Float(Chunks<'a, Option<f64>>),
Integer(Chunks<'a, Option<i64>>),
UInteger(Chunks<'a, Option<u64>>),
Bytes(Chunks<'a, Option<ByteArray>>),
String(Chunks<'a, Option<String>>),
Boolean(Chunks<'a, Option<bool>>),
@ -523,6 +532,7 @@ mod test {
let mut packers: Vec<Packers> = Vec::new();
packers.push(Packers::Float(Packer::new()));
packers.push(Packers::Integer(Packer::new()));
packers.push(Packers::UInteger(Packer::new()));
packers.push(Packers::Boolean(Packer::new()));
packers.get_mut(0).unwrap().f64_packer_mut().push(22.033);

View File

@ -387,7 +387,7 @@ mod test {
for _ in 0..250 {
let packer: Packer<i64> = Packer::from(
(0..1000)
.map(|_| rng.gen_range(0, 20))
.map(|_| rng.gen_range(0..20))
.collect::<Vec<i64>>(),
);
let mut packers = vec![Packers::Integer(packer)];
@ -410,7 +410,7 @@ mod test {
for _ in 0..250 {
let packer: Packer<String> = Packer::from(
(0..1000)
.map(|_| format!("{:?}", rng.gen_range(0, 20)))
.map(|_| format!("{:?}", rng.gen_range(0..20)))
.collect::<Vec<String>>(),
);
let mut packers = vec![Packers::String(packer)];

View File

@ -21,9 +21,10 @@ croaring = "0.4.5"
data_types = { path = "../data_types" }
futures = "0.3.7"
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
internal_types = { path = "../internal_types" }
parking_lot = "0.11.1"
snafu = "0.6.2"
sqlparser = "0.6.1"
sqlparser = "0.8.0"
tokio = { version = "1.0", features = ["macros"] }
tokio-stream = "0.1.2"
tracing = "0.1"

View File

@ -1,7 +1,7 @@
use std::sync::Arc;
use arrow_deps::arrow::{self, datatypes::SchemaRef};
use data_types::TIME_COLUMN_NAME;
use internal_types::schema::TIME_COLUMN_NAME;
use snafu::{ResultExt, Snafu};
#[derive(Debug, Snafu)]

View File

@ -9,7 +9,7 @@ use arrow_deps::arrow::{
datatypes::{DataType, SchemaRef},
record_batch::RecordBatch,
};
use data_types::TIME_COLUMN_NAME;
use internal_types::schema::TIME_COLUMN_NAME;
use snafu::{ensure, ResultExt, Snafu};

View File

@ -14,10 +14,9 @@ use arrow_deps::{
},
util::IntoExpr,
};
use data_types::{
schema::{InfluxColumnType, Schema},
use internal_types::{
schema::{InfluxColumnType, Schema, TIME_COLUMN_NAME},
selection::Selection,
TIME_COLUMN_NAME,
};
use snafu::{ensure, OptionExt, ResultExt, Snafu};
use tracing::debug;

View File

@ -4,7 +4,7 @@ use snafu::{ResultExt, Snafu};
use crate::{exec::Executor, provider::ProviderBuilder, Database, PartitionChunk};
use arrow_deps::datafusion::{error::DataFusionError, physical_plan::ExecutionPlan};
use data_types::selection::Selection;
use internal_types::selection::Selection;
#[derive(Debug, Snafu)]
pub enum Error {

View File

@ -8,10 +8,9 @@
use arrow_deps::datafusion::physical_plan::SendableRecordBatchStream;
use async_trait::async_trait;
use data_types::{
data::ReplicatedWrite, partition_metadata::TableSummary, schema::Schema, selection::Selection,
};
use data_types::{chunk::ChunkSummary, partition_metadata::TableSummary};
use exec::{stringset::StringSet, Executor};
use internal_types::{data::ReplicatedWrite, schema::Schema, selection::Selection};
use std::{fmt::Debug, sync::Arc};
@ -49,6 +48,9 @@ pub trait Database: Debug + Send + Sync {
/// covering set means that together the chunks make up a single
/// complete copy of the data being queried.
fn chunks(&self, partition_key: &str) -> Vec<Arc<Self::Chunk>>;
/// Return a summary of all chunks in this database, in all partitions
fn chunk_summaries(&self) -> Result<Vec<ChunkSummary>, Self::Error>;
}
/// Collection of data that shares the same partition key
@ -60,7 +62,7 @@ pub trait PartitionChunk: Debug + Send + Sync {
/// particular partition.
fn id(&self) -> u32;
/// returns the partition metadata stats for every table in the partition
/// returns the partition metadata stats for every table in the chunk
fn table_stats(&self) -> Result<Vec<TableSummary>, Self::Error>;
/// Returns true if this chunk *might* have data that passes the
@ -155,11 +157,11 @@ pub trait DatabaseStore: Debug + Send + Sync {
type Error: std::error::Error + Send + Sync + 'static;
/// List the database names.
async fn db_names_sorted(&self) -> Vec<String>;
fn db_names_sorted(&self) -> Vec<String>;
/// Retrieve the database specified by `name` returning None if no
/// such database exists
async fn db(&self, name: &str) -> Option<Arc<Self::Database>>;
fn db(&self, name: &str) -> Option<Arc<Self::Database>>;
/// Retrieve the database specified by `name`, creating it if it
/// doesn't exist.

Some files were not shown because too many files have changed in this diff Show More