2022-11-24 14:26:34 +00:00
|
|
|
[package]
|
|
|
|
name = "ingester2"
|
|
|
|
version.workspace = true
|
|
|
|
authors.workspace = true
|
|
|
|
edition.workspace = true
|
|
|
|
license.workspace = true
|
|
|
|
|
|
|
|
[dependencies]
|
|
|
|
arrow = { workspace = true, features = ["prettyprint"] }
|
2023-01-19 15:27:23 +00:00
|
|
|
arrow-flight = { workspace = true }
|
2022-11-24 14:26:34 +00:00
|
|
|
arrow_util = { version = "0.1.0", path = "../arrow_util" }
|
feat(ingester2): optimal persist parallelism
This commit changes the behaviour of the persist system to enable
optimal parallelism of persist operations, and improve the accuracy of
the outstanding job bound / back-pressure.
Previously all persist operations for a given partition were
consistently hashed to a single worker task. This serialised persistence
per partition, ensuring all updates to the partition sort key were
serialised. However, this also unnecessarily serialises persist
operations that do not need to update the sort key, reducing the
potential throughput of the system; in the worst case of a single
partition receiving all the writes, only one worker would be persisting,
and the other N-1 workers would be idle.
After this change, the sort key is inspected when enqueuing the persist
operation and if it can be determined that no sort key update is
necessary (the typical case), then the persist task is placed into a
global work queue from which all workers consume. This allows for
maximal parallelisation of these jobs, and the removes the per-worker
head-of-line blocking.
In the case that the sort key does need updating, these jobs continue to
be consistently hashed to a single worker, ensuring serialised sort key
updates only where necessary.
To support these changes, the back-pressure system has been changed to
account for all outstanding persist jobs in the system, regardless of
type or assigned worker - a logical, bounded queue is composed together
of a semaphore limiting the number of persist tasks overall, and a
series of physical, unbounded queues - one to each worker & the global
queue. The overall system remains bounded by the
INFLUXDB_IOX_PERSIST_QUEUE_DEPTH value, and is now simpler to reason
about (it is independent of the number of workers, etc).
2022-12-15 14:14:23 +00:00
|
|
|
async-channel = "1.8.0"
|
2023-01-23 08:41:27 +00:00
|
|
|
async-trait = "0.1.63"
|
2022-11-24 14:26:34 +00:00
|
|
|
backoff = { version = "0.1.0", path = "../backoff" }
|
feat(ingester2): gRPC methods & type-erased init
This commit implements the gRPC direct-write RPC interface (largely
copied from the ingester crate), and adds a much improved RPC query
handler.
Compared to the ingester crate, the query API is now split into two
defined halves - the API handler side, and types necessary to support it
(server/grpc/query.rs) and the Ingester query execution side (a stub in
query/exec.rs). These two halves maintain a separation of concerns, and
are interfaced by an abstract QueryExec trait (in query/trait.rs).
I also added the catalog RPC interface as it is currently exposed on the
ingester, though I am unsure if it is used by anything.
This commit also introduces the "init" module, and the
IngesterRpcInterface trait within it. This trait forms the public
ingester2 crate API, defining the complete set of methods external
crates can expect to utilise in a stable, unchanging and decoupled way.
The IngesterRpcInterface trait also serves as a method of type-erasure
on the underlying handler implementations, avoiding the need to
expose/pub the types, abstractions, and internal implementation details
of the ingester to external crates.
2022-11-25 10:58:32 +00:00
|
|
|
bytes = "1.3.0"
|
2022-11-28 12:23:16 +00:00
|
|
|
crossbeam-utils = "0.8.14"
|
2022-11-24 14:26:34 +00:00
|
|
|
data_types = { version = "0.1.0", path = "../data_types" }
|
|
|
|
datafusion.workspace = true
|
2022-11-25 14:35:39 +00:00
|
|
|
datafusion_util = { path = "../datafusion_util" }
|
2022-11-24 14:26:34 +00:00
|
|
|
dml = { version = "0.1.0", path = "../dml" }
|
feat(ingester2): gRPC methods & type-erased init
This commit implements the gRPC direct-write RPC interface (largely
copied from the ingester crate), and adds a much improved RPC query
handler.
Compared to the ingester crate, the query API is now split into two
defined halves - the API handler side, and types necessary to support it
(server/grpc/query.rs) and the Ingester query execution side (a stub in
query/exec.rs). These two halves maintain a separation of concerns, and
are interfaced by an abstract QueryExec trait (in query/trait.rs).
I also added the catalog RPC interface as it is currently exposed on the
ingester, though I am unsure if it is used by anything.
This commit also introduces the "init" module, and the
IngesterRpcInterface trait within it. This trait forms the public
ingester2 crate API, defining the complete set of methods external
crates can expect to utilise in a stable, unchanging and decoupled way.
The IngesterRpcInterface trait also serves as a method of type-erasure
on the underlying handler implementations, avoiding the need to
expose/pub the types, abstractions, and internal implementation details
of the ingester to external crates.
2022-11-25 10:58:32 +00:00
|
|
|
flatbuffers = "22"
|
2022-11-24 14:26:34 +00:00
|
|
|
futures = "0.3.25"
|
|
|
|
generated_types = { version = "0.1.0", path = "../generated_types" }
|
|
|
|
hashbrown.workspace = true
|
|
|
|
iox_catalog = { version = "0.1.0", path = "../iox_catalog" }
|
|
|
|
iox_query = { version = "0.1.0", path = "../iox_query" }
|
|
|
|
iox_time = { path = "../iox_time" }
|
|
|
|
metric = { version = "0.1.0", path = "../metric" }
|
|
|
|
mutable_batch = { version = "0.1.0", path = "../mutable_batch" }
|
|
|
|
mutable_batch_pb = { version = "0.1.0", path = "../mutable_batch_pb" }
|
2022-12-06 07:53:54 +00:00
|
|
|
object_store = "0.5.2"
|
2022-11-24 14:26:34 +00:00
|
|
|
observability_deps = { version = "0.1.0", path = "../observability_deps" }
|
2023-01-02 17:07:15 +00:00
|
|
|
once_cell = "1.17"
|
2022-11-24 14:26:34 +00:00
|
|
|
parking_lot = "0.12.1"
|
2022-12-01 09:37:25 +00:00
|
|
|
parquet_file = { version = "0.1.0", path = "../parquet_file" }
|
feat(ingester2): gRPC methods & type-erased init
This commit implements the gRPC direct-write RPC interface (largely
copied from the ingester crate), and adds a much improved RPC query
handler.
Compared to the ingester crate, the query API is now split into two
defined halves - the API handler side, and types necessary to support it
(server/grpc/query.rs) and the Ingester query execution side (a stub in
query/exec.rs). These two halves maintain a separation of concerns, and
are interfaced by an abstract QueryExec trait (in query/trait.rs).
I also added the catalog RPC interface as it is currently exposed on the
ingester, though I am unsure if it is used by anything.
This commit also introduces the "init" module, and the
IngesterRpcInterface trait within it. This trait forms the public
ingester2 crate API, defining the complete set of methods external
crates can expect to utilise in a stable, unchanging and decoupled way.
The IngesterRpcInterface trait also serves as a method of type-erasure
on the underlying handler implementations, avoiding the need to
expose/pub the types, abstractions, and internal implementation details
of the ingester to external crates.
2022-11-25 10:58:32 +00:00
|
|
|
pin-project = "1.0.12"
|
2022-11-24 14:26:34 +00:00
|
|
|
predicate = { version = "0.1.0", path = "../predicate" }
|
2023-01-11 02:53:33 +00:00
|
|
|
prost = { version = "0.11.6", default-features = false, features = ["std"] }
|
2022-11-24 14:26:34 +00:00
|
|
|
rand = "0.8.5"
|
|
|
|
schema = { version = "0.1.0", path = "../schema" }
|
|
|
|
service_grpc_catalog = { version = "0.1.0", path = "../service_grpc_catalog" }
|
2022-12-01 09:37:25 +00:00
|
|
|
sharder = { version = "0.1.0", path = "../sharder" }
|
2022-12-19 10:33:32 +00:00
|
|
|
thiserror = "1.0.38"
|
2022-12-22 15:53:48 +00:00
|
|
|
test_helpers = { path = "../test_helpers", features = ["future_timeout"], optional = true }
|
2023-01-10 09:48:44 +00:00
|
|
|
tokio = { version = "1.24", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
|
2022-11-29 09:53:30 +00:00
|
|
|
tonic = "0.8.3"
|
2022-11-24 14:26:34 +00:00
|
|
|
trace = { version = "0.1.0", path = "../trace" }
|
|
|
|
uuid = "1.2.2"
|
2022-11-28 22:09:03 +00:00
|
|
|
wal = { version = "0.1.0", path = "../wal" }
|
2022-11-24 14:51:21 +00:00
|
|
|
workspace-hack = { path = "../workspace-hack"}
|
2023-01-12 13:59:50 +00:00
|
|
|
tokio-util = "0.7.4"
|
2022-11-24 14:26:34 +00:00
|
|
|
|
|
|
|
[dev-dependencies]
|
|
|
|
assert_matches = "1.5.0"
|
2022-12-12 13:24:11 +00:00
|
|
|
criterion = { version = "0.4", default-features = false, features = ["async_tokio"]}
|
2022-11-24 14:26:34 +00:00
|
|
|
datafusion_util = { path = "../datafusion_util" }
|
|
|
|
lazy_static = "1.4.0"
|
|
|
|
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
2022-12-19 10:31:05 +00:00
|
|
|
paste = "1.0.11"
|
2022-11-28 22:09:03 +00:00
|
|
|
tempfile = "3.3.0"
|
2022-11-24 14:26:34 +00:00
|
|
|
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
2022-12-12 13:24:11 +00:00
|
|
|
|
|
|
|
[features]
|
2022-12-22 15:53:48 +00:00
|
|
|
benches = ["test_helpers"] # Export some internal types for benchmark purposes only.
|
2022-12-12 13:24:11 +00:00
|
|
|
|
|
|
|
[lib]
|
|
|
|
bench = false
|
|
|
|
|
|
|
|
[[bench]]
|
|
|
|
name = "wal"
|
|
|
|
harness = false
|
|
|
|
# Require some internal types be made visible for benchmark code.
|
|
|
|
required-features = ["benches"]
|