influxdb/iox_catalog/Cargo.toml

38 lines
1.3 KiB
TOML
Raw Normal View History

2022-01-11 17:51:56 +00:00
[package]
name = "iox_catalog"
version.workspace = true
authors.workspace = true
edition.workspace = true
license.workspace = true
2022-01-11 17:51:56 +00:00
[dependencies] # In alphabetical order
async-trait = "0.1.71"
2022-05-05 19:29:24 +00:00
data_types = { path = "../data_types" }
2022-01-11 17:51:56 +00:00
futures = "0.3"
2022-05-05 19:29:24 +00:00
iox_time = { version = "0.1.0", path = "../iox_time" }
log = "0.4"
metric = { version = "0.1.0", path = "../metric" }
mutable_batch = { path = "../mutable_batch" }
2022-01-11 17:51:56 +00:00
observability_deps = { path = "../observability_deps" }
parking_lot = { version = "0.12" }
serde = { version = "1.0", features = ["derive"] }
refactor: add `parquet_file` PG index for querier (#7842) * refactor: add `parquet_file` PG index for querier Currently the `list_by_table_not_to_delete` catalog query is somewhat expensive: ```text iox_catalog_prod=> select table_id, sum((to_delete is NULL)::int) as n from parquet_file group by table_id order by n desc limit 5; table_id | n ----------+------ 1489038 | 7221 1489037 | 7019 1491534 | 5793 1491951 | 5522 1513377 | 5339 (5 rows) iox_catalog_prod=> EXPLAIN ANALYZE SELECT id, namespace_id, table_id, partition_id, object_store_id, min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set, max_l0_created_at FROM parquet_file WHERE table_id = 1489038 AND to_delete IS NULL; QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------- Bitmap Heap Scan on parquet_file (cost=46050.91..47179.26 rows=283 width=200) (actual time=464.368..472.514 rows=7221 loops=1) Recheck Cond: ((table_id = 1489038) AND (to_delete IS NULL)) Heap Blocks: exact=7152 -> BitmapAnd (cost=46050.91..46050.91 rows=283 width=0) (actual time=463.341..463.343 rows=0 loops=1) -> Bitmap Index Scan on parquet_file_table_idx (cost=0.00..321.65 rows=22545 width=0) (actual time=1.674..1.674 rows=7221 loops=1) Index Cond: (table_id = 1489038) -> Bitmap Index Scan on parquet_file_deleted_at_idx (cost=0.00..45728.86 rows=1525373 width=0) (actual time=460.717..460.717 rows=4772117 loops=1) Index Cond: (to_delete IS NULL) Planning Time: 0.092 ms Execution Time: 472.907 ms (10 rows) ``` I think this may also be because PostgreSQL kinda chooses the wrong strategy, because it could just look at the existing index and filter from there: ```text iox_catalog_prod=> EXPLAIN ANALYZE SELECT id, namespace_id, table_id, partition_id, object_store_id, min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set, max_l0_created_at FROM parquet_file WHERE table_id = 1489038; QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------- Index Scan using parquet_file_table_idx on parquet_file (cost=0.57..86237.78 rows=22545 width=200) (actual time=0.057..6.994 rows=7221 loops=1) Index Cond: (table_id = 1489038) Planning Time: 0.094 ms Execution Time: 7.297 ms (4 rows) ``` However PostgreSQL doesn't know the cardinalities well enough. So let's add a dedicated index to make the querier faster. * feat: new migration system * docs: explain dirty migrations
2023-05-31 10:56:32 +00:00
siphasher = "0.3"
2022-01-12 23:22:45 +00:00
snafu = "0.7"
sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" , "postgres", "uuid", "sqlite" ] }
sqlx-hotswap-pool = { path = "../sqlx-hotswap-pool" }
thiserror = "1.0.43"
tokio = { version = "1.29", features = ["io-util", "macros", "parking_lot", "rt-multi-thread", "time"] }
uuid = { version = "1", features = ["v4"] }
2023-02-24 18:02:23 +00:00
workspace-hack = { version = "0.1", path = "../workspace-hack" }
[dev-dependencies] # In alphabetical order
assert_matches = "1.5.0"
dotenvy = "0.15.7"
generated_types = { path = "../generated_types" }
mutable_batch_lp = { path = "../mutable_batch_lp" }
paste = "1.0.13"
pretty_assertions = "1.4.0"
rand = "0.8"
tempfile = "3"
test_helpers = { path = "../test_helpers" }