docs(compactor2): Update compactor2 config parameter documentation (#7022)

* chore: Update compactor2 config parameter documentaton

* fix: clarify ording
pull/24376/head
Andrew Lamb 2023-02-17 22:09:17 +01:00 committed by GitHub
parent b785f751b3
commit d82d00b847
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 58 additions and 16 deletions

View File

@ -27,7 +27,9 @@ impl Display for CompactorAlgoVersion {
pub struct Compactor2Config {
/// Number of partitions that should be compacted in parallel.
///
/// This should usually be larger than the compaction job concurrency since one partition can spawn multiple compaction jobs.
/// This should usually be larger than the compaction job
/// concurrency since one partition can spawn multiple compaction
/// jobs.
#[clap(
long = "compaction-partition-concurrency",
env = "INFLUXDB_IOX_COMPACTION_PARTITION_CONCURRENCY",
@ -38,7 +40,8 @@ pub struct Compactor2Config {
/// Number of concurrent compaction jobs.
///
/// This should usually be smaller than the partition concurrency since one partition can spawn multiple compaction jobs.
/// This should usually be smaller than the partition concurrency
/// since one partition can spawn multiple compaction jobs.
#[clap(
long = "compaction-job-concurrency",
env = "INFLUXDB_IOX_COMPACTION_JOB_CONCURRENCY",
@ -47,7 +50,8 @@ pub struct Compactor2Config {
)]
pub compaction_job_concurrency: NonZeroUsize,
/// Number of jobs PER PARTITION that move files in and out of the scratchpad.
/// Number of jobs PER PARTITION that move files in and out of the
/// scratchpad.
#[clap(
long = "compaction-partition-scratchpad-concurrency",
env = "INFLUXDB_IOX_COMPACTION_PARTITION_SCRATCHPAD_CONCURRENCY",
@ -56,7 +60,8 @@ pub struct Compactor2Config {
)]
pub compaction_partition_scratchpad_concurrency: NonZeroUsize,
/// Partitions with recent created files these last minutes are selected for compaction.
/// The compactor will only consider compacting partitions that
/// have new parquet files created within this many minutes.
#[clap(
long = "compaction_partition_minute_threshold",
env = "INFLUXDB_IOX_COMPACTION_PARTITION_MINUTE_THRESHOLD",
@ -65,7 +70,8 @@ pub struct Compactor2Config {
)]
pub compaction_partition_minute_threshold: u64,
/// Number of threads to use for the compactor query execution, compaction and persistence.
/// Number of threads to use for the compactor query execution,
/// compaction and persistence.
#[clap(
long = "query-exec-thread-count",
env = "INFLUXDB_IOX_QUERY_EXEC_THREAD_COUNT",
@ -74,10 +80,12 @@ pub struct Compactor2Config {
)]
pub query_exec_thread_count: usize,
/// Size of memory pool used during query exec, in bytes.
/// Size of memory pool used during compaction plan execution, in
/// bytes.
///
/// If queries attempt to allocate more than this many bytes
/// during execution, they will error with "ResourcesExhausted".
/// If compaction plans attempt to allocate more than this many
/// bytes during execution, they will error with
/// "ResourcesExhausted".
#[clap(
long = "exec-mem-pool-bytes",
env = "INFLUXDB_IOX_EXEC_MEM_POOL_BYTES",
@ -87,7 +95,8 @@ pub struct Compactor2Config {
pub exec_mem_pool_bytes: usize,
/// Desired max size of compacted parquet files.
/// It is a target desired value, rather than a guarantee.
///
/// Note this is a target desired value, rather than a guarantee.
/// 1024 * 1024 * 100 = 104,857,600
#[clap(
long = "compaction-max-desired-size-bytes",
@ -98,6 +107,7 @@ pub struct Compactor2Config {
pub max_desired_file_size_bytes: u64,
/// Percentage of desired max file size.
///
/// If the estimated compacted result is too small, no need to split it.
/// This percentage is to determine how small it is:
/// < percentage_max_file_size * max_desired_file_size_bytes:
@ -169,7 +179,14 @@ pub struct Compactor2Config {
)]
pub ignore_partition_skip_marker: bool,
/// Maximum number of files in a compaction plan
/// Maximum number of files that the compactor will try and
/// compact in a single plan.
///
/// The higher this setting is the fewer compactor plans are run
/// and thus fewer resources over time are consumed by the
/// compactor. Increasing this setting also increases the peak
/// memory used for each compaction plan, and thus if it is set
/// too high, the compactor plans may exceed available memory.
#[clap(
long = "compaction-max-num-files-per-plan",
env = "INFLUXDB_IOX_COMPACTION_MAX_NUM_FILES_PER_PLAN",
@ -178,8 +195,17 @@ pub struct Compactor2Config {
)]
pub max_num_files_per_plan: usize,
/// Maximum input bytes (in parquet) per partition. If there is more data, we ignore the partition (for now) as a
/// self-protection mechanism.
/// Maximum input bytes (in parquet) per partition that the
/// compactor will attempt to compact in any one plan.
///
/// In the worst case, if the sum of the sizes of all parquet
/// files in a partition is greater than this value, the compactor
/// may not try to compact this partition. Under normal operation,
/// the compactor compacts a subset of files in a partition but in
/// some cases it may need to compact them all.
///
/// This setting is a self protection mechanism, and it is
/// expected to be removed in future versions
#[clap(
long = "compaction-max-input-parquet-bytes-per-partition",
env = "INFLUXDB_IOX_COMPACTION_MAX_INPUT_PARQUET_BYTES_PER_PARTITION",
@ -219,7 +245,15 @@ pub struct Compactor2Config {
)]
pub compact_version: CompactorAlgoVersion,
/// Minimum number of L1 files to comapct to L2
/// Minimum number of L1 files to compact to L2.
///
/// If there are more than this many L1 (by definition non
/// overlapping) files in a partition, the compactor will compact
/// them together into one or more larger L2 files.
///
/// Setting this value higher in general results in fewer overall
/// resources spent on compaction but more files per partition (and
/// thus less optimal compression and query performance).
#[clap(
long = "compaction-min-num-l1-files-to-compact",
env = "INFLUXDB_IOX_COMPACTION_MIN_NUM_L1_FILES_TO_COMPACT",
@ -229,6 +263,10 @@ pub struct Compactor2Config {
pub min_num_l1_files_to_compact: usize,
/// Only process all discovered partitions once.
///
/// By default the compactor will continuously loop over all
/// partitions looking for work. Setting this option results in
/// exiting the loop after the one iteration.
#[clap(
long = "compaction-process-once",
env = "INFLUXDB_IOX_COMPACTION_PROCESS_ONCE",
@ -236,7 +274,8 @@ pub struct Compactor2Config {
)]
pub process_once: bool,
/// Compact all partitions found in the catalog, no matter if/when the received writes.
/// Compact all partitions found in the catalog, no matter if/when
/// they received writes.
#[clap(
long = "compaction-process-all-partitions",
env = "INFLUXDB_IOX_COMPACTION_PROCESS_ALL_PARTITIONS",
@ -244,8 +283,11 @@ pub struct Compactor2Config {
)]
pub process_all_partitions: bool,
/// Maximum number of columns in the table of a partition that will be able to considered
/// to get compacted
/// Maximum number of columns in a table of a partition that
/// will be able to considered to get compacted
///
/// If a table has more than this many columns, the compactor will
/// not compact it, to avoid large memory use.
#[clap(
long = "compaction-max-num-columns-per-table",
env = "INFLUXDB_IOX_COMPACTION_MAX_NUM_COLUMNS_PER_TABLE",