docs(compactor2): Update compactor2 config parameter documentation (#7022)

* chore: Update compactor2 config parameter documentaton

* fix: clarify ording
pull/24376/head
Andrew Lamb 2023-02-17 22:09:17 +01:00 committed by GitHub
parent b785f751b3
commit d82d00b847
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 58 additions and 16 deletions

View File

@ -27,7 +27,9 @@ impl Display for CompactorAlgoVersion {
pub struct Compactor2Config { pub struct Compactor2Config {
/// Number of partitions that should be compacted in parallel. /// Number of partitions that should be compacted in parallel.
/// ///
/// This should usually be larger than the compaction job concurrency since one partition can spawn multiple compaction jobs. /// This should usually be larger than the compaction job
/// concurrency since one partition can spawn multiple compaction
/// jobs.
#[clap( #[clap(
long = "compaction-partition-concurrency", long = "compaction-partition-concurrency",
env = "INFLUXDB_IOX_COMPACTION_PARTITION_CONCURRENCY", env = "INFLUXDB_IOX_COMPACTION_PARTITION_CONCURRENCY",
@ -38,7 +40,8 @@ pub struct Compactor2Config {
/// Number of concurrent compaction jobs. /// Number of concurrent compaction jobs.
/// ///
/// This should usually be smaller than the partition concurrency since one partition can spawn multiple compaction jobs. /// This should usually be smaller than the partition concurrency
/// since one partition can spawn multiple compaction jobs.
#[clap( #[clap(
long = "compaction-job-concurrency", long = "compaction-job-concurrency",
env = "INFLUXDB_IOX_COMPACTION_JOB_CONCURRENCY", env = "INFLUXDB_IOX_COMPACTION_JOB_CONCURRENCY",
@ -47,7 +50,8 @@ pub struct Compactor2Config {
)] )]
pub compaction_job_concurrency: NonZeroUsize, pub compaction_job_concurrency: NonZeroUsize,
/// Number of jobs PER PARTITION that move files in and out of the scratchpad. /// Number of jobs PER PARTITION that move files in and out of the
/// scratchpad.
#[clap( #[clap(
long = "compaction-partition-scratchpad-concurrency", long = "compaction-partition-scratchpad-concurrency",
env = "INFLUXDB_IOX_COMPACTION_PARTITION_SCRATCHPAD_CONCURRENCY", env = "INFLUXDB_IOX_COMPACTION_PARTITION_SCRATCHPAD_CONCURRENCY",
@ -56,7 +60,8 @@ pub struct Compactor2Config {
)] )]
pub compaction_partition_scratchpad_concurrency: NonZeroUsize, pub compaction_partition_scratchpad_concurrency: NonZeroUsize,
/// Partitions with recent created files these last minutes are selected for compaction. /// The compactor will only consider compacting partitions that
/// have new parquet files created within this many minutes.
#[clap( #[clap(
long = "compaction_partition_minute_threshold", long = "compaction_partition_minute_threshold",
env = "INFLUXDB_IOX_COMPACTION_PARTITION_MINUTE_THRESHOLD", env = "INFLUXDB_IOX_COMPACTION_PARTITION_MINUTE_THRESHOLD",
@ -65,7 +70,8 @@ pub struct Compactor2Config {
)] )]
pub compaction_partition_minute_threshold: u64, pub compaction_partition_minute_threshold: u64,
/// Number of threads to use for the compactor query execution, compaction and persistence. /// Number of threads to use for the compactor query execution,
/// compaction and persistence.
#[clap( #[clap(
long = "query-exec-thread-count", long = "query-exec-thread-count",
env = "INFLUXDB_IOX_QUERY_EXEC_THREAD_COUNT", env = "INFLUXDB_IOX_QUERY_EXEC_THREAD_COUNT",
@ -74,10 +80,12 @@ pub struct Compactor2Config {
)] )]
pub query_exec_thread_count: usize, pub query_exec_thread_count: usize,
/// Size of memory pool used during query exec, in bytes. /// Size of memory pool used during compaction plan execution, in
/// bytes.
/// ///
/// If queries attempt to allocate more than this many bytes /// If compaction plans attempt to allocate more than this many
/// during execution, they will error with "ResourcesExhausted". /// bytes during execution, they will error with
/// "ResourcesExhausted".
#[clap( #[clap(
long = "exec-mem-pool-bytes", long = "exec-mem-pool-bytes",
env = "INFLUXDB_IOX_EXEC_MEM_POOL_BYTES", env = "INFLUXDB_IOX_EXEC_MEM_POOL_BYTES",
@ -87,7 +95,8 @@ pub struct Compactor2Config {
pub exec_mem_pool_bytes: usize, pub exec_mem_pool_bytes: usize,
/// Desired max size of compacted parquet files. /// Desired max size of compacted parquet files.
/// It is a target desired value, rather than a guarantee. ///
/// Note this is a target desired value, rather than a guarantee.
/// 1024 * 1024 * 100 = 104,857,600 /// 1024 * 1024 * 100 = 104,857,600
#[clap( #[clap(
long = "compaction-max-desired-size-bytes", long = "compaction-max-desired-size-bytes",
@ -98,6 +107,7 @@ pub struct Compactor2Config {
pub max_desired_file_size_bytes: u64, pub max_desired_file_size_bytes: u64,
/// Percentage of desired max file size. /// Percentage of desired max file size.
///
/// If the estimated compacted result is too small, no need to split it. /// If the estimated compacted result is too small, no need to split it.
/// This percentage is to determine how small it is: /// This percentage is to determine how small it is:
/// < percentage_max_file_size * max_desired_file_size_bytes: /// < percentage_max_file_size * max_desired_file_size_bytes:
@ -169,7 +179,14 @@ pub struct Compactor2Config {
)] )]
pub ignore_partition_skip_marker: bool, pub ignore_partition_skip_marker: bool,
/// Maximum number of files in a compaction plan /// Maximum number of files that the compactor will try and
/// compact in a single plan.
///
/// The higher this setting is the fewer compactor plans are run
/// and thus fewer resources over time are consumed by the
/// compactor. Increasing this setting also increases the peak
/// memory used for each compaction plan, and thus if it is set
/// too high, the compactor plans may exceed available memory.
#[clap( #[clap(
long = "compaction-max-num-files-per-plan", long = "compaction-max-num-files-per-plan",
env = "INFLUXDB_IOX_COMPACTION_MAX_NUM_FILES_PER_PLAN", env = "INFLUXDB_IOX_COMPACTION_MAX_NUM_FILES_PER_PLAN",
@ -178,8 +195,17 @@ pub struct Compactor2Config {
)] )]
pub max_num_files_per_plan: usize, pub max_num_files_per_plan: usize,
/// Maximum input bytes (in parquet) per partition. If there is more data, we ignore the partition (for now) as a /// Maximum input bytes (in parquet) per partition that the
/// self-protection mechanism. /// compactor will attempt to compact in any one plan.
///
/// In the worst case, if the sum of the sizes of all parquet
/// files in a partition is greater than this value, the compactor
/// may not try to compact this partition. Under normal operation,
/// the compactor compacts a subset of files in a partition but in
/// some cases it may need to compact them all.
///
/// This setting is a self protection mechanism, and it is
/// expected to be removed in future versions
#[clap( #[clap(
long = "compaction-max-input-parquet-bytes-per-partition", long = "compaction-max-input-parquet-bytes-per-partition",
env = "INFLUXDB_IOX_COMPACTION_MAX_INPUT_PARQUET_BYTES_PER_PARTITION", env = "INFLUXDB_IOX_COMPACTION_MAX_INPUT_PARQUET_BYTES_PER_PARTITION",
@ -219,7 +245,15 @@ pub struct Compactor2Config {
)] )]
pub compact_version: CompactorAlgoVersion, pub compact_version: CompactorAlgoVersion,
/// Minimum number of L1 files to comapct to L2 /// Minimum number of L1 files to compact to L2.
///
/// If there are more than this many L1 (by definition non
/// overlapping) files in a partition, the compactor will compact
/// them together into one or more larger L2 files.
///
/// Setting this value higher in general results in fewer overall
/// resources spent on compaction but more files per partition (and
/// thus less optimal compression and query performance).
#[clap( #[clap(
long = "compaction-min-num-l1-files-to-compact", long = "compaction-min-num-l1-files-to-compact",
env = "INFLUXDB_IOX_COMPACTION_MIN_NUM_L1_FILES_TO_COMPACT", env = "INFLUXDB_IOX_COMPACTION_MIN_NUM_L1_FILES_TO_COMPACT",
@ -229,6 +263,10 @@ pub struct Compactor2Config {
pub min_num_l1_files_to_compact: usize, pub min_num_l1_files_to_compact: usize,
/// Only process all discovered partitions once. /// Only process all discovered partitions once.
///
/// By default the compactor will continuously loop over all
/// partitions looking for work. Setting this option results in
/// exiting the loop after the one iteration.
#[clap( #[clap(
long = "compaction-process-once", long = "compaction-process-once",
env = "INFLUXDB_IOX_COMPACTION_PROCESS_ONCE", env = "INFLUXDB_IOX_COMPACTION_PROCESS_ONCE",
@ -236,7 +274,8 @@ pub struct Compactor2Config {
)] )]
pub process_once: bool, pub process_once: bool,
/// Compact all partitions found in the catalog, no matter if/when the received writes. /// Compact all partitions found in the catalog, no matter if/when
/// they received writes.
#[clap( #[clap(
long = "compaction-process-all-partitions", long = "compaction-process-all-partitions",
env = "INFLUXDB_IOX_COMPACTION_PROCESS_ALL_PARTITIONS", env = "INFLUXDB_IOX_COMPACTION_PROCESS_ALL_PARTITIONS",
@ -244,8 +283,11 @@ pub struct Compactor2Config {
)] )]
pub process_all_partitions: bool, pub process_all_partitions: bool,
/// Maximum number of columns in the table of a partition that will be able to considered /// Maximum number of columns in a table of a partition that
/// to get compacted /// will be able to considered to get compacted
///
/// If a table has more than this many columns, the compactor will
/// not compact it, to avoid large memory use.
#[clap( #[clap(
long = "compaction-max-num-columns-per-table", long = "compaction-max-num-columns-per-table",
env = "INFLUXDB_IOX_COMPACTION_MAX_NUM_COLUMNS_PER_TABLE", env = "INFLUXDB_IOX_COMPACTION_MAX_NUM_COLUMNS_PER_TABLE",