From f4b0589e3b39933bfc3370bdd5d7c3035c43d799 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Wed, 10 Apr 2024 14:27:04 -0700 Subject: [PATCH] osd/osd_types: add PCT_UPDATE_DELAY pool option Signed-off-by: Samuel Just --- src/mon/MonCommands.h | 4 ++-- src/osd/osd_types.cc | 4 +++- src/osd/osd_types.h | 15 +++++++++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index b5de8837cb7b..01bf152a2bdb 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1161,11 +1161,11 @@ COMMAND("osd pool rename " "rename to ", "osd", "rw") COMMAND("osd pool get " "name=pool,type=CephPoolname " - "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio", + "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio|pct_update_delay", "get pool parameter ", "osd", "r") COMMAND("osd pool set " "name=pool,type=CephPoolname " - "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio " + "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio|pct_update_delay " "name=val,type=CephString " "name=yes_i_really_mean_it,type=CephBool,req=false", "set pool parameter to ", "osd", "rw") diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index b2e8d8f297aa..5c2cf8b16b05 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1378,7 +1378,9 @@ static opt_mapping_t opt_mapping = boost::assign::map_list_of ("pg_num_max", pool_opts_t::opt_desc_t( pool_opts_t::PG_NUM_MAX, pool_opts_t::INT)) ("read_ratio", pool_opts_t::opt_desc_t( - pool_opts_t::READ_RATIO, pool_opts_t::INT)); + pool_opts_t::READ_RATIO, pool_opts_t::INT)) + ("pct_update_delay", pool_opts_t::opt_desc_t( + pool_opts_t::PCT_UPDATE_DELAY, pool_opts_t::INT)); bool pool_opts_t::is_opt_name(const std::string& name) { diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index f80adbbfd111..b6f5335a0f51 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1107,6 +1107,21 @@ public: DEDUP_CDC_CHUNK_SIZE, PG_NUM_MAX, // max pg_num READ_RATIO, // read ration for the read balancer work [0-100] + /** + * PCT_UPDATE_DELAY + * + * Time to wait (seconds) after there are no in progress writes before + * updating pg_committed_to on replicas. If the period between writes on + * a PG is usually longer than this value, most writes will trigger an + * extra message. + * + * The primary reason to enable this feature would be to limit the time + * between a write and when that write is available to be read on replicas. + * + * A value <= 0 will cause the update to be sent immediately upon write + * completion if there are no other in progress writes. + */ + PCT_UPDATE_DELAY, }; enum type_t { -- 2.47.3