From: Alex Ainscow Date: Tue, 11 Mar 2025 10:59:13 +0000 (+0000) Subject: osd: Add EC_OPTIMIZATIONS flag and add ability to switch it on. X-Git-Tag: v20.3.0~179^2~3 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=6eac1550b4fde988c2ed19ad850e5c1a43f59278;p=ceph.git osd: Add EC_OPTIMIZATIONS flag and add ability to switch it on. This is the basic command. On its own, this does nothing functional, except set a flag which can be queried. Signed-off-by: Alex Ainscow --- diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index fc32051badcec..91dd806c6ec17 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1170,11 +1170,11 @@ COMMAND("osd pool rename " "rename to ", "osd", "rw") COMMAND("osd pool get " "name=pool,type=CephPoolname " - "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio|pct_update_delay", + "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio|pct_update_delay|allow_ec_optimizations", "get pool parameter ", "osd", "r") COMMAND("osd pool set " "name=pool,type=CephPoolname " - "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio|pct_update_delay " + "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio|pct_update_delay|allow_ec_optimizations " "name=val,type=CephString " "name=yes_i_really_mean_it,type=CephBool,req=false", "set pool parameter to ", "osd", "rw") diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 928492fb2d092..495a78e4293b2 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -5410,7 +5410,8 @@ namespace { CSUM_TYPE, CSUM_MAX_BLOCK, CSUM_MIN_BLOCK, FINGERPRINT_ALGORITHM, PG_AUTOSCALE_MODE, PG_NUM_MIN, TARGET_SIZE_BYTES, TARGET_SIZE_RATIO, PG_AUTOSCALE_BIAS, DEDUP_TIER, DEDUP_CHUNK_ALGORITHM, - DEDUP_CDC_CHUNK_SIZE, POOL_EIO, BULK, PG_NUM_MAX, READ_RATIO }; + DEDUP_CDC_CHUNK_SIZE, POOL_EIO, BULK, PG_NUM_MAX, READ_RATIO, + EC_OPTIMIZATIONS }; std::set subtract_second_from_first(const std::set& first, @@ -6215,7 +6216,8 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) {"dedup_chunk_algorithm", DEDUP_CHUNK_ALGORITHM}, {"dedup_cdc_chunk_size", DEDUP_CDC_CHUNK_SIZE}, {"bulk", BULK}, - {"read_ratio", READ_RATIO} + {"read_ratio", READ_RATIO}, + {"allow_ec_optimizations", EC_OPTIMIZATIONS} }; typedef std::set choices_set_t; @@ -6230,7 +6232,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) HIT_SET_GRADE_DECAY_RATE, HIT_SET_SEARCH_LAST_N }; const choices_set_t ONLY_ERASURE_CHOICES = { - EC_OVERWRITES, ERASURE_CODE_PROFILE + EC_OVERWRITES, ERASURE_CODE_PROFILE, EC_OPTIMIZATIONS }; const choices_set_t ONLY_REPLICA_CHOICES = { READ_RATIO @@ -6462,17 +6464,23 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) case DEDUP_CHUNK_ALGORITHM: case DEDUP_CDC_CHUNK_SIZE: case READ_RATIO: - pool_opts_t::key_t key = pool_opts_t::get_opt_desc(i->first).key; - if (p->opts.is_set(key)) { - if(*it == CSUM_TYPE) { - int64_t val; - p->opts.get(pool_opts_t::CSUM_TYPE, &val); - f->dump_string(i->first.c_str(), Checksummer::get_csum_type_string(val)); - } else { - p->opts.dump(i->first, f.get()); - } + { + pool_opts_t::key_t key = pool_opts_t::get_opt_desc(i->first).key; + if (p->opts.is_set(key)) { + if(*it == CSUM_TYPE) { + int64_t val; + p->opts.get(pool_opts_t::CSUM_TYPE, &val); + f->dump_string(i->first.c_str(), Checksummer::get_csum_type_string(val)); + } else { + p->opts.dump(i->first, f.get()); + } + } } break; + case EC_OPTIMIZATIONS: + f->dump_bool("allow_ec_optimizations", + p->has_flag(pg_pool_t::FLAG_EC_OPTIMIZATIONS)); + break; } } f->close_section(); @@ -6644,6 +6652,11 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) } } break; + case EC_OPTIMIZATIONS: + ss << "allow_ec_optimizations: " << + (p->has_flag(pg_pool_t::FLAG_EC_OPTIMIZATIONS) ? "true" : "false") << + "\n"; + break; } rdata.append(ss.str()); ss.str(""); @@ -8796,8 +8809,31 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap, if (val == "true" || (interr.empty() && n == 1)) { p.flags |= pg_pool_t::FLAG_EC_OVERWRITES; } else if (val == "false" || (interr.empty() && n == 0)) { - ss << "ec overwrites cannot be disabled once enabled"; + if ((p.flags & pg_pool_t::FLAG_EC_OVERWRITES) != 0) { + ss << "ec overwrites cannot be disabled once enabled"; + return -EINVAL; + } + } else { + ss << "expecting value 'true', 'false', '0', or '1'"; return -EINVAL; + } + } else if (var == "allow_ec_optimizations") { + if (!p.is_erasure()) { + ss << "allow_ec_optimizations can only be enabled for an erasure coded pool"; + return -EINVAL; + } + if (osdmap.require_osd_release < ceph_release_t::tentacle) { + ss << "All OSDs must be upgraded to tentacle or " + << "later before setting allow_ec_optimizations"; + return -EINVAL; + } + if (val == "true" || (interr.empty() && n == 1)) { + p.flags |= pg_pool_t::FLAG_EC_OPTIMIZATIONS; + } else if (val == "false" || (interr.empty() && n == 0)) { + if ((p.flags & pg_pool_t::FLAG_EC_OPTIMIZATIONS) != 0) { + ss << "allow_ec_optimizations cannot be disabled once enabled"; + return -EINVAL; + } } else { ss << "expecting value 'true', 'false', '0', or '1'"; return -EINVAL; diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index c654cb71cd685..e30f160b7cb53 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1297,6 +1297,7 @@ struct pg_pool_t { // Pool features are restricted to those supported by crimson-osd. // Note, does not prohibit being created on classic osd. FLAG_CRIMSON = 1<<18, + FLAG_EC_OPTIMIZATIONS = 1<<19, // enable optimizations, once enabled, cannot be disabled }; static const char *get_flag_name(uint64_t f) { @@ -1320,6 +1321,7 @@ struct pg_pool_t { case FLAG_EIO: return "eio"; case FLAG_BULK: return "bulk"; case FLAG_CRIMSON: return "crimson"; + case FLAG_EC_OPTIMIZATIONS: return "ec_optimizations"; default: return "???"; } } @@ -1376,6 +1378,8 @@ struct pg_pool_t { return FLAG_BULK; if (name == "crimson") return FLAG_CRIMSON; + if (name == "ec_optimizations") + return FLAG_EC_OPTIMIZATIONS; return 0; } @@ -1790,6 +1794,10 @@ public: return has_flag(FLAG_EC_OVERWRITES); } + bool allows_ecoptimizations() const { + return has_flag(FLAG_EC_OPTIMIZATIONS); + } + bool is_crimson() const { return has_flag(FLAG_CRIMSON); }