From 578e82c877e1ea698a6e2986447f8455fd72cb72 Mon Sep 17 00:00:00 2001 From: Alex Ainscow Date: Wed, 11 Feb 2026 11:37:22 +0000 Subject: [PATCH] mon/osd: Deny EC optimizations for non-aligned chunk-sizes and allow switch off. There are some bugs in the way Fast EC handles non 4k-aligned chunk sizes. We are unlikely to ever support these and will require any user with these to use the Umbrella pool migration to switch them off. If a user of early versions of Tentacle managed to switch this feature on, we allow the optimizations to be switched off, which should allow the pool to come back online. It is generally a bad idea to turn of optimizations, especially if there have been many writes. However, this should all be detected by scrubbing and the user is more likely to be able to recover data with the old EC code. Turning off EC, or turning it on with non-aligned EC sizes requires the yes-i-really-mean-it flag. Signed-off-by: Alex Ainscow --- src/mon/OSDMonitor.cc | 20 ++++++++++++++------ src/mon/OSDMonitor.h | 5 +++-- src/osd/ECSwitch.h | 5 +---- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index b7698ef2540..fb54f7e7fc3 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -8341,7 +8341,7 @@ int OSDMonitor::prepare_new_pool(string& name, if (cct->_conf.get_val("osd_pool_default_flag_ec_optimizations")) { // This will fail if the pool cannot support ec optimizations. - enable_pool_ec_optimizations(*pi, nullptr, true); + enable_pool_ec_optimizations(*pi, nullptr, true, false); } pending_inc.new_pool_names[pool] = name; @@ -8375,7 +8375,7 @@ bool OSDMonitor::prepare_unset_flag(MonOpRequestRef op, int flag) } int OSDMonitor::enable_pool_ec_optimizations(pg_pool_t &p, - stringstream *ss, bool enable) { + stringstream *ss, bool enable, bool force) { if (!p.is_erasure()) { if (ss) { *ss << "allow_ec_optimizations can only be enabled for an erasure coded pool"; @@ -8391,12 +8391,13 @@ int OSDMonitor::enable_pool_ec_optimizations(pg_pool_t &p, } if (enable) { ErasureCodeInterfaceRef erasure_code; - unsigned int k, m; + unsigned int k, m, chunk_size; stringstream tmp; int err = get_erasure_code(p.erasure_code_profile, &erasure_code, &tmp); if (err == 0) { k = erasure_code->get_data_chunk_count(); m = erasure_code->get_coding_chunk_count(); + chunk_size = erasure_code->get_chunk_size(p.get_stripe_width()); } else { if (ss) { *ss << "get_erasure_code failed: " << tmp.str(); @@ -8404,7 +8405,8 @@ int OSDMonitor::enable_pool_ec_optimizations(pg_pool_t &p, return -EINVAL; } if ((erasure_code->get_supported_optimizations() & - ErasureCodeInterface::FLAG_EC_PLUGIN_OPTIMIZED_SUPPORTED) == 0) { + ErasureCodeInterface::FLAG_EC_PLUGIN_OPTIMIZED_SUPPORTED) == 0 || + ((chunk_size % 4096) != 0 && !force)) { if (ss) { *ss << "ec optimizations not currently supported for pool profile."; } @@ -8427,13 +8429,17 @@ int OSDMonitor::enable_pool_ec_optimizations(pg_pool_t &p, } } p.flags |= pg_pool_t::FLAG_EC_OPTIMIZATIONS; - } else { + } else if (!force) { if ((p.flags & pg_pool_t::FLAG_EC_OPTIMIZATIONS) != 0) { if (ss) { *ss << "allow_ec_optimizations cannot be disabled once enabled"; } return -EINVAL; } + } else { + // Do not undo the non-primary shards change. User should be instructed + // to delete the pool (and migrate data off). + p.flags &= ~pg_pool_t::FLAG_EC_OPTIMIZATIONS; } return 0; } @@ -8915,7 +8921,9 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap, return -EINVAL; } bool was_enabled = p.allows_ecoptimizations(); - int r = enable_pool_ec_optimizations(p, nullptr, enable); + bool force = false; + cmd_getval(cmdmap, "yes_i_really_mean_it", force); + int r = enable_pool_ec_optimizations(p, nullptr, enable, force); if (r != 0) { return r; } diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 83dffb35ba7..f0672347b46 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -1,4 +1,4 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab /* * Ceph - scalable distributed file system @@ -743,7 +743,8 @@ public: int enable_pool_ec_optimizations(pg_pool_t &pool, std::stringstream *ss, - bool enable); + bool enable, + bool force); int prepare_command_pool_set(const cmdmap_t& cmdmap, std::stringstream& ss); diff --git a/src/osd/ECSwitch.h b/src/osd/ECSwitch.h index 42f405021a8..1fe9220b3f5 100644 --- a/src/osd/ECSwitch.h +++ b/src/osd/ECSwitch.h @@ -184,10 +184,7 @@ public: legacy.on_change(); } - if (!is_optimized_actual) - is_optimized_actual = get_parent()->get_pool().allows_ecoptimizations(); - else - ceph_assert(get_parent()->get_pool().allows_ecoptimizations()); + is_optimized_actual = get_parent()->get_pool().allows_ecoptimizations(); } void clear_recovery_state() override -- 2.47.3