]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mon/osd: Deny EC optimizations for non-aligned chunk-sizes and allow switch off. block_non_4k_chunks_tentacle
authorAlex Ainscow <aainscow@uk.ibm.com>
Wed, 11 Feb 2026 11:37:22 +0000 (11:37 +0000)
committerAlex Ainscow <aainscow@uk.ibm.com>
Wed, 11 Feb 2026 11:39:09 +0000 (11:39 +0000)
There are some bugs in the way Fast EC handles non 4k-aligned chunk sizes.

We are unlikely to ever support these and will require any user with these to
use the Umbrella pool migration to switch them off.

If a user of early versions of Tentacle managed to switch this feature on,
we allow the optimizations to be switched off, which should allow the pool
to come back online.

It is generally a bad idea to turn of optimizations, especially if there have
been many writes. However, this should all be detected by scrubbing and the user
is more likely to be able to recover data with the old EC code.

Turning off EC, or turning it on with non-aligned EC sizes requires the
yes-i-really-mean-it flag.

Signed-off-by: Alex Ainscow <aainscow@uk.ibm.com>
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h
src/osd/ECSwitch.h

index b7698ef254021fd6bdb5ec68139d79a1fb9c62e0..fb54f7e7fc3342d131e94b0318e5360e79e663a5 100644 (file)
@@ -8341,7 +8341,7 @@ int OSDMonitor::prepare_new_pool(string& name,
 
   if (cct->_conf.get_val<bool>("osd_pool_default_flag_ec_optimizations")) {
     // This will fail if the pool cannot support ec optimizations.
-    enable_pool_ec_optimizations(*pi, nullptr, true);
+    enable_pool_ec_optimizations(*pi, nullptr, true, false);
   }
 
   pending_inc.new_pool_names[pool] = name;
@@ -8375,7 +8375,7 @@ bool OSDMonitor::prepare_unset_flag(MonOpRequestRef op, int flag)
 }
 
 int OSDMonitor::enable_pool_ec_optimizations(pg_pool_t &p,
-    stringstream *ss, bool enable) {
+    stringstream *ss, bool enable, bool force) {
   if (!p.is_erasure()) {
     if (ss) {
       *ss << "allow_ec_optimizations can only be enabled for an erasure coded pool";
@@ -8391,12 +8391,13 @@ int OSDMonitor::enable_pool_ec_optimizations(pg_pool_t &p,
   }
   if (enable) {
     ErasureCodeInterfaceRef erasure_code;
-    unsigned int k, m;
+    unsigned int k, m, chunk_size;
     stringstream tmp;
     int err = get_erasure_code(p.erasure_code_profile, &erasure_code, &tmp);
     if (err == 0) {
       k = erasure_code->get_data_chunk_count();
       m = erasure_code->get_coding_chunk_count();
+      chunk_size = erasure_code->get_chunk_size(p.get_stripe_width());
     } else {
       if (ss) {
         *ss << "get_erasure_code failed: " << tmp.str();
@@ -8404,7 +8405,8 @@ int OSDMonitor::enable_pool_ec_optimizations(pg_pool_t &p,
       return -EINVAL;
     }
     if ((erasure_code->get_supported_optimizations() &
-        ErasureCodeInterface::FLAG_EC_PLUGIN_OPTIMIZED_SUPPORTED) == 0) {
+        ErasureCodeInterface::FLAG_EC_PLUGIN_OPTIMIZED_SUPPORTED) == 0 ||
+        ((chunk_size % 4096) != 0 && !force)) {
       if (ss) {
         *ss << "ec optimizations not currently supported for pool profile.";
       }
@@ -8427,13 +8429,17 @@ int OSDMonitor::enable_pool_ec_optimizations(pg_pool_t &p,
       }
     }
     p.flags |= pg_pool_t::FLAG_EC_OPTIMIZATIONS;
-  } else {
+  } else if (!force) {
     if ((p.flags & pg_pool_t::FLAG_EC_OPTIMIZATIONS) != 0) {
       if (ss) {
         *ss << "allow_ec_optimizations cannot be disabled once enabled";
       }
       return -EINVAL;
     }
+  } else {
+    // Do not undo the non-primary shards change. User should be instructed
+    // to delete the pool (and migrate data off).
+    p.flags &= ~pg_pool_t::FLAG_EC_OPTIMIZATIONS;
   }
   return 0;
 }
@@ -8915,7 +8921,9 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
       return -EINVAL;
     }
     bool was_enabled = p.allows_ecoptimizations();
-    int r = enable_pool_ec_optimizations(p, nullptr, enable);
+    bool force = false;
+    cmd_getval(cmdmap, "yes_i_really_mean_it", force);
+    int r = enable_pool_ec_optimizations(p, nullptr, enable, force);
     if (r != 0) {
       return r;
     }
index 83dffb35ba729938dd3e6c811ebdd187c630e403..f0672347b4641637beb02daeccff93fd1ac58efe 100644 (file)
@@ -1,4 +1,4 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
 /*
  * Ceph - scalable distributed file system
@@ -743,7 +743,8 @@ public:
 
   int enable_pool_ec_optimizations(pg_pool_t &pool,
                                    std::stringstream *ss,
-                                   bool enable);
+                                   bool enable,
+                                   bool force);
   int prepare_command_pool_set(const cmdmap_t& cmdmap,
                                std::stringstream& ss);
 
index 42f405021a818651f807581d95028c5c184093ce..1fe9220b3f561506e2ded16b4db52207ee752c9f 100644 (file)
@@ -184,10 +184,7 @@ public:
       legacy.on_change();
     }
 
-    if (!is_optimized_actual)
-      is_optimized_actual = get_parent()->get_pool().allows_ecoptimizations();
-    else
-      ceph_assert(get_parent()->get_pool().allows_ecoptimizations());
+    is_optimized_actual = get_parent()->get_pool().allows_ecoptimizations();
   }
 
   void clear_recovery_state() override