]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: Introduce pool flag for "split IO" and Plugin flag for "direct read"
authorAlex Ainscow <aainscow@uk.ibm.com>
Fri, 3 Oct 2025 12:49:58 +0000 (13:49 +0100)
committerAlex Ainscow <aainscow@uk.ibm.com>
Tue, 14 Oct 2025 10:42:13 +0000 (11:42 +0100)
These flags will currently behave as follows:

1. The pool flag is never set, unless by a user with the osd_pool_default_flags
   config option.
2. The pool flag will be removed for EC pools where the plugin does not support
   direct reads.
3. Replica pools will never remove the flag.

The intention is to eventually invert this logic and allow split IOs upon
upgrade to Umberella in this same function.

Signed-off-by: Alex Ainscow <aainscow@uk.ibm.com>
src/erasure-code/ErasureCodeInterface.h
src/erasure-code/isa/ErasureCodeIsa.h
src/erasure-code/jerasure/ErasureCodeJerasure.h
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h
src/osd/ECUtil.h
src/osd/osd_types.h

index 4f6c73405270220731b31bda23fa97972e6db6b3..0825f24a3b9935454c5369333c6bba21b2a58b9d 100644 (file)
@@ -685,6 +685,11 @@ namespace ceph {
        * to decode a parity CRC to get the CRC of a data shard.
        */
       FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT = 1<<7,
+      /* This plugin supports the ability for the client to read directly from
+       * the OSD containing a shard. This currently requires that raw shard ==
+       * shard and that the data shards are simply striped.
+       */
+      FLAG_EC_PLUGIN_DIRECT_READS = 1<<8,
     };
     static const char *get_optimization_flag_name(const plugin_flags flag) {
       switch (flag) {
@@ -697,6 +702,8 @@ namespace ceph {
       case FLAG_EC_PLUGIN_OPTIMIZED_SUPPORTED: return "optimizedsupport";
       case FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT:
         return "crcencodedecode";
+      case FLAG_EC_PLUGIN_DIRECT_READS:
+        return "directreads";
       default: return "???";
       }
     }
index ae14b1d6aace9737f8233faf5b7ff31ad9700047..532d7130bdb1219ed5a0da75ad81d7d94dc9f013 100644 (file)
@@ -69,7 +69,8 @@ public:
             FLAG_EC_PLUGIN_PARTIAL_READ_OPTIMIZATION |
             FLAG_EC_PLUGIN_PARTIAL_WRITE_OPTIMIZATION |
             FLAG_EC_PLUGIN_ZERO_INPUT_ZERO_OUTPUT_OPTIMIZATION |
-            FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION;
+            FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION |
+            FLAG_EC_PLUGIN_DIRECT_READS;
 
     if (technique == "reed_sol_van"sv) {
        flags |= FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT;
index 14ea1d99d5805adb1b4399bd7b4345fbf1a1c9fe..9404f48fc7905208b85ef52b2e2a028389460fcb 100644 (file)
@@ -51,7 +51,8 @@ public:
     flags = FLAG_EC_PLUGIN_PARTIAL_READ_OPTIMIZATION |
       FLAG_EC_PLUGIN_PARTIAL_WRITE_OPTIMIZATION |
       FLAG_EC_PLUGIN_ZERO_INPUT_ZERO_OUTPUT_OPTIMIZATION |
-      FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION;
+      FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION |
+      FLAG_EC_PLUGIN_DIRECT_READS;
 
     if (technique == "reed_sol_van"sv) {
       flags |= FLAG_EC_PLUGIN_OPTIMIZED_SUPPORTED;
index 29ceebe21061cdf3d446dac4927e7a8004c3791d..a0011b0d1703b75fee58332401eeb8f928c8ecc6 100644 (file)
@@ -8357,6 +8357,8 @@ int OSDMonitor::prepare_new_pool(string& name,
     enable_pool_ec_optimizations(*pi, nullptr, true);
   }
 
+  enable_pool_ec_direct_reads(*pi);
+
   pending_inc.new_pool_names[pool] = name;
   return 0;
 }
@@ -8451,6 +8453,29 @@ int OSDMonitor::enable_pool_ec_optimizations(pg_pool_t &p,
   return 0;
 }
 
+void OSDMonitor::enable_pool_ec_direct_reads(pg_pool_t &p) {
+  if (p.is_erasure()) {
+    ErasureCodeInterfaceRef erasure_code;
+    stringstream tmp;
+    int err = get_erasure_code(p.erasure_code_profile, &erasure_code, &tmp);
+
+    // Once this feature is finished, we will replace this with upgrade code.
+    // The upgrade code will enable the split read flag once all OSDs are at
+    // Umbrella. For now, if the plugin does not support direct reads, we just
+    // disable it.  All plugins and techniques should be capable of supporting
+    // direct reads, but we put in place this capability to reduce the test
+    // matrix for less important plugins/techniques.
+    //
+    // To enable direct reads in development, set the osd_pool_default_flags to
+    // 1<<20 = 0x100000 = 1048576
+    if (err != 0 || !p.allows_ecoptimizations() ||
+          (erasure_code->get_supported_optimizations() &
+            ErasureCodeInterface::FLAG_EC_PLUGIN_DIRECT_READS) == 0) {
+      p.flags &= ~pg_pool_t::FLAG_CLIENT_SPLIT_READS;
+    }
+  }
+}
+
 int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
                                          stringstream& ss)
 {
index 6896e5beb0804ec691c5b6234196290899e030f9..f2c27f3110c769036059590ac40c88a9d492e19f 100644 (file)
@@ -745,6 +745,7 @@ public:
   int enable_pool_ec_optimizations(pg_pool_t &pool,
                                    std::stringstream *ss,
                                    bool enable);
+  void enable_pool_ec_direct_reads(pg_pool_t &p);
   int prepare_command_pool_set(const cmdmap_t& cmdmap,
                                std::stringstream& ss);
 
index 0bb905fd016d44c5340a79603bdd9640b18c7de1..deaf39cd53fa90221bc0354d49f3afb83663e4da 100644 (file)
@@ -683,6 +683,11 @@ public:
             ErasureCodeInterface::FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT) != 0;
   }
 
+  bool supports_direct_reads() const {
+    return (plugin_flags &
+            ErasureCodeInterface::FLAG_EC_PLUGIN_DIRECT_READS) != 0;
+  }
+
   uint64_t get_stripe_width() const {
     return stripe_width;
   }
index bfd9bb1f725944b1a1dad28b116cf818e7ecee52..f0495c6e2771ecd4a7233581991633c703e289bc 100644 (file)
@@ -1318,6 +1318,7 @@ struct pg_pool_t {
     // Note, does not prohibit being created on classic osd.
     FLAG_CRIMSON = 1<<18,
     FLAG_EC_OPTIMIZATIONS = 1<<19, // enable optimizations, once enabled, cannot be disabled
+    FLAG_CLIENT_SPLIT_READS = 1<<20, // Optimized EC is permitted to do direct reads.
   };
 
   static const char *get_flag_name(uint64_t f) {