From b4d946681d94f1d662b7c7a4fae0550cca8322c1 Mon Sep 17 00:00:00 2001 From: Alex Ainscow Date: Fri, 3 Oct 2025 13:49:58 +0100 Subject: [PATCH] osd: Introduce pool flag for "split IO" and Plugin flag for "direct read" These flags will currently behave as follows: 1. The pool flag is never set, unless by a user with the osd_pool_default_flags config option. 2. The pool flag will be removed for EC pools where the plugin does not support direct reads. 3. Replica pools will never remove the flag. The intention is to eventually invert this logic and allow split IOs upon upgrade to Umberella in this same function. Signed-off-by: Alex Ainscow --- src/erasure-code/ErasureCodeInterface.h | 7 ++++++ src/erasure-code/isa/ErasureCodeIsa.h | 3 ++- .../jerasure/ErasureCodeJerasure.h | 3 ++- src/mon/OSDMonitor.cc | 25 +++++++++++++++++++ src/mon/OSDMonitor.h | 1 + src/osd/ECUtil.h | 5 ++++ src/osd/osd_types.h | 1 + 7 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/erasure-code/ErasureCodeInterface.h b/src/erasure-code/ErasureCodeInterface.h index 4f6c7340527..0825f24a3b9 100644 --- a/src/erasure-code/ErasureCodeInterface.h +++ b/src/erasure-code/ErasureCodeInterface.h @@ -685,6 +685,11 @@ namespace ceph { * to decode a parity CRC to get the CRC of a data shard. */ FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT = 1<<7, + /* This plugin supports the ability for the client to read directly from + * the OSD containing a shard. This currently requires that raw shard == + * shard and that the data shards are simply striped. + */ + FLAG_EC_PLUGIN_DIRECT_READS = 1<<8, }; static const char *get_optimization_flag_name(const plugin_flags flag) { switch (flag) { @@ -697,6 +702,8 @@ namespace ceph { case FLAG_EC_PLUGIN_OPTIMIZED_SUPPORTED: return "optimizedsupport"; case FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT: return "crcencodedecode"; + case FLAG_EC_PLUGIN_DIRECT_READS: + return "directreads"; default: return "???"; } } diff --git a/src/erasure-code/isa/ErasureCodeIsa.h b/src/erasure-code/isa/ErasureCodeIsa.h index ae14b1d6aac..532d7130bdb 100644 --- a/src/erasure-code/isa/ErasureCodeIsa.h +++ b/src/erasure-code/isa/ErasureCodeIsa.h @@ -69,7 +69,8 @@ public: FLAG_EC_PLUGIN_PARTIAL_READ_OPTIMIZATION | FLAG_EC_PLUGIN_PARTIAL_WRITE_OPTIMIZATION | FLAG_EC_PLUGIN_ZERO_INPUT_ZERO_OUTPUT_OPTIMIZATION | - FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION; + FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION | + FLAG_EC_PLUGIN_DIRECT_READS; if (technique == "reed_sol_van"sv) { flags |= FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT; diff --git a/src/erasure-code/jerasure/ErasureCodeJerasure.h b/src/erasure-code/jerasure/ErasureCodeJerasure.h index 14ea1d99d58..9404f48fc79 100644 --- a/src/erasure-code/jerasure/ErasureCodeJerasure.h +++ b/src/erasure-code/jerasure/ErasureCodeJerasure.h @@ -51,7 +51,8 @@ public: flags = FLAG_EC_PLUGIN_PARTIAL_READ_OPTIMIZATION | FLAG_EC_PLUGIN_PARTIAL_WRITE_OPTIMIZATION | FLAG_EC_PLUGIN_ZERO_INPUT_ZERO_OUTPUT_OPTIMIZATION | - FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION; + FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION | + FLAG_EC_PLUGIN_DIRECT_READS; if (technique == "reed_sol_van"sv) { flags |= FLAG_EC_PLUGIN_OPTIMIZED_SUPPORTED; diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 29ceebe2106..a0011b0d170 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -8357,6 +8357,8 @@ int OSDMonitor::prepare_new_pool(string& name, enable_pool_ec_optimizations(*pi, nullptr, true); } + enable_pool_ec_direct_reads(*pi); + pending_inc.new_pool_names[pool] = name; return 0; } @@ -8451,6 +8453,29 @@ int OSDMonitor::enable_pool_ec_optimizations(pg_pool_t &p, return 0; } +void OSDMonitor::enable_pool_ec_direct_reads(pg_pool_t &p) { + if (p.is_erasure()) { + ErasureCodeInterfaceRef erasure_code; + stringstream tmp; + int err = get_erasure_code(p.erasure_code_profile, &erasure_code, &tmp); + + // Once this feature is finished, we will replace this with upgrade code. + // The upgrade code will enable the split read flag once all OSDs are at + // Umbrella. For now, if the plugin does not support direct reads, we just + // disable it. All plugins and techniques should be capable of supporting + // direct reads, but we put in place this capability to reduce the test + // matrix for less important plugins/techniques. + // + // To enable direct reads in development, set the osd_pool_default_flags to + // 1<<20 = 0x100000 = 1048576 + if (err != 0 || !p.allows_ecoptimizations() || + (erasure_code->get_supported_optimizations() & + ErasureCodeInterface::FLAG_EC_PLUGIN_DIRECT_READS) == 0) { + p.flags &= ~pg_pool_t::FLAG_CLIENT_SPLIT_READS; + } + } +} + int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap, stringstream& ss) { diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 6896e5beb08..f2c27f3110c 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -745,6 +745,7 @@ public: int enable_pool_ec_optimizations(pg_pool_t &pool, std::stringstream *ss, bool enable); + void enable_pool_ec_direct_reads(pg_pool_t &p); int prepare_command_pool_set(const cmdmap_t& cmdmap, std::stringstream& ss); diff --git a/src/osd/ECUtil.h b/src/osd/ECUtil.h index 0bb905fd016..deaf39cd53f 100644 --- a/src/osd/ECUtil.h +++ b/src/osd/ECUtil.h @@ -683,6 +683,11 @@ public: ErasureCodeInterface::FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT) != 0; } + bool supports_direct_reads() const { + return (plugin_flags & + ErasureCodeInterface::FLAG_EC_PLUGIN_DIRECT_READS) != 0; + } + uint64_t get_stripe_width() const { return stripe_width; } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index bfd9bb1f725..f0495c6e277 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1318,6 +1318,7 @@ struct pg_pool_t { // Note, does not prohibit being created on classic osd. FLAG_CRIMSON = 1<<18, FLAG_EC_OPTIMIZATIONS = 1<<19, // enable optimizations, once enabled, cannot be disabled + FLAG_CLIENT_SPLIT_READS = 1<<20, // Optimized EC is permitted to do direct reads. }; static const char *get_flag_name(uint64_t f) { -- 2.39.5