From: Alex Ainscow Date: Thu, 5 Feb 2026 15:00:03 +0000 (+0000) Subject: osd: SplitOp preparatory work in osd_types X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=cebf73e91b2985e284a66cacf0eb24c9af9255dd;p=ceph.git osd: SplitOp preparatory work in osd_types - Add ec_data_shard_count interface - Prevent sending of the split reads flag to tentacle OSDs - Add ec data shard count and coding shard count into the pool - Encode shard mappings into the pool, for use by Direct reads Signed-off-by: Alex Ainscow --- diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 63385f742fb..59daa0a156f 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -3241,6 +3241,9 @@ bool OSDMap::primary_changed_broken( uint64_t OSDMap::get_encoding_features() const { uint64_t f = SIGNIFICANT_FEATURES; + if (require_osd_release < ceph_release_t::umbrella) { + f &= ~CEPH_FEATURE_SERVER_UMBRELLA; + } if (require_osd_release < ceph_release_t::tentacle) { f &= ~CEPH_FEATURE_SERVER_TENTACLE; } diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index bbed6cf70ab..202475ab16e 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -579,7 +579,8 @@ private: CEPH_FEATUREMASK_SERVER_NAUTILUS | CEPH_FEATUREMASK_SERVER_OCTOPUS | CEPH_FEATUREMASK_SERVER_REEF | - CEPH_FEATUREMASK_SERVER_TENTACLE; + CEPH_FEATUREMASK_SERVER_TENTACLE | + CEPH_FEATUREMASK_SERVER_UMBRELLA; struct addrs_s { mempool::osdmap::vector > client_addrs; diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 21baa09aaef..873163fd9a2 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1646,6 +1646,8 @@ void pg_pool_t::dump(Formatter *f) const f->dump_unsigned("cache_min_flush_age", cache_min_flush_age); f->dump_unsigned("cache_min_evict_age", cache_min_evict_age); f->dump_string("erasure_code_profile", erasure_code_profile); + f->dump_unsigned("ec_data_shard_count", ec_data_shard_count.value_or(0)); + f->dump_unsigned("ec_coding_shard_count", ec_coding_shard_count.value_or(0)); f->open_object_section("hit_set_params"); hit_set_params.dump(f); f->close_section(); // hit_set_params @@ -1983,9 +1985,12 @@ void pg_pool_t::encode(ceph::buffer::list& bl, uint64_t features) const return; } - uint8_t v = 32; + uint8_t v = 33; // NOTE: any new encoding dependencies must be reflected by // SIGNIFICANT_FEATURES + if (!HAVE_SIGNIFICANT_FEATURE(features, SERVER_UMBRELLA)) { + v = 32; + } if (!HAVE_SIGNIFICANT_FEATURE(features, SERVER_TENTACLE)) { if (!HAVE_SIGNIFICANT_FEATURE(features, NEW_OSDOP_ENCODING)) { // this was the first post-hammer thing we added; if it's missing, encode @@ -2020,11 +2025,16 @@ void pg_pool_t::encode(ceph::buffer::list& bl, uint64_t features) const encode(snaps, bl, features); encode(removed_snaps, bl); encode(auid, bl); - if (v >= 27) { + if (v >= 33) { encode(flags, bl); + } else if (v >= 27) { + auto tmp = flags; + tmp &= ~FLAG_CLIENT_SPLIT_READS; + encode(tmp, bl); } else { auto tmp = flags; - tmp &= ~(FLAG_SELFMANAGED_SNAPS | FLAG_POOL_SNAPS | FLAG_CREATING); + tmp &= ~(FLAG_SELFMANAGED_SNAPS | FLAG_POOL_SNAPS | FLAG_CREATING | + FLAG_CLIENT_SPLIT_READS); encode(tmp, bl); } encode((uint32_t)0, bl); // crash_replay_interval @@ -2105,12 +2115,17 @@ void pg_pool_t::encode(ceph::buffer::list& bl, uint64_t features) const if (v >= 32) { encode(nonprimary_shards, bl); } + if (v >= 33) { + encode(shard_mapping, bl); + encode(ec_data_shard_count, bl); + encode(ec_coding_shard_count, bl); + } ENCODE_FINISH(bl); } void pg_pool_t::decode(ceph::buffer::list::const_iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN(32, 5, 5, bl); + DECODE_START_LEGACY_COMPAT_LEN(33, 5, 5, bl); decode(type, bl); decode(size, bl); decode(crush_rule, bl); @@ -2306,6 +2321,16 @@ void pg_pool_t::decode(ceph::buffer::list::const_iterator& bl) } else { nonprimary_shards.clear(); } + + if (struct_v >= 33) { + decode(shard_mapping, bl); + decode(ec_data_shard_count, bl); + decode(ec_coding_shard_count, bl); + } else { + shard_mapping.clear(); + ec_data_shard_count.reset(); + ec_coding_shard_count.reset(); + } DECODE_FINISH(bl); calc_pg_masks(); calc_grade_table(); @@ -2428,6 +2453,8 @@ ostream& operator<<(ostream& out, const pg_pool_t& p) out << p.get_type_name(); if (p.get_type_name() == "erasure") { out << " profile " << p.erasure_code_profile; + out << " ec_data_shard_count " << p.ec_data_shard_count.value_or(0); + out << " ec_coding_shard_count " << p.ec_coding_shard_count.value_or(0); } out << " size " << p.get_size() << " min_size " << p.get_min_size() diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 29d5f439dc6..846ffc6edf4 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1351,6 +1351,7 @@ struct pg_pool_t { case FLAG_BULK: return "bulk"; case FLAG_CRIMSON: return "crimson"; case FLAG_EC_OPTIMIZATIONS: return "ec_optimizations"; + case FLAG_CLIENT_SPLIT_READS: return "split_reads"; default: return "???"; } } @@ -1409,6 +1410,8 @@ struct pg_pool_t { return FLAG_CRIMSON; if (name == "ec_optimizations") return FLAG_EC_OPTIMIZATIONS; + if (name == "split_reads") + return FLAG_CLIENT_SPLIT_READS; return 0; } @@ -1516,6 +1519,9 @@ private: public: std::map properties; ///< OBSOLETE std::string erasure_code_profile; ///< name of the erasure code profile in OSDMap + // Profile values stored in integer format to allow reading efficiently + // without parsing the erasure_code_profile string + std::optional ec_data_shard_count, ec_coding_shard_count; ///< ec profile values epoch_t last_change = 0; ///< most recent epoch changed, exclusing snapshot changes // If non-zero, require OSDs in at least this many different instances... uint32_t peering_crush_bucket_count = 0; @@ -1746,6 +1752,7 @@ public: private: std::vector grade_table; + std::vector shard_mapping; // Used by EC direct reads. public: uint32_t get_grade(unsigned i) const { @@ -1801,6 +1808,10 @@ public: snapid_t get_snap_seq() const { return snap_seq; } uint64_t get_auid() const { return auid; } + uint8_t get_ec_data_shard_count() const { + return ec_data_shard_count.value_or(nonprimary_shards.size() + 1); + } + void set_snap_seq(snapid_t s) { snap_seq = s; } void set_snap_epoch(epoch_t e) { snap_epoch = e; } @@ -1963,6 +1974,21 @@ public: return !nonprimary_shards.empty() && nonprimary_shards.contains(shard); } + void set_shard_mapping(std::vector && mapping) { + shard_mapping = mapping; + } + + shard_id_t get_shard(raw_shard_id_t raw) const { + if (shard_mapping.empty()) { + return shard_id_t((int)raw); + } + if (std::cmp_less((int)raw, shard_mapping.size())) { + return shard_mapping[(int)raw]; + } else { + return shard_id_t::NO_SHARD; + } + } + void encode(ceph::buffer::list& bl, uint64_t features) const; void decode(ceph::buffer::list::const_iterator& bl);