From: Nitzan Mordechai Date: Tue, 13 Jun 2023 04:52:12 +0000 (+0000) Subject: osd_type: encode new version for stretch CRUSH buckets X-Git-Tag: v20.3.0~326^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a23c2ac9840f71e7b1ea6c8bdf4d039e3c0ebdea;p=ceph.git osd_type: encode new version for stretch CRUSH buckets We are implementing a new version of the encoding scheme for stretch CRUSH buckets in our OSD system. However, we have encountered limitations when it comes to extending the encoding version for non-stretch pools. Currently, we are restricted to using version 29 due to backward compatibility concerns. To address this, we have devised a solution that excludes version 30 while maintaining backward compatibility. This means that we will specifically check for version 30 and maintain the same behavior for older clients. For new clients, we can proceed with encoding and decoding using version 31. To accommodate stretch pools, we will utilize std::optional. The first byte of the encoding will serve as a boolean indicator, determining if the optional data is present. If the first byte is set to true, we will encode and decode four uint32_t members: peering_crush_*. By implementing these changes, we can ensure compatibility with older clients while enabling the use of the enhanced encoding scheme for new clients, specifically for stretch pools. Fixes: https://tracker.ceph.com/issues/59291 Signed-off-by: Nitzan Mordechai --- diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 05d50def0a7..fbacb4e43ed 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1952,21 +1952,25 @@ void pg_pool_t::encode(ceph::buffer::list& bl, uint64_t features) const return; } - uint8_t v = 30; + uint8_t v = 31; // NOTE: any new encoding dependencies must be reflected by // SIGNIFICANT_FEATURES - if (!(features & CEPH_FEATURE_NEW_OSDOP_ENCODING)) { - // this was the first post-hammer thing we added; if it's missing, encode - // like hammer. - v = 21; - } else if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { - v = 24; - } else if (!HAVE_FEATURE(features, SERVER_MIMIC)) { - v = 26; - } else if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) { - v = 27; - } else if (!is_stretch_pool()) { - v = 29; + if (!HAVE_FEATURE(features, SERVER_SQUID)) { + if (!(features & CEPH_FEATURE_NEW_OSDOP_ENCODING)) { + // this was the first post-hammer thing we added; if it's missing, encode + // like hammer. + v = 21; + } else if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { + v = 24; + } else if (!HAVE_FEATURE(features, SERVER_MIMIC)) { + v = 26; + } else if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) { + v = 27; + } else if (!is_stretch_pool()) { + v = 29; + } else { + v = 30; + } } ENCODE_START(v, 5, bl); @@ -2057,18 +2061,22 @@ void pg_pool_t::encode(ceph::buffer::list& bl, uint64_t features) const if (v >= 29) { encode(last_pg_merge_meta, bl); } - if (v >= 30) { + if (v == 30) { encode(peering_crush_bucket_count, bl); encode(peering_crush_bucket_target, bl); encode(peering_crush_bucket_barrier, bl); encode(peering_crush_mandatory_member, bl); } + if (v >= 31) { + auto maybe_peering_crush_data1 = maybe_peering_crush_data(); + encode(maybe_peering_crush_data1, bl); + } ENCODE_FINISH(bl); } void pg_pool_t::decode(ceph::buffer::list::const_iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN(30, 5, 5, bl); + DECODE_START_LEGACY_COMPAT_LEN(31, 5, 5, bl); decode(type, bl); decode(size, bl); decode(crush_rule, bl); @@ -2243,12 +2251,22 @@ void pg_pool_t::decode(ceph::buffer::list::const_iterator& bl) last_force_op_resend = last_force_op_resend_prenautilus; pg_autoscale_mode = pg_autoscale_mode_t::WARN; // default to warn on upgrade } - if (struct_v >= 30) { + if (struct_v == 30) { decode(peering_crush_bucket_count, bl); decode(peering_crush_bucket_target, bl); decode(peering_crush_bucket_barrier, bl); decode(peering_crush_mandatory_member, bl); } + if (struct_v >= 31) { + std::optional> peering_crush_data; + decode(peering_crush_data, bl); + if (peering_crush_data) { + std::tie(peering_crush_bucket_count, + peering_crush_bucket_target, + peering_crush_bucket_barrier, + peering_crush_mandatory_member) = *peering_crush_data; + } + } DECODE_FINISH(bl); calc_pg_masks(); calc_grade_table(); @@ -2352,6 +2370,13 @@ void pg_pool_t::generate_test_instances(list& o) a.fast_read = false; a.application_metadata = {{"rbd", {{"key", "value"}}}}; o.push_back(new pg_pool_t(a)); + + // test stretch CRUSH buckets + a.peering_crush_bucket_count = 10; + a.peering_crush_bucket_barrier = 11; + a.peering_crush_mandatory_member = 12; + a.peering_crush_bucket_target = 13; + o.push_back(new pg_pool_t(a)); } ostream& operator<<(ostream& out, const pg_pool_t& p) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index fe62fad2805..5b2935c277d 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1552,6 +1552,18 @@ public: bool is_stretch_pool() const { return peering_crush_bucket_count != 0; } + + std::optional> maybe_peering_crush_data() const { + if (!is_stretch_pool()) { + return std::nullopt; + } else { + return std::make_optional(std::tuple(peering_crush_bucket_count, + peering_crush_bucket_target , + peering_crush_bucket_barrier, + peering_crush_mandatory_member + )); + } + } bool stretch_set_can_peer(const std::set& want, const OSDMap& osdmap, std::ostream *out) const;