]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd_type: encode new version for stretch CRUSH buckets
authorNitzan Mordechai <nmordech@redhat.com>
Tue, 13 Jun 2023 04:52:12 +0000 (04:52 +0000)
committerNitzan Mordechai <nmordech@redhat.com>
Tue, 6 Aug 2024 16:44:52 +0000 (16:44 +0000)
We are implementing a new version of the encoding scheme for stretch
CRUSH buckets in our OSD system.
However, we have encountered limitations when it comes to extending the
encoding version for non-stretch pools. Currently, we are restricted to
using version 29 due to backward compatibility concerns.

To address this, we have devised a solution that excludes version 30 while
maintaining backward compatibility. This means that we will specifically check
for version 30 and maintain the same behavior for older clients.

For new clients, we can proceed with encoding and decoding using version 31.
To accommodate stretch pools, we will utilize std::optional. The first byte of
the encoding will serve as a boolean indicator, determining if the optional data is present.
If the first byte is set to true, we will encode and decode four uint32_t members: peering_crush_*.

By implementing these changes, we can ensure compatibility with older clients while enabling the use
of the enhanced encoding scheme for new clients, specifically for stretch pools.

Fixes: https://tracker.ceph.com/issues/59291
Signed-off-by: Nitzan Mordechai <nmordech@redhat.com>
src/osd/osd_types.cc
src/osd/osd_types.h

index 05d50def0a77ae8716f51c82b9157a74620e769b..fbacb4e43eda5c4fb2a6dfc379a16ed51cb02c9a 100644 (file)
@@ -1952,21 +1952,25 @@ void pg_pool_t::encode(ceph::buffer::list& bl, uint64_t features) const
     return;
   }
 
-  uint8_t v = 30;
+  uint8_t v = 31;
   // NOTE: any new encoding dependencies must be reflected by
   // SIGNIFICANT_FEATURES
-  if (!(features & CEPH_FEATURE_NEW_OSDOP_ENCODING)) {
-    // this was the first post-hammer thing we added; if it's missing, encode
-    // like hammer.
-    v = 21;
-  } else if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
-    v = 24;
-  } else if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
-    v = 26;
-  } else if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
-    v = 27;
-  } else if (!is_stretch_pool()) {
-    v = 29;
+  if (!HAVE_FEATURE(features, SERVER_SQUID)) {
+    if (!(features & CEPH_FEATURE_NEW_OSDOP_ENCODING)) {
+      // this was the first post-hammer thing we added; if it's missing, encode
+      // like hammer.
+      v = 21;
+    } else if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
+      v = 24;
+    } else if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
+      v = 26;
+    } else if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
+      v = 27;
+    } else if (!is_stretch_pool()) {
+      v = 29;
+    } else {
+      v = 30;
+    }
   }
 
   ENCODE_START(v, 5, bl);
@@ -2057,18 +2061,22 @@ void pg_pool_t::encode(ceph::buffer::list& bl, uint64_t features) const
   if (v >= 29) {
     encode(last_pg_merge_meta, bl);
   }
-  if (v >= 30) {
+  if (v == 30) {
     encode(peering_crush_bucket_count, bl);
     encode(peering_crush_bucket_target, bl);
     encode(peering_crush_bucket_barrier, bl);
     encode(peering_crush_mandatory_member, bl);
   }
+  if (v >= 31) {
+    auto maybe_peering_crush_data1 = maybe_peering_crush_data();
+    encode(maybe_peering_crush_data1, bl);
+  }
   ENCODE_FINISH(bl);
 }
 
 void pg_pool_t::decode(ceph::buffer::list::const_iterator& bl)
 {
-  DECODE_START_LEGACY_COMPAT_LEN(30, 5, 5, bl);
+  DECODE_START_LEGACY_COMPAT_LEN(31, 5, 5, bl);
   decode(type, bl);
   decode(size, bl);
   decode(crush_rule, bl);
@@ -2243,12 +2251,22 @@ void pg_pool_t::decode(ceph::buffer::list::const_iterator& bl)
     last_force_op_resend = last_force_op_resend_prenautilus;
     pg_autoscale_mode = pg_autoscale_mode_t::WARN;    // default to warn on upgrade
   }
-  if (struct_v >= 30) {
+  if (struct_v == 30) {
     decode(peering_crush_bucket_count, bl);
     decode(peering_crush_bucket_target, bl);
     decode(peering_crush_bucket_barrier, bl);
     decode(peering_crush_mandatory_member, bl);
   }
+  if (struct_v >= 31) {
+    std::optional<std::tuple<uint32_t,uint32_t,uint32_t,uint32_t>> peering_crush_data;
+    decode(peering_crush_data, bl);
+    if (peering_crush_data) {
+        std::tie(peering_crush_bucket_count,
+                 peering_crush_bucket_target,
+                 peering_crush_bucket_barrier,
+                 peering_crush_mandatory_member) = *peering_crush_data;
+    }
+  }
   DECODE_FINISH(bl);
   calc_pg_masks();
   calc_grade_table();
@@ -2352,6 +2370,13 @@ void pg_pool_t::generate_test_instances(list<pg_pool_t*>& o)
   a.fast_read = false;
   a.application_metadata = {{"rbd", {{"key", "value"}}}};
   o.push_back(new pg_pool_t(a));
+
+  // test stretch CRUSH buckets
+  a.peering_crush_bucket_count = 10;
+  a.peering_crush_bucket_barrier = 11;
+  a.peering_crush_mandatory_member = 12;
+  a.peering_crush_bucket_target = 13;
+  o.push_back(new pg_pool_t(a));
 }
 
 ostream& operator<<(ostream& out, const pg_pool_t& p)
index fe62fad2805d3d31feb22a7e7221bf76a6f90217..5b2935c277d0f4f3faddafb78c84883720152a7e 100644 (file)
@@ -1552,6 +1552,18 @@ public:
   bool is_stretch_pool() const {
     return peering_crush_bucket_count != 0;
   }
+  
+  std::optional<std::tuple<uint32_t,uint32_t,uint32_t,uint32_t>> maybe_peering_crush_data() const {
+    if (!is_stretch_pool()) {
+        return std::nullopt;
+    } else {
+        return std::make_optional(std::tuple<uint32_t,uint32_t,uint32_t,uint32_t>(peering_crush_bucket_count,
+                                                                    peering_crush_bucket_target ,
+                                                                    peering_crush_bucket_barrier,
+                                                                    peering_crush_mandatory_member
+                                                                    ));
+    }
+  }
 
   bool stretch_set_can_peer(const std::set<int>& want, const OSDMap& osdmap,
                            std::ostream *out) const;