]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: Sanely set the default CRUSH rule when creating pools in stretch mode 42909/head
authorGreg Farnum <gfarnum@redhat.com>
Thu, 17 Jun 2021 19:56:20 +0000 (19:56 +0000)
committerGreg Farnum <gfarnum@redhat.com>
Tue, 24 Aug 2021 17:54:00 +0000 (17:54 +0000)
If we get a pool create request while in stretch mode that does not explicitly
specify a crush rule, look at the stretch-mode pools and their rules, and
select the most common one.

Also update set_up_stretch_mode.sh to add a few more rules that let me test
this locally.

Fixes: https://tracker.ceph.com/issues/51270
Signed-off-by: Greg Farnum <gfarnum@redhat.com>
(cherry picked from commit 4f9fdca136bde10479fc173c9bcf4288ea960ace)

src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h
src/script/set_up_stretch_mode.sh

index 63370e31f2a7289dcec285a3670212e2b2c25de4..927b8b8af92c2832e0aca57c674d2a31bc5584aa 100644 (file)
@@ -7730,6 +7730,43 @@ int OSDMonitor::prepare_pool_stripe_width(const unsigned pool_type,
   return err;
 }
 
+int OSDMonitor::get_replicated_stretch_crush_rule()
+{
+  /* we don't write down the stretch rule anywhere, so
+   * we have to guess it. How? Look at all the pools
+   * and count up how many times a given rule is used
+   * on stretch pools and then return the one with
+   * the most users!
+   */
+  map<int,int> rule_counts;
+  for (const auto& pooli : osdmap.pools) {
+    const pg_pool_t& p = pooli.second;
+    if (p.is_replicated() && p.is_stretch_pool()) {
+      if (!rule_counts.count(p.crush_rule)) {
+       rule_counts[p.crush_rule] = 1;
+      } else {
+       ++rule_counts[p.crush_rule];
+      }
+    }
+  }
+
+  if (rule_counts.empty()) {
+    return -ENOENT;
+  }
+
+  int most_used_count = 0;
+  int most_used_rule = -1;
+  for (auto i : rule_counts) {
+    if (i.second > most_used_count) {
+      most_used_rule = i.first;
+      most_used_count = i.second;
+    }
+  }
+  ceph_assert(most_used_count > 0);
+  ceph_assert(most_used_rule >= 0);
+  return most_used_rule;
+}
+
 int OSDMonitor::prepare_pool_crush_rule(const unsigned pool_type,
                                        const string &erasure_code_profile,
                                        const string &rule_name,
@@ -7742,8 +7779,12 @@ int OSDMonitor::prepare_pool_crush_rule(const unsigned pool_type,
     case pg_pool_t::TYPE_REPLICATED:
       {
        if (rule_name == "") {
-         // Use default rule
-         *crush_rule = osdmap.crush->get_osd_pool_default_crush_replicated_ruleset(cct);
+         if (osdmap.stretch_mode_enabled) {
+           *crush_rule = get_replicated_stretch_crush_rule();
+         } else {
+           // Use default rule
+           *crush_rule = osdmap.crush->get_osd_pool_default_crush_replicated_ruleset(cct);
+         }
          if (*crush_rule < 0) {
            // Errors may happen e.g. if no valid rule is available
            *ss << "No suitable CRUSH rule exists, check "
index ffe3490b4be7fdcfcb9a493cd0751a65b5df66b7..f90cd4d72fbcb59e781df2cae6d3c87fd8217dc7 100644 (file)
@@ -858,6 +858,13 @@ public:
    * Sets the osdmap and pg_pool_t values back to healthy stretch mode status.
    */
   void trigger_healthy_stretch_mode();
+  /**
+   * Obtain the crush rule being used for stretch pools.
+   * Note that right now this is heuristic and simply selects the
+   * most-used rule on replicated stretch pools.
+   * @return the crush rule ID, or a negative errno
+   */
+  int get_replicated_stretch_crush_rule();
 private:
   utime_t stretch_recovery_triggered; // what time we committed a switch to recovery mode
 };
index a9cfd7daa3b34c273d6e1122d3e54c2281f5963a..a28f181e6c6fd7f0ddc7e7568c2882b767fe45f8 100755 (executable)
@@ -24,6 +24,30 @@ rule stretch_rule {
         step chooseleaf firstn 2 type host
         step emit
 }
+rule stretch_rule2 {
+        id 2
+        type replicated
+        min_size 1
+        max_size 10
+        step take site1
+        step chooseleaf firstn 2 type host
+        step emit
+        step take site2
+        step chooseleaf firstn 2 type host
+        step emit
+}
+rule stretch_rule3 {
+        id 3
+        type replicated
+        min_size 1
+        max_size 10
+        step take site1
+        step chooseleaf firstn 2 type host
+        step emit
+        step take site2
+        step chooseleaf firstn 2 type host
+        step emit
+}
 EOF
 ./bin/crushtool -c crush.map.txt -o crush2.map.bin
 ./bin/ceph osd setcrushmap -i crush2.map.bin