]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crush: add root_bucket to identify underfull buckets 27257/head
authorhuangjun <huangjun@xsky.com>
Wed, 20 Mar 2019 08:44:02 +0000 (16:44 +0800)
committerPrashant D <pdhange@redhat.com>
Fri, 29 Mar 2019 03:08:17 +0000 (23:08 -0400)
All underfull buckets under root_buckets will be taken as target

For the crule rule:
    step take datacenter0
    step chooseleaf firstn 2 type host
    step emit
    step take datacenter1
    step chooseleaf firstn 2 type host
    step emit

If one host contains overfull osd but no underfull osd,
it will use other underfull buckets as target, which
maybe not in the same datacenter, that will
broke the rule.

Fixes: http://tracker.ceph.com/issues/38826
Signed-off-by: huangjun <huangjun@xsky.com>
(cherry picked from commit 3d5678d3561d90a10d9de3cb6e7e0405dbe8fdfe)

src/crush/CrushWrapper.cc
src/crush/CrushWrapper.h

index f09aaa52bfe509f7c9a05f2574d9b7b9c1792138..a40027a5807d8bcdebfe2cc706c6c9e34b5d5af3 100644 (file)
@@ -3642,7 +3642,8 @@ int CrushWrapper::_choose_type_stack(
   const vector<int>& orig,
   vector<int>::const_iterator& i,
   set<int>& used,
-  vector<int> *pw) const
+  vector<int> *pw,
+  int root_bucket) const
 {
   vector<int> w = *pw;
   vector<int> o;
@@ -3652,7 +3653,7 @@ int CrushWrapper::_choose_type_stack(
                 << " at " << *i
                 << " pw " << *pw
                 << dendl;
-
+  ceph_assert(root_bucket < 0);
   vector<int> cumulative_fanout(stack.size());
   int f = 1;
   for (int j = (int)stack.size() - 1; j >= 0; --j) {
@@ -3680,6 +3681,10 @@ int CrushWrapper::_choose_type_stack(
       item = get_parent_of_type(item, type);
       ldout(cct, 10) << __func__ << " underfull " << osd << " type " << type
                     << " is " << item << dendl;
+      if (!subtree_contains(root_bucket, item)) {
+        ldout(cct, 20) << __func__ << " not in root subtree " << root_bucket << dendl;
+        continue;
+      }
       underfull_buckets[j].insert(item);
     }
   }
@@ -3840,7 +3845,7 @@ int CrushWrapper::try_remap_rule(
   set<int> used;
 
   vector<pair<int,int>> type_stack;  // (type, fan-out)
-
+  int root_bucket = 0;
   for (unsigned step = 0; step < rule->len; ++step) {
     const crush_rule_step *curstep = &rule->steps[step];
     ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl;
@@ -3851,6 +3856,7 @@ int CrushWrapper::try_remap_rule(
           map->buckets[-1-curstep->arg1])) {
        w.clear();
        w.push_back(curstep->arg1);
+       root_bucket = curstep->arg1;
        ldout(cct, 10) << __func__ << " take " << w << dendl;
       } else {
        ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl;
@@ -3868,7 +3874,7 @@ int CrushWrapper::try_remap_rule(
         if (type > 0)
          type_stack.push_back(make_pair(0, 1));
        int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig,
-                                  i, used, &w);
+                                  i, used, &w, root_bucket);
        if (r < 0)
          return r;
        type_stack.clear();
@@ -3890,7 +3896,7 @@ int CrushWrapper::try_remap_rule(
       ldout(cct, 10) << " emit " << w << dendl;
       if (!type_stack.empty()) {
        int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig,
-                                  i, used, &w);
+                                  i, used, &w, root_bucket);
        if (r < 0)
          return r;
        type_stack.clear();
index 9a2b4c5a14ca7ad929b51642585ab353603bd873..710d9008b3c2b20f8a1601925e0fff5479733a62 100644 (file)
@@ -1543,7 +1543,8 @@ public:
     const vector<int>& orig,
     vector<int>::const_iterator& i,
     set<int>& used,
-    vector<int> *pw) const;
+    vector<int> *pw,
+    int root_bucket) const;
 
   int try_remap_rule(
     CephContext *cct,