]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crush: add root_bucket to identify underfull buckets 27068/head
authorhuangjun <huangjun@xsky.com>
Wed, 20 Mar 2019 08:44:02 +0000 (16:44 +0800)
committerhuangjun <huangjun@xsky.com>
Wed, 20 Mar 2019 08:44:02 +0000 (16:44 +0800)
All underfull buckets under root_buckets will be taken as target

For the crule rule:
    step take datacenter0
    step chooseleaf firstn 2 type host
    step emit
    step take datacenter1
    step chooseleaf firstn 2 type host
    step emit

If one host contains overfull osd but no underfull osd,
it will use other underfull buckets as target, which
maybe not in the same datacenter, that will
broke the rule.

Fixes: http://tracker.ceph.com/issues/38826
Signed-off-by: huangjun <huangjun@xsky.com>
src/crush/CrushWrapper.cc
src/crush/CrushWrapper.h

index fa6a1ff4a803b1c62ce145eda234dfab4ea49b00..dca7f5a8f5bdf88b37ebe288be094866465bf271 100644 (file)
@@ -3747,7 +3747,8 @@ int CrushWrapper::_choose_type_stack(
   const vector<int>& orig,
   vector<int>::const_iterator& i,
   set<int>& used,
-  vector<int> *pw) const
+  vector<int> *pw,
+  int root_bucket) const
 {
   vector<int> w = *pw;
   vector<int> o;
@@ -3757,7 +3758,7 @@ int CrushWrapper::_choose_type_stack(
                 << " at " << *i
                 << " pw " << *pw
                 << dendl;
-
+  ceph_assert(root_bucket < 0);
   vector<int> cumulative_fanout(stack.size());
   int f = 1;
   for (int j = (int)stack.size() - 1; j >= 0; --j) {
@@ -3785,6 +3786,10 @@ int CrushWrapper::_choose_type_stack(
       item = get_parent_of_type(item, type);
       ldout(cct, 10) << __func__ << " underfull " << osd << " type " << type
                     << " is " << item << dendl;
+      if (!subtree_contains(root_bucket, item)) {
+        ldout(cct, 20) << __func__ << " not in root subtree " << root_bucket << dendl;
+        continue;
+      }
       underfull_buckets[j].insert(item);
     }
   }
@@ -3945,7 +3950,7 @@ int CrushWrapper::try_remap_rule(
   set<int> used;
 
   vector<pair<int,int>> type_stack;  // (type, fan-out)
-
+  int root_bucket = 0;
   for (unsigned step = 0; step < rule->len; ++step) {
     const crush_rule_step *curstep = &rule->steps[step];
     ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl;
@@ -3956,6 +3961,7 @@ int CrushWrapper::try_remap_rule(
           map->buckets[-1-curstep->arg1])) {
        w.clear();
        w.push_back(curstep->arg1);
+       root_bucket = curstep->arg1;
        ldout(cct, 10) << __func__ << " take " << w << dendl;
       } else {
        ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl;
@@ -3973,7 +3979,7 @@ int CrushWrapper::try_remap_rule(
         if (type > 0)
          type_stack.push_back(make_pair(0, 1));
        int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig,
-                                  i, used, &w);
+                                  i, used, &w, root_bucket);
        if (r < 0)
          return r;
        type_stack.clear();
@@ -3995,7 +4001,7 @@ int CrushWrapper::try_remap_rule(
       ldout(cct, 10) << " emit " << w << dendl;
       if (!type_stack.empty()) {
        int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig,
-                                  i, used, &w);
+                                  i, used, &w, root_bucket);
        if (r < 0)
          return r;
        type_stack.clear();
index 9e070419b970a4514ccd44d351e008103456c7bf..36ca2b566b9308a6ca89222519dc44735858258a 100644 (file)
@@ -1556,7 +1556,8 @@ public:
     const vector<int>& orig,
     vector<int>::const_iterator& i,
     set<int>& used,
-    vector<int> *pw) const;
+    vector<int> *pw,
+    int root_bucket) const;
 
   int try_remap_rule(
     CephContext *cct,