]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crush: add root_bucket to identify underfull buckets
authorhuangjun <huangjun@xsky.com>
Wed, 20 Mar 2019 08:44:02 +0000 (16:44 +0800)
committerxie xingguo <xie.xingguo@zte.com.cn>
Thu, 28 Mar 2019 02:09:14 +0000 (10:09 +0800)
All underfull buckets under root_buckets will be taken as target

For the crule rule:
    step take datacenter0
    step chooseleaf firstn 2 type host
    step emit
    step take datacenter1
    step chooseleaf firstn 2 type host
    step emit

If one host contains overfull osd but no underfull osd,
it will use other underfull buckets as target, which
maybe not in the same datacenter, that will
broke the rule.

Fixes: http://tracker.ceph.com/issues/38826
Signed-off-by: huangjun <huangjun@xsky.com>
(cherry picked from commit 3d5678d3561d90a10d9de3cb6e7e0405dbe8fdfe)

src/crush/CrushWrapper.cc
src/crush/CrushWrapper.h

index 5ac5849574e39c68daa03390e7c02b956a005fdf..ec1716decd83cdf201a04f0de1fc364d26441dc9 100644 (file)
@@ -3788,7 +3788,8 @@ int CrushWrapper::_choose_type_stack(
   const vector<int>& orig,
   vector<int>::const_iterator& i,
   set<int>& used,
-  vector<int> *pw) const
+  vector<int> *pw,
+  int root_bucket) const
 {
   vector<int> w = *pw;
   vector<int> o;
@@ -3798,7 +3799,7 @@ int CrushWrapper::_choose_type_stack(
                 << " at " << *i
                 << " pw " << *pw
                 << dendl;
-
+  ceph_assert(root_bucket < 0);
   vector<int> cumulative_fanout(stack.size());
   int f = 1;
   for (int j = (int)stack.size() - 1; j >= 0; --j) {
@@ -3826,6 +3827,10 @@ int CrushWrapper::_choose_type_stack(
       item = get_parent_of_type(item, type);
       ldout(cct, 10) << __func__ << " underfull " << osd << " type " << type
                     << " is " << item << dendl;
+      if (!subtree_contains(root_bucket, item)) {
+        ldout(cct, 20) << __func__ << " not in root subtree " << root_bucket << dendl;
+        continue;
+      }
       underfull_buckets[j].insert(item);
     }
   }
@@ -3986,7 +3991,7 @@ int CrushWrapper::try_remap_rule(
   set<int> used;
 
   vector<pair<int,int>> type_stack;  // (type, fan-out)
-
+  int root_bucket = 0;
   for (unsigned step = 0; step < rule->len; ++step) {
     const crush_rule_step *curstep = &rule->steps[step];
     ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl;
@@ -3997,6 +4002,7 @@ int CrushWrapper::try_remap_rule(
           map->buckets[-1-curstep->arg1])) {
        w.clear();
        w.push_back(curstep->arg1);
+       root_bucket = curstep->arg1;
        ldout(cct, 10) << __func__ << " take " << w << dendl;
       } else {
        ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl;
@@ -4014,7 +4020,7 @@ int CrushWrapper::try_remap_rule(
         if (type > 0)
          type_stack.push_back(make_pair(0, 1));
        int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig,
-                                  i, used, &w);
+                                  i, used, &w, root_bucket);
        if (r < 0)
          return r;
        type_stack.clear();
@@ -4036,7 +4042,7 @@ int CrushWrapper::try_remap_rule(
       ldout(cct, 10) << " emit " << w << dendl;
       if (!type_stack.empty()) {
        int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig,
-                                  i, used, &w);
+                                  i, used, &w, root_bucket);
        if (r < 0)
          return r;
        type_stack.clear();
index 9a3c12a842208c4b73e275c981a6a2bcc9b27a20..5abd0f42d6311139ef17f6bb57114ef03812eaea 100644 (file)
@@ -1577,7 +1577,8 @@ public:
     const vector<int>& orig,
     vector<int>::const_iterator& i,
     set<int>& used,
-    vector<int> *pw) const;
+    vector<int> *pw,
+    int root_bucket) const;
 
   int try_remap_rule(
     CephContext *cct,