]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crush: add root_bucket to identify underfull buckets
authorhuangjun <huangjun@xsky.com>
Wed, 20 Mar 2019 08:44:02 +0000 (16:44 +0800)
committerxie xingguo <xie.xingguo@zte.com.cn>
Thu, 28 Mar 2019 01:48:04 +0000 (09:48 +0800)
All underfull buckets under root_buckets will be taken as target

For the crule rule:
    step take datacenter0
    step chooseleaf firstn 2 type host
    step emit
    step take datacenter1
    step chooseleaf firstn 2 type host
    step emit

If one host contains overfull osd but no underfull osd,
it will use other underfull buckets as target, which
maybe not in the same datacenter, that will
broke the rule.

Fixes: http://tracker.ceph.com/issues/38826
Signed-off-by: huangjun <huangjun@xsky.com>
(cherry picked from commit 3d5678d3561d90a10d9de3cb6e7e0405dbe8fdfe)

src/crush/CrushWrapper.cc
src/crush/CrushWrapper.h

index d3c2a7af48ca338a1920166ce778c949a8f22422..7173697d4159cc2f311df836b24357174b785d8d 100644 (file)
@@ -3675,7 +3675,8 @@ int CrushWrapper::_choose_type_stack(
   const vector<int>& orig,
   vector<int>::const_iterator& i,
   set<int>& used,
-  vector<int> *pw) const
+  vector<int> *pw,
+  int root_bucket) const
 {
   vector<int> w = *pw;
   vector<int> o;
@@ -3685,7 +3686,7 @@ int CrushWrapper::_choose_type_stack(
                 << " at " << *i
                 << " pw " << *pw
                 << dendl;
-
+  ceph_assert(root_bucket < 0);
   vector<int> cumulative_fanout(stack.size());
   int f = 1;
   for (int j = (int)stack.size() - 1; j >= 0; --j) {
@@ -3713,6 +3714,10 @@ int CrushWrapper::_choose_type_stack(
       item = get_parent_of_type(item, type);
       ldout(cct, 10) << __func__ << " underfull " << osd << " type " << type
                     << " is " << item << dendl;
+      if (!subtree_contains(root_bucket, item)) {
+        ldout(cct, 20) << __func__ << " not in root subtree " << root_bucket << dendl;
+        continue;
+      }
       underfull_buckets[j].insert(item);
     }
   }
@@ -3872,7 +3877,7 @@ int CrushWrapper::try_remap_rule(
   set<int> used;
 
   vector<pair<int,int>> type_stack;  // (type, fan-out)
-
+  int root_bucket = 0;
   for (unsigned step = 0; step < rule->len; ++step) {
     const crush_rule_step *curstep = &rule->steps[step];
     ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl;
@@ -3883,6 +3888,7 @@ int CrushWrapper::try_remap_rule(
           map->buckets[-1-curstep->arg1])) {
        w.clear();
        w.push_back(curstep->arg1);
+       root_bucket = curstep->arg1;
        ldout(cct, 10) << __func__ << " take " << w << dendl;
       } else {
        ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl;
@@ -3900,7 +3906,7 @@ int CrushWrapper::try_remap_rule(
         if (type > 0)
          type_stack.push_back(make_pair(0, 1));
        int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig,
-                                  i, used, &w);
+                                  i, used, &w, root_bucket);
        if (r < 0)
          return r;
        type_stack.clear();
@@ -3922,7 +3928,7 @@ int CrushWrapper::try_remap_rule(
       ldout(cct, 10) << " emit " << w << dendl;
       if (!type_stack.empty()) {
        int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig,
-                                  i, used, &w);
+                                  i, used, &w, root_bucket);
        if (r < 0)
          return r;
        type_stack.clear();
index 39d74fad0f77fa26e7f0a9f009f670ef3b11b694..d9f3ef31d524c127d49f7f599b7efe97e12716e5 100644 (file)
@@ -1532,7 +1532,8 @@ public:
     const vector<int>& orig,
     vector<int>::const_iterator& i,
     set<int>& used,
-    vector<int> *pw) const;
+    vector<int> *pw,
+    int root_bucket) const;
 
   int try_remap_rule(
     CephContext *cct,