From: huangjun Date: Wed, 20 Mar 2019 08:44:02 +0000 (+0800) Subject: crush: add root_bucket to identify underfull buckets X-Git-Tag: v12.2.12~24^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5c173a0e65dc9eaca6e95b600270eaf2394b48eb;p=ceph.git crush: add root_bucket to identify underfull buckets All underfull buckets under root_buckets will be taken as target For the crule rule: step take datacenter0 step chooseleaf firstn 2 type host step emit step take datacenter1 step chooseleaf firstn 2 type host step emit If one host contains overfull osd but no underfull osd, it will use other underfull buckets as target, which maybe not in the same datacenter, that will broke the rule. Fixes: http://tracker.ceph.com/issues/38826 Signed-off-by: huangjun (cherry picked from commit 3d5678d3561d90a10d9de3cb6e7e0405dbe8fdfe) --- diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index d3c2a7af48ca..7173697d4159 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -3675,7 +3675,8 @@ int CrushWrapper::_choose_type_stack( const vector& orig, vector::const_iterator& i, set& used, - vector *pw) const + vector *pw, + int root_bucket) const { vector w = *pw; vector o; @@ -3685,7 +3686,7 @@ int CrushWrapper::_choose_type_stack( << " at " << *i << " pw " << *pw << dendl; - + ceph_assert(root_bucket < 0); vector cumulative_fanout(stack.size()); int f = 1; for (int j = (int)stack.size() - 1; j >= 0; --j) { @@ -3713,6 +3714,10 @@ int CrushWrapper::_choose_type_stack( item = get_parent_of_type(item, type); ldout(cct, 10) << __func__ << " underfull " << osd << " type " << type << " is " << item << dendl; + if (!subtree_contains(root_bucket, item)) { + ldout(cct, 20) << __func__ << " not in root subtree " << root_bucket << dendl; + continue; + } underfull_buckets[j].insert(item); } } @@ -3872,7 +3877,7 @@ int CrushWrapper::try_remap_rule( set used; vector> type_stack; // (type, fan-out) - + int root_bucket = 0; for (unsigned step = 0; step < rule->len; ++step) { const crush_rule_step *curstep = &rule->steps[step]; ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl; @@ -3883,6 +3888,7 @@ int CrushWrapper::try_remap_rule( map->buckets[-1-curstep->arg1])) { w.clear(); w.push_back(curstep->arg1); + root_bucket = curstep->arg1; ldout(cct, 10) << __func__ << " take " << w << dendl; } else { ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl; @@ -3900,7 +3906,7 @@ int CrushWrapper::try_remap_rule( if (type > 0) type_stack.push_back(make_pair(0, 1)); int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig, - i, used, &w); + i, used, &w, root_bucket); if (r < 0) return r; type_stack.clear(); @@ -3922,7 +3928,7 @@ int CrushWrapper::try_remap_rule( ldout(cct, 10) << " emit " << w << dendl; if (!type_stack.empty()) { int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig, - i, used, &w); + i, used, &w, root_bucket); if (r < 0) return r; type_stack.clear(); diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 39d74fad0f77..d9f3ef31d524 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -1532,7 +1532,8 @@ public: const vector& orig, vector::const_iterator& i, set& used, - vector *pw) const; + vector *pw, + int root_bucket) const; int try_remap_rule( CephContext *cct,