From: huangjun Date: Wed, 20 Mar 2019 08:44:02 +0000 (+0800) Subject: crush: add root_bucket to identify underfull buckets X-Git-Tag: v15.0.0~146^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3d5678d3561d90a10d9de3cb6e7e0405dbe8fdfe;p=ceph.git crush: add root_bucket to identify underfull buckets All underfull buckets under root_buckets will be taken as target For the crule rule: step take datacenter0 step chooseleaf firstn 2 type host step emit step take datacenter1 step chooseleaf firstn 2 type host step emit If one host contains overfull osd but no underfull osd, it will use other underfull buckets as target, which maybe not in the same datacenter, that will broke the rule. Fixes: http://tracker.ceph.com/issues/38826 Signed-off-by: huangjun --- diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index fa6a1ff4a803b..dca7f5a8f5bdf 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -3747,7 +3747,8 @@ int CrushWrapper::_choose_type_stack( const vector& orig, vector::const_iterator& i, set& used, - vector *pw) const + vector *pw, + int root_bucket) const { vector w = *pw; vector o; @@ -3757,7 +3758,7 @@ int CrushWrapper::_choose_type_stack( << " at " << *i << " pw " << *pw << dendl; - + ceph_assert(root_bucket < 0); vector cumulative_fanout(stack.size()); int f = 1; for (int j = (int)stack.size() - 1; j >= 0; --j) { @@ -3785,6 +3786,10 @@ int CrushWrapper::_choose_type_stack( item = get_parent_of_type(item, type); ldout(cct, 10) << __func__ << " underfull " << osd << " type " << type << " is " << item << dendl; + if (!subtree_contains(root_bucket, item)) { + ldout(cct, 20) << __func__ << " not in root subtree " << root_bucket << dendl; + continue; + } underfull_buckets[j].insert(item); } } @@ -3945,7 +3950,7 @@ int CrushWrapper::try_remap_rule( set used; vector> type_stack; // (type, fan-out) - + int root_bucket = 0; for (unsigned step = 0; step < rule->len; ++step) { const crush_rule_step *curstep = &rule->steps[step]; ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl; @@ -3956,6 +3961,7 @@ int CrushWrapper::try_remap_rule( map->buckets[-1-curstep->arg1])) { w.clear(); w.push_back(curstep->arg1); + root_bucket = curstep->arg1; ldout(cct, 10) << __func__ << " take " << w << dendl; } else { ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl; @@ -3973,7 +3979,7 @@ int CrushWrapper::try_remap_rule( if (type > 0) type_stack.push_back(make_pair(0, 1)); int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig, - i, used, &w); + i, used, &w, root_bucket); if (r < 0) return r; type_stack.clear(); @@ -3995,7 +4001,7 @@ int CrushWrapper::try_remap_rule( ldout(cct, 10) << " emit " << w << dendl; if (!type_stack.empty()) { int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig, - i, used, &w); + i, used, &w, root_bucket); if (r < 0) return r; type_stack.clear(); diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 9e070419b970a..36ca2b566b930 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -1556,7 +1556,8 @@ public: const vector& orig, vector::const_iterator& i, set& used, - vector *pw) const; + vector *pw, + int root_bucket) const; int try_remap_rule( CephContext *cct,