From 3d5678d3561d90a10d9de3cb6e7e0405dbe8fdfe Mon Sep 17 00:00:00 2001 From: huangjun Date: Wed, 20 Mar 2019 16:44:02 +0800 Subject: [PATCH] crush: add root_bucket to identify underfull buckets All underfull buckets under root_buckets will be taken as target For the crule rule: step take datacenter0 step chooseleaf firstn 2 type host step emit step take datacenter1 step chooseleaf firstn 2 type host step emit If one host contains overfull osd but no underfull osd, it will use other underfull buckets as target, which maybe not in the same datacenter, that will broke the rule. Fixes: http://tracker.ceph.com/issues/38826 Signed-off-by: huangjun --- src/crush/CrushWrapper.cc | 16 +++++++++++----- src/crush/CrushWrapper.h | 3 ++- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index fa6a1ff4a80..dca7f5a8f5b 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -3747,7 +3747,8 @@ int CrushWrapper::_choose_type_stack( const vector& orig, vector::const_iterator& i, set& used, - vector *pw) const + vector *pw, + int root_bucket) const { vector w = *pw; vector o; @@ -3757,7 +3758,7 @@ int CrushWrapper::_choose_type_stack( << " at " << *i << " pw " << *pw << dendl; - + ceph_assert(root_bucket < 0); vector cumulative_fanout(stack.size()); int f = 1; for (int j = (int)stack.size() - 1; j >= 0; --j) { @@ -3785,6 +3786,10 @@ int CrushWrapper::_choose_type_stack( item = get_parent_of_type(item, type); ldout(cct, 10) << __func__ << " underfull " << osd << " type " << type << " is " << item << dendl; + if (!subtree_contains(root_bucket, item)) { + ldout(cct, 20) << __func__ << " not in root subtree " << root_bucket << dendl; + continue; + } underfull_buckets[j].insert(item); } } @@ -3945,7 +3950,7 @@ int CrushWrapper::try_remap_rule( set used; vector> type_stack; // (type, fan-out) - + int root_bucket = 0; for (unsigned step = 0; step < rule->len; ++step) { const crush_rule_step *curstep = &rule->steps[step]; ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl; @@ -3956,6 +3961,7 @@ int CrushWrapper::try_remap_rule( map->buckets[-1-curstep->arg1])) { w.clear(); w.push_back(curstep->arg1); + root_bucket = curstep->arg1; ldout(cct, 10) << __func__ << " take " << w << dendl; } else { ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl; @@ -3973,7 +3979,7 @@ int CrushWrapper::try_remap_rule( if (type > 0) type_stack.push_back(make_pair(0, 1)); int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig, - i, used, &w); + i, used, &w, root_bucket); if (r < 0) return r; type_stack.clear(); @@ -3995,7 +4001,7 @@ int CrushWrapper::try_remap_rule( ldout(cct, 10) << " emit " << w << dendl; if (!type_stack.empty()) { int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig, - i, used, &w); + i, used, &w, root_bucket); if (r < 0) return r; type_stack.clear(); diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 9e070419b97..36ca2b566b9 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -1556,7 +1556,8 @@ public: const vector& orig, vector::const_iterator& i, set& used, - vector *pw) const; + vector *pw, + int root_bucket) const; int try_remap_rule( CephContext *cct, -- 2.39.5