From 94168dcfa32f9c1d95e25c0cf228781b69c900a0 Mon Sep 17 00:00:00 2001 From: huangjun Date: Wed, 20 Mar 2019 16:44:02 +0800 Subject: [PATCH] crush: add root_bucket to identify underfull buckets All underfull buckets under root_buckets will be taken as target For the crule rule: step take datacenter0 step chooseleaf firstn 2 type host step emit step take datacenter1 step chooseleaf firstn 2 type host step emit If one host contains overfull osd but no underfull osd, it will use other underfull buckets as target, which maybe not in the same datacenter, that will broke the rule. Fixes: http://tracker.ceph.com/issues/38826 Signed-off-by: huangjun (cherry picked from commit 3d5678d3561d90a10d9de3cb6e7e0405dbe8fdfe) --- src/crush/CrushWrapper.cc | 16 +++++++++++----- src/crush/CrushWrapper.h | 3 ++- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index f09aaa52bfe50..a40027a5807d8 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -3642,7 +3642,8 @@ int CrushWrapper::_choose_type_stack( const vector& orig, vector::const_iterator& i, set& used, - vector *pw) const + vector *pw, + int root_bucket) const { vector w = *pw; vector o; @@ -3652,7 +3653,7 @@ int CrushWrapper::_choose_type_stack( << " at " << *i << " pw " << *pw << dendl; - + ceph_assert(root_bucket < 0); vector cumulative_fanout(stack.size()); int f = 1; for (int j = (int)stack.size() - 1; j >= 0; --j) { @@ -3680,6 +3681,10 @@ int CrushWrapper::_choose_type_stack( item = get_parent_of_type(item, type); ldout(cct, 10) << __func__ << " underfull " << osd << " type " << type << " is " << item << dendl; + if (!subtree_contains(root_bucket, item)) { + ldout(cct, 20) << __func__ << " not in root subtree " << root_bucket << dendl; + continue; + } underfull_buckets[j].insert(item); } } @@ -3840,7 +3845,7 @@ int CrushWrapper::try_remap_rule( set used; vector> type_stack; // (type, fan-out) - + int root_bucket = 0; for (unsigned step = 0; step < rule->len; ++step) { const crush_rule_step *curstep = &rule->steps[step]; ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl; @@ -3851,6 +3856,7 @@ int CrushWrapper::try_remap_rule( map->buckets[-1-curstep->arg1])) { w.clear(); w.push_back(curstep->arg1); + root_bucket = curstep->arg1; ldout(cct, 10) << __func__ << " take " << w << dendl; } else { ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl; @@ -3868,7 +3874,7 @@ int CrushWrapper::try_remap_rule( if (type > 0) type_stack.push_back(make_pair(0, 1)); int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig, - i, used, &w); + i, used, &w, root_bucket); if (r < 0) return r; type_stack.clear(); @@ -3890,7 +3896,7 @@ int CrushWrapper::try_remap_rule( ldout(cct, 10) << " emit " << w << dendl; if (!type_stack.empty()) { int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig, - i, used, &w); + i, used, &w, root_bucket); if (r < 0) return r; type_stack.clear(); diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 9a2b4c5a14ca7..710d9008b3c2b 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -1543,7 +1543,8 @@ public: const vector& orig, vector::const_iterator& i, set& used, - vector *pw) const; + vector *pw, + int root_bucket) const; int try_remap_rule( CephContext *cct, -- 2.39.5