From 09af9b8afb40cc8aa629501582a75e03edf0bf2e Mon Sep 17 00:00:00 2001 From: xie xingguo Date: Wed, 12 Jul 2017 11:15:13 +0800 Subject: [PATCH] osd/OSDMap: allow bidirectional swap of pg-upmap-items This is useful when we also want an even distribution of pg primaries across osds. For example: Was: [0 1 2] By applying bidirectional swap of pg-upmap-items mapping [[0,1],[1,0]], now: [1 0 2] Thus we successfully decrease the number of primaries of osd.0 by 1 without affecting the current (even) distribution of global pgs. Real exmaple: ./bin/ceph pg ls-by-pool rbd PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP 3.0 0 0 0 0 0 0 0 0 active+clean 2017-07-12 15:14:45.083441 0'0 29:13 [0,1,3] 0 [0,1,3] 0 0'0 2017-07-12 15:14:14.515989 0'0 2017-07-12 15:14:14.515989 ./bin/ceph osd pg-upmap-items 3.0 0 1 1 0 3 5 set 3.0 pg_upmap_items mapping to [0->1,1->0,3->5] ./bin/ceph pg ls-by-pool rbd PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP 3.0 0 0 0 0 0 0 0 0 active+clean 2017-07-12 15:16:22.648424 0'0 33:13 [1,0,5] 1 [1,0,5] 1 0'0 2017-07-12 15:14:14.515989 0'0 2017-07-12 15:14:14.515989 Signed-off-by: xie xingguo --- src/osd/OSDMap.cc | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index ea43ae77322..42d896b68ae 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -1936,28 +1936,17 @@ void OSDMap::_apply_upmap(const pg_pool_t& pi, pg_t raw_pg, vector *raw) co auto q = pg_upmap_items.find(pg); if (q != pg_upmap_items.end()) { - // NOTE: this approach does not allow a bidirectional swap, - // e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1]. - for (auto& r : q->second) { - // make sure the replacement value doesn't already appear - bool exists = false; - ssize_t pos = -1; - for (unsigned i = 0; i < raw->size(); ++i) { - int osd = (*raw)[i]; - if (osd == r.second) { - exists = true; - break; - } - // ignore mapping if target is marked out (or invalid osd id) - if (osd == r.first && - pos < 0 && - !(r.second != CRUSH_ITEM_NONE && r.second < max_osd && - osd_weight[r.second] == 0)) { - pos = i; - } - } - if (!exists && pos >= 0) { - (*raw)[pos] = r.second; + for (auto& i : *raw) { + for (auto& r : q->second) { + if (r.first != i) { + continue; + } + if (!(r.second != CRUSH_ITEM_NONE && + r.second < max_osd && + osd_weight[r.second] == 0)) { + i = r.second; + } + break; } } } -- 2.39.5