From: Bill Scales Date: Fri, 23 May 2025 09:45:46 +0000 (+0100) Subject: osd: EC Optimizations OSDMap::clean_temps preventing change of primary X-Git-Tag: v20.1.0~68^2~36 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5cf52ec6cd3249e14d60fc7e4b9f9c036bf492c6;p=ceph-ci.git osd: EC Optimizations OSDMap::clean_temps preventing change of primary clean_temps is clearing pg_temp if the acting set will be the same as the up set. For optimized EC pools this is overaggressive because there are scenarios where it is setting acting set to be the same as up set to force an alternative shard to be chosen as primary - this happens because the acting set is transformed to place non-primary shards at the end of the pg_temp vector. Detect this scenario and stop clean_temps from undoing the acting set which is being set by PeeringState::choose_acting. Signed-off-by: Bill Scales (cherry picked from commit 9d9265337a43b3edab8a3c41752baaca835be92a) --- diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index e59b356cf98..ee95ee22fcb 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -2029,9 +2029,25 @@ void OSDMap::clean_temps(CephContext *cct, const pg_pool_t *pool = nextmap.get_pg_pool(pg.first.pool()); auto acting_set = nextmap.pgtemp_undo_primaryfirst(*pool, pg.first, pg.second); if (raw_up == acting_set) { - ldout(cct, 10) << __func__ << " removing pg_temp " << pg.first << " " - << pg.second << " that matches raw_up mapping" << dendl; - remove = true; + bool keep = false; + // Optimized EC pools may set acting to be the same as up to + // force a change of primary shard - do not remove pg_temp + // if it is being used for this purpose + if (pool->allows_ecoptimizations()) { + for (uint8_t i = 0; i < acting_set.size(); ++i) { + if (acting_set[i] == primary) { + if (pool->is_nonprimary_shard(shard_id_t(i))) { + // pg_temp still required + keep = true; + } + } + } + } + if (!keep) { + ldout(cct, 10) << __func__ << " removing pg_temp " << pg.first << " " + << pg.second << " that matches raw_up mapping" << dendl; + remove = true; + } } // oversized pg_temp? if (pg.second.size() > nextmap.get_pg_pool(pg.first.pool())->get_size()) {