From 00043d24199b06aeb6135736defd0891ca67071a Mon Sep 17 00:00:00 2001 From: Laura Flores Date: Fri, 7 Mar 2025 06:22:00 +0000 Subject: [PATCH] mon, osd: add command to remove invalid pg-upmap-primary entries The current rm-pg-upmap-primary command checks that the pgid exists in the pgmap before continuing to remove it. Due to https://tracker.ceph.com/issues/66867, some invalid pg-upmap-primary entires may exist for pools that have been removed. Currently, these mappings are impossible to remove since the pgids no longer exist in the pgmap. This new command, rm-pg-upmap-primary-all, allows users the ability to remove any and all pg-upmap-primary mappings in the osdmap at once, which includes valid and invalid entries. This command may also be helpful when upgrading from versions where users are plagued by https://tracker.ceph.com/issues/61948. Users may use an upgraded mon to remove all pg-upmap-primray entries (valid and invalid) so they continue to upgrade to a safe version. See manual testing for this patch here: https://tracker.ceph.com/issues/67179#note-12 Fixes: https://tracker.ceph.com/issues/67179 Fixes: https://tracker.ceph.com/issues/69760 Signed-off-by: Laura Flores (cherry picked from commit 6e9e2033bf0f4779bdfac9a3a4f29115459c8c0e) Conflicts: src/osd/OSDMap.cc src/osd/OSDMap.h The `rm_all_upmap_prims` per pool function is part of https://github.com/ceph/ceph/commit/2953db8b58535605882dff2e1d4ff36e6075e122, which is related to the "size optimized" read balancer feature that is only included >= Squid. --- src/mon/MonCommands.h | 3 +++ src/mon/OSDMonitor.cc | 52 ++++++++++++++++++++++++++++++++++--------- src/osd/OSDMap.cc | 14 ++++++++++++ src/osd/OSDMap.h | 4 ++++ 4 files changed, 62 insertions(+), 11 deletions(-) diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 461bd85623c87..662527de10440 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1017,6 +1017,9 @@ COMMAND("osd rm-pg-upmap-primary " "name=pgid,type=CephPgid ", "clear pg primary setting for ", "osd", "rw") +COMMAND("osd rm-pg-upmap-primary-all ", + "clear all pg primary entries (developers only)", + "osd", "rw") COMMAND("osd primary-temp " "name=pgid,type=CephPgid " "name=id,type=CephOsdName", diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index f0bb6ba1a78bb..6153fb3200cc9 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -12235,7 +12235,8 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, prefix == "osd pg-upmap-items" || prefix == "osd rm-pg-upmap-items" || prefix == "osd pg-upmap-primary" || - prefix == "osd rm-pg-upmap-primary") { + prefix == "osd rm-pg-upmap-primary" || + prefix == "osd rm-pg-upmap-primary-all") { enum { OP_PG_UPMAP, OP_RM_PG_UPMAP, @@ -12243,6 +12244,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, OP_RM_PG_UPMAP_ITEMS, OP_PG_UPMAP_PRIMARY, OP_RM_PG_UPMAP_PRIMARY, + OP_RM_PG_UPMAP_PRIMARY_ALL, } upmap_option; if (prefix == "osd pg-upmap") { @@ -12257,6 +12259,8 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, upmap_option = OP_PG_UPMAP_PRIMARY; } else if (prefix == "osd rm-pg-upmap-primary") { upmap_option = OP_RM_PG_UPMAP_PRIMARY; + } else if (prefix == "osd rm-pg-upmap-primary-all") { + upmap_option = OP_RM_PG_UPMAP_PRIMARY_ALL; } else { ceph_abort_msg("invalid upmap option"); } @@ -12276,6 +12280,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, case OP_PG_UPMAP_PRIMARY: // fall through case OP_RM_PG_UPMAP_PRIMARY: + case OP_RM_PG_UPMAP_PRIMARY_ALL: min_release = ceph_release_t::reef; min_feature = CEPH_FEATUREMASK_SERVER_REEF; feature_name = "pg-upmap-primary"; @@ -12301,17 +12306,33 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, goto wait; if (err < 0) goto reply_no_propose; + pg_t pgid; - err = parse_pgid(cmdmap, ss, pgid); - if (err < 0) - goto reply_no_propose; - if (pending_inc.old_pools.count(pgid.pool())) { - ss << "pool of " << pgid << " is pending removal"; - err = -ENOENT; - getline(ss, rs); - wait_for_commit(op, - new Monitor::C_Command(mon, op, err, rs, get_last_committed() + 1)); - return true; + switch (upmap_option) { + case OP_RM_PG_UPMAP_PRIMARY_ALL: // no pgid to check + break; + + case OP_PG_UPMAP: + case OP_RM_PG_UPMAP: + case OP_PG_UPMAP_ITEMS: + case OP_RM_PG_UPMAP_ITEMS: + case OP_PG_UPMAP_PRIMARY: + case OP_RM_PG_UPMAP_PRIMARY: + err = parse_pgid(cmdmap, ss, pgid); + if (err < 0) + goto reply_no_propose; + if (pending_inc.old_pools.count(pgid.pool())) { + ss << "pool of " << pgid << " is pending removal"; + err = -ENOENT; + getline(ss, rs); + wait_for_commit(op, + new Monitor::C_Command(mon, op, err, rs, get_last_committed() + 1)); + return true; + } + break; + + default: + ceph_abort_msg("invalid upmap option"); } // check pending upmap changes @@ -12346,6 +12367,8 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, goto wait; } break; + case OP_RM_PG_UPMAP_PRIMARY_ALL: // nothing to check + break; default: ceph_abort_msg("invalid upmap option"); @@ -12551,6 +12574,13 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } break; + case OP_RM_PG_UPMAP_PRIMARY_ALL: + { + osdmap.rm_all_upmap_prims(cct, &pending_inc); + ss << "cleared all pg_upmap_primary mappings"; + } + break; + default: ceph_abort_msg("invalid upmap option"); } diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 0ef8ad1fe5724..6a8c316558198 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -5158,6 +5158,20 @@ int OSDMap::balance_primaries( return num_changes; } +void OSDMap::rm_all_upmap_prims( + CephContext *cct, + OSDMap::Incremental *pending_inc) +{ + for (const auto& [pg, _] : pg_upmap_primaries) { + if (pending_inc->new_pg_upmap_primary.contains(pg)) { + ldout(cct, 30) << __func__ << "Removing pending pg_upmap_prim for pg " << pg << dendl; + pending_inc->new_pg_upmap_primary.erase(pg); + } + ldout(cct, 30) << __func__ << "Removing pg_upmap_prim for pg " << pg << dendl; + pending_inc->old_pg_upmap_primary.insert(pg); + } +} + int OSDMap::calc_desired_primary_distribution( CephContext *cct, int64_t pid, diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index 065ae60b099b6..1651b4de78ca2 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -1480,6 +1480,10 @@ public: Incremental *pending_inc, OSDMap& tmp_osd_map) const; + void rm_all_upmap_prims( + CephContext *cct, + OSDMap::Incremental *pending_inc); // total + int calc_desired_primary_distribution( CephContext *cct, int64_t pid, // pool id -- 2.39.5