From 95b2281f9d7bb76ac6342998ff494d04421a7f95 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 8 Jul 2018 11:00:12 -0500 Subject: [PATCH] mon/OSDMonitor: add 'osd repeer ' command Selecting force peering on a single PG. In reality this probably induces *2* interval changes. Note that in the case of a single OSD cluster we can't actually force a repeer on a single PG because the pg_temp code is pretty robust about filtering out redundant or meaningless changes, so we can't pg_temp our way into a new interval if there are no other OSDs to switch to and the code also prevents an empty pg_temp. Signed-off-by: Sage Weil --- qa/workunits/cephtool/test.sh | 3 +++ src/mon/MonCommands.h | 2 ++ src/mon/Monitor.cc | 4 +++- src/mon/OSDMonitor.cc | 45 +++++++++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh index ac181ad3c3eb..b7209de0ed93 100755 --- a/qa/workunits/cephtool/test.sh +++ b/qa/workunits/cephtool/test.sh @@ -1950,6 +1950,9 @@ function test_mon_pg() expect_false ceph osd pg-temp 1.0 asdf ceph osd pg-temp 1.0 # cleanup pg-temp + ceph pg repeer 1.0 + expect_false ceph pg repeer 0.0 # pool 0 shouldn't exist anymore + # don't test ceph osd primary-temp for now } diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 0f9d1c2cafe1..eb9e55cba8e5 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -122,6 +122,8 @@ COMMAND("pg map name=pgid,type=CephPgid", "show mapping of pg to osds", \ "pg", "r", "cli,rest") +COMMAND("pg repeer name=pgid,type=CephPgid", "force a PG to repeer", + "osd", "rw", "cli,rest") COMMAND("osd last-stat-seq name=id,type=CephOsdName", \ "get the last pg stats sequence number reported for this osd", \ "osd", "r", "cli,rest") diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 12518f34c892..2522e0eb3846 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -3126,7 +3126,9 @@ void Monitor::handle_command(MonOpRequestRef op) mdsmon()->dispatch(op); return; } - if ((module == "osd" || prefix == "pg map") && + if ((module == "osd" || + prefix == "pg map" || + prefix == "pg repeer") && prefix != "osd last-stat-seq") { osdmon()->dispatch(op); return; diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 5ef6d7e0bb6d..dfc91dc69204 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -10158,6 +10158,51 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, pending_inc.new_primary_temp[pgid] = osd; ss << "set " << pgid << " primary_temp mapping to " << osd; goto update; + } else if (prefix == "pg repeer") { + pg_t pgid; + string pgidstr; + cmd_getval(cct, cmdmap, "pgid", pgidstr); + if (!pgid.parse(pgidstr.c_str())) { + ss << "invalid pgid '" << pgidstr << "'"; + err = -EINVAL; + goto reply; + } + if (!osdmap.pg_exists(pgid)) { + ss << "pg '" << pgidstr << "' does not exist"; + err = -ENOENT; + goto reply; + } + vector acting; + int primary; + osdmap.pg_to_acting_osds(pgid, &acting, &primary); + if (primary < 0) { + err = -EAGAIN; + ss << "pg currently has no primary"; + goto reply; + } + if (acting.size() > 1) { + // map to just primary; it will map back to what it wants + pending_inc.new_pg_temp[pgid] = { primary }; + } else { + // hmm, pick another arbitrary osd to induce a change. Note + // that this won't work if there is only one suitable OSD in the cluster. + int i; + bool done = false; + for (i = 0; i < osdmap.get_max_osd(); ++i) { + if (i == primary || !osdmap.is_up(i) || !osdmap.exists(i)) { + continue; + } + pending_inc.new_pg_temp[pgid] = { primary, i }; + done = true; + break; + } + if (!done) { + err = -EAGAIN; + ss << "not enough up OSDs in the cluster to force repeer"; + goto reply; + } + } + goto update; } else if (prefix == "osd pg-upmap" || prefix == "osd rm-pg-upmap" || prefix == "osd pg-upmap-items" || -- 2.47.3