From a6aa42f793e679272d4f4f2ed53c2b64366f5af6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 13 Mar 2017 11:59:07 -0400 Subject: [PATCH] mon/OSDMonitor: add 'osd [rm-]pg-remap[-items] ...' commands Add commands to add and remove pg_remap mappings. Require that a mon config option is set before this is allowed so that users don't inadvertantly prevent older clients from interacting with the cluster. Signed-off-by: Sage Weil --- src/common/config_opts.h | 1 + src/mon/MonCommands.h | 19 ++++ src/mon/OSDMonitor.cc | 200 +++++++++++++++++++++++++++++++++++++++ src/vstart.sh | 1 + 4 files changed, 221 insertions(+) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 5dd068089009a..29c44aca40bd0 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -282,6 +282,7 @@ OPTION(mon_osd_max_op_age, OPT_DOUBLE, 32) // max op age before we get conce OPTION(mon_osd_max_split_count, OPT_INT, 32) // largest number of PGs per "involved" OSD to let split create OPTION(mon_osd_allow_primary_temp, OPT_BOOL, false) // allow primary_temp to be set in the osdmap OPTION(mon_osd_allow_primary_affinity, OPT_BOOL, false) // allow primary_affinity to be set in the osdmap +OPTION(mon_osd_allow_pg_remap, OPT_BOOL, false) // allow pg remap to be set in the osdmap OPTION(mon_osd_prime_pg_temp, OPT_BOOL, true) // prime osdmap with pg mapping changes OPTION(mon_osd_prime_pg_temp_max_time, OPT_FLOAT, .5) // max time to spend priming OPTION(mon_osd_prime_pg_temp_max_estimate, OPT_FLOAT, .25) // max estimate of pg total before we do all pgs in parallel diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 7589054bfa996..dbe80a46753d0 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -676,6 +676,25 @@ COMMAND("osd pg-temp " \ "name=id,type=CephOsdName,n=N,req=false", \ "set pg_temp mapping pgid:[ [...]] (developers only)", \ "osd", "rw", "cli,rest") +COMMAND("osd pg-remap " \ + "name=pgid,type=CephPgid " \ + "name=id,type=CephOsdName,n=N", \ + "set pg_remap mapping :[ [...]] primary (developers only)", \ + "osd", "rw", "cli,rest") +COMMAND("osd rm-pg-remap " \ + "name=pgid,type=CephPgid", \ + "clear pg_remap mapping for (developers only)", \ + "osd", "rw", "cli,rest") + +COMMAND("osd pg-remap-items " \ + "name=pgid,type=CephPgid " \ + "name=id,type=CephOsdName,n=N", \ + "set pg_remap_items mapping :{ to , [...]} (developers only)", \ + "osd", "rw", "cli,rest") +COMMAND("osd rm-pg-remap-items " \ + "name=pgid,type=CephPgid", \ + "clear pg_remap_items mapping for (developers only)", \ + "osd", "rw", "cli,rest") COMMAND("osd primary-temp " \ "name=pgid,type=CephPgid " \ "name=id,type=CephOsdName", \ diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 1f6024f7d9081..d0f4cac5081f4 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -7050,6 +7050,206 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, pending_inc.new_primary_temp[pgid] = osd; ss << "set " << pgid << " primary_temp mapping to " << osd; goto update; + } else if (prefix == "osd pg-remap") { + if (!g_conf->mon_osd_allow_pg_remap) { + ss << "you must enable 'mon osd allow pg remap = true' on the mons before you can adjust pg_remap. note that pre-luminous clients will no longer be able to communicate with the cluster."; + err = -EPERM; + goto reply; + } + err = check_cluster_features(CEPH_FEATUREMASK_OSDMAP_REMAP, ss); + if (err == -EAGAIN) + goto wait; + if (err < 0) + goto reply; + string pgidstr; + if (!cmd_getval(g_ceph_context, cmdmap, "pgid", pgidstr)) { + ss << "unable to parse 'pgid' value '" + << cmd_vartype_stringify(cmdmap["pgid"]) << "'"; + err = -EINVAL; + goto reply; + } + pg_t pgid; + if (!pgid.parse(pgidstr.c_str())) { + ss << "invalid pgid '" << pgidstr << "'"; + err = -EINVAL; + goto reply; + } + if (!osdmap.pg_exists(pgid)) { + ss << "pg " << pgid << " does not exist"; + err = -ENOENT; + goto reply; + } + if (pending_inc.new_pg_remap.count(pgid) || + pending_inc.old_pg_remap.count(pgid)) { + dout(10) << __func__ << " waiting for pending update on " << pgid << dendl; + wait_for_finished_proposal(op, new C_RetryMessage(this, op)); + return true; + } + vector id_vec; + if (!cmd_getval(g_ceph_context, cmdmap, "id", id_vec)) { + ss << "unable to parse 'id' value(s) '" + << cmd_vartype_stringify(cmdmap["id"]) << "'"; + err = -EINVAL; + goto reply; + } + vector new_pg_remap; + for (auto osd : id_vec) { + if (osd != CRUSH_ITEM_NONE && !osdmap.exists(osd)) { + ss << "osd." << osd << " does not exist"; + err = -ENOENT; + goto reply; + } + new_pg_remap.push_back(osd); + } + + pending_inc.new_pg_remap[pgid] = new_pg_remap; + ss << "set " << pgid << " pg_remap mapping to " << new_pg_remap; + goto update; + } else if (prefix == "osd rm-pg-remap") { + if (!g_conf->mon_osd_allow_pg_remap) { + ss << "you must enable 'mon osd allow pg remap = true' on the mons before you can adjust pg_remap. note that pre-luminous clients will no longer be able to communicate with the cluster."; + err = -EPERM; + goto reply; + } + err = check_cluster_features(CEPH_FEATUREMASK_OSDMAP_REMAP, ss); + if (err == -EAGAIN) + goto wait; + if (err < 0) + goto reply; + string pgidstr; + if (!cmd_getval(g_ceph_context, cmdmap, "pgid", pgidstr)) { + ss << "unable to parse 'pgid' value '" + << cmd_vartype_stringify(cmdmap["pgid"]) << "'"; + err = -EINVAL; + goto reply; + } + pg_t pgid; + if (!pgid.parse(pgidstr.c_str())) { + ss << "invalid pgid '" << pgidstr << "'"; + err = -EINVAL; + goto reply; + } + if (!osdmap.pg_exists(pgid)) { + ss << "pg " << pgid << " does not exist"; + err = -ENOENT; + goto reply; + } + if (pending_inc.new_pg_remap.count(pgid) || + pending_inc.old_pg_remap.count(pgid)) { + dout(10) << __func__ << " waiting for pending update on " << pgid << dendl; + wait_for_finished_proposal(op, new C_RetryMessage(this, op)); + return true; + } + + pending_inc.old_pg_remap.insert(pgid); + ss << "clear " << pgid << " pg_remap mapping"; + goto update; + } else if (prefix == "osd pg-remap-items") { + if (!g_conf->mon_osd_allow_pg_remap) { + ss << "you must enable 'mon osd allow pg remap = true' on the mons before you can adjust pg_remap. note that pre-luminous clients will no longer be able to communicate with the cluster."; + err = -EPERM; + goto reply; + } + err = check_cluster_features(CEPH_FEATUREMASK_OSDMAP_REMAP, ss); + if (err == -EAGAIN) + goto wait; + if (err < 0) + goto reply; + string pgidstr; + if (!cmd_getval(g_ceph_context, cmdmap, "pgid", pgidstr)) { + ss << "unable to parse 'pgid' value '" + << cmd_vartype_stringify(cmdmap["pgid"]) << "'"; + err = -EINVAL; + goto reply; + } + pg_t pgid; + if (!pgid.parse(pgidstr.c_str())) { + ss << "invalid pgid '" << pgidstr << "'"; + err = -EINVAL; + goto reply; + } + if (!osdmap.pg_exists(pgid)) { + ss << "pg " << pgid << " does not exist"; + err = -ENOENT; + goto reply; + } + if (pending_inc.new_pg_remap_items.count(pgid) || + pending_inc.old_pg_remap_items.count(pgid)) { + dout(10) << __func__ << " waiting for pending update on " << pgid << dendl; + wait_for_finished_proposal(op, new C_RetryMessage(this, op)); + return true; + } + vector id_vec; + if (!cmd_getval(g_ceph_context, cmdmap, "id", id_vec)) { + ss << "unable to parse 'id' value(s) '" + << cmd_vartype_stringify(cmdmap["id"]) << "'"; + err = -EINVAL; + goto reply; + } + if (id_vec.size() % 2) { + ss << "you must specify pairs of osd ids to be remapped"; + err = -EINVAL; + goto reply; + } + vector> new_pg_remap_items; + for (auto p = id_vec.begin(); p != id_vec.end(); ++p) { + int from = *p++; + int to = *p; + if (!osdmap.exists(from)) { + ss << "osd." << from << " does not exist"; + err = -ENOENT; + goto reply; + } + if (to != CRUSH_ITEM_NONE && !osdmap.exists(to)) { + ss << "osd." << to << " does not exist"; + err = -ENOENT; + goto reply; + } + new_pg_remap_items.push_back(make_pair(from, to)); + } + + pending_inc.new_pg_remap_items[pgid] = new_pg_remap_items; + ss << "set " << pgid << " pg_remap_items mapping to " << new_pg_remap_items; + goto update; + } else if (prefix == "osd rm-pg-remap-items") { + if (!g_conf->mon_osd_allow_pg_remap) { + ss << "you must enable 'mon osd allow pg remap = true' on the mons before you can adjust pg_remap. note that pre-luminous clients will no longer be able to communicate with the cluster."; + err = -EPERM; + goto reply; + } + err = check_cluster_features(CEPH_FEATUREMASK_OSDMAP_REMAP, ss); + if (err == -EAGAIN) + goto wait; + if (err < 0) + goto reply; + string pgidstr; + if (!cmd_getval(g_ceph_context, cmdmap, "pgid", pgidstr)) { + ss << "unable to parse 'pgid' value '" + << cmd_vartype_stringify(cmdmap["pgid"]) << "'"; + err = -EINVAL; + goto reply; + } + pg_t pgid; + if (!pgid.parse(pgidstr.c_str())) { + ss << "invalid pgid '" << pgidstr << "'"; + err = -EINVAL; + goto reply; + } + if (!osdmap.pg_exists(pgid)) { + ss << "pg " << pgid << " does not exist"; + err = -ENOENT; + goto reply; + } + if (pending_inc.new_pg_remap_items.count(pgid) || + pending_inc.old_pg_remap_items.count(pgid)) { + dout(10) << __func__ << " waiting for pending update on " << pgid << dendl; + wait_for_finished_proposal(op, new C_RetryMessage(this, op)); + return true; + } + + pending_inc.old_pg_remap_items.insert(pgid); + ss << "clear " << pgid << " pg_remap_items mapping"; + goto update; } else if (prefix == "osd primary-affinity") { int64_t id; if (!cmd_getval(g_ceph_context, cmdmap, "id", id)) { diff --git a/src/vstart.sh b/src/vstart.sh index 015db3ad04fd2..06a804f16b07a 100755 --- a/src/vstart.sh +++ b/src/vstart.sh @@ -472,6 +472,7 @@ $extra_conf [mon] mon pg warn min per osd = 3 mon osd allow primary affinity = true + mon osd allow pg remap = true mon reweight min pgs per osd = 4 mon osd prime pg temp = true crushtool = $CEPH_BIN/crushtool -- 2.39.5