From e9921ee76c060122d64e61a2fe483318a49bc0a9 Mon Sep 17 00:00:00 2001 From: Josh Salomon Date: Wed, 2 Nov 2022 16:23:28 +0200 Subject: [PATCH] osd, mon: Add 'osd pg-upmap-primary' and 'osd rm-pg-upmap-primary' commands. Changed OSDMap to keep upmap-primary records The upmap-primary records are visible in json/xml and in dump commands. Signed-off-by: Josh Salomon --- src/mon/MonCommands.h | 11 +- src/mon/OSDMonitor.cc | 276 +++++++++++++++++++++++++----------------- src/mon/OSDMonitor.h | 3 + src/osd/OSDMap.cc | 136 +++++++++++++++++---- src/osd/OSDMap.h | 4 +- src/vstart.sh | 1 + 6 files changed, 296 insertions(+), 135 deletions(-) diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 8ae8dc599f15b..29e7b21275a20 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -961,7 +961,7 @@ COMMAND("osd force-create-pg " COMMAND("osd pg-temp " "name=pgid,type=CephPgid " "name=id,type=CephOsdName,n=N,req=false", - "set pg_temp mapping pgid:[ [...]] (developers only)", + "set pg_temp mapping :[ [...]] (developers only)", "osd", "rw") COMMAND("osd pg-upmap " "name=pgid,type=CephPgid " @@ -981,6 +981,15 @@ COMMAND("osd rm-pg-upmap-items " "name=pgid,type=CephPgid", "clear pg_upmap_items mapping for (developers only)", "osd", "rw") +COMMAND("osd pg-upmap-primary " + "name=pgid,type=CephPgid " + "name=id,type=CephOsdName ", + "set pg primary osd : (id (osd) must be part of pgid)", + "osd", "rw") +COMMAND("osd rm-pg-upmap-primary " + "name=pgid,type=CephPgid ", + "clear pg primary setting for ", + "osd", "rw") COMMAND("osd primary-temp " "name=pgid,type=CephPgid " "name=id,type=CephOsdName", diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index e9e7ce72a8b1d..721fd94b723fd 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -5940,19 +5940,10 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) } else if (prefix == "pg map") { pg_t pgid; - string pgidstr; - cmd_getval(cmdmap, "pgid", pgidstr); - if (!pgid.parse(pgidstr.c_str())) { - ss << "invalid pgid '" << pgidstr << "'"; - r = -EINVAL; - goto reply; - } vector up, acting; - if (!osdmap.have_pg_pool(pgid.pool())) { - ss << "pg '" << pgidstr << "' does not exist"; - r = -ENOENT; + r = parse_pgid(cmdmap, ss, pgid); + if (r < 0) goto reply; - } pg_t mpgid = osdmap.raw_pg_to_pg(pgid); osdmap.pg_to_up_acting_osds(pgid, up, acting); if (f) { @@ -9892,6 +9883,27 @@ int OSDMonitor::prepare_command_osd_purge( return 0; } +int OSDMonitor::parse_pgid(const cmdmap_t& cmdmap, stringstream &ss, + /* out */ pg_t &pgid, std::optional pgids) { + string pgidstr; + if (!cmd_getval(cmdmap, "pgid", pgidstr)) { + ss << "unable to parse 'pgid' value '" + << cmd_vartype_stringify(cmdmap.at("pgid")) << "'"; + return -EINVAL; + } + if (!pgid.parse(pgidstr.c_str())) { + ss << "invalid pgid '" << pgidstr << "'"; + return -EINVAL; + } + if (!osdmap.pg_exists(pgid)) { + ss << "pgid '" << pgid << "' does not exist"; + return -ENOENT; + } + if (pgids.has_value()) + pgids.value() = pgidstr; + return 0; +} + bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, const cmdmap_t& cmdmap) { @@ -11966,24 +11978,10 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, return true; } } else if (prefix == "osd pg-temp") { - string pgidstr; - if (!cmd_getval(cmdmap, "pgid", pgidstr)) { - ss << "unable to parse 'pgid' value '" - << cmd_vartype_stringify(cmdmap.at("pgid")) << "'"; - err = -EINVAL; - goto reply; - } pg_t pgid; - if (!pgid.parse(pgidstr.c_str())) { - ss << "invalid pgid '" << pgidstr << "'"; - err = -EINVAL; - goto reply; - } - if (!osdmap.pg_exists(pgid)) { - ss << "pg " << pgid << " does not exist"; - err = -ENOENT; + err = parse_pgid(cmdmap, ss, pgid); + if (err < 0) goto reply; - } if (pending_inc.new_pg_temp.count(pgid)) { dout(10) << __func__ << " waiting for pending update on " << pgid << dendl; wait_for_finished_proposal(op, new C_RetryMessage(this, op)); @@ -12028,24 +12026,10 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, ss << "set " << pgid << " pg_temp mapping to " << new_pg_temp; goto update; } else if (prefix == "osd primary-temp") { - string pgidstr; - if (!cmd_getval(cmdmap, "pgid", pgidstr)) { - ss << "unable to parse 'pgid' value '" - << cmd_vartype_stringify(cmdmap.at("pgid")) << "'"; - err = -EINVAL; - goto reply; - } pg_t pgid; - if (!pgid.parse(pgidstr.c_str())) { - ss << "invalid pgid '" << pgidstr << "'"; - err = -EINVAL; - goto reply; - } - if (!osdmap.pg_exists(pgid)) { - ss << "pg " << pgid << " does not exist"; - err = -ENOENT; + err = parse_pgid(cmdmap, ss, pgid); + if (err < 0) goto reply; - } int64_t osd; if (!cmd_getval(cmdmap, "id", osd)) { @@ -12074,18 +12058,9 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, goto update; } else if (prefix == "pg repeer") { pg_t pgid; - string pgidstr; - cmd_getval(cmdmap, "pgid", pgidstr); - if (!pgid.parse(pgidstr.c_str())) { - ss << "invalid pgid '" << pgidstr << "'"; - err = -EINVAL; - goto reply; - } - if (!osdmap.pg_exists(pgid)) { - ss << "pg '" << pgidstr << "' does not exist"; - err = -ENOENT; + err = parse_pgid(cmdmap, ss, pgid); + if (err < 0) goto reply; - } vector acting; int primary; osdmap.pg_to_acting_osds(pgid, &acting, &primary); @@ -12120,39 +12095,77 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } else if (prefix == "osd pg-upmap" || prefix == "osd rm-pg-upmap" || prefix == "osd pg-upmap-items" || - prefix == "osd rm-pg-upmap-items") { - if (osdmap.require_min_compat_client < ceph_release_t::luminous) { + prefix == "osd rm-pg-upmap-items" || + prefix == "osd pg-upmap-primary" || + prefix == "osd rm-pg-upmap-primary") { + enum { + OP_PG_UPMAP, + OP_RM_PG_UPMAP, + OP_PG_UPMAP_ITEMS, + OP_RM_PG_UPMAP_ITEMS, + OP_PG_UPMAP_PRIMARY, + OP_RM_PG_UPMAP_PRIMARY, + } upmap_option; + + if (prefix == "osd pg-upmap") { + upmap_option = OP_PG_UPMAP; + } else if (prefix == "osd rm-pg-upmap") { + upmap_option = OP_RM_PG_UPMAP; + } else if (prefix == "osd pg-upmap-items") { + upmap_option = OP_PG_UPMAP_ITEMS; + } else if (prefix == "osd rm-pg-upmap-items") { + upmap_option = OP_RM_PG_UPMAP_ITEMS; + } else if (prefix == "osd pg-upmap-primary") { + upmap_option = OP_PG_UPMAP_PRIMARY; + } else if (prefix == "osd rm-pg-upmap-primary") { + upmap_option = OP_RM_PG_UPMAP_PRIMARY; + } else { + ceph_abort_msg("invalid upmap option"); + } + + ceph_release_t min_release = ceph_release_t::unknown; + string feature_name = "unknown"; + switch (upmap_option) { + case OP_PG_UPMAP: // fall through + case OP_RM_PG_UPMAP: // fall through + case OP_PG_UPMAP_ITEMS: // fall through + case OP_RM_PG_UPMAP_ITEMS: + min_release = ceph_release_t::luminous; + feature_name = "pg-upmap"; + break; + + case OP_PG_UPMAP_PRIMARY: // fall through + case OP_RM_PG_UPMAP_PRIMARY: + min_release = ceph_release_t::reef; + feature_name = "pg-upmap-primary"; + break; + + default: + ceph_abort_msg("invalid upmap option"); + } + uint64_t min_feature = CEPH_FEATUREMASK_OSDMAP_PG_UPMAP; + string min_release_name = ceph_release_name(static_cast(min_release)); + + if (osdmap.require_min_compat_client < min_release) { ss << "min_compat_client " << osdmap.require_min_compat_client - << " < luminous, which is required for pg-upmap. " - << "Try 'ceph osd set-require-min-compat-client luminous' " + << " < " << min_release_name << ", which is required for " << feature_name << ". " + << "Try 'ceph osd set-require-min-compat-client " << min_release_name << "' " << "before using the new interface"; err = -EPERM; goto reply; } - err = check_cluster_features(CEPH_FEATUREMASK_OSDMAP_PG_UPMAP, ss); + + //TODO: Should I add feature and test for upmap-primary? + err = check_cluster_features(min_feature, ss); if (err == -EAGAIN) goto wait; if (err < 0) goto reply; - string pgidstr; - if (!cmd_getval(cmdmap, "pgid", pgidstr)) { - ss << "unable to parse 'pgid' value '" - << cmd_vartype_stringify(cmdmap.at("pgid")) << "'"; - err = -EINVAL; - goto reply; - } pg_t pgid; - if (!pgid.parse(pgidstr.c_str())) { - ss << "invalid pgid '" << pgidstr << "'"; - err = -EINVAL; - goto reply; - } - if (!osdmap.pg_exists(pgid)) { - ss << "pg " << pgid << " does not exist"; - err = -ENOENT; + err = parse_pgid(cmdmap, ss, pgid); + if (err < 0) goto reply; - } if (pending_inc.old_pools.count(pgid.pool())) { ss << "pool of " << pgid << " is pending removal"; err = -ENOENT; @@ -12162,25 +12175,8 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, return true; } - enum { - OP_PG_UPMAP, - OP_RM_PG_UPMAP, - OP_PG_UPMAP_ITEMS, - OP_RM_PG_UPMAP_ITEMS, - } option; - - if (prefix == "osd pg-upmap") { - option = OP_PG_UPMAP; - } else if (prefix == "osd rm-pg-upmap") { - option = OP_RM_PG_UPMAP; - } else if (prefix == "osd pg-upmap-items") { - option = OP_PG_UPMAP_ITEMS; - } else { - option = OP_RM_PG_UPMAP_ITEMS; - } - // check pending upmap changes - switch (option) { + switch (upmap_option) { case OP_PG_UPMAP: // fall through case OP_RM_PG_UPMAP: if (pending_inc.new_pg_upmap.count(pgid) || @@ -12192,8 +12188,19 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } break; - case OP_PG_UPMAP_ITEMS: // fall through - case OP_RM_PG_UPMAP_ITEMS: + case OP_PG_UPMAP_PRIMARY: // fall through + case OP_RM_PG_UPMAP_PRIMARY: + { + const pg_pool_t *pt = osdmap.get_pg_pool(pgid.pool()); + if (! pt->is_replicated()) { + ss << "pg-upmap-primary is only supported for replicated pools"; + err = -EINVAL; + goto reply; + } + } + // fall through + case OP_PG_UPMAP_ITEMS: // fall through + case OP_RM_PG_UPMAP_ITEMS: // fall through if (pending_inc.new_pg_upmap_items.count(pgid) || pending_inc.old_pg_upmap_items.count(pgid)) { dout(10) << __func__ << " waiting for pending update on " @@ -12204,10 +12211,10 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, break; default: - ceph_abort_msg("invalid option"); + ceph_abort_msg("invalid upmap option"); } - switch (option) { + switch (upmap_option) { case OP_PG_UPMAP: { vector id_vec; @@ -12346,8 +12353,69 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } break; + case OP_PG_UPMAP_PRIMARY: + { + int64_t id; + if (!cmd_getval(cmdmap, "id", id)) { + ss << "invalid osd id value '" + << cmd_vartype_stringify(cmdmap.at("id")) << "'"; + err = -EINVAL; + goto reply; + } + if (id != CRUSH_ITEM_NONE && !osdmap.exists(id)) { + ss << "osd." << id << " does not exist"; + err = -ENOENT; + goto reply; + } + vector acting; + int primary; + osdmap.pg_to_acting_osds(pgid, &acting, &primary); + if (id == primary) { + ss << "osd." << id << " is already primary for pg " << pgid; + err = -EINVAL; + goto reply; + } + int found_idx = 0; + for (int i = 1 ; i < (int)acting.size(); i++) { // skip 0 on purpose + if (acting[i] == id) { + found_idx = i; + break; + } + } + if (found_idx == 0) { + ss << "osd." << id << " is not in acting set for pg " << pgid; + err = -EINVAL; + goto reply; + } + vector new_acting(acting); + new_acting[found_idx] = new_acting[0]; + new_acting[0] = id; + int pool_size = osdmap.get_pg_pool_size(pgid); + if (osdmap.crush->verify_upmap(cct, osdmap.get_pg_pool_crush_rule(pgid), + pool_size, new_acting) >= 0) { + ss << "change primary for pg " << pgid << " to osd." << id; + } + else { + ss << "can't change primary for pg " << pgid << " to osd." << id + << " - illegal pg after the change"; + err = -EINVAL; + goto reply; + } + pending_inc.new_pg_upmap_primary[pgid] = id; + //TO-REMOVE: + ldout(cct, 20) << "pg " << pgid << ": set pg_upmap_primary to " << id << dendl; + } + break; + + case OP_RM_PG_UPMAP_PRIMARY: + { + pending_inc.old_pg_upmap_primary.insert(pgid); + ss << "clear " << pgid << " pg_upmap_primary mapping"; + } + break; + default: - ceph_abort_msg("invalid option"); + ceph_abort_msg("invalid upmap option"); } goto update; @@ -13748,17 +13816,9 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } else if (prefix == "osd force-create-pg") { pg_t pgid; string pgidstr; - cmd_getval(cmdmap, "pgid", pgidstr); - if (!pgid.parse(pgidstr.c_str())) { - ss << "invalid pgid '" << pgidstr << "'"; - err = -EINVAL; - goto reply; - } - if (!osdmap.pg_exists(pgid)) { - ss << "pg " << pgid << " should not exist"; - err = -ENOENT; + err = parse_pgid(cmdmap, ss, pgid, pgidstr); + if (err < 0) goto reply; - } bool sure = false; cmd_getval(cmdmap, "yes_i_really_mean_it", sure); if (!sure) { diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 55d4e8c10f3aa..3ced8be401b4c 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -24,6 +24,7 @@ #include #include #include +#include #include "include/types.h" #include "include/encoding.h" @@ -674,6 +675,8 @@ protected: bool grace_interval_threshold_exceeded(int last_failed); void set_default_laggy_params(int target_osd); + int parse_pgid(const cmdmap_t& cmdmap, std::stringstream &ss, + pg_t &pgid, std::optional pgidstr = std::nullopt); public: OSDMonitor(CephContext *cct, Monitor &mn, Paxos &p, const std::string& service_name); diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 0cbe7ca5934c7..699ccb2166557 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -579,14 +579,16 @@ void OSDMap::Incremental::encode(ceph::buffer::list& bl, uint64_t features) cons ENCODE_START(8, 7, bl); { - uint8_t v = 8; + uint8_t v = 9; if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { v = 3; } else if (!HAVE_FEATURE(features, SERVER_MIMIC)) { v = 5; } else if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) { v = 6; - } + } /* else if (!HAVE_FEATURE(features, SERVER_REEF)) { + v = 8; + } */ ENCODE_START(v, 1, bl); // client-usable data encode(fsid, bl); encode(epoch, bl); @@ -645,6 +647,10 @@ void OSDMap::Incremental::encode(ceph::buffer::list& bl, uint64_t features) cons encode(new_last_up_change, bl); encode(new_last_in_change, bl); } + if (v >= 9) { + encode(new_pg_upmap_primary, bl); + encode(old_pg_upmap_primary, bl); + } ENCODE_FINISH(bl); // client-usable data } @@ -1196,6 +1202,23 @@ void OSDMap::Incremental::dump(Formatter *f) const } f->close_section(); + // dump upmap_primaries + f->open_array_section("new_pg_upmap_primaries"); + for (auto& [pg, osd] : new_pg_upmap_primary) { + f->open_object_section("primary_mapping"); + f->dump_stream("pgid") << pg; + f->dump_int("primary_osd", osd); + f->close_section(); + } + f->close_section(); // new_pg_upmap_primaries + + // dump old_pg_upmap_primaries (removed primary mappings) + f->open_array_section("old_pg_upmap_primaries"); + for (auto& pg : old_pg_upmap_primary) { + f->dump_stream("pgid") << pg; + } + f->close_section(); // old_pg_upmap_primaries + f->open_array_section("new_up_thru"); for (const auto &up_thru : new_up_thru) { @@ -1709,7 +1732,7 @@ uint64_t OSDMap::get_features(int entity_type, uint64_t *pmask) const } mask |= CEPH_FEATURES_CRUSH; - if (!pg_upmap.empty() || !pg_upmap_items.empty()) + if (!pg_upmap.empty() || !pg_upmap_items.empty() || !pg_upmap_primaries.empty()) features |= CEPH_FEATUREMASK_OSDMAP_PG_UPMAP; mask |= CEPH_FEATUREMASK_OSDMAP_PG_UPMAP; @@ -2074,13 +2097,13 @@ bool OSDMap::check_pg_upmaps( auto i = pg_upmap.find(pg); if (i != pg_upmap.end()) { if (i->second == raw) { - ldout(cct, 10) << "removing redundant pg_upmap " << i->first << " " + ldout(cct, 10) << __func__ << "removing redundant pg_upmap " << i->first << " " << i->second << dendl; to_cancel->push_back(pg); continue; } if ((int)i->second.size() != get_pg_pool_size(pg)) { - ldout(cct, 10) << "removing pg_upmap " << i->first << " " + ldout(cct, 10) << __func__ << "removing pg_upmap " << i->first << " " << i->second << " != pool size " << get_pg_pool_size(pg) << dendl; to_cancel->push_back(pg); @@ -2091,24 +2114,29 @@ bool OSDMap::check_pg_upmaps( if (j != pg_upmap_items.end()) { mempool::osdmap::vector> newmap; for (auto& p : j->second) { - if (std::find(raw.begin(), raw.end(), p.first) == raw.end()) { + auto osd_from = p.first; + auto osd_to = p.second; + if (std::find(raw.begin(), raw.end(), osd_from) == raw.end()) { // cancel mapping if source osd does not exist anymore + ldout(cct, 20) << __func__ << " pg_upmap_items (source osd does not exist) " << pg_upmap_items << dendl; continue; } - if (p.second != CRUSH_ITEM_NONE && p.second < max_osd && - p.second >= 0 && osd_weight[p.second] == 0) { + if (osd_to != CRUSH_ITEM_NONE && osd_to < max_osd && + osd_to >= 0 && osd_weight[osd_to] == 0) { // cancel mapping if target osd is out + ldout(cct, 20) << __func__ << " pg_upmap_items (target osd is out) " << pg_upmap_items << dendl; continue; } newmap.push_back(p); } if (newmap.empty()) { - ldout(cct, 10) << " removing no-op pg_upmap_items " + ldout(cct, 10) << __func__ << " removing no-op pg_upmap_items " << j->first << " " << j->second << dendl; to_cancel->push_back(pg); - } else if (newmap != j->second) { - ldout(cct, 10) << " simplifying partially no-op pg_upmap_items " + } else { + //Josh--check partial no-op here. + ldout(cct, 10) << __func__ << " simplifying partially no-op pg_upmap_items " << j->first << " " << j->second << " -> " << newmap << dendl; @@ -2176,6 +2204,9 @@ bool OSDMap::clean_pg_upmaps( get_upmap_pgs(&to_check); auto any_change = check_pg_upmaps(cct, to_check, &to_cancel, &to_remap); clean_pg_upmaps(cct, pending_inc, to_cancel, to_remap); + //TODO: Create these 3 functions for pg_upmap_primaries and so they can be checked + // and cleaned in the same way as pg_upmap. This is not critical since invalid + // pg_upmap_primaries are never applied, (the final check is in _apply_upmap). return any_change; } @@ -2388,6 +2419,13 @@ int OSDMap::apply_incremental(const Incremental &inc) pg_upmap_items.erase(pg); } + for (auto& [pg, prim] : inc.new_pg_upmap_primary) { + pg_upmap_primaries[pg] = prim; + } + for (auto& pg : inc.old_pg_upmap_primary) { + pg_upmap_primaries.erase(pg); + } + // blocklist if (!inc.new_blocklist.empty()) { blocklist.insert(inc.new_blocklist.begin(),inc.new_blocklist.end()); @@ -2614,26 +2652,47 @@ void OSDMap::_apply_upmap(const pg_pool_t& pi, pg_t raw_pg, vector *raw) co if (q != pg_upmap_items.end()) { // NOTE: this approach does not allow a bidirectional swap, // e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1]. - for (auto& r : q->second) { + for (auto& [osd_from, osd_to] : q->second) { + // A capcaity change upmap (repace osd in the pg with osd not in the pg) // make sure the replacement value doesn't already appear bool exists = false; ssize_t pos = -1; for (unsigned i = 0; i < raw->size(); ++i) { int osd = (*raw)[i]; - if (osd == r.second) { + if (osd == osd_to) { exists = true; break; } // ignore mapping if target is marked out (or invalid osd id) - if (osd == r.first && + if (osd == osd_from && pos < 0 && - !(r.second != CRUSH_ITEM_NONE && r.second < max_osd && - r.second >= 0 && osd_weight[r.second] == 0)) { + !(osd_to != CRUSH_ITEM_NONE && osd_to < max_osd && + osd_to >= 0 && osd_weight[osd_to] == 0)) { pos = i; - } + } } if (!exists && pos >= 0) { - (*raw)[pos] = r.second; + (*raw)[pos] = osd_to; + } + } + } + auto r = pg_upmap_primaries.find(pg); + if (r != pg_upmap_primaries.end()) { + auto new_prim = r->second; + // Apply mapping only if new primary is not marked out and valid osd id + if (new_prim != CRUSH_ITEM_NONE && new_prim < max_osd && new_prim >= 0 && + osd_weight[new_prim] != 0) { + int new_prim_idx = 0; + for (int i = 1 ; i < (int)raw->size(); i++) { // start from 1 on purpose + if ((*raw)[i] == new_prim) { + new_prim_idx = i; + break; + } + } + if (new_prim_idx > 0) { + // swap primary + (*raw)[new_prim_idx] = (*raw)[0]; + (*raw)[0] = new_prim; } } } @@ -3068,14 +3127,16 @@ void OSDMap::encode(ceph::buffer::list& bl, uint64_t features) const { // NOTE: any new encoding dependencies must be reflected by // SIGNIFICANT_FEATURES - uint8_t v = 9; + uint8_t v = 10; if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { v = 3; } else if (!HAVE_FEATURE(features, SERVER_MIMIC)) { v = 6; } else if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) { v = 7; - } + } /* else if (!HAVE_FEATURE(features, SERVER_REEF)) { + v = 9; + } */ ENCODE_START(v, 1, bl); // client-usable data // base encode(fsid, bl); @@ -3150,6 +3211,11 @@ void OSDMap::encode(ceph::buffer::list& bl, uint64_t features) const encode(last_up_change, bl); encode(last_in_change, bl); } + if (v >= 10) { + encode(pg_upmap_primaries, bl); + } else { + ceph_assert(pg_upmap_primaries.empty()); + } ENCODE_FINISH(bl); // client-usable data } @@ -3484,6 +3550,11 @@ void OSDMap::decode(ceph::buffer::list::const_iterator& bl) decode(last_up_change, bl); decode(last_in_change, bl); } + if (struct_v >= 10) { + decode(pg_upmap_primaries, bl); + } else { + pg_upmap_primaries.clear(); + } DECODE_FINISH(bl); // client-usable data } @@ -3753,21 +3824,32 @@ void OSDMap::dump(Formatter *f) const f->close_section(); } f->close_section(); + f->open_array_section("pg_upmap_items"); - for (auto& p : pg_upmap_items) { + for (auto& [pgid, mappings] : pg_upmap_items) { f->open_object_section("mapping"); - f->dump_stream("pgid") << p.first; + f->dump_stream("pgid") << pgid; f->open_array_section("mappings"); - for (auto& q : p.second) { + for (auto& [from, to] : mappings) { f->open_object_section("mapping"); - f->dump_int("from", q.first); - f->dump_int("to", q.second); + f->dump_int("from", from); + f->dump_int("to", to); f->close_section(); } f->close_section(); f->close_section(); } f->close_section(); + + f->open_array_section("pg_upmap_primaries"); + for (const auto& [pg, osd] : pg_upmap_primaries) { + f->open_object_section("primary_mapping"); + f->dump_stream("pgid") << pg; + f->dump_int("primary_osd", osd); + f->close_section(); + } + f->close_section(); // primary_temp + f->open_array_section("pg_temp"); pg_temp->dump(f); f->close_section(); @@ -4049,6 +4131,10 @@ void OSDMap::print(ostream& out) const out << "pg_upmap_items " << p.first << " " << p.second << "\n"; } + for (auto& [pg, osd] : pg_upmap_primaries) { + out << "pg_upmap_primary " << pg << " " << osd << "\n"; + } + for (const auto& pg : *pg_temp) out << "pg_temp " << pg.first << " " << pg.second << "\n"; diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index 534879f1e773e..54474c2893e11 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -404,7 +404,8 @@ public: mempool::osdmap::map> new_pg_upmap; mempool::osdmap::map>> new_pg_upmap_items; - mempool::osdmap::set old_pg_upmap, old_pg_upmap_items; + mempool::osdmap::map new_pg_upmap_primary; + mempool::osdmap::set old_pg_upmap, old_pg_upmap_items, old_pg_upmap_primary; mempool::osdmap::map new_removed_snaps; mempool::osdmap::map new_purged_snaps; @@ -575,6 +576,7 @@ private: // remap (post-CRUSH, pre-up) mempool::osdmap::map> pg_upmap; ///< remap pg mempool::osdmap::map>> pg_upmap_items; ///< remap osds in up set + mempool::osdmap::map pg_upmap_primaries; ///< remap primary of a pg mempool::osdmap::map pools; mempool::osdmap::map pool_name; diff --git a/src/vstart.sh b/src/vstart.sh index bc4e0afe5ef59..e18184ed1e898 100755 --- a/src/vstart.sh +++ b/src/vstart.sh @@ -1333,6 +1333,7 @@ if [ "$debug" -eq 0 ]; then else debug echo "** going verbose **" CMONDEBUG=' + debug osd = 20 debug mon = 20 debug paxos = 20 debug auth = 20 -- 2.39.5