From cb2bbc3737ef959e11351e4bbafc276b66728a48 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 5 Aug 2017 15:30:15 -0400 Subject: [PATCH] mon: include PGMonitor commands with mixed-version mons While we have a mixed version cluster, we have to advertise our PGMonitor commands to our peons or else commands like 'pg dump' won't work. Once the mon feature flag is set, we can drop that because each mon will include the mgr commands (either those stored in paxos or the statically compiled ones until that point). Signed-off-by: Sage Weil (cherry picked from commit 2d9045a9a57a3b08d525f4f1ecb3b255add26b1f) --- src/mon/Elector.cc | 10 ++- src/mon/Monitor.cc | 27 ++++++-- src/mon/Monitor.h | 18 ++++-- src/mon/PGMonitorCommands.h | 120 ++++++++++++++++++++++++++++++++++++ 4 files changed, 164 insertions(+), 11 deletions(-) create mode 100644 src/mon/PGMonitorCommands.h diff --git a/src/mon/Elector.cc b/src/mon/Elector.cc index a0cc7272574c6..001fea95f3d56 100644 --- a/src/mon/Elector.cc +++ b/src/mon/Elector.cc @@ -117,8 +117,14 @@ void Elector::defer(int who) ack_stamp = ceph_clock_now(); MMonElection *m = new MMonElection(MMonElection::OP_ACK, epoch, mon->monmap); m->mon_features = ceph::features::mon::get_supported(); - m->sharing_bl = mon->get_local_commands_bl(); mon->collect_metadata(&m->metadata); + + // This field is unused completely in luminous, but jewel uses it to + // determine whether we are a dumpling mon due to some crufty old + // code. It only needs to see this buffer non-empty, so put + // something useless there. + m->sharing_bl = mon->get_local_commands_bl(mon->get_required_mon_features()); + mon->messenger->send_message(m, mon->monmap->get_inst(who)); // set a timer @@ -211,7 +217,7 @@ void Elector::victory() m->quorum = quorum; m->quorum_features = cluster_features; m->mon_features = mon_features; - m->sharing_bl = mon->get_local_commands_bl(); + m->sharing_bl = mon->get_local_commands_bl(mon_features); mon->messenger->send_message(m, mon->monmap->get_inst(*p)); } diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 41adf5c960300..dabfc998ba72b 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -103,16 +103,19 @@ const string Monitor::MONITOR_STORE_PREFIX = "monitor_store"; #undef FLAG #undef COMMAND #undef COMMAND_WITH_FLAG -MonCommand mon_commands[] = { #define FLAG(f) (MonCommand::FLAG_##f) #define COMMAND(parsesig, helptext, modulename, req_perms, avail) \ {parsesig, helptext, modulename, req_perms, avail, FLAG(NONE)}, #define COMMAND_WITH_FLAG(parsesig, helptext, modulename, req_perms, avail, flags) \ {parsesig, helptext, modulename, req_perms, avail, flags}, +MonCommand mon_commands[] = { #include +}; +MonCommand pgmonitor_commands[] = { +#include +}; #undef COMMAND #undef COMMAND_WITH_FLAG -}; void C_MonContext::finish(int r) { @@ -210,6 +213,13 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorDBStore *s, } MonCommand::encode_vector(local_mon_commands, local_mon_commands_bl); + local_upgrading_mon_commands = local_mon_commands; + for (unsigned i = 0; i < ARRAY_SIZE(pgmonitor_commands); ++i) { + local_upgrading_mon_commands.push_back(pgmonitor_commands[i]); + } + MonCommand::encode_vector(local_upgrading_mon_commands, + local_upgrading_mon_commands_bl); + // assume our commands until we have an election. this only means // we won't reply with EINVAL before the election; any command that // actually matters will wait until we have quorum etc and then @@ -1904,7 +1914,7 @@ void Monitor::win_election(epoch_t epoch, set& active, uint64_t features, clog->info() << "mon." << name << "@" << rank << " won leader election with quorum " << quorum; - set_leader_commands(get_local_commands()); + set_leader_commands(get_local_commands(mon_features)); paxos->leader_init(); // NOTE: tell monmap monitor first. This is important for the @@ -2945,8 +2955,13 @@ void Monitor::handle_command(MonOpRequestRef op) osdmon()->osdmap.require_osd_release < CEPH_RELEASE_LUMINOUS; std::vector commands; - commands = static_cast( + + // only include mgr commands once all mons are upgrade (and we've dropped + // the hard-coded PGMonitor commands) + if (quorum_mon_features.contains_all(ceph::features::mon::FEATURE_LUMINOUS)) { + commands = static_cast( paxos_service[PAXOS_MGR])->get_command_descs(); + } for (auto& c : leader_mon_commands) { commands.push_back(c); @@ -2996,7 +3011,9 @@ void Monitor::handle_command(MonOpRequestRef op) } } // validate command is in our map & matches, or forward if it is allowed - const MonCommand *mon_cmd = _get_moncommand(prefix, get_local_commands()); + const MonCommand *mon_cmd = _get_moncommand( + prefix, + get_local_commands(quorum_mon_features)); if (!mon_cmd) { mon_cmd = mgr_cmd; } diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index 283f560a2f693..fa4ba9fa8f56f 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -169,6 +169,10 @@ public: vector local_mon_commands; // commands i support bufferlist local_mon_commands_bl; // encoded version of above + // for upgrading mon cluster that still uses PGMonitor + vector local_upgrading_mon_commands; // mixed mon cluster commands + bufferlist local_upgrading_mon_commands_bl; // encoded version of above + Messenger *mgr_messenger; MgrClient mgr_client; uint64_t mgr_proxy_bytes = 0; // in-flight proxied mgr command message bytes @@ -962,11 +966,17 @@ public: bufferlist *rdata, bool hide_mgr_flag=false); - const std::vector &get_local_commands() { - return local_mon_commands; + const std::vector &get_local_commands(mon_feature_t f) { + if (f.contains_all(ceph::features::mon::FEATURE_LUMINOUS)) + return local_mon_commands; + else + return local_upgrading_mon_commands; } - const bufferlist& get_local_commands_bl() { - return local_mon_commands_bl; + const bufferlist& get_local_commands_bl(mon_feature_t f) { + if (f.contains_all(ceph::features::mon::FEATURE_LUMINOUS)) + return local_mon_commands_bl; + else + return local_upgrading_mon_commands_bl; } void set_leader_commands(const std::vector& cmds) { leader_mon_commands = cmds; diff --git a/src/mon/PGMonitorCommands.h b/src/mon/PGMonitorCommands.h new file mode 100644 index 0000000000000..12f6d831c3d32 --- /dev/null +++ b/src/mon/PGMonitorCommands.h @@ -0,0 +1,120 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/* no guard; may be included multiple times */ + +COMMAND("pg stat", "show placement group status.", + "pg", "r", "cli,rest") +COMMAND("pg getmap", "get binary pg map to -o/stdout", "pg", "r", "cli,rest") + +COMMAND("pg dump " \ + "name=dumpcontents,type=CephChoices,strings=all|summary|sum|delta|pools|osds|pgs|pgs_brief,n=N,req=false", \ + "show human-readable versions of pg map (only 'all' valid with plain)", "pg", "r", "cli,rest") +COMMAND("pg dump_json " \ + "name=dumpcontents,type=CephChoices,strings=all|summary|sum|pools|osds|pgs,n=N,req=false", \ + "show human-readable version of pg map in json only",\ + "pg", "r", "cli,rest") +COMMAND("pg dump_pools_json", "show pg pools info in json only",\ + "pg", "r", "cli,rest") + +COMMAND("pg ls-by-pool " \ + "name=poolstr,type=CephString " \ + "name=states,type=CephString,n=N,req=false", \ + "list pg with pool = [poolname]", "pg", "r", "cli,rest") +COMMAND("pg ls-by-primary " \ + "name=osd,type=CephOsdName " \ + "name=pool,type=CephInt,req=false " \ + "name=states,type=CephString,n=N,req=false", \ + "list pg with primary = [osd]", "pg", "r", "cli,rest") +COMMAND("pg ls-by-osd " \ + "name=osd,type=CephOsdName " \ + "name=pool,type=CephInt,req=false " \ + "name=states,type=CephString,n=N,req=false", \ + "list pg on osd [osd]", "pg", "r", "cli,rest") +COMMAND("pg ls " \ + "name=pool,type=CephInt,req=false " \ + "name=states,type=CephString,n=N,req=false", \ + "list pg with specific pool, osd, state", "pg", "r", "cli,rest") +COMMAND("pg dump_stuck " \ + "name=stuckops,type=CephChoices,strings=inactive|unclean|stale|undersized|degraded,n=N,req=false " \ + "name=threshold,type=CephInt,req=false", + "show information about stuck pgs",\ + "pg", "r", "cli,rest") +COMMAND("pg debug " \ + "name=debugop,type=CephChoices,strings=unfound_objects_exist|degraded_pgs_exist", \ + "show debug info about pgs", "pg", "r", "cli,rest") + +COMMAND("pg scrub name=pgid,type=CephPgid", "start scrub on ", \ + "pg", "rw", "cli,rest") +COMMAND("pg deep-scrub name=pgid,type=CephPgid", "start deep-scrub on ", \ + "pg", "rw", "cli,rest") +COMMAND("pg repair name=pgid,type=CephPgid", "start repair on ", \ + "pg", "rw", "cli,rest") + +// stuff in osd namespace +COMMAND("osd perf", \ + "print dump of OSD perf summary stats", \ + "osd", \ + "r", \ + "cli,rest") +COMMAND("osd df " \ + "name=output_method,type=CephChoices,strings=plain|tree,req=false", \ + "show OSD utilization", "osd", "r", "cli,rest") +COMMAND("osd blocked-by", \ + "print histogram of which OSDs are blocking their peers", \ + "osd", "r", "cli,rest") +COMMAND("osd pool stats " \ + "name=name,type=CephString,req=false", + "obtain stats from all pools, or from specified pool", + "osd", "r", "cli,rest") +COMMAND("osd reweight-by-utilization " \ + "name=oload,type=CephInt,req=false " \ + "name=max_change,type=CephFloat,req=false " \ + "name=max_osds,type=CephInt,req=false " \ + "name=no_increasing,type=CephChoices,strings=--no-increasing,req=false",\ + "reweight OSDs by utilization [overload-percentage-for-consideration, default 120]", \ + "osd", "rw", "cli,rest") +COMMAND("osd test-reweight-by-utilization " \ + "name=oload,type=CephInt,req=false " \ + "name=max_change,type=CephFloat,req=false " \ + "name=max_osds,type=CephInt,req=false " \ + "name=no_increasing,type=CephChoices,strings=--no-increasing,req=false",\ + "dry run of reweight OSDs by utilization [overload-percentage-for-consideration, default 120]", \ + "osd", "r", "cli,rest") +COMMAND("osd reweight-by-pg " \ + "name=oload,type=CephInt,req=false " \ + "name=max_change,type=CephFloat,req=false " \ + "name=max_osds,type=CephInt,req=false " \ + "name=pools,type=CephPoolname,n=N,req=false", \ + "reweight OSDs by PG distribution [overload-percentage-for-consideration, default 120]", \ + "osd", "rw", "cli,rest") +COMMAND("osd test-reweight-by-pg " \ + "name=oload,type=CephInt,req=false " \ + "name=max_change,type=CephFloat,req=false " \ + "name=max_osds,type=CephInt,req=false " \ + "name=pools,type=CephPoolname,n=N,req=false", \ + "dry run of reweight OSDs by PG distribution [overload-percentage-for-consideration, default 120]", \ + "osd", "r", "cli,rest") + +COMMAND("osd scrub " \ + "name=who,type=CephString", \ + "initiate scrub on osd , or use to scrub all", \ + "osd", "rw", "cli,rest") +COMMAND("osd deep-scrub " \ + "name=who,type=CephString", \ + "initiate deep scrub on osd , or use to deep scrub all", \ + "osd", "rw", "cli,rest") +COMMAND("osd repair " \ + "name=who,type=CephString", \ + "initiate repair on osd , or use to repair all", \ + "osd", "rw", "cli,rest") + +COMMAND("pg force_create_pg name=pgid,type=CephPgid", \ + "force creation of pg ", "pg", "rw", "cli,rest") +COMMAND_WITH_FLAG("pg set_full_ratio name=ratio,type=CephFloat,range=0.0|1.0", \ + "set ratio at which pgs are considered full", \ + "pg", "rw", "cli,rest", FLAG(DEPRECATED)) +COMMAND_WITH_FLAG("pg set_nearfull_ratio " \ + "name=ratio,type=CephFloat,range=0.0|1.0", \ + "set ratio at which pgs are considered nearly full", \ + "pg", "rw", "cli,rest", FLAG(DEPRECATED)) -- 2.39.5