From: Sage Weil Date: Thu, 17 Sep 2015 01:44:04 +0000 (-0400) Subject: mon: let peon mons send the osdmap replies X-Git-Tag: v10.0.1~26^2~25 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=39e06ef8f070e136e54452bdea3f6105cd79bb73;p=ceph.git mon: let peon mons send the osdmap replies Currently the leader mon often replies to OSDs by sending a set of incremental OSDmaps (e.g., in response to an osd boot or failure). Instead, send a small message to the proxying peon mon (if any) with the epoch to start from and let *them* generate a suitable reply. Signed-off-by: Sage Weil --- diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h index 791008a3821d..4e15563912e0 100755 --- a/src/include/ceph_features.h +++ b/src/include/ceph_features.h @@ -71,6 +71,7 @@ #define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55) #define CEPH_FEATURE_NEW_OSDOP_ENCODING (1ULL<<56) /* New, v7 encoding */ #define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */ +#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */ #define CEPH_FEATURE_RESERVED2 (1ULL<<61) /* slow down, we are almost out... */ #define CEPH_FEATURE_RESERVED (1ULL<<62) /* DO NOT USE THIS ... last bit! */ @@ -164,6 +165,7 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) { CEPH_FEATURE_OSD_HITSET_GMT | \ CEPH_FEATURE_HAMMER_0_94_4 | \ CEPH_FEATURE_MON_STATEFUL_SUB | \ + CEPH_FEATURE_MON_ROUTE_OSDMAP | \ 0ULL) #define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL diff --git a/src/messages/MRoute.h b/src/messages/MRoute.h index 5282d39ae8de..109574e87110 100644 --- a/src/messages/MRoute.h +++ b/src/messages/MRoute.h @@ -22,24 +22,35 @@ struct MRoute : public Message { - static const int HEAD_VERSION = 2; + static const int HEAD_VERSION = 3; static const int COMPAT_VERSION = 2; uint64_t session_mon_tid; Message *msg; entity_inst_t dest; + epoch_t send_osdmap_first; - MRoute() : Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION), msg(NULL) {} + MRoute() : Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION), + session_mon_tid(0), + msg(NULL), + send_osdmap_first(0) {} MRoute(uint64_t t, Message *m) - : Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION), session_mon_tid(t), msg(m) {} + : Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION), + session_mon_tid(t), + msg(m), + send_osdmap_first(0) {} MRoute(bufferlist bl, const entity_inst_t& i) - : Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION), session_mon_tid(0), dest(i) { + : Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION), + session_mon_tid(0), + dest(i), + send_osdmap_first(0) { bufferlist::iterator p = bl.begin(); msg = decode_message(NULL, 0, p); } private: ~MRoute() { - if (msg) msg->put(); + if (msg) + msg->put(); } public: @@ -55,23 +66,25 @@ public: } else { msg = decode_message(NULL, 0, p); } + if (header.version >= 3) { + ::decode(send_osdmap_first, p); + } } void encode_payload(uint64_t features) { ::encode(session_mon_tid, payload); ::encode(dest, payload); - if (features & CEPH_FEATURE_MON_NULLROUTE) { - header.version = HEAD_VERSION; - header.compat_version = COMPAT_VERSION; - bool m = msg ? true : false; - ::encode(m, payload); - if (msg) - encode_message(msg, features, payload); - } else { + if ((features & CEPH_FEATURE_MON_NULLROUTE) == 0) { header.version = 1; header.compat_version = 1; assert(msg); encode_message(msg, features, payload); + return; } + bool m = msg ? true : false; + ::encode(m, payload); + if (msg) + encode_message(msg, features, payload); + ::encode(send_osdmap_first, payload); } const char *get_type_name() const { return "route"; } @@ -80,6 +93,8 @@ public: o << "route(" << *msg; else o << "route(no-reply"; + if (send_osdmap_first) + o << " send_osdmap_first " << send_osdmap_first; if (session_mon_tid) o << " tid " << session_mon_tid << ")"; else diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 941625cfe682..0a260d0404ee 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -3272,6 +3272,11 @@ void Monitor::handle_route(MonOpRequestRef op) rr->con->send_message(m->msg); m->msg = NULL; } + if (m->send_osdmap_first) { + dout(10) << " sending osdmaps from " << m->send_osdmap_first << dendl; + osdmon()->send_incremental(m->send_osdmap_first, rr->session, + true, MonOpRequestRef()); + } routed_requests.erase(m->session_mon_tid); rr->session->routed_request_tids.insert(rr->tid); delete rr; diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index a22ab953ade6..5e8e3016ac51 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -43,6 +43,7 @@ #include "messages/MMonCommand.h" #include "messages/MRemoveSnaps.h" #include "messages/MOSDScrub.h" +#include "messages/MRoute.h" #include "common/TextTable.h" #include "common/Timer.h" @@ -2400,7 +2401,20 @@ void OSDMonitor::send_incremental(MonOpRequestRef op, epoch_t first) MonSession *s = op->get_session(); assert(s); - send_incremental(first, s, false, op); + + if (s->proxy_con && + s->proxy_con->has_feature(CEPH_FEATURE_MON_ROUTE_OSDMAP)) { + // oh, we can tell the other mon to do it + dout(10) << __func__ << " asking proxying mon to send_incremental from " + << first << dendl; + MRoute *r = new MRoute(s->proxy_tid, NULL); + r->send_osdmap_first = first; + s->proxy_con->send_message(r); + op->mark_event("reply: send routed send_osdmap_first reply"); + } else { + // do it ourselves + send_incremental(first, s, false, op); + } } void OSDMonitor::send_incremental(epoch_t first, diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 78e00f90e5f9..517c34fd81b4 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -227,11 +227,12 @@ private: MOSDMap *build_incremental(epoch_t first, epoch_t last); void send_full(MonOpRequestRef op); void send_incremental(MonOpRequestRef op, epoch_t first); +public: // @param req an optional op request, if the osdmaps are replies to it. so // @c Monitor::send_reply() can mark_event with it. void send_incremental(epoch_t first, MonSession *session, bool onetime, MonOpRequestRef req = MonOpRequestRef()); - +private: int reweight_by_utilization(int oload, std::string& out_str, bool by_pg, const set *pools);