]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: let peon mons send the osdmap replies
authorSage Weil <sage@redhat.com>
Thu, 17 Sep 2015 01:44:04 +0000 (21:44 -0400)
committerSage Weil <sage@redhat.com>
Mon, 23 Nov 2015 13:38:48 +0000 (08:38 -0500)
Currently the leader mon often replies to OSDs by sending a set of
incremental OSDmaps (e.g., in response to an osd boot or failure).

Instead, send a small message to the proxying peon mon (if any)
with the epoch to start from and let *them* generate a suitable
reply.

Signed-off-by: Sage Weil <sage@redhat.com>
src/include/ceph_features.h
src/messages/MRoute.h
src/mon/Monitor.cc
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h

index 791008a3821d8a94d22cc93245b36693a2306a30..4e15563912e095f8f64aebfcc35153e9ee573ac3 100755 (executable)
@@ -71,6 +71,7 @@
 #define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55)
 #define CEPH_FEATURE_NEW_OSDOP_ENCODING   (1ULL<<56) /* New, v7 encoding */
 #define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
+#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
 
 #define CEPH_FEATURE_RESERVED2 (1ULL<<61)  /* slow down, we are almost out... */
 #define CEPH_FEATURE_RESERVED  (1ULL<<62)  /* DO NOT USE THIS ... last bit! */
@@ -164,6 +165,7 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) {
         CEPH_FEATURE_OSD_HITSET_GMT |                   \
         CEPH_FEATURE_HAMMER_0_94_4 |            \
         CEPH_FEATURE_MON_STATEFUL_SUB |         \
+        CEPH_FEATURE_MON_ROUTE_OSDMAP |         \
         0ULL)
 
 #define CEPH_FEATURES_SUPPORTED_DEFAULT  CEPH_FEATURES_ALL
index 5282d39ae8def7640ad9492e46d71d40686fbd6f..109574e87110f2b9d306a2c72f14924b0aa5ea46 100644 (file)
 
 struct MRoute : public Message {
 
-  static const int HEAD_VERSION = 2;
+  static const int HEAD_VERSION = 3;
   static const int COMPAT_VERSION = 2;
 
   uint64_t session_mon_tid;
   Message *msg;
   entity_inst_t dest;
+  epoch_t send_osdmap_first;
   
-  MRoute() : Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION), msg(NULL) {}
+  MRoute() : Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION),
+            session_mon_tid(0),
+            msg(NULL),
+            send_osdmap_first(0) {}
   MRoute(uint64_t t, Message *m)
-    : Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION), session_mon_tid(t), msg(m) {}
+    : Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION),
+      session_mon_tid(t),
+      msg(m),
+      send_osdmap_first(0) {}
   MRoute(bufferlist bl, const entity_inst_t& i)
-    : Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION), session_mon_tid(0), dest(i) {
+    : Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION),
+      session_mon_tid(0),
+      dest(i),
+      send_osdmap_first(0) {
     bufferlist::iterator p = bl.begin();
     msg = decode_message(NULL, 0, p);
   }
 private:
   ~MRoute() {
-    if (msg) msg->put();
+    if (msg)
+      msg->put();
   }
 
 public:
@@ -55,23 +66,25 @@ public:
     } else {
       msg = decode_message(NULL, 0, p);
     }
+    if (header.version >= 3) {
+      ::decode(send_osdmap_first, p);
+    }
   }
   void encode_payload(uint64_t features) {
     ::encode(session_mon_tid, payload);
     ::encode(dest, payload);
-    if (features & CEPH_FEATURE_MON_NULLROUTE) {
-      header.version = HEAD_VERSION;
-      header.compat_version = COMPAT_VERSION;
-      bool m = msg ? true : false;
-      ::encode(m, payload);
-      if (msg)
-       encode_message(msg, features, payload);
-    } else {
+    if ((features & CEPH_FEATURE_MON_NULLROUTE) == 0) {
       header.version = 1;
       header.compat_version = 1;
       assert(msg);
       encode_message(msg, features, payload);
+      return;
     }
+    bool m = msg ? true : false;
+    ::encode(m, payload);
+    if (msg)
+      encode_message(msg, features, payload);
+    ::encode(send_osdmap_first, payload);
   }
 
   const char *get_type_name() const { return "route"; }
@@ -80,6 +93,8 @@ public:
       o << "route(" << *msg;
     else
       o << "route(no-reply";
+    if (send_osdmap_first)
+      o << " send_osdmap_first " << send_osdmap_first;
     if (session_mon_tid)
       o << " tid " << session_mon_tid << ")";
     else
index 941625cfe68269a7c271bbe5f0bf4f367b6a2e58..0a260d0404ee2528ca04b579b9d36b0172cf79e6 100644 (file)
@@ -3272,6 +3272,11 @@ void Monitor::handle_route(MonOpRequestRef op)
        rr->con->send_message(m->msg);
        m->msg = NULL;
       }
+      if (m->send_osdmap_first) {
+       dout(10) << " sending osdmaps from " << m->send_osdmap_first << dendl;
+       osdmon()->send_incremental(m->send_osdmap_first, rr->session,
+                                  true, MonOpRequestRef());
+      }
       routed_requests.erase(m->session_mon_tid);
       rr->session->routed_request_tids.insert(rr->tid);
       delete rr;
index a22ab953ade63ff0d0f1e60a653b118679ba84c6..5e8e3016ac51aa446976762f9e1f33e92b5d01b0 100644 (file)
@@ -43,6 +43,7 @@
 #include "messages/MMonCommand.h"
 #include "messages/MRemoveSnaps.h"
 #include "messages/MOSDScrub.h"
+#include "messages/MRoute.h"
 
 #include "common/TextTable.h"
 #include "common/Timer.h"
@@ -2400,7 +2401,20 @@ void OSDMonitor::send_incremental(MonOpRequestRef op, epoch_t first)
 
   MonSession *s = op->get_session();
   assert(s);
-  send_incremental(first, s, false, op);
+
+  if (s->proxy_con &&
+      s->proxy_con->has_feature(CEPH_FEATURE_MON_ROUTE_OSDMAP)) {
+    // oh, we can tell the other mon to do it
+    dout(10) << __func__ << " asking proxying mon to send_incremental from "
+            << first << dendl;
+    MRoute *r = new MRoute(s->proxy_tid, NULL);
+    r->send_osdmap_first = first;
+    s->proxy_con->send_message(r);
+    op->mark_event("reply: send routed send_osdmap_first reply");
+  } else {
+    // do it ourselves
+    send_incremental(first, s, false, op);
+  }
 }
 
 void OSDMonitor::send_incremental(epoch_t first,
index 78e00f90e5f9c1a24f6e22483eed96312f80377c..517c34fd81b495543765d0ce4b893f39ece5199b 100644 (file)
@@ -227,11 +227,12 @@ private:
   MOSDMap *build_incremental(epoch_t first, epoch_t last);
   void send_full(MonOpRequestRef op);
   void send_incremental(MonOpRequestRef op, epoch_t first);
+public:
   // @param req an optional op request, if the osdmaps are replies to it. so
   //            @c Monitor::send_reply() can mark_event with it.
   void send_incremental(epoch_t first, MonSession *session, bool onetime,
                        MonOpRequestRef req = MonOpRequestRef());
-
+private:
   int reweight_by_utilization(int oload, std::string& out_str, bool by_pg,
                              const set<int64_t> *pools);