]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon/OSDMonitor: handle MOSDFull messages from OSDs
authorSage Weil <sage@redhat.com>
Thu, 23 Feb 2017 20:51:37 +0000 (15:51 -0500)
committerSage Weil <sage@redhat.com>
Mon, 6 Mar 2017 21:42:33 +0000 (16:42 -0500)
Signed-off-by: Sage Weil <sage@redhat.com>
src/messages/MOSDFull.h [new file with mode: 0644]
src/mon/Monitor.cc
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h
src/msg/Message.cc
src/msg/Message.h

diff --git a/src/messages/MOSDFull.h b/src/messages/MOSDFull.h
new file mode 100644 (file)
index 0000000..5fcd3c8
--- /dev/null
@@ -0,0 +1,50 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_MOSDFULL_H
+#define CEPH_MOSDFULL_H
+
+#include "messages/PaxosServiceMessage.h"
+#include "osd/OSDMap.h"
+
+// tell the mon to update the full/nearfull bits.  note that in the
+// future this message could be generalized to other state bits, but
+// for now name it for its sole application.
+
+class MOSDFull : public PaxosServiceMessage {
+ public:
+  epoch_t map_epoch = 0;
+  uint32_t state = 0;
+
+private:
+  ~MOSDFull() {}
+
+public:
+  MOSDFull(epoch_t e, unsigned s)
+    : PaxosServiceMessage(MSG_OSD_FULL, e), map_epoch(e), state(s) { }
+  MOSDFull()
+    : PaxosServiceMessage(MSG_OSD_FULL, 0) {}
+
+public:
+  void encode_payload(uint64_t features) {
+    paxos_encode();
+    ::encode(map_epoch, payload);
+    ::encode(state, payload);
+  }
+  void decode_payload() {
+    bufferlist::iterator p = payload.begin();
+    paxos_decode(p);
+    ::decode(map_epoch, p);
+    ::decode(state, p);
+  }
+
+  const char *get_type_name() const { return "osd_full"; }
+  void print(ostream &out) const {
+    set<string> states;
+    OSDMap::calc_state_set(state, states);
+    out << "osd_full(e" << map_epoch << " " << states << " v" << version << ")";
+  }
+
+};
+
+#endif
index 7205fdc638c25ba38cb26fac4c5f1940fd70a594..f46ad3e4c20596d0bdd14c3858038653eed84f22 100644 (file)
@@ -3736,6 +3736,7 @@ void Monitor::dispatch_op(MonOpRequestRef op)
     case CEPH_MSG_MON_GET_OSDMAP:
     case CEPH_MSG_POOLOP:
     case MSG_OSD_MARK_ME_DOWN:
+    case MSG_OSD_FULL:
     case MSG_OSD_FAILURE:
     case MSG_OSD_BOOT:
     case MSG_OSD_ALIVE:
index 52c3cf49953a8cb82b00f3e2bb1dc3f112dc6b2d..3cec6ef6ee2e05fbbefd6c0e1870c86c9d8d43ad 100644 (file)
@@ -33,6 +33,7 @@
 
 #include "messages/MOSDFailure.h"
 #include "messages/MOSDMarkMeDown.h"
+#include "messages/MOSDFull.h"
 #include "messages/MOSDMap.h"
 #include "messages/MMonGetOSDMap.h"
 #include "messages/MOSDBoot.h"
@@ -1363,6 +1364,8 @@ bool OSDMonitor::preprocess_query(MonOpRequestRef op)
     // damp updates
   case MSG_OSD_MARK_ME_DOWN:
     return preprocess_mark_me_down(op);
+  case MSG_OSD_FULL:
+    return preprocess_full(op);
   case MSG_OSD_FAILURE:
     return preprocess_failure(op);
   case MSG_OSD_BOOT:
@@ -1394,6 +1397,8 @@ bool OSDMonitor::prepare_update(MonOpRequestRef op)
     // damp updates
   case MSG_OSD_MARK_ME_DOWN:
     return prepare_mark_me_down(op);
+  case MSG_OSD_FULL:
+    return prepare_full(op);
   case MSG_OSD_FAILURE:
     return prepare_failure(op);
   case MSG_OSD_BOOT:
@@ -2296,6 +2301,97 @@ void OSDMonitor::_booted(MonOpRequestRef op, bool logit)
 }
 
 
+// -------------
+// full
+
+bool OSDMonitor::preprocess_full(MonOpRequestRef op)
+{
+  op->mark_osdmon_event(__func__);
+  MOSDFull *m = static_cast<MOSDFull*>(op->get_req());
+  int from = m->get_orig_source().num();
+  set<string> state;
+  unsigned mask = CEPH_OSD_NEARFULL | CEPH_OSD_FULL;
+
+  // check permissions, ignore if failed
+  MonSession *session = m->get_session();
+  if (!session)
+    goto ignore;
+  if (!session->is_capable("osd", MON_CAP_X)) {
+    dout(0) << "MOSDFull from entity with insufficient privileges:"
+           << session->caps << dendl;
+    goto ignore;
+  }
+
+  // ignore a full message from the osd instance that already went down
+  if (!osdmap.exists(from)) {
+    dout(7) << __func__ << " ignoring full message from nonexistent "
+           << m->get_orig_source_inst() << dendl;
+    goto ignore;
+  }
+  if ((!osdmap.is_up(from) &&
+       osdmap.get_most_recent_inst(from) == m->get_orig_source_inst()) ||
+      (osdmap.is_up(from) &&
+       osdmap.get_inst(from) != m->get_orig_source_inst())) {
+    dout(7) << __func__ << " ignoring full message from down "
+           << m->get_orig_source_inst() << dendl;
+    goto ignore;
+  }
+
+  OSDMap::calc_state_set(osdmap.get_state(from), state);
+
+  if ((osdmap.get_state(from) & mask) == m->state) {
+    dout(7) << __func__ << " state already " << state << " for osd." << from
+           << " " << m->get_orig_source_inst() << dendl;
+    _reply_map(op, m->version);
+    goto ignore;
+  }
+
+  dout(10) << __func__ << " want state " << state << " for osd." << from
+          << " " << m->get_orig_source_inst() << dendl;
+  return false;
+
+ ignore:
+  return true;
+}
+
+bool OSDMonitor::prepare_full(MonOpRequestRef op)
+{
+  op->mark_osdmon_event(__func__);
+  const MOSDFull *m = static_cast<MOSDFull*>(op->get_req());
+  const int from = m->get_orig_source().num();
+
+  const unsigned mask = CEPH_OSD_NEARFULL | CEPH_OSD_FULL;
+  const unsigned want_state = m->state & mask;  // safety first
+
+  unsigned cur_state = osdmap.get_state(from);
+  auto p = pending_inc.new_state.find(from);
+  if (p != pending_inc.new_state.end()) {
+    cur_state ^= p->second;
+  }
+  cur_state &= mask;
+
+  set<string> want_state_set, cur_state_set;
+  OSDMap::calc_state_set(want_state, want_state_set);
+  OSDMap::calc_state_set(cur_state, cur_state_set);
+
+  if (cur_state != want_state) {
+    if (p != pending_inc.new_state.end()) {
+      p->second &= ~mask;
+    } else {
+      pending_inc.new_state[from] = 0;
+    }
+    pending_inc.new_state[from] |= (osdmap.get_state(from) & mask) ^ want_state;
+    dout(7) << __func__ << " osd." << from << " " << cur_state_set
+           << " -> " << want_state_set << dendl;
+  } else {
+    dout(7) << __func__ << " osd." << from << " " << cur_state_set
+           << " = wanted " << want_state_set << ", just waiting" << dendl;
+  }
+
+  wait_for_finished_proposal(op, new C_ReplyMap(this, op, m->version));
+  return true;
+}
+
 // -------------
 // alive
 
index bc7d44fd40fd54b3a6c8844c7382123fa5a1dbe6..70dd67735a64b27e26e6139bda8e3a1a7d3591c8 100644 (file)
@@ -261,6 +261,9 @@ private:
   void process_failures();
   void take_all_failures(list<MonOpRequestRef>& ls);
 
+  bool preprocess_full(MonOpRequestRef op);
+  bool prepare_full(MonOpRequestRef op);
+
   bool preprocess_boot(MonOpRequestRef op);
   bool prepare_boot(MonOpRequestRef op);
   void _booted(MonOpRequestRef op, bool logit);
index 26c91eef09346cbe32ad20c133db5b706890880d..632bc92606a2fa752fdb014ec4e5da077f9aa57c 100644 (file)
@@ -60,6 +60,7 @@ using namespace std;
 #include "messages/MOSDPGTemp.h"
 #include "messages/MOSDFailure.h"
 #include "messages/MOSDMarkMeDown.h"
+#include "messages/MOSDFull.h"
 #include "messages/MOSDPing.h"
 #include "messages/MOSDOp.h"
 #include "messages/MOSDOpReply.h"
@@ -433,6 +434,9 @@ Message *decode_message(CephContext *cct, int crcflags,
   case MSG_OSD_MARK_ME_DOWN:
     m = new MOSDMarkMeDown();
     break;
+  case MSG_OSD_FULL:
+    m = new MOSDFull();
+    break;
   case MSG_OSD_PING:
     m = new MOSDPing();
     break;
index 63e6d11503678093d607cdb6b754ea22c09df7fc..5120f7edf85a149bc6465f0887784c9e0d05f8d4 100644 (file)
@@ -65,6 +65,7 @@
 #define MSG_OSD_FAILURE      72
 #define MSG_OSD_ALIVE        73
 #define MSG_OSD_MARK_ME_DOWN 74
+#define MSG_OSD_FULL         75
 
 #define MSG_OSD_SUBOP        76
 #define MSG_OSD_SUBOPREPLY   77