]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: do not join cluster if not healthy
authorSage Weil <sage@inktank.com>
Wed, 23 Jan 2013 02:03:10 +0000 (18:03 -0800)
committerSamuel Just <sam.just@inktank.com>
Sat, 26 Jan 2013 01:22:38 +0000 (17:22 -0800)
If our internal heartbeats are failing, do not send a boot message and try
to join the cluster.

Signed-off-by: Sage Weil <sage@inktank.com>
(cherry picked from commit a4e78652cdd1698e8dd72dda51599348d013e5e0)

src/osd/OSD.cc
src/osd/OSD.h

index f1b90902e2d9754d2d2f0e02b05bb60d54f607dc..f9613469d790d08c2d46e8790b1074d7ff0fde67 100644 (file)
@@ -2240,6 +2240,14 @@ void OSD::tick()
 
   logger->set(l_osd_buf, buffer::get_total_alloc());
 
+  if (is_waiting_for_healthy()) {
+    if (g_ceph_context->get_heartbeat_map()->is_healthy()) {
+      dout(1) << "healthy again, booting" << dendl;
+      state = STATE_BOOTING;
+      start_boot();
+    }
+  }
+
   if (is_active()) {
     // periodically kick recovery work queue
     recovery_tp.wake();
@@ -2543,6 +2551,13 @@ void OSD::_maybe_boot(epoch_t oldest, epoch_t newest)
     return;
   }
 
+  // if we are not healthy, do not mark ourselves up (yet)
+  if (!g_ceph_context->get_heartbeat_map()->is_healthy()) {
+    dout(5) << "not healthy, deferring boot" << dendl;
+    state = STATE_WAITING_FOR_HEALTHY;
+    return;
+  }
+
   // if our map within recent history, try to add ourselves to the osdmap.
   if (osdmap->test_flag(CEPH_OSDMAP_NOUP)) {
     dout(5) << "osdmap NOUP flag is set, waiting for it to clear" << dendl;
index 766686825a90ef93e5cf383c45a5c657b3fafc11..567d84c09cd5580948e022368f49a51963d0505e 100644 (file)
@@ -493,6 +493,7 @@ public:
   static const int STATE_BOOTING = 2;
   static const int STATE_ACTIVE = 3;
   static const int STATE_STOPPING = 4;
+  static const int STATE_WAITING_FOR_HEALTHY = 5;
 
 private:
   int state;
@@ -505,6 +506,7 @@ public:
   bool is_booting() { return state == STATE_BOOTING; }
   bool is_active() { return state == STATE_ACTIVE; }
   bool is_stopping() { return state == STATE_STOPPING; }
+  bool is_waiting_for_healthy() { return state == STATE_WAITING_FOR_HEALTHY; }
 
 private: