From: Sage Weil Date: Wed, 23 Jan 2013 02:03:10 +0000 (-0800) Subject: osd: do not join cluster if not healthy X-Git-Tag: v0.57~143^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a4e78652cdd1698e8dd72dda51599348d013e5e0;p=ceph.git osd: do not join cluster if not healthy If our internal heartbeats are failing, do not send a boot message and try to join the cluster. Signed-off-by: Sage Weil --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 0e3bade863ad..82a9a54e281e 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2277,6 +2277,14 @@ void OSD::tick() logger->set(l_osd_buf, buffer::get_total_alloc()); + if (is_waiting_for_healthy()) { + if (g_ceph_context->get_heartbeat_map()->is_healthy()) { + dout(1) << "healthy again, booting" << dendl; + state = STATE_BOOTING; + start_boot(); + } + } + if (is_active()) { // periodically kick recovery work queue recovery_tp.wake(); @@ -2694,6 +2702,13 @@ void OSD::_maybe_boot(epoch_t oldest, epoch_t newest) return; } + // if we are not healthy, do not mark ourselves up (yet) + if (!g_ceph_context->get_heartbeat_map()->is_healthy()) { + dout(5) << "not healthy, deferring boot" << dendl; + state = STATE_WAITING_FOR_HEALTHY; + return; + } + // if our map within recent history, try to add ourselves to the osdmap. if (osdmap->test_flag(CEPH_OSDMAP_NOUP)) { dout(5) << "osdmap NOUP flag is set, waiting for it to clear" << dendl; diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 359d75396fea..1fb34aac6108 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -494,6 +494,7 @@ public: static const int STATE_BOOTING = 2; static const int STATE_ACTIVE = 3; static const int STATE_STOPPING = 4; + static const int STATE_WAITING_FOR_HEALTHY = 5; private: int state; @@ -506,6 +507,7 @@ public: bool is_booting() { return state == STATE_BOOTING; } bool is_active() { return state == STATE_ACTIVE; } bool is_stopping() { return state == STATE_STOPPING; } + bool is_waiting_for_healthy() { return state == STATE_WAITING_FOR_HEALTHY; } private: