From: Sage Weil Date: Tue, 24 Apr 2012 21:28:18 +0000 (-0700) Subject: mon: prevent osd mark-down with NODOWN flag X-Git-Tag: v0.47~89^2~9 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=addfb2c6700cc368667ed015838966dde524ccf8;p=ceph.git mon: prevent osd mark-down with NODOWN flag If the NODOWN osdmap flag is set, - ignore osd failure reports - do not mark osds down due to lack of osd/pg stats We *do* still allow explicit admin 'ceph osd down N' commands, and a booting OSD to mark the previous instance of itself down. Signed-off-by: Sage Weil --- diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index dbc4e92fd98c..381c93df8b3d 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -468,6 +468,7 @@ bool OSDMonitor::preprocess_failure(MOSDFailure *m) send_incremental(m, m->get_epoch()+1); goto didit; } + // already reported? if (osdmap.is_down(badboy)) { dout(5) << "preprocess_failure dup: " << m->get_target() << ", from " << m->get_orig_source_inst() << dendl; @@ -476,6 +477,12 @@ bool OSDMonitor::preprocess_failure(MOSDFailure *m) goto didit; } + // NODOWN? + if (osdmap.test_flag(CEPH_OSDMAP_NODOWN)) { + dout(5) << "preprocess_failure NODOWN flag set, ignoring report of " << m->get_target() << " from " << m->get_orig_source_inst() << dendl; + goto didit; + } + dout(10) << "preprocess_failure new: " << m->get_target() << ", from " << m->get_orig_source_inst() << dendl; return false; @@ -1203,7 +1210,7 @@ void OSDMonitor::handle_osd_timeouts(const utime_t &now, if (t == last_osd_report.end()) { // it wasn't in the map; start the timer. last_osd_report[i] = now; - } else { + } else if (!osdmap.test_flag(CEPH_OSDMAP_NODOWN)) { utime_t diff = now - t->second; if (diff > timeo) { derr << "no osd or pg stats from osd." << i << " since " << t->second << ", " << diff