From: xie xingguo Date: Fri, 31 May 2019 06:18:40 +0000 (+0800) Subject: osd/OSD: keep synchronizing with mon if stuck at booting X-Git-Tag: v15.1.0~2466^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=afc09727d2e053c9db777b4c16366a03099e0bb7;p=ceph.git osd/OSD: keep synchronizing with mon if stuck at booting During the flapping no{up,down,in,out} flags test I've noticed that some osds might be stuck at down&booting state due to the unawareness of noup flag changing. Fix by subscribing (continuous) to next osd map, just as we do if we are stuck at unhealthy. Signed-off-by: xie xingguo --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 744dda22caa2..2257c2061ac4 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4886,15 +4886,15 @@ void OSD::tick() if (is_waiting_for_healthy()) { start_boot(); - if (is_waiting_for_healthy()) { - // failed to boot - std::lock_guard l(heartbeat_lock); - utime_t now = ceph_clock_now(); - if (now - last_mon_heartbeat > cct->_conf->osd_mon_heartbeat_interval) { - last_mon_heartbeat = now; - dout(1) << __func__ << " checking mon for new map" << dendl; - osdmap_subscribe(osdmap->get_epoch() + 1, false); - } + } + + if (is_waiting_for_healthy() || is_booting()) { + std::lock_guard l(heartbeat_lock); + utime_t now = ceph_clock_now(); + if (now - last_mon_heartbeat > cct->_conf->osd_mon_heartbeat_interval) { + last_mon_heartbeat = now; + dout(1) << __func__ << " checking mon for new map" << dendl; + osdmap_subscribe(osdmap->get_epoch() + 1, false); } }