From 05e9037245752538bfc721b9fceb4ade747d3270 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Thu, 20 Dec 2018 11:42:19 -0800 Subject: [PATCH] MDSMonitor: allow beacons from stopping MDS that was laggy Otherwise these get continually dropped. Reproducing this manually: o only have 2 MDS running (to prevent failover) o max_mds=2 o create a lot of dirs with pins on rank 1 to make stopping take a while (as of this commit) o max_mds=1 o immediately start dropping beacon packets to the mons from rank 1 using iptables o wait ~30 seconds until the rank shows up as laggy o remove the iptables rule Now debug output shows after this commit: 2018-12-20 14:58:07.190 7fbe19f5d700 5 mon.a@0(leader).mds e148 preprocess_beacon mdsbeacon(34119/b up:stopping seq 155 v148) v7 from mds.1 127.0.0.1:6839/1223470631 compat={},rocompat={},incompat={1=base v0.20,2=client writeable ranges,3=default file layouts on dirs,4=dir inode in separate object,5=mds uses versioned encoding,6=dirfrag is stored in omap,8=no anchor table,9=file layout v2,10=snaprealm v2} 2018-12-20 14:58:07.190 7fbe19f5d700 10 mon.a@0(leader).mds e148 preprocess_beacon: GID exists in map: 34119 2018-12-20 14:58:07.190 7fbe19f5d700 5 mon.a@0(leader).mds e148 _note_beacon mdsbeacon(34119/b up:stopping seq 155 v148) v7 noting time 2018-12-20 14:58:07.190 7fbe19f5d700 7 mon.a@0(leader).mds e148 prepare_update mdsbeacon(34119/b up:stopping seq 155 v148) v7 2018-12-20 14:58:07.190 7fbe19f5d700 12 mon.a@0(leader).mds e148 prepare_beacon mdsbeacon(34119/b up:stopping seq 155 v148) v7 from mds.1 127.0.0.1:6839/1223470631 2018-12-20 14:58:07.190 7fbe19f5d700 15 mon.a@0(leader).mds e148 prepare_beacon got health from gid 34119 with 0 metrics. 2018-12-20 14:58:07.190 7fbe19f5d700 0 log_channel(cluster) log [INF] : MDS health message cleared (mds.1): 1 slow metadata IOs are blocked > 30 secs, oldest blocked for 30 secs 2018-12-20 14:58:07.190 7fbe19f5d700 1 -- 127.0.0.1:40495/0 --> 127.0.0.1:40495/0 -- log(1 entries from seq 129 at 2018-12-20 14:58:07.192368) v1 -- 0x5de9f11a80 con 0 2018-12-20 14:58:07.190 7fbe19f5d700 1 mon.a@0(leader).mds e148 prepare_beacon clearing laggy flag on 127.0.0.1:6839/1223470631 2018-12-20 14:58:07.190 7fbe19f5d700 5 mon.a@0(leader).mds e148 prepare_beacon mds.1 up:stopping -> up:stopping standby_for_rank=-1 Fixes: https://tracker.ceph.com/issues/37724 Signed-off-by: Patrick Donnelly --- src/mon/MDSMonitor.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index 0dd60336a8c1e..eb59d415e37d4 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -680,7 +680,9 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) }); } - if (info.state == MDSMap::STATE_STOPPING && state != MDSMap::STATE_STOPPED ) { + if (info.state == MDSMap::STATE_STOPPING && + state != MDSMap::STATE_STOPPING && + state != MDSMap::STATE_STOPPED) { // we can't transition to any other states from STOPPING dout(0) << "got beacon for MDS in STATE_STOPPING, ignoring requested state change" << dendl; -- 2.47.3