From 439902311910f9c239f48c0958fd95fa5839e309 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Sat, 1 Sep 2018 18:47:18 -0700 Subject: [PATCH] mds: print is_laggy message once Beacon::is_laggy gets called frequently which causes the debug log to get spammed with messages. Steps to reproduce: - setup vstart cluster - start IO from client (e.g. `cp -av /usr /mnt`) - find connection to mon by the mds $ ss -tnp | grep ceph-mds ... ESTAB 0 0 127.0.0.1:58988 127.0.0.1:40928 users:(("ceph-mds",pid=13095,fd=37)) - block the connection using iptables on the MDS node: $ sudo iptables -I INPUT -p tcp --sport 58988 --dport 40928 -j DROP - verify MDS log using: $ tail -f mds.a.log | grep beacon ... 2018-09-05 19:21:05.672 7f2908a9d700 1 mds.beacon.a is_laggy 19.8876 > 15 since last acked beacon 2018-09-05 19:21:05.672 7f2908a9d700 1 mds.beacon.a is_laggy 19.8876 > 15 since last acked beacon 2018-09-05 19:21:05.672 7f2908a9d700 1 mds.beacon.a is_laggy 19.8876 > 15 since last acked beacon 2018-09-05 19:21:05.672 7f2908a9d700 1 mds.beacon.a is_laggy 19.8876 > 15 since last acked beacon 2018-09-05 19:21:05.784 7f2905a97700 5 mds.beacon.a Sending beacon up:active seq 114 2018-09-05 19:21:05.784 7f2905a97700 1 -- 127.0.0.1:6813/2277776624 --> 127.0.0.1:40929/0 -- mdsbeacon(14162/a up:active seq 114 v7) v7 -- 0x7d1e7e2a80 con 0 2018-09-05 19:21:05.784 7f2905a97700 20 mds.beacon.a sender thread waiting interval 4s Fixes: http://tracker.ceph.com/issues/35250 Signed-off-by: Patrick Donnelly --- src/mds/Beacon.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mds/Beacon.cc b/src/mds/Beacon.cc index 94c5befba02..8d54a8218ec 100644 --- a/src/mds/Beacon.cc +++ b/src/mds/Beacon.cc @@ -252,8 +252,10 @@ bool Beacon::is_laggy() auto now = clock::now(); auto since = std::chrono::duration(now-last_acked_stamp).count(); if (since > g_conf()->mds_beacon_grace) { - dout(1) << "is_laggy " << since << " > " << g_conf()->mds_beacon_grace - << " since last acked beacon" << dendl; + if (!laggy) { + dout(1) << "MDS connection to Monitors appears to be laggy; " << since + << "s since last acked beacon" << dendl; + } laggy = true; auto last_reconnect = std::chrono::duration(now-last_mon_reconnect).count(); if (since > (g_conf()->mds_beacon_grace*2) && last_reconnect > g_conf()->mds_beacon_interval) { -- 2.39.5