From 80e0a0a8fee2f6f903f612734b2cc72eae703eae Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 27 Mar 2014 21:33:21 -0700 Subject: [PATCH] mon/MonClient: use keepalive2 to verify the mon session is live Verify that the mon is responding by checking the keepalive2 reply timestamp. We cannot rely solely on TCP timing out and returning an error. Fixes: #7888 Signed-off-by: Sage Weil (cherry picked from commit 056151a6334c054505c54e59af40f203a0721f28) --- src/common/config_opts.h | 1 + src/mon/MonClient.cc | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index b6d92af0914f..b41db5a17838 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -226,6 +226,7 @@ OPTION(auth_service_ticket_ttl, OPT_DOUBLE, 60*60) OPTION(auth_debug, OPT_BOOL, false) // if true, assert when weird things happen OPTION(mon_client_hunt_interval, OPT_DOUBLE, 3.0) // try new mon every N seconds until we connect OPTION(mon_client_ping_interval, OPT_DOUBLE, 10.0) // ping every N seconds +OPTION(mon_client_ping_timeout, OPT_DOUBLE, 30.0) // fail if we don't hear back OPTION(mon_client_hunt_interval_backoff, OPT_DOUBLE, 2.0) // each time we reconnect to a monitor, double our timeout OPTION(mon_client_hunt_interval_max_multiple, OPT_DOUBLE, 10.0) // up to a max of 10*default (30 seconds) OPTION(mon_client_max_log_entries_per_message, OPT_INT, 1000) diff --git a/src/mon/MonClient.cc b/src/mon/MonClient.cc index c5903685c7f5..d726f88dc6fd 100644 --- a/src/mon/MonClient.cc +++ b/src/mon/MonClient.cc @@ -556,6 +556,11 @@ void MonClient::_reopen_session(int rank, string name) state = MC_STATE_NEGOTIATING; hunting = true; + // send an initial keepalive to ensure our timestamp is valid by the + // time we are in an OPENED state (by sequencing this before + // authentication). + messenger->send_keepalive(cur_con.get()); + MAuth *m = new MAuth; m->protocol = 0; m->monmap_epoch = monmap.get_epoch(); @@ -624,9 +629,20 @@ void MonClient::tick() _renew_subs(); messenger->send_keepalive(cur_con.get()); - + if (state == MC_STATE_HAVE_SESSION) { send_log(); + + if (cct->_conf->mon_client_ping_timeout > 0 && + cur_con->has_feature(CEPH_FEATURE_MSGR_KEEPALIVE2)) { + utime_t lk = cur_con->get_last_keepalive_ack(); + utime_t interval = ceph_clock_now(cct) - lk; + if (interval > cct->_conf->mon_client_ping_timeout) { + ldout(cct, 1) << "no keepalive since " << lk << " (" << interval + << " seconds), reconnecting" << dendl; + _reopen_session(); + } + } } } -- 2.47.3