From: Greg Farnum Date: Wed, 12 Feb 2014 21:51:48 +0000 (-0800) Subject: monc: backoff the timeout period when reconnecting X-Git-Tag: v0.78~162^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=794c86fd289bd62a35ed14368fa096c46736e9a2;p=ceph.git monc: backoff the timeout period when reconnecting If the monitors are systematically slowing down, we don't want to spam them with reconnect attempts every three seconds. Instead, every time we issue a reconnect, multiply our timeout period by a configurable; when we complete the connection, reduce that multipler by 50%. This should let us respond to monitor load. Of course, we don't want to do that for initial startup in the case of a couple down monitors, so don't apply the backoff until we've successfully connected to a monitor at least once. Signed-off-by: Greg Farnum --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 7b99c0c3ae1..db71fb23514 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -239,6 +239,8 @@ OPTION(auth_service_ticket_ttl, OPT_DOUBLE, 60*60) OPTION(auth_debug, OPT_BOOL, false) // if true, assert when weird things happen OPTION(mon_client_hunt_interval, OPT_DOUBLE, 3.0) // try new mon every N seconds until we connect OPTION(mon_client_ping_interval, OPT_DOUBLE, 10.0) // ping every N seconds +OPTION(mon_client_hunt_interval_backoff, OPT_DOUBLE, 2.0) // each time we reconnect to a monitor, double our timeout +OPTION(mon_client_hunt_interval_max_multiple, OPT_DOUBLE, 10.0) // up to a max of 10*default (30 seconds) OPTION(mon_client_max_log_entries_per_message, OPT_INT, 1000) OPTION(mon_max_pool_pg_num, OPT_INT, 65536) OPTION(mon_pool_quota_warn_threshold, OPT_INT, 0) // percent of quota at which to issue warnings diff --git a/src/mon/MonClient.cc b/src/mon/MonClient.cc index 3149290ede1..3ddd21fcecb 100644 --- a/src/mon/MonClient.cc +++ b/src/mon/MonClient.cc @@ -67,6 +67,8 @@ MonClient::MonClient(CephContext *cct_) : want_keys(0), global_id(0), authenticate_err(0), session_established_context(NULL), + had_a_connection(false), + reopen_interval_multiplier(1.0), auth(NULL), keyring(NULL), rotating_secrets(NULL), @@ -613,6 +615,15 @@ void MonClient::_reopen_session(int rank, string name) version_requests.erase(version_requests.begin()); } + // adjust timeouts if necessary + if (had_a_connection) { + reopen_interval_multiplier *= cct->_conf->mon_client_hunt_interval_backoff; + if (reopen_interval_multiplier > + cct->_conf->mon_client_hunt_interval_max_multiple) + reopen_interval_multiplier = + cct->_conf->mon_client_hunt_interval_max_multiple; + } + // restart authentication handshake state = MC_STATE_NEGOTIATING; hunting = true; @@ -658,6 +669,10 @@ void MonClient::_finish_hunting() if (hunting) { ldout(cct, 1) << "found mon." << cur_mon << dendl; hunting = false; + had_a_connection = true; + reopen_interval_multiplier /= 2.0; + if (reopen_interval_multiplier < 1.0) + reopen_interval_multiplier = 1.0; } } @@ -696,7 +711,8 @@ void MonClient::tick() void MonClient::schedule_tick() { if (hunting) - timer.add_event_after(cct->_conf->mon_client_hunt_interval, new C_Tick(this)); + timer.add_event_after(cct->_conf->mon_client_hunt_interval + * reopen_interval_multiplier, new C_Tick(this)); else timer.add_event_after(cct->_conf->mon_client_ping_interval, new C_Tick(this)); } diff --git a/src/mon/MonClient.h b/src/mon/MonClient.h index 4abefe6ce76..1fcdf9826b4 100644 --- a/src/mon/MonClient.h +++ b/src/mon/MonClient.h @@ -180,6 +180,8 @@ private: list waiting_for_session; Context *session_established_context; + bool had_a_connection; + double reopen_interval_multiplier; string _pick_random_mon(); void _finish_hunting();