From 75074524fe15afff1374a6006628adab4f7abf7b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Piotr=20Da=C5=82ek?= Date: Sun, 22 May 2016 13:08:48 +0200 Subject: [PATCH] OSD: Implement ms_handle_refused MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Added implementation of ms_handle_refused in OSD code, so it sends MOSDFailure message in case the peer connection fails with ECONNREFUSED *and* it is known to be up and new option "osd fast fail on connection refused" which enables or disables new behavior. Signed-off-by: Piotr Dałek --- src/common/config_opts.h | 1 + src/osd/OSD.cc | 28 +++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index b601f300be7a..6cb987eaa416 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -815,6 +815,7 @@ OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track OPTION(osd_target_transaction_size, OPT_INT, 30) // to adjust various transactions that batch smaller items OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe) OPTION(osd_failsafe_nearfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD near full (failsafe) +OPTION(osd_fast_fail_on_connection_refused, OPT_BOOL, true) // immediately mark OSDs as down once they refuse to accept connections OPTION(osd_pg_object_context_cache_count, OPT_INT, 64) OPTION(osd_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 61039678ac85..121e5abd0969 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4778,7 +4778,33 @@ bool OSD::ms_handle_reset(Connection *con) bool OSD::ms_handle_refused(Connection *con) { - return false; + if (!cct->_conf->osd_fast_fail_on_connection_refused) + return false; + + OSD::Session *session = (OSD::Session *)con->get_priv(); + dout(1) << "ms_handle_refused con " << con << " session " << session << dendl; + if (!session) + return false; + int type = con->get_peer_type(); + // handle only OSD failures here + if (monc && (type == CEPH_ENTITY_TYPE_OSD)) { + OSDMapRef osdmap = get_osdmap(); + if (osdmap) { + int id = osdmap->identify_osd(con->get_peer_addr()); + if (osdmap->is_up(id)) { + // I'm cheating mon heartbeat grace logic, because we know it's not going + // to respawn alone. +1 so we won't hit any boundary case. + monc->send_mon_message(new MOSDFailure(monc->get_fsid(), + osdmap->get_inst(id), + cct->_conf->osd_heartbeat_grace + 1, + osdmap->get_epoch(), + MOSDFailure::FLAG_IMMEDIATE | MOSDFailure::FLAG_FAILED + )); + } + } + } + session->put(); + return true; } struct C_OSD_GetVersion : public Context { -- 2.47.3