From: Samuel Just Date: Fri, 13 Jul 2012 20:45:24 +0000 (-0700) Subject: OSD: add config options to fake missed pings X-Git-Tag: v0.50~67 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=117b28680e56e10aac6c6489ba77963e7e8abd18;p=ceph.git OSD: add config options to fake missed pings In order to test monitor and osd failure detection and false positive correction, this patch adds the following options: 1. osd_debug_drop_ping_probability: probability of dropping a string of pings from a client upon ping recipt. 2. osd_debug_drop_ping_duration: number of pings to drop in a row. This should help with replicating some wrongly-marked-down thrashing cases. Signed-off-by: Samuel Just --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index c5f182e0dbc..a7e2a11e0e3 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -336,6 +336,8 @@ OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op OPTION(osd_command_max_records, OPT_INT, 256) OPTION(osd_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go OPTION(osd_verify_sparse_read_holes, OPT_BOOL, false) // read fiemap-reported holes and verify they are zeros +OPTION(osd_debug_drop_ping_probability, OPT_DOUBLE, 0) +OPTION(osd_debug_drop_ping_duration, OPT_INT, 0) OPTION(filestore, OPT_BOOL, false) OPTION(filestore_debug_omap_check, OPT_BOOL, 0) // Expensive debugging check on sync // Use omap for xattrs for attrs over diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 28efcc4d771..44b155f2fdb 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1671,6 +1671,27 @@ void OSD::handle_osd_ping(MOSDPing *m) case MOSDPing::PING: { + if (g_conf->osd_debug_drop_ping_probability > 0) { + if (debug_heartbeat_drops_remaining.count(from)) { + if (debug_heartbeat_drops_remaining[from] == 0) { + debug_heartbeat_drops_remaining.erase(from); + } else { + debug_heartbeat_drops_remaining[from]--; + dout(5) << "Dropping heartbeat from " << from + << ", " << debug_heartbeat_drops_remaining[from] + << " remaining to drop" << dendl; + break; + } + } else if (g_conf->osd_debug_drop_ping_probability > + ((((double)(rand()%100))/100.0))) { + debug_heartbeat_drops_remaining[from] = + g_conf->osd_debug_drop_ping_duration; + dout(5) << "Dropping heartbeat from " << from + << ", " << debug_heartbeat_drops_remaining[from] + << " remaining to drop" << dendl; + break; + } + } Message *r = new MOSDPing(monc->get_fsid(), locked ? osdmap->get_epoch():0, MOSDPing::PING_REPLY, diff --git a/src/osd/OSD.h b/src/osd/OSD.h index bfbecca3cf1..86a02cd61b8 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -453,6 +453,7 @@ private: epoch_t epoch; ///< most recent epoch we wanted this peer }; Mutex heartbeat_lock; + map debug_heartbeat_drops_remaining; Cond heartbeat_cond; bool heartbeat_stop; bool heartbeat_need_update; ///< true if we need to refresh our heartbeat peers