]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
OSD: add config options to fake missed pings
authorSamuel Just <sam.just@inktank.com>
Fri, 13 Jul 2012 20:45:24 +0000 (13:45 -0700)
committerSamuel Just <sam.just@inktank.com>
Fri, 13 Jul 2012 23:09:53 +0000 (16:09 -0700)
In order to test monitor and osd failure detection and false
positive correction, this patch adds the following options:

 1. osd_debug_drop_ping_probability: probability of dropping
    a string of pings from a client upon ping recipt.
 2. osd_debug_drop_ping_duration: number of pings to drop in
    a row.

This should help with replicating some wrongly-marked-down
thrashing cases.

Signed-off-by: Samuel Just <sam.just@inktank.com>
src/common/config_opts.h
src/osd/OSD.cc
src/osd/OSD.h

index c5f182e0dbc60deee55d1fe49ac93b85fd6d4033..a7e2a11e0e31a77c36357825c8cc0b36af7fbdbe 100644 (file)
@@ -336,6 +336,8 @@ OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op
 OPTION(osd_command_max_records, OPT_INT, 256)
 OPTION(osd_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
 OPTION(osd_verify_sparse_read_holes, OPT_BOOL, false)  // read fiemap-reported holes and verify they are zeros
+OPTION(osd_debug_drop_ping_probability, OPT_DOUBLE, 0)
+OPTION(osd_debug_drop_ping_duration, OPT_INT, 0)
 OPTION(filestore, OPT_BOOL, false)
 OPTION(filestore_debug_omap_check, OPT_BOOL, 0) // Expensive debugging check on sync
 // Use omap for xattrs for attrs over
index 28efcc4d771e302b54813a4366ab590ddfb2a7bb..44b155f2fdbc1115010ebd510e4ea9cf9dbebc11 100644 (file)
@@ -1671,6 +1671,27 @@ void OSD::handle_osd_ping(MOSDPing *m)
 
   case MOSDPing::PING:
     {
+      if (g_conf->osd_debug_drop_ping_probability > 0) {
+       if (debug_heartbeat_drops_remaining.count(from)) {
+         if (debug_heartbeat_drops_remaining[from] == 0) {
+           debug_heartbeat_drops_remaining.erase(from);
+         } else {
+           debug_heartbeat_drops_remaining[from]--;
+           dout(5) << "Dropping heartbeat from " << from
+                   << ", " << debug_heartbeat_drops_remaining[from]
+                   << " remaining to drop" << dendl;
+           break;
+         }
+       } else if (g_conf->osd_debug_drop_ping_probability >
+                  ((((double)(rand()%100))/100.0))) {
+         debug_heartbeat_drops_remaining[from] =
+           g_conf->osd_debug_drop_ping_duration;
+         dout(5) << "Dropping heartbeat from " << from
+                 << ", " << debug_heartbeat_drops_remaining[from]
+                 << " remaining to drop" << dendl;
+         break;
+       }
+      }
       Message *r = new MOSDPing(monc->get_fsid(),
                                locked ? osdmap->get_epoch():0, 
                                MOSDPing::PING_REPLY,
index bfbecca3cf1b2f9207930fd8670b82194d5b389b..86a02cd61b89a5002f42c93ae0964ba0ba058b48 100644 (file)
@@ -453,6 +453,7 @@ private:
     epoch_t epoch;      ///< most recent epoch we wanted this peer
   };
   Mutex heartbeat_lock;
+  map<int, int> debug_heartbeat_drops_remaining;
   Cond heartbeat_cond;
   bool heartbeat_stop;
   bool heartbeat_need_update;   ///< true if we need to refresh our heartbeat peers