]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: Cache notify fault injection 23099/head
authorAdam C. Emerson <aemerson@redhat.com>
Wed, 14 Mar 2018 20:29:02 +0000 (16:29 -0400)
committerAdam C. Emerson <aemerson@redhat.com>
Wed, 18 Jul 2018 16:20:18 +0000 (12:20 -0400)
Allow a configured probability of dropping a notify message on the
floor without processing or acking it.

Fixes: http://tracker.ceph.com/issues/24962
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
src/common/options.cc
src/rgw/rgw_rados.cc
src/rgw/rgw_rados.h

index b2f43f0d1fb76625ca3aa61279fec255f31fd147..998d21b1b439343481a6f468c699887a8d69af18 100644 (file)
@@ -6248,6 +6248,22 @@ std::vector<Option> get_rgw_options() {
                          "of RGW instances under heavy use. If you would like "
                          "to turn off cache expiry, set this value to zero."),
 
+
+    Option("rgw_inject_notify_timeout_probability", Option::TYPE_FLOAT,
+          Option::LEVEL_DEV)
+    .set_default(0)
+    .add_tag("fault injection")
+    .add_tag("testing")
+    .add_service("rgw")
+    .set_min_max(0.0, 1.0)
+    .set_description("Likelihood of ignoring a notify")
+    .set_long_description("This is the probability that the RGW cache will "
+                         "ignore a cache notify message. It exists to help "
+                         "with the development and testing of cache "
+                         "consistency and recovery improvements. Please "
+                         "do not set it in a production cluster, as it "
+                         "actively causes failures. Set this to a floating "
+                         "point value between 0 and 1."),
   });
 }
 
index 717fbfe4c301eb4e3f37288cf0474ed2923c04a1..aea7448c6ec31f09521afd591e9009b33f923b17 100644 (file)
@@ -2945,6 +2945,20 @@ public:
                            << " cookie " << cookie
                            << " notifier " << notifier_id
                            << " bl.length()=" << bl.length() << dendl;
+
+    if (unlikely(rados->inject_notify_timeout_probability == 1) ||
+       (rados->inject_notify_timeout_probability > 0 &&
+        (rados->inject_notify_timeout_probability >
+         ceph::util::generate_random_number(0.0, 1.0)))) {
+      ldout(rados->ctx(), 0)
+       << "RGWWatcher::handle_notify() dropping notification! "
+       << "If this isn't what you want, set "
+       << "rgw_inject_notify_timeout_probability to zero!" << dendl;
+      return;
+    }
+
+
+
     rados->watch_cb(notify_id, cookie, notifier_id, bl);
 
     bufferlist reply_bl; // empty reply payload
@@ -4739,6 +4753,9 @@ int RGWRados::initialize()
 {
   int ret;
 
+  inject_notify_timeout_probability =
+    cct->_conf.get_val<double>("rgw_inject_notify_timeout_probability");
+
   ret = init_rados();
   if (ret < 0)
     return ret;
index 4fc4504dd6010a0804306276514d5c49d82f49ec..b5c01025b7bf6165b284e82fc5862e68ab5cc98c 100644 (file)
@@ -2320,6 +2320,8 @@ class RGWRados : public AdminSocketHook
   librados::IoCtx control_pool_ctx;   // .rgw.control
   bool watch_initialized;
 
+  double inject_notify_timeout_probability = 0;
+
   friend class RGWWatcher;
 
   Mutex bucket_id_lock;