]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/osd: implement timeout for notify propagation.
authorRadoslaw Zarzynski <rzarzyns@redhat.com>
Fri, 5 Mar 2021 18:49:05 +0000 (18:49 +0000)
committerRadoslaw Zarzynski <rzarzyns@redhat.com>
Sat, 6 Mar 2021 16:27:14 +0000 (16:27 +0000)
This missed feature was the root cause of the following
failure at Sepia:

```
2021-03-04T15:40:03.013 INFO:tasks.workunit.client.0.smithi058.stdout:         api_watch_notify: [ RUN      ] LibRadosWatchNotify.WatchNotify2Timeout
2021-03-04T15:40:03.013 INFO:tasks.workunit.client.0.smithi058.stdout:         api_watch_notify: watch_notify2_test_cb from 4394 notify_id 120259084288 cookie 94023196911472
2021-03-04T15:40:03.013 INFO:tasks.workunit.client.0.smithi058.stdout:         api_watch_notify: /home/jenkins-build/build/workspace/ceph-dev-new-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/17.0.0-1471-g8bd81c29/rpm/el8/BUILD/ceph-17.0.0-1471-g8bd81c29/src/test/librados/watch_notify.cc:425: Failure
2021-03-04T15:40:03.014 INFO:tasks.workunit.client.0.smithi058.stdout:         api_watch_notify: Expected equality of these values:
2021-03-04T15:40:03.015 INFO:tasks.workunit.client.0.smithi058.stdout:         api_watch_notify:   -110
2021-03-04T15:40:03.015 INFO:tasks.workunit.client.0.smithi058.stdout:         api_watch_notify:   rados_notify2(ioctx, notify_oid, "notify", 6, 1000, &reply_buf, &reply_buf_len)
2021-03-04T15:40:03.015 INFO:tasks.workunit.client.0.smithi058.stdout:         api_watch_notify:     Which is: 0
2021-03-04T15:40:03.016 INFO:tasks.workunit.client.0.smithi058.stdout:         api_watch_notify: [  FAILED  ] LibRadosWatchNotify.WatchNotify2Timeout (3020 ms)
```

Signed-off-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
src/crimson/osd/watch.cc
src/crimson/osd/watch.h

index 1cb85e98019ef5aa8267b00d0325be6b0717cdfe..a69ea8d728af614d1062a8e57fba13a7d693b6de 100644 (file)
@@ -1,6 +1,11 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
 
+#include <algorithm>
+
+#include <boost/range/adaptor/transformed.hpp>
+#include <boost/range/algorithm_ext/insert.hpp>
+
 #include "crimson/osd/watch.h"
 #include "messages/MWatchNotify.h"
 
@@ -102,6 +107,18 @@ seastar::future<> Watch::remove(const bool send_disconnect)
     });
 }
 
+void Watch::cancel_notify(const uint64_t notify_id)
+{
+  logger().info("{} notify_id={}", __func__, notify_id);
+  const auto it = std::find_if(
+    std::begin(in_progress_notifies), std::end(in_progress_notifies),
+    [notify_id] (const auto& notify) {
+      return notify->ninfo.notify_id == notify_id;
+    });
+  assert(it != std::end(in_progress_notifies));
+  in_progress_notifies.erase(it);
+}
+
 bool notify_reply_t::operator<(const notify_reply_t& rhs) const
 {
   // comparing std::pairs to emphasize our legacy. ceph-osd stores
@@ -147,19 +164,15 @@ seastar::future<> Notify::complete_watcher(
   return remove_watcher(std::move(watch));
 }
 
-seastar::future<> Notify::maybe_send_completion()
+seastar::future<> Notify::maybe_send_completion(
+  std::set<WatchRef> timedout_watchers)
 {
-  logger().info("{} -- {} in progress watchers", __func__, watchers.size());
-  if (watchers.empty()) {
+  logger().info("{} -- {} in progress watchers, {} timedout watchers {}",
+                __func__, watchers.size(), timedout_watchers.size());
+  if (watchers.empty() || !timedout_watchers.empty()) {
     logger().debug("{} sending notify replies: {}", __func__, notify_replies);
-    // prepare reply
-    ceph::bufferlist bl;
-    encode(notify_replies, bl);
-    // FIXME: this is just a stub
-    std::list<std::pair<uint64_t,uint64_t>> missed;
-    encode(missed, bl);
-
     complete = true;
+    timeout_timer.cancel();
 
     ceph::bufferlist empty;
     auto reply = make_message<MWatchNotify>(
@@ -169,10 +182,39 @@ seastar::future<> Notify::maybe_send_completion()
       CEPH_WATCH_EVENT_NOTIFY_COMPLETE,
       empty,
       client_gid);
-    reply->set_data(bl);
+    ceph::bufferlist reply_bl;
+    {
+      std::vector<std::pair<uint64_t,uint64_t>> missed;
+      missed.reserve(std::size(timedout_watchers));
+      boost::insert(
+       missed, std::begin(missed),
+       timedout_watchers | boost::adaptors::transformed([] (auto w) {
+          return std::make_pair(w->get_watcher_gid(), w->get_cookie());
+        }));
+      ceph::encode(notify_replies, reply_bl);
+      ceph::encode(missed, reply_bl);
+    }
+    reply->set_data(std::move(reply_bl));
+    if (!timedout_watchers.empty()) {
+      reply->return_code = -ETIMEDOUT;
+    }
     return conn->send(std::move(reply));
   }
   return seastar::now();
 }
 
+void Notify::do_timeout()
+{
+  logger().debug("{} complete={}", __func__, complete);
+  if (complete) {
+    return;
+  }
+  decltype(watchers) timedout_watchers;
+  std::swap(watchers, timedout_watchers);
+  for (auto& watcher : timedout_watchers) {
+    watcher->cancel_notify(ninfo.notify_id);
+  }
+  std::ignore = maybe_send_completion(std::move(timedout_watchers));
+}
+
 } // namespace crimson::osd
index cdfb5b951443d58d22920b7972fb220ac257aaee..0f2083651cf84f4809210124639a2a1e694b4d3c 100644 (file)
@@ -89,6 +89,7 @@ public:
   uint64_t get_cookie() const {
     return winfo.cookie;
   }
+  void cancel_notify(const uint64_t notify_id);
 };
 
 using WatchRef = seastar::shared_ptr<Watch>;
@@ -116,12 +117,21 @@ class Notify {
   uint64_t user_version;
   bool complete = false;
   bool discarded = false;
+  seastar::timer<seastar::lowres_clock> timeout_timer{
+    [this] { do_timeout(); }
+  };
 
   /// (gid,cookie) -> reply_bl for everyone who acked the notify
   std::multiset<notify_reply_t> notify_replies;
 
   uint64_t get_id() const { return ninfo.notify_id; }
-  seastar::future<> maybe_send_completion();
+
+  /// Sends notify completion if watchers.empty() or timeout
+  seastar::future<> maybe_send_completion(
+    std::set<WatchRef> timedout_watchers = {});
+
+  /// Called on Notify timeout
+  void do_timeout();
 
   template <class WatchIteratorT>
   Notify(WatchIteratorT begin,
@@ -166,6 +176,9 @@ Notify::Notify(WatchIteratorT begin,
     conn(std::move(conn)),
     client_gid(client_gid),
     user_version(user_version) {
+  if (ninfo.timeout) {
+    timeout_timer.arm(std::chrono::seconds{ninfo.timeout});
+  }
 }
 
 template <class WatchIteratorT, class... Args>