From: Radoslaw Zarzynski Date: Fri, 5 Mar 2021 18:49:05 +0000 (+0000) Subject: crimson/osd: implement timeout for notify propagation. X-Git-Tag: v17.1.0~2714^2~2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=5db263feff32ba16a243b246dd19e4042ef2a69d;p=ceph-ci.git crimson/osd: implement timeout for notify propagation. This missed feature was the root cause of the following failure at Sepia: ``` 2021-03-04T15:40:03.013 INFO:tasks.workunit.client.0.smithi058.stdout: api_watch_notify: [ RUN ] LibRadosWatchNotify.WatchNotify2Timeout 2021-03-04T15:40:03.013 INFO:tasks.workunit.client.0.smithi058.stdout: api_watch_notify: watch_notify2_test_cb from 4394 notify_id 120259084288 cookie 94023196911472 2021-03-04T15:40:03.013 INFO:tasks.workunit.client.0.smithi058.stdout: api_watch_notify: /home/jenkins-build/build/workspace/ceph-dev-new-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/17.0.0-1471-g8bd81c29/rpm/el8/BUILD/ceph-17.0.0-1471-g8bd81c29/src/test/librados/watch_notify.cc:425: Failure 2021-03-04T15:40:03.014 INFO:tasks.workunit.client.0.smithi058.stdout: api_watch_notify: Expected equality of these values: 2021-03-04T15:40:03.015 INFO:tasks.workunit.client.0.smithi058.stdout: api_watch_notify: -110 2021-03-04T15:40:03.015 INFO:tasks.workunit.client.0.smithi058.stdout: api_watch_notify: rados_notify2(ioctx, notify_oid, "notify", 6, 1000, &reply_buf, &reply_buf_len) 2021-03-04T15:40:03.015 INFO:tasks.workunit.client.0.smithi058.stdout: api_watch_notify: Which is: 0 2021-03-04T15:40:03.016 INFO:tasks.workunit.client.0.smithi058.stdout: api_watch_notify: [ FAILED ] LibRadosWatchNotify.WatchNotify2Timeout (3020 ms) ``` Signed-off-by: Radoslaw Zarzynski --- diff --git a/src/crimson/osd/watch.cc b/src/crimson/osd/watch.cc index 1cb85e98019..a69ea8d728a 100644 --- a/src/crimson/osd/watch.cc +++ b/src/crimson/osd/watch.cc @@ -1,6 +1,11 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab +#include + +#include +#include + #include "crimson/osd/watch.h" #include "messages/MWatchNotify.h" @@ -102,6 +107,18 @@ seastar::future<> Watch::remove(const bool send_disconnect) }); } +void Watch::cancel_notify(const uint64_t notify_id) +{ + logger().info("{} notify_id={}", __func__, notify_id); + const auto it = std::find_if( + std::begin(in_progress_notifies), std::end(in_progress_notifies), + [notify_id] (const auto& notify) { + return notify->ninfo.notify_id == notify_id; + }); + assert(it != std::end(in_progress_notifies)); + in_progress_notifies.erase(it); +} + bool notify_reply_t::operator<(const notify_reply_t& rhs) const { // comparing std::pairs to emphasize our legacy. ceph-osd stores @@ -147,19 +164,15 @@ seastar::future<> Notify::complete_watcher( return remove_watcher(std::move(watch)); } -seastar::future<> Notify::maybe_send_completion() +seastar::future<> Notify::maybe_send_completion( + std::set timedout_watchers) { - logger().info("{} -- {} in progress watchers", __func__, watchers.size()); - if (watchers.empty()) { + logger().info("{} -- {} in progress watchers, {} timedout watchers {}", + __func__, watchers.size(), timedout_watchers.size()); + if (watchers.empty() || !timedout_watchers.empty()) { logger().debug("{} sending notify replies: {}", __func__, notify_replies); - // prepare reply - ceph::bufferlist bl; - encode(notify_replies, bl); - // FIXME: this is just a stub - std::list> missed; - encode(missed, bl); - complete = true; + timeout_timer.cancel(); ceph::bufferlist empty; auto reply = make_message( @@ -169,10 +182,39 @@ seastar::future<> Notify::maybe_send_completion() CEPH_WATCH_EVENT_NOTIFY_COMPLETE, empty, client_gid); - reply->set_data(bl); + ceph::bufferlist reply_bl; + { + std::vector> missed; + missed.reserve(std::size(timedout_watchers)); + boost::insert( + missed, std::begin(missed), + timedout_watchers | boost::adaptors::transformed([] (auto w) { + return std::make_pair(w->get_watcher_gid(), w->get_cookie()); + })); + ceph::encode(notify_replies, reply_bl); + ceph::encode(missed, reply_bl); + } + reply->set_data(std::move(reply_bl)); + if (!timedout_watchers.empty()) { + reply->return_code = -ETIMEDOUT; + } return conn->send(std::move(reply)); } return seastar::now(); } +void Notify::do_timeout() +{ + logger().debug("{} complete={}", __func__, complete); + if (complete) { + return; + } + decltype(watchers) timedout_watchers; + std::swap(watchers, timedout_watchers); + for (auto& watcher : timedout_watchers) { + watcher->cancel_notify(ninfo.notify_id); + } + std::ignore = maybe_send_completion(std::move(timedout_watchers)); +} + } // namespace crimson::osd diff --git a/src/crimson/osd/watch.h b/src/crimson/osd/watch.h index cdfb5b95144..0f2083651cf 100644 --- a/src/crimson/osd/watch.h +++ b/src/crimson/osd/watch.h @@ -89,6 +89,7 @@ public: uint64_t get_cookie() const { return winfo.cookie; } + void cancel_notify(const uint64_t notify_id); }; using WatchRef = seastar::shared_ptr; @@ -116,12 +117,21 @@ class Notify { uint64_t user_version; bool complete = false; bool discarded = false; + seastar::timer timeout_timer{ + [this] { do_timeout(); } + }; /// (gid,cookie) -> reply_bl for everyone who acked the notify std::multiset notify_replies; uint64_t get_id() const { return ninfo.notify_id; } - seastar::future<> maybe_send_completion(); + + /// Sends notify completion if watchers.empty() or timeout + seastar::future<> maybe_send_completion( + std::set timedout_watchers = {}); + + /// Called on Notify timeout + void do_timeout(); template Notify(WatchIteratorT begin, @@ -166,6 +176,9 @@ Notify::Notify(WatchIteratorT begin, conn(std::move(conn)), client_gid(client_gid), user_version(user_version) { + if (ninfo.timeout) { + timeout_timer.arm(std::chrono::seconds{ninfo.timeout}); + } } template