From 97ff102cc6e8311c534f758ad688544a5ae8b82b Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Thu, 2 Sep 2021 14:41:38 +0000 Subject: [PATCH] crimson/osd: fix Watch::connect() behaviour on reconnect. It's perfectly legal for a client to reconnect to particular `Watch` using different socket / `Connection` than original one. This shall include proper handling of the watch timer which is currently broken as, when reconnecting, we don't cancel the timer. This leaded to the following crash at Sepia: ``` rzarzynski@teuthology:/home/teuthworker/archive/rzarzynski-2021-09-02_07:44:51-rados-master-distro-basic-smithi/6372357$ less ./remote/smithi183/log/ceph-osd.4.log.gz ... DEBUG 2021-09-02 08:10:45,462 [shard 0] osd - client_request(id=12, detail=m=[osd_op(client.5087.0:93 7.1e 7:7c7084bd:::repobj:head {watch reconnect cookie 94478891024832 gen 1} snapc 0={} ondisk+write+know n_if_redirected e40) v8]): got obc lock ... DEBUG 2021-09-02 08:10:45,462 [shard 0] osd - do_op_watch INFO 2021-09-02 08:10:45,462 [shard 0] osd - found existing watch by client.5087 DEBUG 2021-09-02 08:10:45,462 [shard 0] osd - do_op_watch_subop_watch INFO 2021-09-02 08:10:45,462 [shard 0] osd - found existing watch watch(cookie 94478891024832 30s 172.21.15.150:0/3544196211) by client.5087 ... INFO 2021-09-02 08:10:45,462 [shard 0] osd - op_effect: found existing watcher: 94478891024832,client.5087 ceph-osd: /home/jenkins-build/build/workspace/ceph-dev-new-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/17.0.0-7406-g9d30203c/rpm/el8/BUILD/ceph- 17.0.0-7406-g9d30203c/src/seastar/include/seastar/core/timer.hh:95: void seastar::timer::arm_state(seastar::timer::time_point, std::optional) [with Clock = seastar::l owres_clock; seastar::timer::time_point = std::chrono::time_point > >; typename Clock::duration = std::chrono::duration >]: Assertion `!_armed' failed. Aborting on shard 0. Backtrace: 0# 0x000055CC052CF0B6 in ceph-osd 1# FatalSignal::signaled(int, siginfo_t const&) in ceph-osd 2# FatalSignal::install_oneshot_signal_handler<6>()::{lambda(int, siginfo_t*, void*)#1}::_FUN(int, siginfo_t*, void*) in ceph-osd 3# 0x00007FA58349FB20 in /lib64/libpthread.so.0 4# gsignal in /lib64/libc.so.6 5# abort in /lib64/libc.so.6 6# 0x00007FA581A98C89 in /lib64/libc.so.6 7# 0x00007FA581AA6A76 in /lib64/libc.so.6 8# 0x000055CC0BEEE9DD in ceph-osd 9# crimson::osd::Watch::connect(seastar::shared_ptr, bool) in ceph-osd 10# 0x000055CC00B1D246 in ceph-osd 11# 0x000055CBFFEF01AE in ceph-osd ... ``` Signed-off-by: Radoslaw Zarzynski --- src/crimson/osd/watch.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/crimson/osd/watch.cc b/src/crimson/osd/watch.cc index 1848869d19b..54112963287 100644 --- a/src/crimson/osd/watch.cc +++ b/src/crimson/osd/watch.cc @@ -81,9 +81,9 @@ seastar::future<> Watch::connect(crimson::net::ConnectionRef conn, bool) { if (this->conn == conn) { logger().debug("conn={} already connected", conn); - timeout_timer.cancel(); + return seastar::now(); } - + timeout_timer.cancel(); timeout_timer.arm(std::chrono::seconds{winfo.timeout_seconds}); this->conn = std::move(conn); return seastar::now(); -- 2.39.5