]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/osd: fix sequencing issues in ClientRequest::process_op.
authorRadoslaw Zarzynski <rzarzyns@redhat.com>
Tue, 30 Nov 2021 19:30:32 +0000 (19:30 +0000)
committerRadoslaw Zarzynski <rzarzyns@redhat.com>
Tue, 7 Dec 2021 09:36:24 +0000 (09:36 +0000)
The following crash has been observed in one of the runs at Sepia:

```
ceph-osd: /home/jenkins-build/build/workspace/ceph-dev-new-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/17.0.0-8898-ge57ad63c/rpm/el8/BUILD/ceph-17.0.0-8898-ge57ad63c/src/crimson/osd/osd_operation_sequencer.h:123: void crimson::osd::OpSequencer::finish_op_in_order(crimson::osd::ClientRequest&): Assertion `op.get_id() > last_completed_id' failed.
Aborting on shard 0.
Backtrace:
Reactor stalled for 1807 ms on shard 0. Backtrace: 0xb14ab 0x46e57428 0x46bc450d 0x46be03bd 0x46be0782 0x46be0946 0x46be0bf6 0x12b1f 0x137341 0x3fdd6a92 0x3fddccdb 0x3fdde1ee 0x3fdde8b3 0x3fdd3f2b 0x3fdd4442 0x3fdd4c3a 0x12b1f 0x3737e 0x21db4 0x21c88 0x2fa75 0x3b769527 0x3b8418af 0x3b8423cb 0x3b842ce0 0x3b84383d 0x3a116220 0x3a143f31 0x3a144bcd 0x46b96271 0x46bde51a 0x46d6891b 0x46d6a8f0 0x4681a7d2 0x4681f03b 0x39fd50f2 0x23492 0x39b7a7dd
 0# gsignal in /lib64/libc.so.6
 1# abort in /lib64/libc.so.6
 2# 0x00007FB9FB946C89 in /lib64/libc.so.6
 3# 0x00007FB9FB954A76 in /lib64/libc.so.6
 4# 0x00005595E98E6528 in ceph-osd
 5# 0x00005595E99BE8B0 in ceph-osd
 6# 0x00005595E99BF3CC in ceph-osd
 7# 0x00005595E99BFCE1 in ceph-osd
 8# 0x00005595E99C083E in ceph-osd
 9# 0x00005595E8293221 in ceph-osd
10# 0x00005595E82C0F32 in ceph-osd
11# 0x00005595E82C1BCE in ceph-osd
12# 0x00005595F4D13272 in ceph-osd
13# 0x00005595F4D5B51B in ceph-osd
14# 0x00005595F4EE591C in ceph-osd
15# 0x00005595F4EE78F1 in ceph-osd
16# 0x00005595F49977D3 in ceph-osd
17# 0x00005595F499C03C in ceph-osd
18# main in ceph-osd
19# __libc_start_main in /lib64/libc.so.6
20# _start in ceph-osd
```

The sequence of events provides at least two clues:
  - the op no. 32 finished before the op no. 29 which was waiting
    for `ObjectContext`,
  - the op no. 29 was a short-living one -- it wasn't waiting even
    on `obc`.

```
rzarzynski@teuthology:/home/teuthworker/archive/rzarzynski-2021-11-22_22:01:32-rados-master-distro-basic-smithi$ less ./6520106/remote/smithi115/log/ceph-osd.3.log.gz
...
DEBUG 2021-11-22 22:32:24,531 [shard 0] osd - client_request(id=29, detail=m=[osd_op(client.4371.0:36 4.d 4.f0fb5e1d (undecoded) ondisk+retry+read+rwordered+known_if_redirected+supports_pool_eio e23) v8]): start
DEBUG 2021-11-22 22:32:24,531 [shard 0] osd - client_request(id=29, detail=m=[osd_op(client.4371.0:36 4.d 4.f0fb5e1d (undecoded) ondisk+retry+read+rwordered+known_if_redirected+supports_pool_eio e23) v8]): in repeat
...
DEBUG 2021-11-22 22:32:24,546 [shard 0] osd - client_request(id=29, detail=m=[osd_op(client.4371.0:36 4.d 4.f0fb5e1d (undecoded) ondisk+retry+read+rwordered+known_if_redirected+supports_pool_eio e23) v8]) same_interval_since: 21
DEBUG 2021-11-22 22:32:24,546 [shard 0] osd - OpSequencer::start_op: op=29, last_started=27, last_unblocked=27, last_completed=27
...
DEBUG 2021-11-22 22:32:24,621 [shard 0] osd - client_request(id=32, detail=m=[osd_op(client.4371.0:49 4.d 4.81addbad (undecoded) ondisk+retry+write+known_if_redirected+supports_pool_eio e23) v8]): start
DEBUG 2021-11-22 22:32:24,621 [shard 0] osd - client_request(id=32, detail=m=[osd_op(client.4371.0:49 4.d 4.81addbad (undecoded) ondisk+retry+write+known_if_redirected+supports_pool_eio e23) v8]): in repeat
...
DEBUG 2021-11-22 22:32:24,626 [shard 0] osd - client_request(id=32, detail=m=[osd_op(client.4371.0:49 4.d 4.81addbad (undecoded) ondisk+retry+write+known_if_redirected+supports_pool_eio e23) v8]) same_interval_s
ince: 21
DEBUG 2021-11-22 22:32:24,626 [shard 0] osd - OpSequencer::start_op: op=32, last_started=29, last_unblocked=29, last_completed=27
<note that op 32 is very short living>
DEBUG 2021-11-22 22:32:24,669 [shard 0] osd - OpSequencer::finish_op_in_order: op=32, last_started=32, last_unblocked=32, last_completed=27
...
DEBUG 2021-11-22 22:32:24,671 [shard 0] osd - client_request(id=32, detail=m=[osd_op(client.4371.0:49 4.d 4:b5dbb581:::smithi11538976-13:head {write 601684~619341 in=619341b, stat} snapc 0={} RETRY=1 ondisk+retry+write+known_if_redirected+supports_pool_eio e23) v8]): destroying
...
DEBUG 2021-11-22 22:32:24,722 [shard 0] osd - client_request(id=29, detail=m=[osd_op(client.4371.0:36 4.d 4:b87adf0f:::smithi11538976-9:head {read 0~1} snapc 0={} RETRY=1 ondisk+retry+read+rwordered+known_if_redirected+supports_pool_eio e23) v8]): got obc lock
...
INFO  2021-11-22 22:32:24,723 [shard 0] osd - client_request(id=29, detail=m=[osd_op(client.4371.0:36 4.d 4:b87adf0f:::smithi11538976-9:head {read 0~1} snapc 0={} RETRY=1 ondisk+retry+read+rwordered+known_if_redirected+supports_pool_eio e23) v8]) obc.get()=0x6190000d5780
...
DEBUG 2021-11-22 22:32:24,753 [shard 0] osd - OpSequencer::finish_op_in_order: op=29, last_started=32, last_unblocked=32, last_completed=32
ceph-osd: /home/jenkins-build/build/workspace/ceph-dev-new-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/17.0.0-8898-ge57ad63c/rpm/el8/BUILD/ceph-17.0.0-8898-ge57ad63c/src/crimson/osd/osd_operation_sequencer.h:123: void crimson::osd::OpSequencer::finish_op_in_order(crimson::osd::ClientRequest&): Assertion `op.get_id() > last_completed_id' failed.
Aborting on shard 0.
```

This could be explained in a scenario where:
  - op no. 29 skipped stages of the execution pipeline while
  - it wrongly informed `OpSequencer` the execution was in-order.

Static analysis shows there are multiple problems of this genre
in the `ClientRequest::process_op()` and its callees with the most
recently merged one being the path for `PG::already_complete()`.

Signed-off-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
src/crimson/osd/osd_operations/client_request.cc
src/crimson/osd/osd_operations/client_request.h

index 3a5111ce0d4171e2da7d444c84db27fb73e47f17..66cd414ffc81a76ebc2438246f4c109622415627 100644 (file)
@@ -121,10 +121,13 @@ seastar::future<> ClientRequest::start()
                   sequencer.finish_op_out_of_order(*this, osd.get_shard_services().registry);
                 });
               } else {
-                return process_op(pgref).then_interruptible([this] {
-                  // NOTE: this assumes process_op() handles everything
-                  // in-order which I'm not sure about.
-                  sequencer.finish_op_in_order(*this);
+                return process_op(pgref).then_interruptible([this] (const seq_mode_t mode) {
+                  if (mode == seq_mode_t::IN_ORDER) {
+                    sequencer.finish_op_in_order(*this);
+                  } else {
+                    assert(mode == seq_mode_t::OUT_OF_ORDER);
+                    sequencer.finish_op_out_of_order(*this, osd.get_shard_services().registry);
+                  }
                 });
               }
             });
@@ -154,7 +157,7 @@ ClientRequest::process_pg_op(
     });
 }
 
-ClientRequest::interruptible_future<>
+ClientRequest::interruptible_future<ClientRequest::seq_mode_t>
 ClientRequest::process_op(Ref<PG> &pg)
 {
   return with_blocking_future_interruptible<interruptor::condition>(
@@ -165,52 +168,65 @@ ClientRequest::process_op(Ref<PG> &pg)
   }).then_interruptible([this, pg]() mutable {
     return pg->already_complete(m->get_reqid()).then_unpack_interruptible(
       [this, pg](bool completed, int ret) mutable
-      -> PG::load_obc_iertr::future<> {
+      -> PG::load_obc_iertr::future<seq_mode_t> {
       if (completed) {
         auto reply = crimson::make_message<MOSDOpReply>(
           m.get(), ret, pg->get_osdmap_epoch(),
           CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK, false);
-        return conn->send(std::move(reply));
+        return conn->send(std::move(reply)).then([] {
+          return seastar::make_ready_future<seq_mode_t>(seq_mode_t::OUT_OF_ORDER);
+        });
       } else {
         return with_blocking_future_interruptible<interruptor::condition>(
             handle.enter(pp(*pg).get_obc)).then_interruptible(
-          [this, pg]() mutable -> PG::load_obc_iertr::future<> {
+          [this, pg]() mutable -> PG::load_obc_iertr::future<seq_mode_t> {
           logger().debug("{}: got obc lock", *this);
           op_info.set_from_op(&*m, *pg->get_osdmap());
-          return pg->with_locked_obc(m->get_hobj(), op_info,
-                                     [this, pg](auto obc) mutable {
-            return with_blocking_future_interruptible<interruptor::condition>(
-              handle.enter(pp(*pg).process)
-            ).then_interruptible([this, pg, obc]() mutable {
-              return do_process(pg, obc);
+          // XXX: `do_with()` is just a workaround for `with_obc_func_t` imposing
+          // `future<void>`.
+          return seastar::do_with(seq_mode_t{}, [this, &pg] (seq_mode_t& mode) {
+            return pg->with_locked_obc(m->get_hobj(), op_info,
+                                       [this, pg, &mode](auto obc) mutable {
+              return with_blocking_future_interruptible<interruptor::condition>(
+                handle.enter(pp(*pg).process)
+              ).then_interruptible([this, pg, obc, &mode]() mutable {
+                return do_process(pg, obc).then_interruptible([&mode] (seq_mode_t _mode) {
+                  mode = _mode;
+                  return seastar::now();
+                });
+              });
+            }).safe_then_interruptible([&mode] {
+              return PG::load_obc_iertr::make_ready_future<seq_mode_t>(mode);
             });
           });
         });
       }
     });
-  }).safe_then_interruptible([pg=std::move(pg)] {
-    return seastar::now();
+  }).safe_then_interruptible([pg=std::move(pg)] (const seq_mode_t mode) {
+    return seastar::make_ready_future<seq_mode_t>(mode);
   }, PG::load_obc_ertr::all_same_way([](auto &code) {
     logger().error("ClientRequest saw error code {}", code);
-    return seastar::now();
+    return seastar::make_ready_future<seq_mode_t>(seq_mode_t::OUT_OF_ORDER);
   }));
 }
 
-ClientRequest::interruptible_future<>
+ClientRequest::interruptible_future<ClientRequest::seq_mode_t>
 ClientRequest::do_process(Ref<PG>& pg, crimson::osd::ObjectContextRef obc)
 {
   if (!pg->is_primary()) {
     // primary can handle both normal ops and balanced reads
     if (is_misdirected(*pg)) {
       logger().trace("do_process: dropping misdirected op");
-      return seastar::now();
+      return seastar::make_ready_future<seq_mode_t>(seq_mode_t::OUT_OF_ORDER);
     } else if (const hobject_t& hoid = m->get_hobj();
                !pg->get_peering_state().can_serve_replica_read(hoid)) {
       auto reply = crimson::make_message<MOSDOpReply>(
        m.get(), -EAGAIN, pg->get_osdmap_epoch(),
        m->get_flags() & (CEPH_OSD_FLAG_ACK|CEPH_OSD_FLAG_ONDISK),
        !m->has_flag(CEPH_OSD_FLAG_RETURNVEC));
-      return conn->send(std::move(reply));
+      return conn->send(std::move(reply)).then([] {
+        return seastar::make_ready_future<seq_mode_t>(seq_mode_t::OUT_OF_ORDER);
+      });
     }
   }
   return pg->do_osd_ops(m, obc, op_info).safe_then_unpack_interruptible(
@@ -226,7 +242,9 @@ ClientRequest::do_process(Ref<PG>& pg, crimson::osd::ObjectContextRef obc)
         return with_blocking_future_interruptible<interruptor::condition>(
             handle.enter(pp(*pg).send_reply)).then_interruptible(
               [this, reply=std::move(reply)]() mutable{
-              return conn->send(std::move(reply));
+              return conn->send(std::move(reply)).then([] {
+                return seastar::make_ready_future<seq_mode_t>(seq_mode_t::IN_ORDER);
+              });
             });
       }, crimson::ct_error::eagain::handle([this, pg]() mutable {
         return process_op(pg);
index 9563c10226060d13c8a032b625da48fedadb0986..d695e2c3ee9d2fba8fa633ff20998d547b39840b 100644 (file)
@@ -62,14 +62,20 @@ public:
 private:
   template <typename FuncT>
   interruptible_future<> with_sequencer(FuncT&& func);
-  interruptible_future<> do_process(
+
+  enum class seq_mode_t {
+    IN_ORDER,
+    OUT_OF_ORDER
+  };
+
+  interruptible_future<seq_mode_t> do_process(
     Ref<PG>& pg,
     crimson::osd::ObjectContextRef obc);
   ::crimson::interruptible::interruptible_future<
     ::crimson::osd::IOInterruptCondition> process_pg_op(
     Ref<PG> &pg);
   ::crimson::interruptible::interruptible_future<
-    ::crimson::osd::IOInterruptCondition> process_op(
+    ::crimson::osd::IOInterruptCondition, seq_mode_t> process_op(
     Ref<PG> &pg);
   bool is_pg_op() const;