From 7462b1c06e375e57f1d8ff0c5a07c2ce3e53d982 Mon Sep 17 00:00:00 2001 From: Matan Breizman Date: Wed, 5 Mar 2025 15:58:12 +0000 Subject: [PATCH] crimson/osd/pg: Handle peer replies handling once received Instead of updating last complete on disk and calling complete_write in PG::submit_transacion (chained to all_completed). Move the completion handling earlier as soon as all the peers acked. This essentially means that we move the handling to ReplicatedBackend::get_rep_op_reply, unless replication size is 1 - then we would mark completion in ReplicatedBackend::submit_transaction. Fixes: https://tracker.ceph.com/issues/69439 Signed-off-by: Matan Breizman Signed-off-by: Xuehan Xu --- src/crimson/osd/pg.cc | 11 +---------- src/crimson/osd/replicated_backend.cc | 6 ++++++ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 43cd4661164..d49e056881a 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -930,16 +930,7 @@ PG::submit_transaction( std::move(log_entries)); co_return std::make_tuple( std::move(submitted), - all_completed.then_interruptible( - [this, last_complete=peering_state.get_info().last_complete, at_version] - (auto acked) { - for (const auto& peer : acked) { - peering_state.update_peer_last_complete_ondisk( - peer.shard, peer.last_complete_ondisk); - } - peering_state.complete_write(at_version, last_complete); - return seastar::now(); - }) + std::move(all_completed) ); } diff --git a/src/crimson/osd/replicated_backend.cc b/src/crimson/osd/replicated_backend.cc index 987432e08db..ebb34a52a6f 100644 --- a/src/crimson/osd/replicated_backend.cc +++ b/src/crimson/osd/replicated_backend.cc @@ -166,10 +166,13 @@ ReplicatedBackend::submit_transaction( assert(0 == "impossible"); } if (--peers->pending == 0) { + // no peers other than me, replication size is 1 + pg.complete_write(peers->at_version, peers->last_complete); peers->all_committed.set_value(); peers->all_committed = {}; return seastar::now(); } + // wait for all peers to ack (ReplicatedBackend::got_rep_op_reply) return peers->all_committed.get_shared_future(); }).then_interruptible([pending_txn, this, _new_clone, &hoid, to_push_delete=std::move(to_push_delete), @@ -216,7 +219,10 @@ void ReplicatedBackend::got_rep_op_reply(const MOSDRepOpReply& reply) for (auto& peer : peers.acked_peers) { if (peer.shard == reply.from) { peer.last_complete_ondisk = reply.get_last_complete_ondisk(); + pg.update_peer_last_complete_ondisk( + peer.shard, peer.last_complete_ondisk); if (--peers.pending == 0) { + pg.complete_write(peers.at_version, peers.last_complete); peers.all_committed.set_value(); peers.all_committed = {}; } -- 2.39.5