From 85dff0d19185fa6dfad723ce80b6b3314de9752c Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Wed, 18 Sep 2024 17:34:39 +0800 Subject: [PATCH] crimson/osd: purge strays when PGs go clean Signed-off-by: Xuehan Xu --- src/crimson/osd/osd.cc | 24 ++++++++++++++++++++++++ src/crimson/osd/osd.h | 2 ++ src/crimson/osd/pg.cc | 18 +++++++++++++++++- src/crimson/osd/pg.h | 12 ++++++++++-- 4 files changed, 53 insertions(+), 3 deletions(-) diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index 8d2d10fbd7c45..34ad97ceb068a 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -23,6 +23,7 @@ #include "messages/MOSDOp.h" #include "messages/MOSDPeeringOp.h" #include "messages/MOSDPGCreate2.h" +#include "messages/MOSDPGRemove.h" #include "messages/MOSDPGUpdateLogMissing.h" #include "messages/MOSDPGUpdateLogMissingReply.h" #include "messages/MOSDRepOpReply.h" @@ -863,6 +864,8 @@ OSD::do_ms_dispatch( [[fallthrough]]; case MSG_OSD_PG_LOG: return handle_peering_op(conn, boost::static_pointer_cast(m)); + case MSG_OSD_PG_REMOVE: + return handle_pg_remove(conn, boost::static_pointer_cast(m)); case MSG_OSD_REPOP: return handle_rep_op(conn, boost::static_pointer_cast(m)); case MSG_OSD_REPOPREPLY: @@ -1555,6 +1558,27 @@ seastar::future<> OSD::handle_peering_op( std::move(*evt)).second; } +seastar::future<> OSD::handle_pg_remove( + crimson::net::ConnectionRef conn, + Ref m) +{ + LOG_PREFIX(OSD::handle_pg_remove); + const int from = m->get_source().num(); + std::vector> futs; + for (auto &pg : m->pg_list) { + DEBUG("{} from {}", pg, from); + futs.emplace_back( + pg_shard_manager.start_pg_operation( + conn, + pg_shard_t{from, pg.shard}, + pg, + m->get_epoch(), + m->get_epoch(), + PeeringState::DeleteStart()).second); + } + return seastar::when_all_succeed(std::move(futs)); +} + seastar::future<> OSD::check_osdmap_features() { LOG_PREFIX(OSD::check_osdmap_features); diff --git a/src/crimson/osd/osd.h b/src/crimson/osd/osd.h index de39d80827494..d7d54d5d2c3c3 100644 --- a/src/crimson/osd/osd.h +++ b/src/crimson/osd/osd.h @@ -208,6 +208,8 @@ private: Ref m); seastar::future<> handle_peering_op(crimson::net::ConnectionRef conn, Ref m); + seastar::future<> handle_pg_remove(crimson::net::ConnectionRef conn, + Ref m); seastar::future<> handle_recovery_subreq(crimson::net::ConnectionRef conn, Ref m); seastar::future<> handle_scrub_command(crimson::net::ConnectionRef conn, diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 644cc84513d49..c92978fcfc2b9 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -517,7 +517,8 @@ Context *PG::on_clean() { recovery_handler->on_pg_clean(); scrubber.on_primary_active_clean(); - return nullptr; + recovery_finisher = new C_PG_FinishRecovery(*this); + return recovery_finisher; } seastar::future<> PG::clear_temp_objects() @@ -1883,4 +1884,19 @@ void PG::cancel_pglog_based_recovery_op() { pglog_based_recovery_op->cancel(); reset_pglog_based_recovery_op(); } + +void PG::C_PG_FinishRecovery::finish(int r) { + LOG_PREFIX(PG::C_PG_FinishRecovery::finish); + auto &peering_state = pg.get_peering_state(); + if (peering_state.is_deleting() || !peering_state.is_clean()) { + DEBUGDPP("raced with delete or repair", pg); + return; + } + if (this == pg.recovery_finisher) { + peering_state.purge_strays(); + pg.recovery_finisher = nullptr; + } else { + DEBUGDPP("stale recovery finsher", pg); + } +} } diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index 11c0e3668b142..91bd529b95d63 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -375,7 +375,7 @@ public: } void check_blocklisted_watchers() final; void clear_primary_state() final { - // Not needed yet + recovery_finisher = nullptr; } void queue_check_readable(epoch_t last_peering_reset, @@ -394,7 +394,7 @@ public: void on_replica_activate() final; void on_activate_complete() final; void on_new_interval() final { - // Not needed yet + recovery_finisher = nullptr; } Context *on_clean() final; void on_activate_committed() final { @@ -712,9 +712,17 @@ public: } seastar::future<> stop(); private: + class C_PG_FinishRecovery : public Context { + public: + explicit C_PG_FinishRecovery(PG &pg) : pg(pg) {} + void finish(int r) override; + private: + PG& pg; + }; std::unique_ptr backend; std::unique_ptr recovery_backend; std::unique_ptr recovery_handler; + C_PG_FinishRecovery *recovery_finisher; PeeringState peering_state; eversion_t projected_last_update; -- 2.39.5