From 123ff9e18a27c6517a291aab1e0613b27d70d711 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Thu, 27 Feb 2014 15:27:56 -0800 Subject: [PATCH] osd: stray pg ref on shutdown Move agent_clear() from only being done when becoming replica Do it in clear_primary_state() whenever we stop being primary clear_primary_state() passed whether we are staying a primary Add asserts in agent_stop() and don't need to clear agent_queue Fixes: #7458 Signed-off-by: David Zafman --- src/osd/OSD.cc | 8 ++++++-- src/osd/PG.cc | 9 +++++---- src/osd/PG.h | 2 +- src/osd/ReplicatedPG.cc | 4 +++- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index d3d14f96f62f..66c6fc4df596 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -511,12 +511,16 @@ void OSDService::agent_stop() { { Mutex::Locker l(agent_lock); + + // By this time all ops should be cancelled + assert(agent_ops == 0); + // By this time all PGs are shutdown and dequeued + assert(agent_queue.empty()); + agent_stop_flag = true; agent_cond.Signal(); } agent_thread.join(); - - agent_queue.clear(); } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index a525b7195299..ef7d812b1f4f 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -794,7 +794,7 @@ void PG::build_prior(std::auto_ptr &prior_set) set_probe_targets(prior_set->probe); } -void PG::clear_primary_state() +void PG::clear_primary_state(bool staying_primary) { dout(10) << "clear_primary_state" << dendl; @@ -827,6 +827,9 @@ void PG::clear_primary_state() osd->recovery_wq.dequeue(this); osd->snap_trim_wq.dequeue(this); + + if (!staying_primary) + agent_clear(); } /** @@ -4730,7 +4733,7 @@ void PG::start_peering_interval( // reset primary state? if (was_old_primary || is_primary()) - clear_primary_state(); + clear_primary_state(was_old_primary && is_primary()); // pg->on_* @@ -6403,8 +6406,6 @@ PG::RecoveryState::ReplicaActive::ReplicaActive(my_context ctx) context< RecoveryMachine >().get_cur_transaction(), context< RecoveryMachine >().get_on_applied_context_list(), context< RecoveryMachine >().get_on_safe_context_list()); - - pg->agent_clear(); } diff --git a/src/osd/PG.h b/src/osd/PG.h index df189fa317a1..003f43b53bc7 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -675,7 +675,7 @@ protected: void clear_publish_stats(); public: - void clear_primary_state(); + void clear_primary_state(bool stay_primary); public: bool is_actingbackfill(pg_shard_t osd) const { diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 32171c3ba4bb..5109ed116ed8 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -8876,7 +8876,7 @@ void ReplicatedPG::on_shutdown() osd->remote_reserver.cancel_reservation(info.pgid); osd->local_reserver.cancel_reservation(info.pgid); - clear_primary_state(); + clear_primary_state(false); // Not staying primary osd->remove_want_pg_temp(info.pgid.pgid); cancel_recovery(); } @@ -10464,6 +10464,8 @@ void ReplicatedPG::agent_work(int start_max) return; } + assert(!deleting); + if (agent_state->is_idle()) { dout(10) << __func__ << " idle, stopping" << dendl; unlock(); -- 2.47.3