From 90b9f6d8251d9665184ee9556dee0f585bc0f0a9 Mon Sep 17 00:00:00 2001 From: Neha Ojha Date: Fri, 19 Jan 2018 09:49:00 -0800 Subject: [PATCH] PG: mark recovery started when fail to get recovery_read_lock Signed-off-by: Neha Ojha --- src/osd/PrimaryLogPG.cc | 19 ++++++++++++------- src/osd/PrimaryLogPG.h | 6 ++++-- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index d2904eb329b04..cd4285a63fb1a 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -578,6 +578,7 @@ void PrimaryLogPG::maybe_kick_recovery( const hobject_t &soid) { eversion_t v; + bool work_started = false; if (!missing_loc.needs_recovery(soid, &v)) return; @@ -594,7 +595,7 @@ void PrimaryLogPG::maybe_kick_recovery( } else if (missing_loc.is_deleted(soid)) { prep_object_replica_deletes(soid, v, h); } else { - prep_object_replica_pushes(soid, v, h); + prep_object_replica_pushes(soid, v, h, &work_started); } pgbackend->run_recovery_op(h, cct->_conf->osd_client_op_priority); } @@ -11930,6 +11931,7 @@ bool PrimaryLogPG::start_recovery_ops( uint64_t& started = *ops_started; started = 0; bool work_in_progress = false; + bool recovery_started = false; assert(is_primary()); assert(is_peered()); assert(!is_deleting()); @@ -11957,7 +11959,7 @@ bool PrimaryLogPG::start_recovery_ops( if (num_missing == num_unfound) { // All of the missing objects we have are unfound. // Recover the replicas. - started = recover_replicas(max, handle); + started = recover_replicas(max, handle, &recovery_started); } if (!started) { // We still have missing objects that we should grab from replicas. @@ -11965,10 +11967,10 @@ bool PrimaryLogPG::start_recovery_ops( } if (!started && num_unfound != get_num_unfound()) { // second chance to recovery replicas - started = recover_replicas(max, handle); + started = recover_replicas(max, handle, &recovery_started); } - if (started) + if (started || recovery_started) work_in_progress = true; bool deferred_backfill = false; @@ -12282,7 +12284,8 @@ int PrimaryLogPG::prep_object_replica_deletes( int PrimaryLogPG::prep_object_replica_pushes( const hobject_t& soid, eversion_t v, - PGBackend::RecoveryHandle *h) + PGBackend::RecoveryHandle *h, + bool *work_started) { assert(is_primary()); dout(10) << __func__ << ": on " << soid << dendl; @@ -12297,6 +12300,7 @@ int PrimaryLogPG::prep_object_replica_pushes( if (!obc->get_recovery_read()) { dout(20) << "recovery delayed on " << soid << "; could not get rw_manager lock" << dendl; + *work_started = true; return 0; } else { dout(20) << "recovery got recovery read lock on " << soid @@ -12327,7 +12331,8 @@ int PrimaryLogPG::prep_object_replica_pushes( return 1; } -uint64_t PrimaryLogPG::recover_replicas(uint64_t max, ThreadPool::TPHandle &handle) +uint64_t PrimaryLogPG::recover_replicas(uint64_t max, ThreadPool::TPHandle &handle, + bool *work_started) { dout(10) << __func__ << "(" << max << ")" << dendl; uint64_t started = 0; @@ -12418,7 +12423,7 @@ uint64_t PrimaryLogPG::recover_replicas(uint64_t max, ThreadPool::TPHandle &hand dout(10) << __func__ << ": recover_object_replicas(" << soid << ")" << dendl; map::const_iterator r = m.get_items().find(soid); - started += prep_object_replica_pushes(soid, r->second.need, h); + started += prep_object_replica_pushes(soid, r->second.need, h, work_started); } } diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 2ef95a8c6844d..5d36a3b616a4f 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -1078,7 +1078,8 @@ protected: bool new_backfill; int prep_object_replica_pushes(const hobject_t& soid, eversion_t v, - PGBackend::RecoveryHandle *h); + PGBackend::RecoveryHandle *h, + bool *work_started); int prep_object_replica_deletes(const hobject_t& soid, eversion_t v, PGBackend::RecoveryHandle *h); @@ -1207,7 +1208,8 @@ protected: ThreadPool::TPHandle &handle, uint64_t *started) override; uint64_t recover_primary(uint64_t max, ThreadPool::TPHandle &handle); - uint64_t recover_replicas(uint64_t max, ThreadPool::TPHandle &handle); + uint64_t recover_replicas(uint64_t max, ThreadPool::TPHandle &handle, + bool *recovery_started); hobject_t earliest_peer_backfill() const; bool all_peer_done() const; /** -- 2.39.5