From 0d98fc6ffdd314bf53d2c4f4ebaf1c4e595c3ee5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 May 2010 13:50:00 -0700 Subject: [PATCH] osd: keep recovery ops in sync with pull Call start_recovery_op from pull() instead of fixing every caller (some were wrong). This keeps the recovery state in sync with pulling state, even when pull() has to pull something different (head, snapdir) first. Fixes this crash: osd/PG.cc: In function 'void PG::finish_recovery_op(const sobject_t&, bool)': osd/PG.cc:1842: FAILED assert(recovering_oids.count(soid)) 1: (PG::finish_recovery_op(sobject_t const&, bool)+0x14e) [0x74caf6] 2: (ReplicatedPG::sub_op_push(MOSDSubOp*)+0x1da8) [0x669292] 3: (ReplicatedPG::do_sub_op(MOSDSubOp*)+0x109) [0x671a73] 4: (OSD::dequeue_op(PG*)+0x23c) [0x6bda00] 5: (OSD::OpWQ::_process(PG*)+0x21) [0x7387c9] 6: (ThreadPool::WorkQueue::_void_process(void*)+0x28) [0x6f5e12] 7: (ThreadPool::worker()+0x23a) [0x7f2404] 8: (ThreadPool::WorkThread::entry()+0x19) [0x73b783] 9: (Thread::_entry_func(void*)+0x20) [0x64f92a] 10: /lib/libpthread.so.0 [0x7f7a12cf473a] 11: (clone()+0x6d) [0x7f7a11f1e69d] --- src/osd/ReplicatedPG.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index eabf0c3b339aa..8c51b986f6792 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -99,7 +99,6 @@ void ReplicatedPG::wait_for_missing_object(const sobject_t& soid, Message *m) << ", pulling" << dendl; pull(soid); - start_recovery_op(soid); } waiting_for_missing_object[soid].push_back(m); } @@ -2974,6 +2973,8 @@ bool ReplicatedPG::pull(const sobject_t& soid) assert(pulling.count(soid) == 0); pulling[soid].first = v; pulling[soid].second = fromosd; + + start_recovery_op(soid); return true; } @@ -3643,7 +3644,6 @@ int ReplicatedPG::recover_primary(int max) if (pull(soid)) { ++started; - start_recovery_op(soid); } else ++skipped; if (started >= max) -- 2.39.5