]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: keep recovery ops in sync with pull
authorSage Weil <sage@newdream.net>
Mon, 24 May 2010 20:50:00 +0000 (13:50 -0700)
committerSage Weil <sage@newdream.net>
Mon, 24 May 2010 21:00:43 +0000 (14:00 -0700)
Call start_recovery_op from pull() instead of fixing every caller (some
were wrong).  This keeps the recovery state in sync with pulling state,
even when pull() has to pull something different (head, snapdir) first.

Fixes this crash:
osd/PG.cc: In function 'void PG::finish_recovery_op(const sobject_t&, bool)':
osd/PG.cc:1842: FAILED assert(recovering_oids.count(soid))
 1: (PG::finish_recovery_op(sobject_t const&, bool)+0x14e) [0x74caf6]
 2: (ReplicatedPG::sub_op_push(MOSDSubOp*)+0x1da8) [0x669292]
 3: (ReplicatedPG::do_sub_op(MOSDSubOp*)+0x109) [0x671a73]
 4: (OSD::dequeue_op(PG*)+0x23c) [0x6bda00]
 5: (OSD::OpWQ::_process(PG*)+0x21) [0x7387c9]
 6: (ThreadPool::WorkQueue<PG>::_void_process(void*)+0x28) [0x6f5e12]
 7: (ThreadPool::worker()+0x23a) [0x7f2404]
 8: (ThreadPool::WorkThread::entry()+0x19) [0x73b783]
 9: (Thread::_entry_func(void*)+0x20) [0x64f92a]
 10: /lib/libpthread.so.0 [0x7f7a12cf473a]
 11: (clone()+0x6d) [0x7f7a11f1e69d]

src/osd/ReplicatedPG.cc

index eabf0c3b339aa51ec55ec65cfabd896688440130..8c51b986f679246572bf3ba0310e679e0c7d0240 100644 (file)
@@ -99,7 +99,6 @@ void ReplicatedPG::wait_for_missing_object(const sobject_t& soid, Message *m)
            << ", pulling"
            << dendl;
     pull(soid);
-    start_recovery_op(soid);
   }
   waiting_for_missing_object[soid].push_back(m);
 }
@@ -2974,6 +2973,8 @@ bool ReplicatedPG::pull(const sobject_t& soid)
   assert(pulling.count(soid) == 0);
   pulling[soid].first = v;
   pulling[soid].second = fromosd;
+
+  start_recovery_op(soid);
   return true;
 }
 
@@ -3643,7 +3644,6 @@ int ReplicatedPG::recover_primary(int max)
        
        if (pull(soid)) {
          ++started;
-         start_recovery_op(soid);
        } else
          ++skipped;
        if (started >= max)