]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: don't complete recovery if unfound
authorSage Weil <sage@newdream.net>
Thu, 23 Feb 2012 17:39:50 +0000 (09:39 -0800)
committerSage Weil <sage@newdream.net>
Thu, 23 Feb 2012 17:39:50 +0000 (09:39 -0800)
Otherwise we fail the !needs_recovery() assert.  Because we aren't
recovered.  For example,

2012-02-21 16:16:13.104665 1685c700 osd.5 1217 pg[0.16( v 1215'337 lc 19'2 (0'0,1215'337] n=25 ec=1 les/c 0/1061 1210/1210/1210) [5,3] r=0 lpr=1210 mlcod 0'0 active m=23 u=23 snaptrimq=[1~99,9b~e,aa~72,11d~3d,15b~e,16a~f,17a~5,180~4,185~1a,1a0~a,1ac~10,1bd~4,1c2~8,1cb~1,1cd~1,1cf~1a,1ea~10,1fb~6,202~2,205~2,209~2,20c~8,215~2,218~5,21e~1,220~1,222~9,22c~4,231~3,235~2,238~3,23e~2,241~4,246~1,248~1,24b~1,24d~9,257~6,25e~1,263~1,265~2,268~3,26e~1,273~1,275~5,27e~1,280~2]] needs_recovery osd.3 has 23 missing
osd/PG.cc: In function 'boost::statechart::result PG::RecoveryState::Active::react(const PG::RecoveryState::RecoveryComplete&)' thread 1685c700 time 2012-02-21 16:16:13.108923
osd/PG.cc: 4070: FAILED assert(!pg->needs_recovery())
 ceph version 0.42-70-g0e4367a (commit:0e4367aaac88b99c36386b6ce5e8d816fdd4ada0)
 1: (PG::RecoveryState::Active::react(PG::RecoveryState::RecoveryComplete const&)+0x1b3) [0x6a1173]
 2: (boost::statechart::simple_state<PG::RecoveryState::Active, PG::RecoveryState::Primary, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::react_impl(boost::statechart::event_base const&, void const*)+0x121) [0x6c7301]
 3: (boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>::process_event(boost::statechart::event_base const&)+0x6b) [0x6bfc6b]
 4: (PG::RecoveryState::handle_recovery_complete(PG::RecoveryCtx*)+0x10c) [0x67c03c]
 5: (ReplicatedPG::start_recovery_ops(int, PG::RecoveryCtx*)+0x241) [0x4f83c1]
 6: (OSD::do_recovery(PG*)+0x345) [0x54b3e5]
 7: (ThreadPool::worker()+0xa26) [0x619e66]
 8: (ThreadPool::WorkThread::entry()+0xd) [0x57ad5d]
 9: (()+0x7971) [0x5037971]
 10: (clone()+0x6d) [0x679f92d]

Signed-off-by: Sage Weil <sage@newdream.net>
Reviewed-by: Samuel Just <samuel.just@dreamhost.com>
src/osd/ReplicatedPG.cc

index 10aaf8526bcf25c5c04e02f48e9c6e221e7adbfa..153471bac23797c0da59fa27d90856b89f6d4159 100644 (file)
@@ -5264,6 +5264,12 @@ int ReplicatedPG::start_recovery_ops(int max, RecoveryCtx *prctx)
 
   assert(recovery_ops_active == 0);
 
+  int unfound = get_num_unfound();
+  if (unfound) {
+    dout(10) << " still have " << unfound << " unfound" << dendl;
+    return started;
+  }
+
   handle_recovery_complete(prctx);
 
   return 0;