From 64ca584da12002bc1255db0920fddeb101a9f7b5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 23 Feb 2012 09:39:50 -0800 Subject: [PATCH] osd: don't complete recovery if unfound Otherwise we fail the !needs_recovery() assert. Because we aren't recovered. For example, 2012-02-21 16:16:13.104665 1685c700 osd.5 1217 pg[0.16( v 1215'337 lc 19'2 (0'0,1215'337] n=25 ec=1 les/c 0/1061 1210/1210/1210) [5,3] r=0 lpr=1210 mlcod 0'0 active m=23 u=23 snaptrimq=[1~99,9b~e,aa~72,11d~3d,15b~e,16a~f,17a~5,180~4,185~1a,1a0~a,1ac~10,1bd~4,1c2~8,1cb~1,1cd~1,1cf~1a,1ea~10,1fb~6,202~2,205~2,209~2,20c~8,215~2,218~5,21e~1,220~1,222~9,22c~4,231~3,235~2,238~3,23e~2,241~4,246~1,248~1,24b~1,24d~9,257~6,25e~1,263~1,265~2,268~3,26e~1,273~1,275~5,27e~1,280~2]] needs_recovery osd.3 has 23 missing osd/PG.cc: In function 'boost::statechart::result PG::RecoveryState::Active::react(const PG::RecoveryState::RecoveryComplete&)' thread 1685c700 time 2012-02-21 16:16:13.108923 osd/PG.cc: 4070: FAILED assert(!pg->needs_recovery()) ceph version 0.42-70-g0e4367a (commit:0e4367aaac88b99c36386b6ce5e8d816fdd4ada0) 1: (PG::RecoveryState::Active::react(PG::RecoveryState::RecoveryComplete const&)+0x1b3) [0x6a1173] 2: (boost::statechart::simple_state, (boost::statechart::history_mode)0>::react_impl(boost::statechart::event_base const&, void const*)+0x121) [0x6c7301] 3: (boost::statechart::state_machine, boost::statechart::null_exception_translator>::process_event(boost::statechart::event_base const&)+0x6b) [0x6bfc6b] 4: (PG::RecoveryState::handle_recovery_complete(PG::RecoveryCtx*)+0x10c) [0x67c03c] 5: (ReplicatedPG::start_recovery_ops(int, PG::RecoveryCtx*)+0x241) [0x4f83c1] 6: (OSD::do_recovery(PG*)+0x345) [0x54b3e5] 7: (ThreadPool::worker()+0xa26) [0x619e66] 8: (ThreadPool::WorkThread::entry()+0xd) [0x57ad5d] 9: (()+0x7971) [0x5037971] 10: (clone()+0x6d) [0x679f92d] Signed-off-by: Sage Weil Reviewed-by: Samuel Just --- src/osd/ReplicatedPG.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 10aaf8526bcf2..153471bac2379 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -5264,6 +5264,12 @@ int ReplicatedPG::start_recovery_ops(int max, RecoveryCtx *prctx) assert(recovery_ops_active == 0); + int unfound = get_num_unfound(); + if (unfound) { + dout(10) << " still have " << unfound << " unfound" << dendl; + return started; + } + handle_recovery_complete(prctx); return 0; -- 2.39.5