From: Sage Weil Date: Mon, 7 Jan 2013 04:43:21 +0000 (-0800) Subject: osd: fix race in do_recovery() X-Git-Tag: v0.48.3argonaut~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=213e3559dd260a2e19324f2a671c808261249f96;p=ceph.git osd: fix race in do_recovery() Verify that the PG is still RECOVERING or BACKFILL when we take the pg lock in the recovery thread. This prevents a crash from an invalid state machine event when the recovery queue races with a PG state change (e.g., due to peering). Signed-off-by: Sage Weil Reviewed-by: Samuel Just --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index a6e26e7f536..f57f2264f74 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -5217,6 +5217,13 @@ void OSD::do_recovery(PG *pg) } else { pg->lock(); + + if (!pg->state_test(PG_STATE_RECOVERING) && + !pg->state_test(PG_STATE_BACKFILL)) { + dout(10) << "do_recovery not recovering|backfill on " << *pg << dendl; + pg->unlock(); + goto out; + } dout(10) << "do_recovery starting " << max << " (" << recovery_ops_active << "/" << g_conf->osd_recovery_max_active << " rops) on " @@ -5269,6 +5276,7 @@ void OSD::do_recovery(PG *pg) } pg->unlock(); } + out: pg->put(); } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index a80d95dcbae..2f38dac426e 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2919,6 +2919,7 @@ void PG::repair_object(const hobject_t& soid, ScrubMap::object *po, int bad_peer log.last_requested = 0; } + state_set(PG_STATE_RECOVERING); osd->queue_for_recovery(this); }