]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: tolerate racing threads starting recovery ops
authorSage Weil <sage@inktank.com>
Mon, 24 Jun 2013 23:37:29 +0000 (16:37 -0700)
committerSamuel Just <sam.just@inktank.com>
Tue, 13 Aug 2013 20:31:38 +0000 (13:31 -0700)
We sample the (max - active) recovery ops to know how many to start, but
do not hold the lock over the full duration, such that it is possible to
start too many ops.  This isn't problematic except that our condition
checks for being == max but not beyond it, and we will continue to start
recovery ops when we shouldn't.  Fix this by adjusting the conditional
to be <=.

Reported-by: Stefan Priebe <s.priebe@profihost.ag>
Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: David Zafman <david.zafman@inktank.com>
(cherry picked from commit 3791a1e55828ba541f9d3e8e3df0da8e79c375f9)

src/osd/OSD.cc

index 679f0e143bdd2ca6afa97ab2a43bfa702cb1ace6..7d0a0e3e5e633469e15b79c54fabf9dfebe9821f 100644 (file)
@@ -6057,7 +6057,7 @@ void OSD::do_recovery(PG *pg)
   recovery_wq.lock();
   int max = g_conf->osd_recovery_max_active - recovery_ops_active;
   recovery_wq.unlock();
-  if (max == 0) {
+  if (max <= 0) {
     dout(10) << "do_recovery raced and failed to start anything; requeuing " << *pg << dendl;
     recovery_wq.queue(pg);
   } else {