]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: tolerate racing threads starting recovery ops
authorSage Weil <sage@inktank.com>
Mon, 24 Jun 2013 23:37:29 +0000 (16:37 -0700)
committerSage Weil <sage@inktank.com>
Tue, 25 Jun 2013 00:44:06 +0000 (17:44 -0700)
We sample the (max - active) recovery ops to know how many to start, but
do not hold the lock over the full duration, such that it is possible to
start too many ops.  This isn't problematic except that our condition
checks for being == max but not beyond it, and we will continue to start
recovery ops when we shouldn't.  Fix this by adjusting the conditional
to be <=.

Reported-by: Stefan Priebe <s.priebe@profihost.ag>
Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: David Zafman <david.zafman@inktank.com>
src/osd/OSD.cc

index e96e05eb7cd6ebf1a01f9545241f02bbcb54071a..57d6d7245c6f0e41c83ad950f31985262414c39d 100644 (file)
@@ -6493,7 +6493,7 @@ void OSD::do_recovery(PG *pg)
   recovery_wq.lock();
   int max = g_conf->osd_recovery_max_active - recovery_ops_active;
   recovery_wq.unlock();
-  if (max == 0) {
+  if (max <= 0) {
     dout(10) << "do_recovery raced and failed to start anything; requeuing " << *pg << dendl;
     recovery_wq.queue(pg);
   } else {