]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
OSD,PG: fix race between processing scrub and dequeueing scrub
authorSamuel Just <samuel.just@dreamhost.com>
Tue, 15 Feb 2011 18:02:19 +0000 (10:02 -0800)
committerSamuel Just <samuel.just@dreamhost.com>
Tue, 22 Feb 2011 23:09:33 +0000 (15:09 -0800)
Previously, a second scrub could be scheduled between when the first is
dequeued and processed resulting in two scrubs of the pg running
concurrently.

Signed-off-by: Samuel Just <samuel.just@dreamhost.com>
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PG.h

index 44df6581849fdb47192ad92bcd0197fb5d7b691c..ee538577317410a9421cdeaeab84a9c72d8de6c2 100644 (file)
@@ -2529,14 +2529,15 @@ void OSD::handle_scrub(MOSDScrub *m)
         p != pg_map.end();
         p++) {
       PG *pg = p->second;
+      pg->lock();
       if (pg->is_primary()) {
        if (m->repair)
          pg->state_set(PG_STATE_REPAIR);
-       if (!pg->is_scrubbing()) {
+       if (pg->queue_scrub()) {
          dout(10) << "queueing " << *pg << " for scrub" << dendl;
-         scrub_wq.queue(pg);
        }
       }
+      pg->unlock();
     }
   } else {
     for (vector<pg_t>::iterator p = m->scrub_pgs.begin();
@@ -2544,14 +2545,15 @@ void OSD::handle_scrub(MOSDScrub *m)
         p++)
       if (pg_map.count(*p)) {
        PG *pg = pg_map[*p];
+       pg->lock();
        if (pg->is_primary()) {
          if (m->repair)
            pg->state_set(PG_STATE_REPAIR);
-         if (!pg->is_scrubbing()) {
+         if (pg->queue_scrub()) {
            dout(10) << "queueing " << *pg << " for scrub" << dendl;
-           scrub_wq.queue(pg);
          }
        }
+       pg->unlock();
       }
   }
   
index 1e919794c43409c589d26ef902a0017d86943a85..ced3d8014fbf296eaf95e806ada52caaf3a2ec30 100644 (file)
@@ -1991,6 +1991,16 @@ void PG::queue_snap_trim()
     dout(10) << "queue_snap_trim -- already trimming" << dendl;
 }
 
+bool PG::queue_scrub()
+{
+  assert(_lock.is_locked());
+  if (is_scrubbing()) {
+    return false;
+  }
+  state_set(PG_STATE_SCRUBBING);
+  osd->scrub_wq.queue(this);
+  return true;
+}
 
 struct C_PG_FinishRecovery : public Context {
   PG *pg;
@@ -2034,7 +2044,7 @@ void PG::_finish_recovery(Context *c)
 
     if (state_test(PG_STATE_INCONSISTENT)) {
       dout(10) << "_finish_recovery requeueing for scrub" << dendl;
-      osd->scrub_wq.queue(this);
+      queue_scrub();
     } else if (log.backlog) {
       ObjectStore::Transaction *t = new ObjectStore::Transaction;
       drop_backlog();
@@ -2774,7 +2784,7 @@ bool PG::sched_scrub()
       ret = true;
     } else if (scrub_reserved_peers.size() == acting.size()) {
       dout(20) << "sched_scrub: success, reserved self and replicas" << dendl;
-      osd->scrub_wq.queue(this);
+      queue_scrub();
       ret = true;
     } else {
       // none declined, since scrub_reserved is set
index 37910bd0e0667b639aab2ce744254ef6ff5fe916..7a56ce9f91eeace1a80380fb959895783f4b9f14 100644 (file)
@@ -1001,6 +1001,7 @@ public:
   void adjust_local_snaps(ObjectStore::Transaction &t, interval_set<snapid_t> &to_check);
 
   void queue_snap_trim();
+  bool queue_scrub();
 
   void share_pg_info();
   void share_pg_log(const eversion_t &oldver);