]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: do not mark pg clean until active is durable
authorSage Weil <sage@inktank.com>
Sat, 5 May 2012 18:24:57 +0000 (11:24 -0700)
committerSage Weil <sage@inktank.com>
Sat, 5 May 2012 20:08:09 +0000 (13:08 -0700)
Do not mark a PG CLEAN or set last_epoch_clean until after the PG activate
is stable on all replicas.

This effectively means that last_epoch_clean will never fall in an interval
that follows last_epoch_started's interval.  It *can* be >
last_epoch_started when it falls within the same interval.

Signed-off-by: Sage Weil <sage.weil@dreamhost.com>
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PG.h

index fdceb67064a045d573c922b7a5b99e41b401caca..48d8db53e53e4c9a2fa4eb685ce7300d91c695df 100644 (file)
@@ -5065,7 +5065,6 @@ void OSD::do_recovery(PG *pg)
        recovery_wq.lock();
        pg->recovery_item.remove_myself();      // sigh...
        recovery_wq.unlock();
-
       }
     }
 
@@ -5073,6 +5072,8 @@ void OSD::do_recovery(PG *pg)
     do_queries(query_map);
     do_infos(info_map);
 
+    pg->write_if_dirty(*t);
+
     if (!t->empty()) {
       int tr = store->queue_transaction(&pg->osr, t, new ObjectStore::C_DeleteTransaction(t), fin);
       assert(tr == 0);
@@ -5080,7 +5081,6 @@ void OSD::do_recovery(PG *pg)
       delete t;
       delete fin;
     }
-
     pg->unlock();
   }
   pg->put();
index 5d23cdff70b6ace810943dfa8e08657b368ec072..ba97b968930d39d5c7fa54b845ee10fc8ea621dd 100644 (file)
@@ -1521,9 +1521,17 @@ void PG::all_activated_and_committed()
   assert(peer_activated.size() == acting.size());
 
   info.history.last_epoch_started = get_osdmap()->get_epoch();
-  share_pg_info();
-
   dirty_info = true;
+
+  // make sure CLEAN is marked if we've been clean in this interval
+  if (info.last_complete == info.last_update &&
+      !state_test(PG_STATE_BACKFILL) &&
+      !state_test(PG_STATE_RECOVERING)) {
+    mark_clean();
+  }
+
+  share_pg_info();
+  update_stats();
 }
 
 void PG::queue_snap_trim()
@@ -1555,30 +1563,38 @@ struct C_PG_FinishRecovery : public Context {
   }
 };
 
-void PG::finish_recovery(ObjectStore::Transaction& t, list<Context*>& tfin)
+void PG::mark_clean()
 {
-  dout(10) << "finish_recovery" << dendl;
-  state_clear(PG_STATE_BACKFILL);
-  state_clear(PG_STATE_RECOVERING);
-
   // only mark CLEAN if we have the desired number of replicas AND we
   // are not remapped.
   if (acting.size() == get_osdmap()->get_pg_size(info.pgid) &&
       up == acting)
     state_set(PG_STATE_CLEAN);
 
-  assert(info.last_complete == info.last_update);
-
   // NOTE: this is actually a bit premature: we haven't purged the
   // strays yet.
   info.history.last_epoch_clean = get_osdmap()->get_epoch();
-  share_pg_info();
-
-  clear_recovery_state();
 
   trim_past_intervals();
-  
-  write_info(t);
+
+  dirty_info = true;
+}
+
+void PG::finish_recovery(ObjectStore::Transaction& t, list<Context*>& tfin)
+{
+  dout(10) << "finish_recovery" << dendl;
+  assert(info.last_complete == info.last_update);
+
+  state_clear(PG_STATE_BACKFILL);
+  state_clear(PG_STATE_RECOVERING);
+
+  // only mark CLEAN if last_epoch_started is already stable.
+  if (info.history.last_epoch_started >= info.history.same_interval_since) {
+    mark_clean();
+    share_pg_info();
+  }
+
+  clear_recovery_state();
 
   /*
    * sync all this before purging strays.  but don't block!
index 027e386dfdef92d3ab49da7e704d705fad517ad3..eec356775bef3e5c071c3483170effd8fde75493 100644 (file)
@@ -638,6 +638,7 @@ public:
   
   bool needs_recovery() const;
 
+  void mark_clean();  ///< mark an active pg clean
   void generate_past_intervals();
   void trim_past_intervals();
   void build_prior(std::auto_ptr<PriorSet> &prior_set);