]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
ReplicatedPG,OSD: Track which osds we are pulling from
authorSamuel Just <samuel.just@dreamhost.com>
Thu, 10 Mar 2011 23:37:33 +0000 (15:37 -0800)
committerSage Weil <sage.weil@dreamhost.com>
Mon, 14 Mar 2011 22:14:08 +0000 (15:14 -0700)
Currently, a PG waiting on a pull from a dead OSD cannot continue
recovery.  ReplicatedPG::pull now tracks open pulls by peer in
rec_from_peer (map<int, set<sobject_t> >).

OSD::advance_map now calls check_recovery_op_pulls to allow the PG to
reset pulls from failed peers.

Signed-off-by: Samuel Just <samuel.just@dreamhost.com>
src/osd/OSD.cc
src/osd/PG.h
src/osd/ReplicatedPG.cc
src/osd/ReplicatedPG.h

index 0adb2728849f4f24c1f9cdd5708a3f838211fb64..6247961c075b4782819530577c8fd5928c37bc53 100644 (file)
@@ -3315,6 +3315,7 @@ void OSD::activate_map(ObjectStore::Transaction& t, list<Context*>& tfin)
        it++) {
     PG *pg = it->second;
     pg->lock();
+    pg->check_recovery_op_pulls(*osdmap);
 
     if (pg->is_primary())
       num_pg_primary++;
index fc074d08349eef11f5fd5c2a8af21ab52b722376..74db95dacd9b763eed914329689fb3fe2a585635 100644 (file)
@@ -885,6 +885,7 @@ public:
   void clear_recovery_state();
   virtual void _clear_recovery_state() = 0;
   void defer_recovery();
+  virtual void check_recovery_op_pulls(const OSDMap &newmap) = 0;
   void start_recovery_op(const sobject_t& soid);
   void finish_recovery_op(const sobject_t& soid, bool dequeue=false);
 
index 78474b3cf32576c2c0b4987f4aee5e69d5653ef3..972428e604f3926c7c1239d39e4635c78df3db29 100644 (file)
@@ -3303,6 +3303,7 @@ int ReplicatedPG::pull(const sobject_t& soid)
 
   // take note
   assert(pulling.count(soid) == 0);
+  rec_from_peer[fromosd].insert(soid);
   pull_info_t& p = pulling[soid];
   p.version = v;
   p.from = fromosd;
@@ -3945,6 +3946,7 @@ void ReplicatedPG::sub_op_push(MOSDSubOp *op)
     if (complete) {
       // close out pull op
       pulling.erase(soid);
+      rec_from_peer[pi->from].erase(soid);
       finish_recovery_op(soid);
       
       update_stats();
@@ -4015,6 +4017,7 @@ void ReplicatedPG::_failed_push(MOSDSubOp *op)
   }
 
   finish_recovery_op(soid);  // close out this attempt,
+  rec_from_peer[from].erase(soid);
   pulling.erase(soid);
 
   op->put();
@@ -4085,6 +4088,7 @@ void ReplicatedPG::on_change()
   // clear pushing/pulling maps
   pushing.clear();
   pulling.clear();
+  rec_from_peer.clear();
 }
 
 void ReplicatedPG::on_role_change()
@@ -4110,8 +4114,34 @@ void ReplicatedPG::_clear_recovery_state()
 #endif
   pulling.clear();
   pushing.clear();
+  rec_from_peer.clear();
 }
 
+void ReplicatedPG::check_recovery_op_pulls(const OSDMap &osdmap)
+{
+  map<int, set<sobject_t> >::iterator j = rec_from_peer.begin();
+  for (;
+       j != rec_from_peer.end();
+       ) {
+    if (osdmap.is_up(j->first)) {
+      ++j;
+      continue;
+    }
+    dout(10) << "Reseting pulls from osd" << j->first
+            << ", osdmap has it marked down" << dendl;
+    
+    for (set<sobject_t>::iterator i = j->second.begin();
+        i != j->second.end();
+        ++i) {
+      assert(pulling.count(*i) == 1);
+      pulling.erase(*i);
+      finish_recovery_op(*i);
+    }
+    log.last_requested = eversion_t();
+    rec_from_peer.erase(j++);
+  }
+}
+  
 
 int ReplicatedPG::start_recovery_ops(int max)
 {
index 1d1b3a77f159e501d876169c238f1ebe03591790..779a1a52aab95005719ba566ba627a9cf79badba 100644 (file)
@@ -503,6 +503,9 @@ protected:
   };
   map<sobject_t, pull_info_t> pulling;
 
+  // Reverse mapping from osd peer to objects beging pulled from that peer
+  map<int, set<sobject_t> > rec_from_peer;
+
   // push
   struct push_info_t {
     uint64_t size;
@@ -532,6 +535,8 @@ protected:
                   map<sobject_t, interval_set<uint64_t> >& clone_subsets);
   void send_push_op_blank(const sobject_t& soid, int peer);
 
+  // Cancels/resets pulls from peer
+  void check_recovery_op_pulls(const OSDMap &map);
   int pull(const sobject_t& oid);
   void send_pull_op(const sobject_t& soid, eversion_t v, bool first, const interval_set<uint64_t>& data_subset, int fromosd);