]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: try to object from other replica(s) on EOF
authorSage Weil <sage@newdream.net>
Wed, 29 Sep 2010 15:24:34 +0000 (08:24 -0700)
committerSage Weil <sage@newdream.net>
Wed, 29 Sep 2010 15:45:06 +0000 (08:45 -0700)
If during recovery we are unable to pull from a replica due to reaching
EOF (e.g., zeroed out object), pull from the next available replica (if
any).

Eventually this should be extended to do the same when a checksum fails.

Signed-off-by: Sage Weil <sage@newdream.net>
src/osd/ReplicatedPG.cc

index 775c6b06c28f208c9dc51abda27b42e2baddf5f0..343b8a17501a2e3a5a3f5d019b184562beaf0a2a 100644 (file)
@@ -3209,6 +3209,22 @@ void ReplicatedPG::sub_op_push(MOSDSubOp *op)
 
       if (op->complete && !complete) {
        dout(0) << " uh oh, we reached EOF on peer before we got everything we wanted" << dendl;
+
+       // hmm, do we have another source?
+       int from = op->get_source().num();
+       set<int>& reps = missing_loc[soid];
+       dout(0) << " we have reps on osds " << reps << dendl;
+       set<int>::iterator q = reps.begin();
+       if (q != reps.end() && *q == from) {
+         q++;
+         if (q != reps.end()) {
+           dout(0) << " trying next replica on osd" << *q << dendl;
+           reps.erase(reps.begin());  // forget about the bad replica...
+           finish_recovery_op(soid);  // close out this attempt,
+           pulling.erase(soid);
+           pull(soid);                // and try again.
+         }
+       }
        op->put();
        return;
       }