From aa04c8fbbdab1615cd2430a729998ac5a7c6fe46 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 29 Sep 2010 08:24:34 -0700 Subject: [PATCH] osd: try to object from other replica(s) on EOF If during recovery we are unable to pull from a replica due to reaching EOF (e.g., zeroed out object), pull from the next available replica (if any). Eventually this should be extended to do the same when a checksum fails. Signed-off-by: Sage Weil --- src/osd/ReplicatedPG.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 775c6b06c28f2..343b8a17501a2 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -3209,6 +3209,22 @@ void ReplicatedPG::sub_op_push(MOSDSubOp *op) if (op->complete && !complete) { dout(0) << " uh oh, we reached EOF on peer before we got everything we wanted" << dendl; + + // hmm, do we have another source? + int from = op->get_source().num(); + set& reps = missing_loc[soid]; + dout(0) << " we have reps on osds " << reps << dendl; + set::iterator q = reps.begin(); + if (q != reps.end() && *q == from) { + q++; + if (q != reps.end()) { + dout(0) << " trying next replica on osd" << *q << dendl; + reps.erase(reps.begin()); // forget about the bad replica... + finish_recovery_op(soid); // close out this attempt, + pulling.erase(soid); + pull(soid); // and try again. + } + } op->put(); return; } -- 2.39.5