From 5997059a604d05ced8566a511c27608e810b81f4 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 1 Mar 2011 14:25:59 -0800 Subject: [PATCH] osd: continue recovery after encountering missing objects 1- If we try to pull an object that isn't there, send an empty push in reply. 2- If we get an empty push, call a new failed_push helper. Also called when we pull partial/bad data. 3- Fix the fail behavior to close out our attempt, adjust our missing_loc, but let the calling recovery code handle the retry. Signed-off-by: Sage Weil --- src/osd/ReplicatedPG.cc | 79 +++++++++++++++++++++++++++-------------- src/osd/ReplicatedPG.h | 10 +++--- 2 files changed, 58 insertions(+), 31 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index d55ce8f38a1fd..e961a9200a109 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -3212,6 +3212,12 @@ int ReplicatedPG::pull(const sobject_t& soid) break; } } + if (fromosd < 0) { + dout(7) << "pull " << soid + << " v " << v + << " but it is unfound" << dendl; + return PULL_NONE; + } dout(7) << "pull " << soid << " v " << v @@ -3219,9 +3225,6 @@ int ReplicatedPG::pull(const sobject_t& soid) << " from osd" << fromosd << dendl; - if (fromosd < 0) - return PULL_NONE; - map > clone_subsets; interval_set data_subset; bool need_size = false; @@ -3420,10 +3423,10 @@ void ReplicatedPG::push_start(const sobject_t& soid, int peer, * push - send object to a peer */ -void ReplicatedPG::send_push_op(const sobject_t& soid, eversion_t version, int peer, - uint64_t size, bool first, bool complete, - interval_set &data_subset, - map >& clone_subsets) +int ReplicatedPG::send_push_op(const sobject_t& soid, eversion_t version, int peer, + uint64_t size, bool first, bool complete, + interval_set &data_subset, + map >& clone_subsets) { // read data+attrs bufferlist bl; @@ -3452,7 +3455,7 @@ void ReplicatedPG::send_push_op(const sobject_t& soid, eversion_t version, int p if (oi.version != version) { osd->clog.error() << "push " << soid << " v " << version << " to osd" << peer << " failed because local copy is " << oi.version << "\n"; - return; + return -1; } // ok @@ -3484,6 +3487,20 @@ void ReplicatedPG::send_push_op(const sobject_t& soid, eversion_t version, int p subop->complete = complete; osd->cluster_messenger-> send_message(subop, osd->osdmap->get_cluster_inst(peer)); + return 0; +} + +void ReplicatedPG::send_push_op_blank(const sobject_t& soid, int peer) +{ + // send a blank push back to the primary + osd_reqid_t rid; + MOSDSubOp *subop = new MOSDSubOp(rid, info.pgid, soid, false, 0, + osd->osdmap->get_epoch(), osd->get_tid(), eversion_t()); + subop->ops = vector(1); + subop->ops[0].op.op = CEPH_OSD_OP_PUSH; + subop->first = false; + subop->complete = false; + osd->cluster_messenger->send_message(subop, osd->osdmap->get_cluster_inst(peer)); } void ReplicatedPG::sub_op_push_reply(MOSDSubOpReply *reply) @@ -3564,7 +3581,7 @@ void ReplicatedPG::sub_op_pull(MOSDSubOp *op) osd->clog.error() << op->get_source() << " tried to pull " << soid << " in " << info.pgid << " but got " << cpp_strerror(-r) << "\n"; - // FIXME: do something more intelligent.. mark the pg as needing repair? + send_push_op_blank(soid, op->get_source().num()); } else { uint64_t size = st.st_size; @@ -3576,7 +3593,10 @@ void ReplicatedPG::sub_op_pull(MOSDSubOp *op) // complete==false means nothing. we don't know because the primary may // not be pulling the entire object. - send_push_op(soid, op->version, op->get_source().num(), size, op->first, complete, op->data_subset, op->clone_subsets); + r = send_push_op(soid, op->version, op->get_source().num(), size, op->first, complete, + op->data_subset, op->clone_subsets); + if (r < 0) + send_push_op_blank(soid, op->get_source().num()); } op->put(); } @@ -3650,6 +3670,12 @@ void ReplicatedPG::sub_op_push(MOSDSubOp *op) << " data len " << op->get_data().length() << dendl; + if (v == eversion_t()) { + // replica doesn't have it! + _failed_push(op); + return; + } + interval_set data_subset; map > clone_subsets; @@ -3748,23 +3774,7 @@ void ReplicatedPG::sub_op_push(MOSDSubOp *op) if (op->complete && !complete) { dout(0) << " uh oh, we reached EOF on peer before we got everything we wanted" << dendl; - - // hmm, do we have another source? - int from = op->get_source().num(); - set& reps = missing_loc[soid]; - dout(0) << " we have reps on osds " << reps << dendl; - set::iterator q = reps.begin(); - if (q != reps.end() && *q == from) { - q++; - if (q != reps.end()) { - dout(0) << " trying next replica on osd" << *q << dendl; - reps.erase(reps.begin()); // forget about the bad replica... - finish_recovery_op(soid); // close out this attempt, - pulling.erase(soid); - pull(soid); // and try again. - } - } - op->put(); + _failed_push(op); return; } @@ -3965,6 +3975,21 @@ void ReplicatedPG::sub_op_push(MOSDSubOp *op) op->put(); // at the end... soid is a ref to op->soid! } +void ReplicatedPG::_failed_push(MOSDSubOp *op) +{ + const sobject_t& soid = op->poid; + int from = op->get_source().num(); + set& reps = missing_loc[soid]; + dout(0) << "_failed_push " << soid << " from osd" << from + << ", reps on " << reps << dendl; + + reps.erase(from); // forget about this (bad) peer replica + + finish_recovery_op(soid); // close out this attempt, + pulling.erase(soid); + + op->put(); +} /* diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 884597fee19fc..ab73f7e995113 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -525,10 +525,11 @@ protected: uint64_t size, eversion_t version, interval_set &data_subset, map >& clone_subsets); - void send_push_op(const sobject_t& oid, eversion_t version, int dest, - uint64_t size, bool first, bool complete, - interval_set& data_subset, - map >& clone_subsets); + int send_push_op(const sobject_t& oid, eversion_t version, int dest, + uint64_t size, bool first, bool complete, + interval_set& data_subset, + map >& clone_subsets); + void send_push_op_blank(const sobject_t& soid, int peer); int pull(const sobject_t& oid); void send_pull_op(const sobject_t& soid, eversion_t v, bool first, const interval_set& data_subset, int fromosd); @@ -618,6 +619,7 @@ protected: void sub_op_modify_reply(MOSDSubOpReply *reply); void _wrote_pushed_object(ObjectStore::Transaction *t, ObjectContext *obc); void sub_op_push(MOSDSubOp *op); + void _failed_push(MOSDSubOp *op); void sub_op_push_reply(MOSDSubOpReply *reply); void sub_op_pull(MOSDSubOp *op); -- 2.39.5