From: David Zafman Date: Fri, 12 May 2017 23:01:02 +0000 (-0700) Subject: osd: Handle errors on reads of subsequent file segment X-Git-Tag: ses5-milestone8~1^2~19^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=84f6c9c8a09257b5d2f28a3693000067b82f1795;p=ceph.git osd: Handle errors on reads of subsequent file segment Signed-off-by: David Zafman --- diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 323d411dc610..4b2793802068 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -111,6 +111,14 @@ typedef ceph::shared_ptr OSDMapRef; const hobject_t &soid, const object_stat_sum_t &delta_stats) = 0; + /** + * Called when a read on the primary fails when pushing + */ + virtual void on_primary_error( + const hobject_t &oid, + eversion_t v + ) = 0; + /** * Bless a context diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 6d726afccf07..c6c3b74050e9 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -514,6 +514,18 @@ void PrimaryLogPG::send_message_osd_cluster( osd->send_message_osd_cluster(m, con); } +void PrimaryLogPG::on_primary_error( + const hobject_t &oid, + eversion_t v) +{ + dout(0) << __func__ << ": oid " << oid << " version " << v << dendl; + list fl = { pg_whoami }; + failed_push(fl, oid); + primary_error(oid, v); + backfills_in_flight.erase(oid); + missing_loc.add_missing(oid, v, eversion_t()); +} + ConnectionRef PrimaryLogPG::get_con_osd_cluster( int peer, epoch_t from_epoch) { diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 4ef55f035b19..93dacfe1d039 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -268,6 +268,7 @@ public: void apply_stats( const hobject_t &soid, const object_stat_sum_t &delta_stats) override; + void on_primary_error(const hobject_t &oid, eversion_t v) override; template class BlessedGenContext; class BlessedContext; diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index 9d533aa21770..81d7e8771f69 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -1929,7 +1929,7 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info, ObjectRecoveryProgress &new_progress = *out_progress; new_progress = progress; - dout(7) << "send_push_op " << recovery_info.soid + dout(7) << __func__ << " " << recovery_info.soid << " v " << recovery_info.version << " size " << recovery_info.size << " recovery_info: " << recovery_info @@ -2105,8 +2105,9 @@ bool ReplicatedBackend::handle_push_reply( return false; } else { PushInfo *pi = &pushing[soid][peer]; + bool error = pushing[soid].begin()->second.recovery_progress.error; - if (!pi->recovery_progress.data_complete) { + if (!pi->recovery_progress.data_complete && !error) { dout(10) << " pushing more from, " << pi->recovery_progress.data_recovered_to << " of " << pi->recovery_info.copy_subset << dendl; @@ -2115,24 +2116,40 @@ bool ReplicatedBackend::handle_push_reply( pi->recovery_info, pi->recovery_progress, &new_progress, reply, &(pi->stat)); - // XXX: What can we do here? - assert(r == 0); + // Handle the case of a read error right after we wrote, which is + // hopefuilly extremely rare. + if (r < 0) { + dout(5) << __func__ << ": oid " << soid << " error " << r << dendl; + + error = true; + goto done; + } pi->recovery_progress = new_progress; return true; } else { // done! - get_parent()->on_peer_recover( - peer, soid, pi->recovery_info); +done: + if (!error) + get_parent()->on_peer_recover( peer, soid, pi->recovery_info); get_parent()->release_locks(pi->lock_manager); object_stat_sum_t stat = pi->stat; + eversion_t v = pi->recovery_info.version; pushing[soid].erase(peer); pi = NULL; if (pushing[soid].empty()) { - get_parent()->on_global_recover(soid, stat); + if (!error) + get_parent()->on_global_recover(soid, stat); + else + get_parent()->on_primary_error(soid, v); + pushing.erase(soid); } else { + // This looks weird, but we erased the current peer and need to remember + // the error on any other one, while getting more acks. + if (error) + pushing[soid].begin()->second.recovery_progress.error = true; dout(10) << "pushed " << soid << ", still waiting for push ack from " << pushing[soid].size() << " others" << dendl; } diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 40d3138f459c..2f09b902ae44 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -5279,6 +5279,7 @@ ostream &ObjectRecoveryProgress::print(ostream &out) const << ", data_complete:" << ( data_complete ? "true" : "false" ) << ", omap_recovered_to:" << omap_recovered_to << ", omap_complete:" << ( omap_complete ? "true" : "false" ) + << ", error:" << ( error ? "true" : "false" ) << ")"; } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 92cc7c569876..c06fb5c7cdaf 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -4658,6 +4658,7 @@ struct ObjectRecoveryProgress { bool first; bool data_complete; bool omap_complete; + bool error = false; ObjectRecoveryProgress() : data_recovered_to(0),