From cef725bb47093da4293ae82ca38e7e13866842f7 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Tue, 3 Oct 2017 18:32:20 -0700 Subject: [PATCH] osd: Better handle failure to get enough EC shards to backfill Fixes: http://tracker.ceph.com/issues/18162 Signed-off-by: David Zafman (cherry picked from commit 6a02bfef3d44a13589c1a90bec29ff0ac64f97aa) Conflicts: src/osd/PGBackend.h (trivial) src/osd/ReplicatedPG.cc (trivial) src/osd/ReplicatedPG.h (trivial) --- src/osd/ECBackend.cc | 3 +++ src/osd/PGBackend.h | 6 ++++++ src/osd/ReplicatedPG.cc | 9 +++++++++ src/osd/ReplicatedPG.h | 1 + 4 files changed, 19 insertions(+) diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 1453520858930..e1d1d3e0531e2 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -196,6 +196,7 @@ void ECBackend::_failed_push(const hobject_t &hoid, dout(10) << __func__ << ": canceling recovery op for obj " << hoid << dendl; assert(recovery_ops.count(hoid)); + eversion_t v = recovery_ops[hoid].v; recovery_ops.erase(hoid); list fl; @@ -203,6 +204,8 @@ void ECBackend::_failed_push(const hobject_t &hoid, fl.push_back(i.first); } get_parent()->failed_push(fl, hoid); + get_parent()->backfill_add_missing(hoid, v); + get_parent()->finish_degraded_object(hoid); } struct OnRecoveryReadComplete : diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 2fee7a2f66bb1..e73d6afdfbd55 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -97,8 +97,14 @@ struct inconsistent_obj_wrapper; virtual void failed_push(const list &from, const hobject_t &soid) = 0; + virtual void finish_degraded_object(const hobject_t& oid) = 0; virtual void cancel_pull(const hobject_t &soid) = 0; + virtual void backfill_add_missing( + const hobject_t &oid, + eversion_t v + ) = 0; + /** * Bless a context * diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index d85c5c4acc379..93586c42c6b59 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -356,6 +356,15 @@ void ReplicatedPG::send_message_osd_cluster( osd->send_message_osd_cluster(m, con); } +void ReplicatedPG::backfill_add_missing( + const hobject_t &oid, + eversion_t v) +{ + dout(0) << __func__ << ": oid " << oid << " version " << v << dendl; + backfills_in_flight.erase(oid); + missing_loc.add_missing(oid, v, eversion_t()); +} + ConnectionRef ReplicatedPG::get_con_osd_cluster( int peer, epoch_t from_epoch) { diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index d0a217207a728..169d89876805d 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -282,6 +282,7 @@ public: const object_stat_sum_t &stat_diff); void failed_push(const list &from, const hobject_t &soid) override; void cancel_pull(const hobject_t &soid); + void backfill_add_missing(const hobject_t &oid, eversion_t v) override; template class BlessedGenContext : public GenContext { -- 2.39.5