From 6a02bfef3d44a13589c1a90bec29ff0ac64f97aa Mon Sep 17 00:00:00 2001 From: David Zafman Date: Tue, 3 Oct 2017 18:32:20 -0700 Subject: [PATCH] osd: Better handle failure to get enough EC shards to backfill Fixes: http://tracker.ceph.com/issues/18162 Signed-off-by: David Zafman --- src/osd/ECBackend.cc | 3 +++ src/osd/PGBackend.h | 6 ++++++ src/osd/PrimaryLogPG.cc | 8 ++++++++ src/osd/PrimaryLogPG.h | 1 + 4 files changed, 18 insertions(+) diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 780bdb676b331..52fd2e21fe344 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -217,6 +217,7 @@ void ECBackend::_failed_push(const hobject_t &hoid, dout(10) << __func__ << ": canceling recovery op for obj " << hoid << dendl; assert(recovery_ops.count(hoid)); + eversion_t v = recovery_ops[hoid].v; recovery_ops.erase(hoid); list fl; @@ -224,6 +225,8 @@ void ECBackend::_failed_push(const hobject_t &hoid, fl.push_back(i.first); } get_parent()->failed_push(fl, hoid); + get_parent()->backfill_add_missing(hoid, v); + get_parent()->finish_degraded_object(hoid); } struct OnRecoveryReadComplete : diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index b9604dd2729c1..b53c615926fb1 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -106,6 +106,7 @@ typedef ceph::shared_ptr OSDMapRef; const hobject_t oid) = 0; virtual void failed_push(const list &from, const hobject_t &soid) = 0; + virtual void finish_degraded_object(const hobject_t& oid) = 0; virtual void primary_failed(const hobject_t &soid) = 0; virtual bool primary_error(const hobject_t& soid, eversion_t v) = 0; virtual void cancel_pull(const hobject_t &soid) = 0; @@ -122,6 +123,11 @@ typedef ceph::shared_ptr OSDMapRef; eversion_t v ) = 0; + virtual void backfill_add_missing( + const hobject_t &oid, + eversion_t v + ) = 0; + virtual void remove_missing_object(const hobject_t &oid, eversion_t v, Context *on_complete) = 0; diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 99a4230da7895..a9a6fa233425b 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -525,6 +525,14 @@ void PrimaryLogPG::on_primary_error( dout(0) << __func__ << ": oid " << oid << " version " << v << dendl; primary_failed(oid); primary_error(oid, v); + backfill_add_missing(oid, v); +} + +void PrimaryLogPG::backfill_add_missing( + const hobject_t &oid, + eversion_t v) +{ + dout(0) << __func__ << ": oid " << oid << " version " << v << dendl; backfills_in_flight.erase(oid); missing_loc.add_missing(oid, v, eversion_t()); } diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 8606fc4f694ae..fb9541787150d 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -277,6 +277,7 @@ public: const hobject_t &soid, const object_stat_sum_t &delta_stats) override; void on_primary_error(const hobject_t &oid, eversion_t v) override; + void backfill_add_missing(const hobject_t &oid, eversion_t v) override; void remove_missing_object(const hobject_t &oid, eversion_t v, Context *on_complete) override; -- 2.39.5