]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Better handle failure to get enough EC shards to backfill
authorDavid Zafman <dzafman@redhat.com>
Wed, 4 Oct 2017 01:32:20 +0000 (18:32 -0700)
committerDavid Zafman <dzafman@redhat.com>
Mon, 16 Oct 2017 15:24:52 +0000 (08:24 -0700)
Fixes: http://tracker.ceph.com/issues/18162
Signed-off-by: David Zafman <dzafman@redhat.com>
src/osd/ECBackend.cc
src/osd/PGBackend.h
src/osd/PrimaryLogPG.cc
src/osd/PrimaryLogPG.h

index 780bdb676b3316f9f8d207429d535b412a32f4c7..52fd2e21fe3445fe6cf9ba176051bc802391f5db 100644 (file)
@@ -217,6 +217,7 @@ void ECBackend::_failed_push(const hobject_t &hoid,
   dout(10) << __func__ << ": canceling recovery op for obj " << hoid
           << dendl;
   assert(recovery_ops.count(hoid));
+  eversion_t v = recovery_ops[hoid].v;
   recovery_ops.erase(hoid);
 
   list<pg_shard_t> fl;
@@ -224,6 +225,8 @@ void ECBackend::_failed_push(const hobject_t &hoid,
     fl.push_back(i.first);
   }
   get_parent()->failed_push(fl, hoid);
+  get_parent()->backfill_add_missing(hoid, v);
+  get_parent()->finish_degraded_object(hoid);
 }
 
 struct OnRecoveryReadComplete :
index b9604dd2729c19f25fdc8a260f669c0efb013875..b53c615926fb13503a2f25a2ef5f0c1b331a11b0 100644 (file)
@@ -106,6 +106,7 @@ typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
        const hobject_t oid) = 0;
 
      virtual void failed_push(const list<pg_shard_t> &from, const hobject_t &soid) = 0;
+     virtual void finish_degraded_object(const hobject_t& oid) = 0;
      virtual void primary_failed(const hobject_t &soid) = 0;
      virtual bool primary_error(const hobject_t& soid, eversion_t v) = 0;
      virtual void cancel_pull(const hobject_t &soid) = 0;
@@ -122,6 +123,11 @@ typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
        eversion_t v
        ) = 0;
 
+     virtual void backfill_add_missing(
+       const hobject_t &oid,
+       eversion_t v
+       ) = 0;
+
      virtual void remove_missing_object(const hobject_t &oid,
                                        eversion_t v,
                                        Context *on_complete) = 0;
index 99a4230da78950aca6ca5d91d09fb4c67951be8a..a9a6fa233425bcce410410c7a54a4f59048c91b2 100644 (file)
@@ -525,6 +525,14 @@ void PrimaryLogPG::on_primary_error(
   dout(0) << __func__ << ": oid " << oid << " version " << v << dendl;
   primary_failed(oid);
   primary_error(oid, v);
+  backfill_add_missing(oid, v);
+}
+
+void PrimaryLogPG::backfill_add_missing(
+  const hobject_t &oid,
+  eversion_t v)
+{
+  dout(0) << __func__ << ": oid " << oid << " version " << v << dendl;
   backfills_in_flight.erase(oid);
   missing_loc.add_missing(oid, v, eversion_t());
 }
index 8606fc4f694ae6126da1a15a998b349bde4af220..fb9541787150d8ab75b3f59649c90b146f1f1169 100644 (file)
@@ -277,6 +277,7 @@ public:
     const hobject_t &soid,
     const object_stat_sum_t &delta_stats) override;
   void on_primary_error(const hobject_t &oid, eversion_t v) override;
+  void backfill_add_missing(const hobject_t &oid, eversion_t v) override;
   void remove_missing_object(const hobject_t &oid,
                             eversion_t v,
                             Context *on_complete) override;