]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Better handle failure to get enough EC shards to backfill
authorDavid Zafman <dzafman@redhat.com>
Wed, 4 Oct 2017 01:32:20 +0000 (18:32 -0700)
committerDavid Zafman <dzafman@redhat.com>
Tue, 23 Jan 2018 19:37:17 +0000 (11:37 -0800)
Fixes: http://tracker.ceph.com/issues/18162
Signed-off-by: David Zafman <dzafman@redhat.com>
(cherry picked from commit 6a02bfef3d44a13589c1a90bec29ff0ac64f97aa)

src/osd/ECBackend.cc
src/osd/PGBackend.h
src/osd/PrimaryLogPG.cc
src/osd/PrimaryLogPG.h

index b75948c4fe3cc10f23c81a23069128ffa490b07a..e0cedf47db1a2877d0d18cce8028e0ffbf441089 100644 (file)
@@ -217,6 +217,7 @@ void ECBackend::_failed_push(const hobject_t &hoid,
   dout(10) << __func__ << ": canceling recovery op for obj " << hoid
           << dendl;
   assert(recovery_ops.count(hoid));
+  eversion_t v = recovery_ops[hoid].v;
   recovery_ops.erase(hoid);
 
   list<pg_shard_t> fl;
@@ -224,6 +225,8 @@ void ECBackend::_failed_push(const hobject_t &hoid,
     fl.push_back(i.first);
   }
   get_parent()->failed_push(fl, hoid);
+  get_parent()->backfill_add_missing(hoid, v);
+  get_parent()->finish_degraded_object(hoid);
 }
 
 struct OnRecoveryReadComplete :
index f244f4c2f703dcfc7cbf96e73166b40a2c2c60cd..cb8a1115c06f6e7892930f71934c5cc205ec1a0a 100644 (file)
@@ -106,6 +106,7 @@ typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
        const hobject_t oid) = 0;
 
      virtual void failed_push(const list<pg_shard_t> &from, const hobject_t &soid) = 0;
+     virtual void finish_degraded_object(const hobject_t& oid) = 0;
      virtual void primary_failed(const hobject_t &soid) = 0;
      virtual bool primary_error(const hobject_t& soid, eversion_t v) = 0;
      virtual void cancel_pull(const hobject_t &soid) = 0;
@@ -122,6 +123,11 @@ typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
        eversion_t v
        ) = 0;
 
+     virtual void backfill_add_missing(
+       const hobject_t &oid,
+       eversion_t v
+       ) = 0;
+
      virtual void remove_missing_object(const hobject_t &oid,
                                        eversion_t v,
                                        Context *on_complete) = 0;
index a0f881a9db353b896245c6323e8e4ce2e28a7ffa..df828787fdafb95debc24be0b25ea92a6c0718cd 100644 (file)
@@ -535,6 +535,14 @@ void PrimaryLogPG::on_primary_error(
   dout(0) << __func__ << ": oid " << oid << " version " << v << dendl;
   primary_failed(oid);
   primary_error(oid, v);
+  backfill_add_missing(oid, v);
+}
+
+void PrimaryLogPG::backfill_add_missing(
+  const hobject_t &oid,
+  eversion_t v)
+{
+  dout(0) << __func__ << ": oid " << oid << " version " << v << dendl;
   backfills_in_flight.erase(oid);
   missing_loc.add_missing(oid, v, eversion_t());
 }
index 5289e78b0bad19037bd471f5731f1e357051edd8..6a4b3d9776ecb3a022015ba13a58d38096189cbe 100644 (file)
@@ -273,6 +273,7 @@ public:
     const hobject_t &soid,
     const object_stat_sum_t &delta_stats) override;
   void on_primary_error(const hobject_t &oid, eversion_t v) override;
+  void backfill_add_missing(const hobject_t &oid, eversion_t v) override;
   void remove_missing_object(const hobject_t &oid,
                             eversion_t v,
                             Context *on_complete) override;