]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/: unify PGBackend pull error pathways
authorSamuel Just <sjust@redhat.com>
Fri, 12 Apr 2019 18:08:53 +0000 (11:08 -0700)
committersjust@redhat.com <sjust@redhat.com>
Wed, 1 May 2019 18:22:27 +0000 (11:22 -0700)
This patch narrows the PGBackend -> PrimaryLogPG recovery
cancel/error interface to on_failed_pull and cancel_pull.

This patch requires careful review.

Signed-off-by: Samuel Just <sjust@redhat.com>
src/osd/ECBackend.cc
src/osd/PGBackend.h
src/osd/PrimaryLogPG.cc
src/osd/PrimaryLogPG.h
src/osd/ReplicatedBackend.cc

index 36a77cc7417a908e765f2f8b24153c7f1f4c3762..9fa6ca0ffd2ee207e69fcef1df890a254294d874 100644 (file)
@@ -220,13 +220,11 @@ void ECBackend::_failed_push(const hobject_t &hoid,
   eversion_t v = recovery_ops[hoid].v;
   recovery_ops.erase(hoid);
 
-  list<pg_shard_t> fl;
+  set<pg_shard_t> fl;
   for (auto&& i : res.errors) {
-    fl.push_back(i.first);
+    fl.insert(i.first);
   }
-  get_parent()->failed_push(fl, hoid);
-  get_parent()->backfill_add_missing(hoid, v);
-  get_parent()->finish_degraded_object(hoid);
+  get_parent()->on_failed_pull(fl, hoid, v);
 }
 
 struct OnRecoveryReadComplete :
index 6ea782bd75e1e9ce454ae664de58d1b54d122a12..d8fca3277e05f52084c6f0c6df38743ca8637e24 100644 (file)
@@ -105,35 +105,33 @@ typedef std::shared_ptr<const OSDMap> OSDMapRef;
        pg_shard_t peer,
        const hobject_t oid) = 0;
 
-     virtual void failed_push(const list<pg_shard_t> &from,
-                              const hobject_t &soid,
-                              const eversion_t &need = eversion_t()) = 0;
-     virtual void finish_degraded_object(const hobject_t& oid) = 0;
-     virtual void primary_failed(const hobject_t &soid) = 0;
-     virtual bool primary_error(const hobject_t& soid, eversion_t v) = 0;
-     virtual void cancel_pull(const hobject_t &soid) = 0;
-
      virtual void apply_stats(
        const hobject_t &soid,
        const object_stat_sum_t &delta_stats) = 0;
 
      /**
-      * Called when a read on the primary fails when pushing
+      * Called when a read from a set of replicas/primary fails
       */
-     virtual void on_primary_error(
-       const hobject_t &oid,
-       eversion_t v
-       ) = 0;
-
-     virtual void backfill_add_missing(
-       const hobject_t &oid,
-       eversion_t v
+     virtual void on_failed_pull(
+       const set<pg_shard_t> &from,
+       const hobject_t &soid,
+       const eversion_t &v
        ) = 0;
 
-     virtual void remove_missing_object(const hobject_t &oid,
-                                       eversion_t v,
-                                       Context *on_complete) = 0;
+     /**
+      * Called when a pull on soid cannot be completed due to
+      * down peers
+      */
+     virtual void cancel_pull(
+       const hobject_t &soid) = 0;
 
+     /**
+      * Called to remove an object.
+      */
+     virtual void remove_missing_object(
+       const hobject_t &oid,
+       eversion_t v,
+       Context *on_complete) = 0;
 
      /**
       * Bless a context
index 8ea49c2031c0d5ffa38c52b05df31c4747f5394a..e2ce5486ba0c19c839e863feb1d1cab6141994c2 100644 (file)
@@ -505,16 +505,6 @@ void PrimaryLogPG::send_message_osd_cluster(
   osd->send_message_osd_cluster(m, con);
 }
 
-void PrimaryLogPG::on_primary_error(
-  const hobject_t &oid,
-  eversion_t v)
-{
-  dout(0) << __func__ << ": oid " << oid << " version " << v << dendl;
-  primary_failed(oid);
-  primary_error(oid, v);
-  backfill_add_missing(oid, v);
-}
-
 void PrimaryLogPG::backfill_add_missing(
   const hobject_t &oid,
   eversion_t v)
@@ -11420,14 +11410,10 @@ void PrimaryLogPG::_applied_recovered_object_replica()
   }
 }
 
-void PrimaryLogPG::primary_failed(const hobject_t &soid)
-{
-  list<pg_shard_t> fl = { pg_whoami };
-  failed_push(fl, soid);
-}
-
-void PrimaryLogPG::failed_push(const list<pg_shard_t> &from,
-  const hobject_t &soid, const eversion_t &need)
+void PrimaryLogPG::on_failed_pull(
+  const set<pg_shard_t> &from,
+  const hobject_t &soid,
+  const eversion_t &v)
 {
   dout(20) << __func__ << ": " << soid << dendl;
   ceph_assert(recovering.count(soid));
@@ -11452,6 +11438,12 @@ void PrimaryLogPG::failed_push(const list<pg_shard_t> &from,
          << ", reps on " << missing_loc.get_locations(soid)
          << " unfound? " << missing_loc.is_unfound(soid) << dendl;
   finish_recovery_op(soid);  // close out this attempt,
+  finish_degraded_object(soid);
+
+  if (from.count(pg_whoami)) {
+    primary_error(soid, v);
+    backfill_add_missing(soid, v);
+  }
 }
 
 eversion_t PrimaryLogPG::pick_newest_available(const hobject_t& oid)
@@ -12558,8 +12550,7 @@ int PrimaryLogPG::prep_object_replica_pushes(
     h);
   if (r < 0) {
     dout(0) << __func__ << " Error " << r << " on oid " << soid << dendl;
-    primary_failed(soid);
-    primary_error(soid, v);
+    on_failed_pull({ pg_whoami }, soid, v);
     return 0;
   }
   return 1;
@@ -13115,10 +13106,7 @@ int PrimaryLogPG::prep_backfill_object_push(
     h);
   if (r < 0) {
     dout(0) << __func__ << " Error " << r << " on oid " << oid << dendl;
-    primary_failed(oid);
-    primary_error(oid, v);
-    backfills_in_flight.erase(oid);
-    missing_loc.add_missing(oid, v, eversion_t());
+    on_failed_pull({ pg_whoami }, oid, v);
   }
   return r;
 }
index 884db99fe716eba9209158cf78e1f6aa7b204b99..a2a7ccb9084aa9a36a2e06bebcf4cd86e18edc44 100644 (file)
@@ -294,17 +294,18 @@ public:
     const hobject_t &oid,
     const object_stat_sum_t &stat_diff,
     bool is_delete) override;
-  void failed_push(const list<pg_shard_t> &from,
-                   const hobject_t &soid,
-                   const eversion_t &need = eversion_t()) override;
-  void primary_failed(const hobject_t &soid) override;
-  bool primary_error(const hobject_t& soid, eversion_t v) override;
+  void on_failed_pull(
+    const set<pg_shard_t> &from,
+    const hobject_t &soid,
+    const eversion_t &version) override;
   void cancel_pull(const hobject_t &soid) override;
   void apply_stats(
     const hobject_t &soid,
     const object_stat_sum_t &delta_stats) override;
-  void on_primary_error(const hobject_t &oid, eversion_t v) override;
-  void backfill_add_missing(const hobject_t &oid, eversion_t v) override;
+
+  bool primary_error(const hobject_t& soid, eversion_t v);
+
+  void backfill_add_missing(const hobject_t &oid, eversion_t v);
   void remove_missing_object(const hobject_t &oid,
                             eversion_t v,
                             Context *on_complete) override;
@@ -1134,7 +1135,7 @@ protected:
                                  PGBackend::RecoveryHandle *h,
                                  bool *work_started);
 
-  void finish_degraded_object(const hobject_t& oid) override;
+  void finish_degraded_object(const hobject_t& oid);
 
   // Cancels/resets pulls from peer
   void check_recovery_sources(const OSDMapRef& map) override ;
index 269adf64b1191875ddf79f0094a325954ec4d143..8063e0a9e132a139c1c4941ea631206582694413 100644 (file)
@@ -786,8 +786,9 @@ struct C_ReplicatedBackend_OnPullComplete : GenContext<ThreadPool::TPHandle&> {
       int started = bc->start_pushes(i.hoid, obc, h);
       if (started < 0) {
        bc->pushing[i.hoid].clear();
-       bc->get_parent()->primary_failed(i.hoid);
-       bc->get_parent()->primary_error(i.hoid, obc->obs.oi.version);
+       bc->get_parent()->on_failed_pull(
+         { bc->get_parent()->whoami_shard() },
+         i.hoid, obc->obs.oi.version);
       } else if (!started) {
        bc->get_parent()->on_global_recover(
          i.hoid, i.stat, false);
@@ -2103,7 +2104,10 @@ done:
        if (!error)
          get_parent()->on_global_recover(soid, stat, false);
        else
-         get_parent()->on_primary_error(soid, v);
+         get_parent()->on_failed_pull(
+           std::set<pg_shard_t>{ get_parent()->whoami_shard() },
+           soid,
+           v);
        pushing.erase(soid);
       } else {
        // This looks weird, but we erased the current peer and need to remember
@@ -2193,10 +2197,12 @@ void ReplicatedBackend::trim_pushed_data(
 void ReplicatedBackend::_failed_pull(pg_shard_t from, const hobject_t &soid)
 {
   dout(20) << __func__ << ": " << soid << " from " << from << dendl;
-  list<pg_shard_t> fl = { from };
   auto it = pulling.find(soid);
   assert(it != pulling.end());
-  get_parent()->failed_push(fl, soid, it->second.recovery_info.version);
+  get_parent()->on_failed_pull(
+    { from },
+    soid,
+    it->second.recovery_info.version);
 
   clear_pull(it);
 }