]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Handle errors on reads of subsequent file segment
authorDavid Zafman <dzafman@redhat.com>
Fri, 12 May 2017 23:01:02 +0000 (16:01 -0700)
committerDavid Zafman <dzafman@redhat.com>
Fri, 23 Jun 2017 15:02:51 +0000 (08:02 -0700)
Signed-off-by: David Zafman <dzafman@redhat.com>
src/osd/PGBackend.h
src/osd/PrimaryLogPG.cc
src/osd/PrimaryLogPG.h
src/osd/ReplicatedBackend.cc
src/osd/osd_types.cc
src/osd/osd_types.h

index 323d411dc610476e1c22fd222857d5f6ee36e513..4b279380206831e04ca77e11e62800a46ec6e911 100644 (file)
@@ -111,6 +111,14 @@ typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
        const hobject_t &soid,
        const object_stat_sum_t &delta_stats) = 0;
 
+     /**
+      * Called when a read on the primary fails when pushing
+      */
+     virtual void on_primary_error(
+       const hobject_t &oid,
+       eversion_t v
+       ) = 0;
+
 
      /**
       * Bless a context
index 6d726afccf072c33e1394d731d540ed597c3a523..c6c3b74050e95cf2975c9beab5dc61eea08ae6a8 100644 (file)
@@ -514,6 +514,18 @@ void PrimaryLogPG::send_message_osd_cluster(
   osd->send_message_osd_cluster(m, con);
 }
 
+void PrimaryLogPG::on_primary_error(
+  const hobject_t &oid,
+  eversion_t v)
+{
+  dout(0) << __func__ << ": oid " << oid << " version " << v << dendl;
+  list<pg_shard_t> fl = { pg_whoami };
+  failed_push(fl, oid);
+  primary_error(oid, v);
+  backfills_in_flight.erase(oid);
+  missing_loc.add_missing(oid, v, eversion_t());
+}
+
 ConnectionRef PrimaryLogPG::get_con_osd_cluster(
   int peer, epoch_t from_epoch)
 {
index 4ef55f035b19b67ab59192c998bbe37fee267bf5..93dacfe1d03900bd8ea735eab0167c83c9b9eda5 100644 (file)
@@ -268,6 +268,7 @@ public:
   void apply_stats(
     const hobject_t &soid,
     const object_stat_sum_t &delta_stats) override;
+  void on_primary_error(const hobject_t &oid, eversion_t v) override;
 
   template<class T> class BlessedGenContext;
   class BlessedContext;
index 9d533aa217700a289759e959d0eeead6d5aebf22..81d7e8771f691eeb4858add358693e2dc4886238 100644 (file)
@@ -1929,7 +1929,7 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info,
   ObjectRecoveryProgress &new_progress = *out_progress;
   new_progress = progress;
 
-  dout(7) << "send_push_op " << recovery_info.soid
+  dout(7) << __func__ << " " << recovery_info.soid
          << " v " << recovery_info.version
          << " size " << recovery_info.size
          << " recovery_info: " << recovery_info
@@ -2105,8 +2105,9 @@ bool ReplicatedBackend::handle_push_reply(
     return false;
   } else {
     PushInfo *pi = &pushing[soid][peer];
+    bool error = pushing[soid].begin()->second.recovery_progress.error;
 
-    if (!pi->recovery_progress.data_complete) {
+    if (!pi->recovery_progress.data_complete && !error) {
       dout(10) << " pushing more from, "
               << pi->recovery_progress.data_recovered_to
               << " of " << pi->recovery_info.copy_subset << dendl;
@@ -2115,24 +2116,40 @@ bool ReplicatedBackend::handle_push_reply(
        pi->recovery_info,
        pi->recovery_progress, &new_progress, reply,
        &(pi->stat));
-      // XXX: What can we do here?
-      assert(r == 0);
+      // Handle the case of a read error right after we wrote, which is
+      // hopefuilly extremely rare.
+      if (r < 0) {
+        dout(5) << __func__ << ": oid " << soid << " error " << r << dendl;
+
+       error = true;
+       goto done;
+      }
       pi->recovery_progress = new_progress;
       return true;
     } else {
       // done!
-      get_parent()->on_peer_recover(
-       peer, soid, pi->recovery_info);
+done:
+      if (!error)
+       get_parent()->on_peer_recover( peer, soid, pi->recovery_info);
 
       get_parent()->release_locks(pi->lock_manager);
       object_stat_sum_t stat = pi->stat;
+      eversion_t v = pi->recovery_info.version;
       pushing[soid].erase(peer);
       pi = NULL;
 
       if (pushing[soid].empty()) {
-       get_parent()->on_global_recover(soid, stat);
+       if (!error)
+         get_parent()->on_global_recover(soid, stat);
+       else
+         get_parent()->on_primary_error(soid, v);
+
        pushing.erase(soid);
       } else {
+       // This looks weird, but we erased the current peer and need to remember
+       // the error on any other one, while getting more acks.
+       if (error)
+         pushing[soid].begin()->second.recovery_progress.error = true;
        dout(10) << "pushed " << soid << ", still waiting for push ack from "
                 << pushing[soid].size() << " others" << dendl;
       }
index 40d3138f459cb6922a7afed637ab6f67ba946fda..2f09b902ae448d4a1e95aa51e77567480ea578c4 100644 (file)
@@ -5279,6 +5279,7 @@ ostream &ObjectRecoveryProgress::print(ostream &out) const
             << ", data_complete:" << ( data_complete ? "true" : "false" )
             << ", omap_recovered_to:" << omap_recovered_to
             << ", omap_complete:" << ( omap_complete ? "true" : "false" )
+            << ", error:" << ( error ? "true" : "false" )
             << ")";
 }
 
index 92cc7c56987688ccc41be8b1247b9bc604b7938d..c06fb5c7cdaf8755b65a9fa2eac21a5119ca174a 100644 (file)
@@ -4658,6 +4658,7 @@ struct ObjectRecoveryProgress {
   bool first;
   bool data_complete;
   bool omap_complete;
+  bool error = false;
 
   ObjectRecoveryProgress()
     : data_recovered_to(0),