]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/ECBackend: optimize remaining read as readop contain multiple objects 35821/head
authorZengran Zhang <zhangzengran@sangfor.com.cn>
Mon, 29 Jun 2020 03:37:16 +0000 (11:37 +0800)
committerZengran Zhang <zhangzengran@sangfor.com.cn>
Mon, 17 Aug 2020 15:05:31 +0000 (23:05 +0800)
in github.com/ceph/ceph/pull/21911
we s/rop.to_read.swap/rop.to_read.erase/ in send_all_remaining_reads(...),
and then introduce a new little issue: if one of to_read objects meet shard error,
we will only change the uncompleted object's to read, but do_read_op will resend the
completed objects shard read, because we did not modify their to_read's need.

Fixes: https://tracker.ceph.com/issues/46876
Signed-off-by: Zengran Zhang <zhangzengran@sangfor.com.cn>
src/osd/ECBackend.cc
src/osd/ECBackend.h

index 6c33e6c405248aadecdbc29a6c3f85bf066faaf0..5d5edd42476f14565bf8a7ddad1e89ba66455316 100644 (file)
@@ -1248,6 +1248,7 @@ void ECBackend::handle_sub_read_reply(
   ceph_assert(rop.in_progress.count(from));
   rop.in_progress.erase(from);
   unsigned is_complete = 0;
+  bool need_resend = false;
   // For redundant reads check for completion as each shard comes in,
   // or in a non-recovery read check for completion once all the shards read.
   if (rop.do_redundant_reads || rop.in_progress.empty()) {
@@ -1274,7 +1275,8 @@ void ECBackend::handle_sub_read_reply(
          if (!rop.do_redundant_reads) {
            int r = send_all_remaining_reads(iter->first, rop);
            if (r == 0) {
-             // We added to in_progress and not incrementing is_complete
+             // We changed the rop's to_read and not incrementing is_complete
+             need_resend = true;
              continue;
            }
            // Couldn't read any additional shards so handle as completed with errors
@@ -1302,11 +1304,17 @@ void ECBackend::handle_sub_read_reply(
            rop.complete[iter->first].errors.clear();
          }
        }
+       // avoid re-read for completed object as we may send remaining reads for uncopmpleted objects
+       rop.to_read.at(iter->first).need.clear();
+       rop.to_read.at(iter->first).want_attrs = false;
        ++is_complete;
       }
     }
   }
-  if (rop.in_progress.empty() || is_complete == rop.complete.size()) {
+  if (need_resend) {
+    do_read_op(rop);
+  } else if (rop.in_progress.empty() || 
+             is_complete == rop.complete.size()) {
     dout(20) << __func__ << " Complete: " << rop << dendl;
     rop.trace.event("ec read complete");
     complete_read_op(rop, m);
@@ -2452,7 +2460,6 @@ int ECBackend::send_all_remaining_reads(
        shards,
        want_attrs,
        c)));
-  do_read_op(rop);
   return 0;
 }
 
index d659d64dff614bae6b7ef5a7e7d0582ae4adf36b..d833b4f00f5d77f056824ab299d57ac80ad237ba 100644 (file)
@@ -352,8 +352,8 @@ public:
   };
   struct read_request_t {
     const std::list<boost::tuple<uint64_t, uint64_t, uint32_t> > to_read;
-    const std::map<pg_shard_t, std::vector<std::pair<int, int>>> need;
-    const bool want_attrs;
+    std::map<pg_shard_t, std::vector<std::pair<int, int>>> need;
+    bool want_attrs;
     GenContext<std::pair<RecoveryMessages *, read_result_t& > &> *cb;
     read_request_t(
       const std::list<boost::tuple<uint64_t, uint64_t, uint32_t> > &to_read,