]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: fix trimming of multistripe partial reads
authorRadoslaw Zarzynski <rzarzyns@redhat.com>
Fri, 29 Mar 2024 16:51:57 +0000 (16:51 +0000)
committerRadoslaw Zarzynski <rzarzyns@redhat.com>
Thu, 20 Jun 2024 20:37:57 +0000 (20:37 +0000)
Signed-off-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
src/osd/ECBackend.cc
src/osd/ECCommon.cc
src/osd/ECUtil.h
src/test/osd/TestECBackend.cc

index 2bfdda38530eb04aa19cdbc3da10a336efa745db..2a4066d22894b8361aa02663d27987a7040d3555 100644 (file)
@@ -1539,13 +1539,17 @@ int ECBackend::objects_read_sync(
 }
 
 static bool should_partial_read(
+  const ECUtil::stripe_info_t& sinfo,
+  uint64_t off,
+  uint32_t len,
   bool fast_read)
 {
   // Don't partial read if we are doing a fast_read
   if (fast_read) {
     return false;
   }
-  return true;
+  // Same stripe only
+  return sinfo.offset_length_is_same_stripe(off, len);
 }
 
 void ECBackend::objects_read_async(
@@ -1561,7 +1565,8 @@ void ECBackend::objects_read_async(
   extent_set es;
   for (const auto& [read, ctx] : to_read) {
     pair<uint64_t, uint64_t> tmp;
-    if (!cct->_conf->osd_ec_partial_reads || !should_partial_read(fast_read)) {
+    if (!cct->_conf->osd_ec_partial_reads ||
+       !should_partial_read(sinfo, read.offset, read.size, fast_read)) {
       tmp = sinfo.offset_len_to_stripe_bounds(make_pair(read.offset, read.size));
     } else {
       tmp = sinfo.offset_len_to_chunk_bounds(make_pair(read.offset, read.size));
index bac410cec5b6bbcfc20572ecaf03796c89286475..9ea14dd18eca13feaf7a89b8d5e66d8e268217c7 100644 (file)
@@ -552,9 +552,9 @@ struct ClientReadCompleter : ECCommon::ReadCompleter {
         goto out;
       }
       bufferlist trimmed;
+      ceph_assert(aligned.first <= read.offset);
       auto off = read.offset - aligned.first;
-      auto len =
-          std::min(read.size, bl.length() - (read.offset - aligned.first));
+      auto len = std::min(read.size, bl.length() - off);
       trimmed.substr_of(bl, off, len);
       result.insert(
        read.offset, trimmed.length(), std::move(trimmed));
index af21839da23eb69c7b50f6a238a6d5dcf452cc49..c84a87ee3804bd4d60d012f9106d3719c225a9fa 100644 (file)
@@ -106,6 +106,16 @@ public:
     const auto last_chunk_idx = (chunk_size - 1 + off + len) / chunk_size;
     return {first_chunk_idx, last_chunk_idx};
   }
+  bool offset_length_is_same_stripe(
+    uint64_t off, uint64_t len) const {
+    if (len == 0) {
+      return true;
+    }
+    assert(chunk_size > 0);
+    const auto first_stripe_idx = off / stripe_width;
+    const auto last_inc_stripe_idx = (off + len - 1) / stripe_width;
+    return first_stripe_idx == last_inc_stripe_idx;
+  }
 };
 
 int decode(
index 9f7483b0cdfe4917b66e359b33dd8f2a77fa00c1..d28d428fc060f205b59be9303772f243c09be8af 100644 (file)
@@ -74,6 +74,82 @@ TEST(ECUtil, stripe_info_t)
             make_pair((uint64_t)0, 2*swidth));
 }
 
+TEST(ECUtil, offset_length_is_same_stripe)
+{
+  const uint64_t swidth = 4096;
+  const uint64_t schunk = 1024;
+  const uint64_t ssize = 4;
+
+  ECUtil::stripe_info_t s(ssize, swidth);
+  ASSERT_EQ(s.get_stripe_width(), swidth);
+  ASSERT_EQ(s.get_chunk_size(), schunk);
+
+  // read nothing at the very beginning
+  //   +---+---+---+---+
+  //   |  0|   |   |   |
+  //   +---+---+---+---+
+  //   |   |   |   |   |
+  //   +---+---+---+---+
+  ASSERT_TRUE(s.offset_length_is_same_stripe(0, 0));
+
+  // read nothing at the stripe end
+  //   +---+---+---+---+
+  //   |   |   |   |  0|
+  //   +---+---+---+---+
+  //   |   |   |   |   |
+  //   +---+---+---+---+
+  ASSERT_TRUE(s.offset_length_is_same_stripe(swidth, 0));
+
+  // read single byte at the stripe end
+  //   +---+---+---+---+
+  //   |   |   |   | ~1|
+  //   +---+---+---+---+
+  //   |   |   |   |   |
+  //   +---+---+---+---+
+  ASSERT_TRUE(s.offset_length_is_same_stripe(swidth - 1, 1));
+
+  // read single stripe
+  //   +---+---+---+---+
+  //   | 1k| 1k| 1k| 1k|
+  //   +---+---+---+---+
+  //   |   |   |   |   |
+  //   +---+---+---+---+
+  ASSERT_TRUE(s.offset_length_is_same_stripe(0, swidth));
+
+  // read single chunk
+  //   +---+---+---+---+
+  //   | 1k|   |   |   |
+  //   +---+---+---+---+
+  //   |   |   |   |   |
+  //   +---+---+---+---+
+  ASSERT_TRUE(s.offset_length_is_same_stripe(0, schunk));
+
+  // read single stripe except its first chunk
+  //   +---+---+---+---+
+  //   |   | 1k| 1k| 1k|
+  //   +---+---+---+---+
+  //   |   |   |   |   |
+  //   +---+---+---+---+
+  ASSERT_TRUE(s.offset_length_is_same_stripe(schunk, swidth - schunk));
+
+  // read two stripes
+  //   +---+---+---+---+
+  //   | 1k| 1k| 1k| 1k|
+  //   +---+---+---+---+
+  //   | 1k| 1k| 1k| 1k|
+  //   +---+---+---+---+
+  ASSERT_FALSE(s.offset_length_is_same_stripe(0, 2*swidth));
+
+  // multistripe read: 1st stripe without 1st byte + 1st byte of 2nd stripe
+  //   +-----+---+---+---+
+  //   | 1k-1| 1k| 1k| 1k|
+  //   +-----+---+---+---+
+  //   |    1|   |   |   |
+  //   +-----+---+---+---+
+  ASSERT_FALSE(s.offset_length_is_same_stripe(1, swidth));
+}
+
+
 TEST(ECCommon, get_min_want_to_read_shards)
 {
   const uint64_t swidth = 4096;