]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
os/bluestore: enforce extent split on shard boundary
authorIgor Fedotov <igor.fedotov@croit.io>
Fri, 15 Aug 2025 10:15:07 +0000 (13:15 +0300)
committerIgor Fedotov <igor.fedotov@croit.io>
Tue, 2 Sep 2025 13:45:12 +0000 (16:45 +0300)
Partially fixes: https://tracker.ceph.com/issues/70390

Signed-off-by: Igor Fedotov <igor.fedotov@croit.io>
src/os/bluestore/BlueStore.cc

index 24a4e3848fc086e2972c1d68ba36e8f806abf5ae..9cd840519578f0d27f4a7538ae719208cfc223c3 100644 (file)
@@ -3850,7 +3850,7 @@ void BlueStore::ExtentMap::reshard_action(
       if (extent->logical_offset >= needs_reshard_end) {
        break;
       }
-      dout(30) << " extent " << *extent << dendl;
+      dout(30) << __func__ << " extent " << *extent << dendl;
       while (extent->logical_offset >= shard_end) {
        shard_start = shard_end;
        ceph_assert(current_shard != end_shard);
@@ -3865,58 +3865,78 @@ void BlueStore::ExtentMap::reshard_action(
       }
 
       if (extent->blob_escapes_range(shard_start, shard_end - shard_start)) {
-       if (!extent->blob->is_spanning()) {
+       BlobRef b = extent->blob;
+       uint32_t bstart = extent->blob_start();
+       uint32_t bend = extent->blob_end();
+       if (!b->is_spanning()) {
          // We have two options: (1) split the blob into pieces at the
          // shard boundaries (and adjust extents accordingly), or (2)
          // mark it spanning.  We prefer to cut the blob if we can.  Note that
          // we may have to split it multiple times--potentially at every
          // shard boundary.
-         bool must_span = false;
-         BlobRef b = extent->blob;
+         auto _make_spanning = [&](BlobRef& b) {
+           auto bid = allocate_spanning_blob_id();
+           b->id = bid;
+           spanning_blob_map[b->id] = b;
+           dout(20) << __func__ << "    adding spanning " << *b << dendl;
+           if (!was_too_many_blobs_check &&
+             too_many_blobs_threshold &&
+             spanning_blob_map.size() >= size_t(too_many_blobs_threshold)) {
+
+             was_too_many_blobs_check = true;
+             for (size_t i = 0; i < dumped_onodes.size(); ++i) {
+               if (dumped_onodes[i].first == onode->oid) {
+                 oid_slot = &dumped_onodes[i];
+                 break;
+               }
+               if (!oldest_slot || (oldest_slot &&
+                 dumped_onodes[i].second < oldest_slot->second)) {
+                 oldest_slot = &dumped_onodes[i];
+               }
+             }
+           }
+         };
          if (b->can_split()) {
-           uint32_t bstart = extent->blob_start();
-           uint32_t bend = extent->blob_end();
+           auto bstart1 = bstart;
            for (const auto& sh : shards) {
-             if (bstart < sh.shard_info->offset &&
+             if (bstart1 < sh.shard_info->offset &&
                  bend > sh.shard_info->offset) {
-               uint32_t blob_offset = sh.shard_info->offset - bstart;
+               uint32_t blob_offset = sh.shard_info->offset - bstart1;
                if (b->can_split_at(blob_offset)) {
                  dout(20) << __func__ << "    splitting blob, bstart 0x"
-                          << std::hex << bstart << " blob_offset 0x"
+                          << std::hex << bstart1 << " blob_offset 0x"
                           << blob_offset << std::dec << " " << *b << dendl;
                  b = split_blob(b, blob_offset, sh.shard_info->offset);
                  // switch b to the new right-hand side, in case it
                  // *also* has to get split.
-                 bstart += blob_offset;
+                 bstart1 = sh.shard_info->offset;
                  onode->c->store->logger->inc(l_bluestore_blob_split);
                } else {
-                 must_span = true;
+                 _make_spanning(b);
                  break;
                }
              }
            }
          } else {
-           must_span = true;
+           _make_spanning(b);
          }
-         if (must_span) {
-            auto bid = allocate_spanning_blob_id();
-            b->id = bid;
-           spanning_blob_map[b->id] = b;
-           dout(20) << __func__ << "    adding spanning " << *b << dendl;
-           if (!was_too_many_blobs_check &&
-             too_many_blobs_threshold &&
-             spanning_blob_map.size() >= size_t(too_many_blobs_threshold)) {
-
-             was_too_many_blobs_check = true;
-             for (size_t i = 0; i < dumped_onodes.size(); ++i) {
-               if (dumped_onodes[i].first == onode->oid) {
-                 oid_slot = &dumped_onodes[i];
-                 break;
-               }
-               if (!oldest_slot || (oldest_slot &&
-                   dumped_onodes[i].second < oldest_slot->second)) {
-                 oldest_slot = &dumped_onodes[i];
-               }
+       } // if (!extent->blob->is_spanning())
+       // Make sure extent with a spanning blob doesn't span over shard boundary
+       if (extent->blob->is_spanning()) {
+         BlobRef b = extent->blob;
+         uint32_t bstart = extent->blob_start();
+         for (const auto& sh : shards) {
+           if (bstart < sh.shard_info->offset && bend > sh.shard_info->offset) {
+             uint32_t blob_offset = sh.shard_info->offset - bstart;
+             auto pos = sh.shard_info->offset;
+             if (extent->logical_offset < pos && extent->logical_end() > pos) {
+               // split extent
+               size_t left = pos - extent->logical_offset;
+               Extent* ne = new Extent(pos, blob_offset, extent->length - left, b);
+               extent_map.insert(*ne);
+               extent->length = left;
+               dout(20) << __func__ << "  split " << *extent << dendl;
+               dout(20) << __func__ << "     to " << *ne << dendl;
              }
            }
          }
@@ -3925,7 +3945,7 @@ void BlueStore::ExtentMap::reshard_action(
        if (extent->blob->is_spanning()) {
          spanning_blob_map.erase(extent->blob->id);
          extent->blob->id = -1;
-         dout(30) << __func__ << "    un-spanning " << *extent->blob << dendl;
+         dout(20) << __func__ << "    un-spanning " << *extent->blob << dendl;
        }
       }
     }
@@ -3977,7 +3997,6 @@ bool BlueStore::ExtentMap::encode_some(
 
   unsigned n = 0;
   size_t bound = 0;
-  bool must_reshard = false;
   uint32_t prev_offset_end = 0;
   for (auto p = start;
        p != extent_map.end() && p->logical_offset < end;
@@ -3986,25 +4005,28 @@ bool BlueStore::ExtentMap::encode_some(
     if (complain_extent_overlap) {
       if (p->logical_offset < prev_offset_end) {
         using P = BlueStore::printer;
-        dout(-1) << __func__ << " extents overlap: " << std::endl
-                 << onode->print(P::NICK + P::SDISK + P::SUSE + P::SBUF) << dendl;
-        ceph_abort();
+        dout(-1) << __func__ << " extents overlap: "
+                 << std::hex << offset <<"~" << length
+                 << " " << p->logical_offset <<"~" << p->length
+                 << std::dec << std::endl
+                 << onode->print(P::NICK + P::SDISK + P::SUSE + P::SBUF)
+                 << dendl;
+       ceph_abort_msg("extents overlaps");
       }
       prev_offset_end = p->logical_end();
     }
     p->blob->last_encoded_id = -1;
     if (!p->blob->is_spanning() && p->blob_escapes_range(offset, length)) {
-      dout(30) << __func__ << " 0x" << std::hex << offset << "~" << length
+      dout(20) << __func__ << " 0x" << std::hex << offset << "~" << length
               << std::dec << " hit new spanning blob " << *p << dendl;
       request_reshard(p->blob_start(), p->blob_end());
-      must_reshard = true;
+      return true;
     } else if (p->blob->is_spanning() && p->logical_end() > end) {
-      dout(30) << __func__ << std::hex << offset << "~" << length
+      dout(20) << __func__ << std::hex << offset << "~" << length
                << std::dec << " extent stands out " << *p << dendl;
       request_reshard(p->blob_start(), p->blob_end());
-      must_reshard = true;
-    }
-    if (!must_reshard) {
+      return true;
+    } else {
       denc_varint(0, bound); // blobid
       denc_varint(0, bound); // logical_offset
       denc_varint(0, bound); // len
@@ -4017,9 +4039,6 @@ bool BlueStore::ExtentMap::encode_some(
         false);
     }
   }
-  if (must_reshard) {
-    return true;
-  }
 
   denc(struct_v, bound);
   denc_varint(0, bound); // number of extents