From: Igor Fedotov Date: Fri, 15 Aug 2025 10:15:07 +0000 (+0300) Subject: os/bluestore: enforce extent split on shard boundary X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=90a4bdb313756fa839822f3196b9975c498a0b99;p=ceph.git os/bluestore: enforce extent split on shard boundary Partially fixes: https://tracker.ceph.com/issues/70390 Signed-off-by: Igor Fedotov (cherry picked from commit 5beee2ad46cfeb8ffc70d106c1180f531e455e3e) (cherry picked from commit 0611ed6d8980b8b8839bb0d6c7af07b598fcc089) Conflicts: src/os/bluestore/BlueStore.cc The conflict was not a logical one, more like stemming from refactor that changed "e"->"extent". --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index bb88c486486..c014a4392ad 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -3790,7 +3790,7 @@ void BlueStore::ExtentMap::reshard( if (e->logical_offset >= needs_reshard_end) { break; } - dout(30) << " extent " << *e << dendl; + dout(30) << __func__ << " extent " << *e << dendl; while (e->logical_offset >= shard_end) { shard_start = shard_end; ceph_assert(sp != esp); @@ -3805,58 +3805,78 @@ void BlueStore::ExtentMap::reshard( } if (e->blob_escapes_range(shard_start, shard_end - shard_start)) { - if (!e->blob->is_spanning()) { + BlobRef b = e->blob; + uint32_t bstart = e->blob_start(); + uint32_t bend = e->blob_end(); + if (!b->is_spanning()) { // We have two options: (1) split the blob into pieces at the // shard boundaries (and adjust extents accordingly), or (2) // mark it spanning. We prefer to cut the blob if we can. Note that // we may have to split it multiple times--potentially at every // shard boundary. - bool must_span = false; - BlobRef b = e->blob; + auto _make_spanning = [&](BlobRef& b) { + auto bid = allocate_spanning_blob_id(); + b->id = bid; + spanning_blob_map[b->id] = b; + dout(20) << __func__ << " adding spanning " << *b << dendl; + if (!was_too_many_blobs_check && + too_many_blobs_threshold && + spanning_blob_map.size() >= size_t(too_many_blobs_threshold)) { + + was_too_many_blobs_check = true; + for (size_t i = 0; i < dumped_onodes.size(); ++i) { + if (dumped_onodes[i].first == onode->oid) { + oid_slot = &dumped_onodes[i]; + break; + } + if (!oldest_slot || (oldest_slot && + dumped_onodes[i].second < oldest_slot->second)) { + oldest_slot = &dumped_onodes[i]; + } + } + } + }; if (b->can_split()) { - uint32_t bstart = e->blob_start(); - uint32_t bend = e->blob_end(); + auto bstart1 = bstart; for (const auto& sh : shards) { - if (bstart < sh.shard_info->offset && + if (bstart1 < sh.shard_info->offset && bend > sh.shard_info->offset) { - uint32_t blob_offset = sh.shard_info->offset - bstart; + uint32_t blob_offset = sh.shard_info->offset - bstart1; if (b->can_split_at(blob_offset)) { dout(20) << __func__ << " splitting blob, bstart 0x" - << std::hex << bstart << " blob_offset 0x" + << std::hex << bstart1 << " blob_offset 0x" << blob_offset << std::dec << " " << *b << dendl; b = split_blob(b, blob_offset, sh.shard_info->offset); // switch b to the new right-hand side, in case it // *also* has to get split. - bstart += blob_offset; + bstart1 = sh.shard_info->offset; onode->c->store->logger->inc(l_bluestore_blob_split); } else { - must_span = true; + _make_spanning(b); break; } } } } else { - must_span = true; + _make_spanning(b); } - if (must_span) { - auto bid = allocate_spanning_blob_id(); - b->id = bid; - spanning_blob_map[b->id] = b; - dout(20) << __func__ << " adding spanning " << *b << dendl; - if (!was_too_many_blobs_check && - too_many_blobs_threshold && - spanning_blob_map.size() >= size_t(too_many_blobs_threshold)) { - - was_too_many_blobs_check = true; - for (size_t i = 0; i < dumped_onodes.size(); ++i) { - if (dumped_onodes[i].first == onode->oid) { - oid_slot = &dumped_onodes[i]; - break; - } - if (!oldest_slot || (oldest_slot && - dumped_onodes[i].second < oldest_slot->second)) { - oldest_slot = &dumped_onodes[i]; - } + } // if (!extent->blob->is_spanning()) + // Make sure extent with a spanning blob doesn't span over shard boundary + if (e->blob->is_spanning()) { + BlobRef b = e->blob; + uint32_t bstart = e->blob_start(); + for (const auto& sh : shards) { + if (bstart < sh.shard_info->offset && bend > sh.shard_info->offset) { + uint32_t blob_offset = sh.shard_info->offset - bstart; + auto pos = sh.shard_info->offset; + if (e->logical_offset < pos && e->logical_end() > pos) { + // split extent + size_t left = pos - e->logical_offset; + Extent* ne = new Extent(pos, blob_offset, e->length - left, b); + extent_map.insert(*ne); + e->length = left; + dout(20) << __func__ << " split " << *e << dendl; + dout(20) << __func__ << " to " << *ne << dendl; } } } @@ -3865,7 +3885,7 @@ void BlueStore::ExtentMap::reshard( if (e->blob->is_spanning()) { spanning_blob_map.erase(e->blob->id); e->blob->id = -1; - dout(30) << __func__ << " un-spanning " << *e->blob << dendl; + dout(20) << __func__ << " un-spanning " << *e->blob << dendl; } } } @@ -3907,24 +3927,22 @@ bool BlueStore::ExtentMap::encode_some( unsigned n = 0; size_t bound = 0; - bool must_reshard = false; for (auto p = start; p != extent_map.end() && p->logical_offset < end; ++p, ++n) { ceph_assert(p->logical_offset >= offset); p->blob->last_encoded_id = -1; if (!p->blob->is_spanning() && p->blob_escapes_range(offset, length)) { - dout(30) << __func__ << " 0x" << std::hex << offset << "~" << length + dout(20) << __func__ << " 0x" << std::hex << offset << "~" << length << std::dec << " hit new spanning blob " << *p << dendl; request_reshard(p->blob_start(), p->blob_end()); - must_reshard = true; + return true; } else if (p->blob->is_spanning() && p->logical_end() > end) { - dout(30) << __func__ << std::hex << offset << "~" << length + dout(20) << __func__ << std::hex << offset << "~" << length << std::dec << " extent stands out " << *p << dendl; request_reshard(p->blob_start(), p->blob_end()); - must_reshard = true; - } - if (!must_reshard) { + return true; + } else { denc_varint(0, bound); // blobid denc_varint(0, bound); // logical_offset denc_varint(0, bound); // len @@ -3937,9 +3955,6 @@ bool BlueStore::ExtentMap::encode_some( false); } } - if (must_reshard) { - return true; - } denc(struct_v, bound); denc_varint(0, bound); // number of extents