From b7fdda71c7ca122cfe094528737e0d3dc1b0e39a Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Mon, 3 Mar 2025 15:12:21 +0000 Subject: [PATCH] os/bluestore: Add fault_range_ex Add fault_range_ex, a sibling to fault_range. The new function does exactly the same as the original. The difference is the returned range that reflects range encompassed by shards affected. Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueStore.cc | 43 +++++++++++++++++++++++++++++++---- src/os/bluestore/BlueStore.h | 10 ++++++++ src/os/bluestore/Writer.cc | 2 ++ src/os/bluestore/Writer.h | 5 +++- 4 files changed, 54 insertions(+), 6 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 7dca7f49f1e..cd36ad6eabe 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -4178,6 +4178,29 @@ void BlueStore::ExtentMap::init_shards(bool loaded, bool dirty) } } +std::pair BlueStore::ExtentMap::fault_range_ex( + KeyValueDB *db, + uint32_t offset, + uint32_t length) +{ + dout(30) << __func__ << " 0x" << std::hex << offset << "~" << length + << std::dec << dendl; + if (shards.size() == 0) { + // no sharding yet; everyting is loaded + return {0, OBJECT_MAX_SIZE}; + } + auto start = seek_shard(offset); + auto last = seek_shard(offset + length); + maybe_load_shard(db, start, last); + uint32_t left_bound = shards[start].shard_info->offset; + uint32_t right_bound = (size_t)last + 1 < shards.size() ? + shards[last + 1].shard_info->offset : OBJECT_MAX_SIZE; + dout(20) << __func__ << " start=" << start << " last=" << last + << " -> 0x" << std::hex << left_bound << "~" << right_bound + << std::dec << dendl; + return {left_bound, right_bound}; +} + void BlueStore::ExtentMap::fault_range( KeyValueDB *db, uint32_t offset, @@ -4191,6 +4214,14 @@ void BlueStore::ExtentMap::fault_range( } auto start = seek_shard(offset); auto last = seek_shard(offset + length); + maybe_load_shard(db, start, last); +} + +void BlueStore::ExtentMap::maybe_load_shard( + KeyValueDB *db, + int start, + int last) +{ ceph_assert(last >= start); ceph_assert(start >= 0); @@ -4216,9 +4247,10 @@ void BlueStore::ExtentMap::fault_range( ); p->extents = decode_some(v); p->loaded = true; - dout(20) << __func__ << " open shard 0x" << std::hex - << p->shard_info->offset - << " for range 0x" << offset << "~" << length << std::dec + uint32_t shard_end = + (size_t)start + 1 < shards.size() ? (p + 1)->shard_info->offset : OBJECT_MAX_SIZE; + dout(20) << __func__ << " open shard for range 0x" + << std::hex << p->shard_info->offset << "~" << shard_end << std::dec << " (" << v.length() << " bytes)" << dendl; ceph_assert(p->dirty == false); ceph_assert(v.length() == p->shard_info->bytes); @@ -17621,10 +17653,11 @@ int BlueStore::_do_write_v2( if (bl.length() != length) { bl.splice(length, bl.length() - length); } + BlueStore::Writer wr(this, txc, &wctx, o); uint64_t start = p2align(offset, min_alloc_size); uint64_t end = p2roundup(offset + length, min_alloc_size); - o->extent_map.fault_range(db, start, end - start); - BlueStore::Writer wr(this, txc, &wctx, o); + std::tie(wr.left_shard_bound, wr.right_shard_bound) = + o->extent_map.fault_range_ex(db, start, end - start); wr.do_write(offset, bl); return r; } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index b535d16bba5..9ca48cea441 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -1149,6 +1149,16 @@ public: /// ensure that a range of the map is loaded void fault_range(KeyValueDB *db, uint32_t offset, uint32_t length); + /// ensure that a range of the map is loaded + /// return range that is encompassed by affected shards + std::pair fault_range_ex( + KeyValueDB *db, + uint32_t offset, + uint32_t length); + void maybe_load_shard( + KeyValueDB *db, + int begin_shard, + int end_shard); /// ensure a range of the map is marked dirty void dirty_range(uint32_t offset, uint32_t length); diff --git a/src/os/bluestore/Writer.cc b/src/os/bluestore/Writer.cc index b2d3195dc9e..a41cab3cb87 100644 --- a/src/os/bluestore/Writer.cc +++ b/src/os/bluestore/Writer.cc @@ -732,6 +732,7 @@ void BlueStore::Writer::_try_reuse_allocated_l( blob_data_t& bd) // modified when consumed { uint32_t search_stop = p2align(logical_offset, (uint32_t)wctx->target_blob_size); + search_stop = std::max(left_shard_bound, search_stop); uint32_t au_size = bstore->min_alloc_size; uint32_t block_size = bstore->block_size; ceph_assert(!bd.is_compressed()); @@ -812,6 +813,7 @@ void BlueStore::Writer::_try_reuse_allocated_r( uint32_t block_size = bstore->block_size; uint32_t blob_size = wctx->target_blob_size; uint32_t search_end = p2roundup(end_offset, blob_size); + search_end = std::min(right_shard_bound, search_end); ceph_assert(!bd.is_compressed()); ceph_assert(p2phase(end_offset, au_size) != 0); BlueStore::ExtentMap& emap = onode->extent_map; diff --git a/src/os/bluestore/Writer.h b/src/os/bluestore/Writer.h index 82b164befbb..f12d25c2f1b 100644 --- a/src/os/bluestore/Writer.h +++ b/src/os/bluestore/Writer.h @@ -50,7 +50,8 @@ public: virtual bufferlist read(uint32_t object_offset, uint32_t object_length) = 0; }; Writer(BlueStore* bstore, TransContext* txc, WriteContext* wctx, OnodeRef o) - :bstore(bstore), txc(txc), wctx(wctx), onode(o) { + :left_shard_bound(0), right_shard_bound(OBJECT_MAX_SIZE) + , bstore(bstore), txc(txc), wctx(wctx), onode(o) { pp_mode = debug_level_to_pp_mode(bstore->cct); } public: @@ -67,6 +68,8 @@ public: read_divertor* test_read_divertor = nullptr; std::vector pruned_blobs; volatile_statfs statfs_delta; + uint32_t left_shard_bound; // if sharding is in effect, + uint32_t right_shard_bound; // do not cross this line private: BlueStore* bstore; -- 2.39.5