From: Adam Kupczyk Date: Wed, 19 Feb 2025 13:13:56 +0000 (+0000) Subject: os/bluestore: implemented bluestore_blob_t::get_unused_mask X-Git-Tag: v20.3.0~393^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F61900%2Fhead;p=ceph.git os/bluestore: implemented bluestore_blob_t::get_unused_mask The function was used only by writer v2, but it was returning 0 always. Now it properly returns the mask of used and unused blob regions. Changed returned type unused_t (16 bits) -> uint64_t. Made write_v2 path now properly mark unused. + new unittest bluestore_blob_t.get_unused_mask + fixed unittest ExtentMapFixture.rain Signed-off-by: Adam Kupczyk --- diff --git a/src/os/bluestore/Writer.cc b/src/os/bluestore/Writer.cc index 34a308586ec2..b2d3195dc9e2 100644 --- a/src/os/bluestore/Writer.cc +++ b/src/os/bluestore/Writer.cc @@ -500,6 +500,7 @@ BlueStore::BlobRef BlueStore::Writer::_blob_create_with_data( _get_disk_space(blob_length - alloc_offset, blob_allocs); bblob.allocated(alloc_offset, blob_length - alloc_offset, blob_allocs); //^sets also logical_length = blob_length + bblob.add_unused_all(); dout(25) << __func__ << " @0x" << std::hex << in_blob_offset << "~" << disk_data.length() << " alloc_offset=" << alloc_offset @@ -508,6 +509,7 @@ BlueStore::BlobRef BlueStore::Writer::_blob_create_with_data( _crop_allocs_to_io(disk_extents, in_blob_offset - alloc_offset, blob_length - in_blob_offset - disk_data.length()); _schedule_io(disk_extents, disk_data); + bblob.mark_used(in_blob_offset, data_length); return blob; } @@ -541,7 +543,7 @@ BlueStore::BlobRef BlueStore::Writer::_blob_create_full( _get_disk_space(blob_length, blob_allocs); _schedule_io(blob_allocs, disk_data); //have to do before move() bblob.allocated_full(blob_length, std::move(blob_allocs)); - bblob.mark_used(0, blob_length); //todo - optimize; this obviously clears it + bblob.mark_used_all(); return blob; } @@ -610,7 +612,7 @@ BlueStore::BlobRef BlueStore::Writer::_blob_create_full( inline void BlueStore::Writer::_schedule_io_masked( uint64_t disk_position, bufferlist data, - bluestore_blob_t::unused_t mask, + uint64_t mask, uint32_t chunk_size) { if (test_write_divertor == nullptr) { @@ -771,7 +773,7 @@ void BlueStore::Writer::_try_reuse_allocated_l( uint32_t data_size = want_subau_end - want_subau_begin; bufferlist data_at_left = split_left(data, data_size); bd.real_length -= data_size; - uint32_t mask = bb.get_unused_mask(in_blob_offset, data_size, chunk_size); + uint64_t mask = bb.get_unused_mask(in_blob_offset, data_size, chunk_size); _blob_put_data_subau(b, in_blob_offset, data_at_left); // transfer do disk _schedule_io_masked(subau_disk_offset, data_at_left, mask, chunk_size); @@ -844,9 +846,9 @@ void BlueStore::Writer::_try_reuse_allocated_r( uint32_t data_size = want_subau_end - want_subau_begin; bufferlist data_at_right = split_right(data, data.length() - data_size); bd.real_length -= data_size; - uint32_t mask = bb.get_unused_mask(in_blob_offset, data_size, chunk_size); + uint64_t mask = bb.get_unused_mask(in_blob_offset, data_size, chunk_size); _blob_put_data_subau(b, in_blob_offset, data_at_right); - //transfer to disk + // transfer to disk _schedule_io_masked(subau_disk_offset, data_at_right, mask, chunk_size); uint32_t ref_end = std::min(ref_end_offset, want_subau_end); diff --git a/src/os/bluestore/Writer.h b/src/os/bluestore/Writer.h index aa2a41dd186d..82b164befbb6 100644 --- a/src/os/bluestore/Writer.h +++ b/src/os/bluestore/Writer.h @@ -107,7 +107,7 @@ private: inline void _schedule_io_masked( uint64_t disk_offset, bufferlist data, - bluestore_blob_t::unused_t mask, + uint64_t mask, uint32_t chunk_size); inline void _schedule_io( diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h index 0d28d2716fc5..f45224769b22 100644 --- a/src/os/bluestore/bluestore_types.h +++ b/src/os/bluestore/bluestore_types.h @@ -728,6 +728,12 @@ public: } } + /// mark everything as unused + void add_unused_all() { + set_flag(FLAG_HAS_UNUSED); + unused = ~0; + } + /// indicate that a range has (now) been used. void mark_used(uint64_t offset, uint64_t length) { if (has_unused()) { @@ -746,14 +752,58 @@ public: } } } - /// todo implement me! - unused_t get_unused_mask(uint32_t offset, uint32_t length, uint32_t chunk_size) { + + ///mark everything as used + void mark_used_all() { + clear_flag(FLAG_HAS_UNUSED); + } + + /// create bitmap mask, io_chunk_size per bit + /// bit 0 is offset, bit 1 is offset + io_chunk_size, .... + uint64_t get_unused_mask(uint32_t offset, uint32_t length, uint32_t io_chunk_size) { if (has_unused()) { - return 0; + uint32_t blob_len = get_logical_length(); + ceph_assert((blob_len % (sizeof(unused)*8)) == 0); + ceph_assert(offset + length <= blob_len); + ceph_assert((offset % io_chunk_size) == 0); + ceph_assert((length % io_chunk_size) == 0); + if (length / io_chunk_size > 64) { + // the result cannot fit 64 bits, pretend all is used + return 0; + } + uint32_t chunk_size = blob_len / (sizeof(unused)*8); + uint16_t i = offset / chunk_size; + uint16_t j = 0; + uint64_t io_used = 0; + uint64_t next_u = round_down_to(offset + chunk_size, chunk_size); + uint64_t next_io = round_down_to(offset + io_chunk_size, io_chunk_size); + // The algorithm here is iterating 2 sequences that have different "speeds": + // unused bit speed (chunk_size) and output disk region speed (io_chunk_size) + // unused_bits : aaaaabbbbbcccccdddddeeeeefffffggggghhhhh + // disk_io_chnk: AAABBBCCCDDDEEEFFFGGGHHHIIIJJJ + // But we operate on "used" logic, as it allows for easier summation, and return the inverse. + // We apply restriction from i-th unused bit to j-th io_chunk. + // The relative sizes of chunk_size and io_chunk_size determine + // how fast we increase i and j respectively. + for (; next_io < offset + length + io_chunk_size; ) { + //produce io_mask bit, by copying state from unused bit + (!(unused & (1 << i))) ? io_used |= uint64_t(1) << j : 0; + auto le = next_u <= next_io; + if (next_u >= next_io) { + j++; + next_io += io_chunk_size; + } + if (le) { + i++; + next_u += chunk_size; + } + } + return ~io_used; } else { return 0; } } + // map_f_invoke templates intended to mask parameters which are not expected // by the provided callback template right) swap(left, right); + b.mark_used(left, right - left); + } + + for (uint32_t io_chunk_size = 1024; io_chunk_size <= 32 * 1024; io_chunk_size *= 2) { + if (size < io_chunk_size || (size % io_chunk_size) != 0) { + continue; + } + if (size / io_chunk_size > 64) continue; + uint32_t io_begin = rand() % (size / io_chunk_size + 1) * io_chunk_size; + uint32_t io_end = rand() % (size / io_chunk_size + 1) * io_chunk_size; + if (io_begin == io_end) continue; + if (io_begin > io_end) swap(io_begin, io_end); + + uint64_t mask = 0; + uint64_t bit = 1; + for (uint32_t i = io_begin; i < io_end; i += io_chunk_size) { + mask = mask | (b.is_unused(i, io_chunk_size) ? bit : 0); + bit = bit << 1; + } + uint64_t result = b.get_unused_mask(io_begin, io_end - io_begin, io_chunk_size); + auto ref = std::bitset<64>(mask).to_string().substr(64 - (io_end - io_begin) / io_chunk_size); + auto uuu = std::bitset<64>(result).to_string().substr(64 - (io_end - io_begin) / io_chunk_size); + EXPECT_EQ(ref, uuu); + } + } + } +} + class bluestore_blob_t_test : public ::testing::Test, public ::testing::WithParamInterface>