os/bluestore: implemented bluestore_blob_t::get_unused_mask

author Adam Kupczyk <akupczyk@ibm.com>

Wed, 19 Feb 2025 13:13:56 +0000 (13:13 +0000)

committer Adam Kupczyk <akupczyk@ibm.com>

Thu, 6 Mar 2025 19:26:42 +0000 (19:26 +0000)
author Adam Kupczyk <akupczyk@ibm.com>
Wed, 19 Feb 2025 13:13:56 +0000 (13:13 +0000)
committer Adam Kupczyk <akupczyk@ibm.com>
Thu, 6 Mar 2025 19:26:42 +0000 (19:26 +0000)
diff --git a/src/os/bluestore/Writer.cc b/src/os/bluestore/Writer.cc

index 34a308586ec2f48ced51b98fac4cac9be2640bca..b2d3195dc9e2a7c6e1d550f329947587c03bbc04 100644 (file)
--- a/src/os/bluestore/Writer.cc
+++ b/src/os/bluestore/Writer.cc
@@ -500,6 +500,7 @@ BlueStore::BlobRef BlueStore::Writer::_blob_create_with_data(
    _get_disk_space(blob_length - alloc_offset, blob_allocs);
    bblob.allocated(alloc_offset, blob_length - alloc_offset, blob_allocs);
    //^sets also logical_length = blob_length
+  bblob.add_unused_all();
    dout(25) << __func__ << " @0x" << std::hex << in_blob_offset
      << "~" << disk_data.length()
      << " alloc_offset=" << alloc_offset
@@ -508,6 +509,7 @@ BlueStore::BlobRef BlueStore::Writer::_blob_create_with_data(
    _crop_allocs_to_io(disk_extents, in_blob_offset - alloc_offset,
      blob_length - in_blob_offset - disk_data.length());
    _schedule_io(disk_extents, disk_data);
+  bblob.mark_used(in_blob_offset, data_length);
    return blob;
  }
  
@@ -541,7 +543,7 @@ BlueStore::BlobRef BlueStore::Writer::_blob_create_full(
    _get_disk_space(blob_length, blob_allocs);
    _schedule_io(blob_allocs, disk_data); //have to do before move()
    bblob.allocated_full(blob_length, std::move(blob_allocs));
-  bblob.mark_used(0, blob_length); //todo - optimize; this obviously clears it
+  bblob.mark_used_all();
    return blob;
  }
  
@@ -610,7 +612,7 @@ BlueStore::BlobRef BlueStore::Writer::_blob_create_full(
  inline void BlueStore::Writer::_schedule_io_masked(
    uint64_t disk_position,
    bufferlist data,
-  bluestore_blob_t::unused_t mask,
+  uint64_t mask,
    uint32_t chunk_size)
  {
    if (test_write_divertor == nullptr) {
@@ -771,7 +773,7 @@ void BlueStore::Writer::_try_reuse_allocated_l(
      uint32_t data_size = want_subau_end - want_subau_begin;
      bufferlist data_at_left = split_left(data, data_size);
      bd.real_length -= data_size;
-    uint32_t mask = bb.get_unused_mask(in_blob_offset, data_size, chunk_size);
+    uint64_t mask = bb.get_unused_mask(in_blob_offset, data_size, chunk_size);
      _blob_put_data_subau(b, in_blob_offset, data_at_left);
      // transfer do disk
      _schedule_io_masked(subau_disk_offset, data_at_left, mask, chunk_size);
@@ -844,9 +846,9 @@ void BlueStore::Writer::_try_reuse_allocated_r(
      uint32_t data_size = want_subau_end - want_subau_begin;
      bufferlist data_at_right = split_right(data, data.length() - data_size);
      bd.real_length -= data_size;
-    uint32_t mask = bb.get_unused_mask(in_blob_offset, data_size, chunk_size);
+    uint64_t mask = bb.get_unused_mask(in_blob_offset, data_size, chunk_size);
      _blob_put_data_subau(b, in_blob_offset, data_at_right);
-    //transfer to disk
+    // transfer to disk
      _schedule_io_masked(subau_disk_offset, data_at_right, mask, chunk_size);
  
      uint32_t ref_end = std::min(ref_end_offset, want_subau_end);
diff --git a/src/os/bluestore/Writer.h b/src/os/bluestore/Writer.h

index aa2a41dd186db160eda6b42cf3c91404cfe89a22..82b164befbb6c530a737b6faec8b52698f86ea0d 100644 (file)
--- a/src/os/bluestore/Writer.h
+++ b/src/os/bluestore/Writer.h
@@ -107,7 +107,7 @@ private:
    inline void _schedule_io_masked(
      uint64_t disk_offset,
      bufferlist data,
-    bluestore_blob_t::unused_t mask,
+    uint64_t mask,
      uint32_t chunk_size);
  
    inline void _schedule_io(
diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h

index 0d28d2716fc5e169615d04e06fe26c8d6a55d039..f45224769b22395723b46514849b3a13dd5f0e72 100644 (file)
--- a/src/os/bluestore/bluestore_types.h
+++ b/src/os/bluestore/bluestore_types.h
@@ -728,6 +728,12 @@ public:
      }
    }
  
+  /// mark everything as unused
+  void add_unused_all() {
+    set_flag(FLAG_HAS_UNUSED);
+    unused = ~0;
+  }
+
    /// indicate that a range has (now) been used.
    void mark_used(uint64_t offset, uint64_t length) {
      if (has_unused()) {
@@ -746,14 +752,58 @@ public:
        }
      }
    }
-  /// todo implement me!
-  unused_t get_unused_mask(uint32_t offset, uint32_t length, uint32_t chunk_size) {
+
+  ///mark everything as used
+  void mark_used_all() {
+    clear_flag(FLAG_HAS_UNUSED);
+  }
+
+  /// create bitmap mask, io_chunk_size per bit
+  /// bit 0 is offset, bit 1 is offset + io_chunk_size, ....
+  uint64_t get_unused_mask(uint32_t offset, uint32_t length, uint32_t io_chunk_size) {
      if (has_unused()) {
-      return 0;
+      uint32_t blob_len = get_logical_length();
+      ceph_assert((blob_len % (sizeof(unused)*8)) == 0);
+      ceph_assert(offset + length <= blob_len);
+      ceph_assert((offset % io_chunk_size) == 0);
+      ceph_assert((length % io_chunk_size) == 0);
+      if (length / io_chunk_size > 64) {
+        // the result cannot fit 64 bits, pretend all is used
+        return 0;
+      }
+      uint32_t chunk_size = blob_len / (sizeof(unused)*8);
+      uint16_t i = offset / chunk_size;
+      uint16_t j = 0;
+      uint64_t io_used = 0;
+      uint64_t next_u = round_down_to(offset + chunk_size, chunk_size);
+      uint64_t next_io = round_down_to(offset + io_chunk_size, io_chunk_size);
+      // The algorithm here is iterating 2 sequences that have different "speeds":
+      // unused bit speed (chunk_size) and output disk region speed (io_chunk_size)
+      // unused_bits : aaaaabbbbbcccccdddddeeeeefffffggggghhhhh
+      // disk_io_chnk:    AAABBBCCCDDDEEEFFFGGGHHHIIIJJJ
+      // But we operate on "used" logic, as it allows for easier summation, and return the inverse.
+      // We apply restriction from i-th unused bit to j-th io_chunk.
+      // The relative sizes of chunk_size and io_chunk_size determine
+      // how fast we increase i and j respectively.
+      for (; next_io < offset + length + io_chunk_size; ) {
+        //produce io_mask bit, by copying state from unused bit
+        (!(unused & (1 << i))) ? io_used |= uint64_t(1) << j : 0;
+        auto le = next_u <= next_io;
+        if (next_u >= next_io) {
+          j++;
+          next_io += io_chunk_size;
+        }
+        if (le) {
+          i++;
+          next_u += chunk_size;
+        }
+      }
+      return ~io_used;
      } else {
        return 0;
      }
    }
+
    // map_f_invoke templates intended to mask parameters which are not expected
    // by the provided callback
    template<class F, typename std::enable_if<std::is_invocable_r_v<
diff --git a/src/test/objectstore/test_bluestore_types.cc b/src/test/objectstore/test_bluestore_types.cc

index d5d70d14d8337c2e01a2b8f554a17bb6bb71b36b..5361ed9fcc61d5c8a74b7cb6797d367f9a4e2250 100644 (file)
--- a/src/test/objectstore/test_bluestore_types.cc
+++ b/src/test/objectstore/test_bluestore_types.cc
@@ -1864,7 +1864,7 @@ TEST_F(ExtentMapFixture, rain) {
      X.push_back(create());
    }
    for (size_t i = 0; i < H - 1; i++) {
-    write(X[i], (rand() % W - 1) * au_size, au_size);
+    write(X[i], (rand() % (W - 1)) * au_size, au_size);
      dup(X[i], X[i + 1], 0, W * au_size);
    }
    for (size_t i = 0; i < H; i++) {
@@ -3174,6 +3174,54 @@ TEST(bluestore_blob_t, wrong_map_bl_in_51682) {
      ASSERT_EQ(expected_pos, num_expected_entries);
    }
  }
+
+TEST(bluestore_blob_t, get_unused_mask) {
+  uint32_t disk_block = 4 * 1024;
+  for (uint32_t alloc = 4 * 1024; alloc <= 256 * 1024; alloc *= 2) {
+    for (uint32_t t = 0; t < 10000; t++) {
+      bluestore_blob_t b;
+      uint32_t size = (rand() % 10 + 1) * alloc;
+      b.allocated_test({uint64_t(rand() * 0x1000), size});
+      b.add_unused(0, size);
+      // sprinkle used
+      uint32_t regions = (rand() % 4) + 1;
+      for (uint32_t i = 0; i < regions; i++) {
+        uint32_t left = (rand() % 4) ?
+          rand() % (size / disk_block + 1) * disk_block : //aligned to disk block
+          (rand() * 1024 + rand()) % size; // completely free
+        uint32_t right = (rand() % 4) ?
+          rand() % (size / disk_block + 1) * disk_block : //aligned to disk block
+          (rand() * 1024 + rand()) % size; // completely free
+        if (left == right) continue;
+        if (left > right) swap(left, right);
+        b.mark_used(left, right - left);
+      }
+
+      for (uint32_t io_chunk_size = 1024; io_chunk_size <= 32 * 1024; io_chunk_size *= 2) {
+        if (size < io_chunk_size || (size % io_chunk_size) != 0) {
+          continue;
+        }
+        if (size / io_chunk_size > 64) continue;
+        uint32_t io_begin = rand() % (size / io_chunk_size + 1) * io_chunk_size;
+        uint32_t io_end  = rand() % (size / io_chunk_size + 1) * io_chunk_size;
+        if (io_begin == io_end) continue;
+        if (io_begin > io_end) swap(io_begin, io_end);
+
+        uint64_t mask = 0;
+        uint64_t bit = 1;
+        for (uint32_t i = io_begin; i < io_end; i += io_chunk_size) {
+          mask = mask | (b.is_unused(i, io_chunk_size) ? bit : 0);
+          bit = bit << 1;
+        }
+        uint64_t result = b.get_unused_mask(io_begin, io_end - io_begin, io_chunk_size);
+        auto ref = std::bitset<64>(mask).to_string().substr(64 - (io_end - io_begin) / io_chunk_size);
+        auto uuu = std::bitset<64>(result).to_string().substr(64 - (io_end - io_begin) / io_chunk_size);
+        EXPECT_EQ(ref, uuu);
+      }
+    }
+  }
+}
+
  class bluestore_blob_t_test :
    public ::testing::Test,
    public ::testing::WithParamInterface<std::vector<int>>
author	Adam Kupczyk <akupczyk@ibm.com>
	Wed, 19 Feb 2025 13:13:56 +0000 (13:13 +0000)
committer	Adam Kupczyk <akupczyk@ibm.com>
	Thu, 6 Mar 2025 19:26:42 +0000 (19:26 +0000)
src/os/bluestore/Writer.cc		patch \| blob \| history
src/os/bluestore/Writer.h		patch \| blob \| history
src/os/bluestore/bluestore_types.h		patch \| blob \| history
src/test/objectstore/test_bluestore_types.cc		patch \| blob \| history