]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: introduce locking for the very first DB/WAL alloc unit. 62174/head
authorIgor Fedotov <igor.fedotov@croit.io>
Fri, 21 Mar 2025 10:25:41 +0000 (13:25 +0300)
committerIgor Fedotov <igor.fedotov@croit.io>
Mon, 24 Mar 2025 16:07:57 +0000 (19:07 +0300)
Signed-off-by: Igor Fedotov <igor.fedotov@croit.io>
src/os/bluestore/BlueFS.cc
src/os/bluestore/BlueFS.h
src/os/bluestore/bluefs_types.cc
src/os/bluestore/bluefs_types.h
src/test/objectstore/store_test.cc
src/test/objectstore/test_bluefs.cc

index 48db417bd6efebd464a3e8e80860c89df18825bf..a3ab0831bc83859c89c654a74a8ed4b16669f043 100644 (file)
@@ -640,7 +640,8 @@ uint64_t BlueFS::_get_minimal_reserved(unsigned id) const
 uint64_t BlueFS::get_full_reserved(unsigned id)
 {
   if (!is_shared_alloc(id)) {
-    return locked_alloc[id].length + _get_minimal_reserved(id);
+    return locked_alloc[id].head_length + locked_alloc[id].tail_length +
+      _get_minimal_reserved(id);
   }
   return 0;
 }
@@ -709,6 +710,18 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout)
 
   _init_alloc();
 
+  // temporary lock candidate regions to forbid their use during mkfs
+  for (uint8_t i = 0; i < MAX_BDEV; i++) {
+    if (!alloc[i]) continue;
+    bluefs_locked_extents_t res_la = locked_alloc[i].get_merged();
+    if (res_la.head_length) {
+      alloc[i]->init_rm_free(res_la.head_offset, res_la.head_length);
+    }
+    if (res_la.tail_length) {
+      alloc[i]->init_rm_free(res_la.tail_offset, res_la.tail_length);
+    }
+  }
+
   // init log
   FileRef log_file = ceph::make_ref<File>();
   log_file->fnode.ino = 1;
@@ -793,7 +806,7 @@ void BlueFS::_init_alloc()
       continue;
     }
     ceph_assert(bdev[id]->get_size());
-    locked_alloc[id] = bluefs_extent_t();
+    locked_alloc[id].reset();
 
     if (is_shared_alloc(id)) {
       dout(1) << __func__ << " shared, id " << id << std::hex
@@ -810,37 +823,37 @@ void BlueFS::_init_alloc()
         name += to_string(uintptr_t(this));
 
       auto reserved = _get_minimal_reserved(id);
-      uint64_t locked_offs = 0;
-      {
-        // Try to lock tailing space at device if allocator controlled space
-        // isn't aligned with recommended alloc unit.
-        // Final decision whether locked tail to be maintained is made after
-        // BlueFS replay depending on existing allocations.
-        uint64_t size0 = _get_block_device_size(id);
-        uint64_t size = size0 - reserved;
-        size = p2align(size, alloc_size[id]) + reserved;
-        if (size < size0) {
-          locked_offs = size;
-          locked_alloc[id] = bluefs_extent_t(id, locked_offs, uint32_t(size0 - size));
-        }
+      uint64_t full_size = _get_block_device_size(id);
+      uint64_t free_end = p2align(full_size, alloc_size[id]);
+
+      // Trying to lock the following extents:
+      // [reserved, alloc_size] and [p2align(dev_size, alloc_size), dev_size]
+      // to make all the allocations alligned to alloc_size if possible.
+      // Final decision whether locked head/tail to be maintained is made after
+      // BlueFS replay depending on existing allocations.
+      auto &locked = locked_alloc[id];
+      locked.head_offset = reserved;
+      locked.head_length = p2nphase(reserved, alloc_size[id]);
+      if (free_end < full_size) {
+        locked.tail_offset = free_end;
+        locked.tail_length = full_size - free_end;
       }
       string alloc_type = cct->_conf->bluefs_allocator;
       dout(1) << __func__ << " new, id " << id << std::hex
               << ", allocator name " << name
               << ", allocator type " << alloc_type
-              << ", capacity 0x" << bdev[id]->get_size()
+              << ", capacity 0x" << full_size
               << ", reserved 0x" << reserved
-              << ", locked 0x" << locked_alloc[id].offset
-              << "~" << locked_alloc[id].length
+              << ", maybe locked " << locked
               << ", block size 0x" << bdev[id]->get_block_size()
               << ", alloc unit 0x" << alloc_size[id]
               << std::dec << dendl;
-      alloc[id] = Allocator::create(cct, alloc_type,
-                                   bdev[id]->get_size(),
+      alloc[id] = Allocator::create(cct,
+                                    alloc_type,
+                                   full_size,
                                    bdev[id]->get_block_size(),
                                    name);
-      uint64_t free_len = locked_offs ? locked_offs : _get_block_device_size(id) - reserved;
-      alloc[id]->init_add_free(reserved, free_len);
+      alloc[id]->init_add_free(reserved, full_size - reserved);
     }
   }
 }
@@ -1066,7 +1079,7 @@ int BlueFS::mount()
 
   // init freelist
   for (auto& p : nodes.file_map) {
-    dout(30) << __func__ << " noting alloc for " << p.second->fnode << dendl;
+    dout(20) << __func__ << " noting alloc for " << p.second->fnode << dendl;
     for (auto& q : p.second->fnode.extents) {
       bool is_shared = is_shared_alloc(q.bdev);
       ceph_assert(!is_shared || (is_shared && shared_alloc));
@@ -1074,24 +1087,26 @@ int BlueFS::mount()
         shared_alloc->bluefs_used += q.length;
         alloc[q.bdev]->init_rm_free(q.offset, q.length);
       } else if (!is_shared) {
-        if (locked_alloc[q.bdev].length) {
-          auto locked_offs = locked_alloc[q.bdev].offset;
-          if (q.offset + q.length > locked_offs) {
-            // we already have allocated extents in locked range,
-            // do not enforce this lock then.
-            bluefs_extent_t dummy;
-            std::swap(locked_alloc[q.bdev], dummy);
-            alloc[q.bdev]->init_add_free(dummy.offset, dummy.length);
-            dout(1) << __func__ << std::hex
-                    << " unlocked at " << q.bdev
-                    << " 0x" << dummy.offset << "~" << dummy.length
-                    << std::dec << dendl;
-          }
-        }
+        locked_alloc[q.bdev].reset_intersected(q);
         alloc[q.bdev]->init_rm_free(q.offset, q.length);
       }
     }
   }
+  // finalize and apply locked allocation regions
+  for (uint8_t i = 0; i < MAX_BDEV; i++) {
+    bluefs_locked_extents_t res_la = locked_alloc[i].finalize();
+    dout(1) << __func__ << std::hex
+            << " final locked allocations " << (int)i
+            << " " << locked_alloc[i] << " => " << res_la
+            << dendl;
+    if (res_la.head_length) {
+      alloc[i]->init_rm_free(res_la.head_offset, res_la.head_length);
+    }
+    if (res_la.tail_length) {
+      alloc[i]->init_rm_free(res_la.tail_offset, res_la.tail_length);
+    }
+  }
+
   if (shared_alloc) {
     shared_alloc->need_init = false;
     dout(1) << __func__ << " shared_bdev_used = "
index 1932d45e0b72837594d071f7548f233f34a14160..f21c20869fb65a919e689ed0a9e07983a2b0f565 100644 (file)
@@ -522,9 +522,11 @@ private:
   std::vector<IOContext*> ioc;                     ///< IOContexts for bdevs
   std::vector<Allocator*> alloc;                   ///< allocators for bdevs
   std::vector<uint64_t> alloc_size;                ///< alloc size for each device
-  std::vector<bluefs_extent_t> locked_alloc;       ///< candidate extents for locked alocations,
-                                                   ///< no alloc/release reqs matching these space
-                                                   ///< to be issued to allocator.
+  std::vector<bluefs_locked_extents_t> locked_alloc;  ///< candidate extents
+                                                      ///< at both dev's head and tail
+                                                      ///< locked for allocations,
+                                                      ///< no alloc/release reqs matching
+                                                      ///< these space to be issued to allocator.
 
 
   //std::vector<interval_set<uint64_t>> block_unused_too_granular;
index 65a25ae50d2da028ae38a83ba789d22cf6487fa8..554ebcfc24709b3cf88d146c36458f36884f4b69 100644 (file)
@@ -38,6 +38,102 @@ ostream& operator<<(ostream& out, const bluefs_extent_t& e)
             << std::dec;
 }
 
+bluefs_locked_extents_t::bluefs_locked_extents_t(uint64_t head_reserved,
+  uint64_t full_size, uint64_t alloc_size)
+{
+  // Calculating three extents which are potential candidates for locking:
+  // [start, end]
+  // - head: [reserved, p2nphase(reserved, alloc_size)]
+  // - gray_tail: an area which should be locked if head becomes void
+  // - tail: [p2align(full_size, alloc_size), full_size]
+  // Final decision whether locked extents to be maintained is made after
+  // BlueFS replay depending on existing allocations.
+  // This class performs that recalculation on  reset_intercepted() calls
+  // which indicate existing allocations to it.
+  //
+
+  head_offset = head_reserved;
+  head_length = p2nphase(head_reserved, alloc_size);
+  if (head_reserved) {
+    ceph_assert(full_size > head_reserved);
+    uint64_t gray_free_end = p2align(full_size - head_reserved, alloc_size);
+    gray_free_end += head_reserved;
+    if (gray_free_end < full_size) {
+      gray_tail_offset = gray_free_end;
+      gray_tail_length = full_size - gray_free_end;
+    }
+  }
+  uint64_t free_end = p2align(full_size, alloc_size);
+  if (free_end < full_size) {
+    tail_offset = free_end;
+    tail_length = full_size - free_end;
+  }
+}
+
+void bluefs_locked_extents_t::reset_intersected(const bluefs_extent_t& e)
+{
+  if (e.offset < head_end() && e.end() > head_offset) {
+    head_offset = 0;
+    head_length = 0;
+  }
+  if (e.offset < gray_tail_end() && e.end() > gray_tail_offset) {
+    gray_tail_offset = 0;
+    gray_tail_length = 0;
+  }
+  if (e.offset < tail_end() && e.end() > tail_offset) {
+    tail_offset = 0;
+    tail_length = 0;
+  }
+}
+
+bluefs_locked_extents_t bluefs_locked_extents_t::get_merged() const
+{
+  bluefs_locked_extents_t res;
+  res.head_offset = head_offset;
+  res.head_length = head_length;
+  if (gray_tail_length) {
+    if (tail_length) {
+      ceph_assert(gray_tail_offset > 0);
+      ceph_assert(tail_offset > 0);
+      res.tail_offset = std::min(tail_offset, gray_tail_offset);
+      res.tail_length = std::max(tail_end(), gray_tail_end()) - res.tail_offset;
+    } else {
+      res.tail_offset = gray_tail_offset;
+      res.tail_length = gray_tail_length;
+    }
+  } else {
+    res.tail_offset = tail_offset;
+    res.tail_length = tail_length;
+  }
+  return res;
+}
+
+bluefs_locked_extents_t bluefs_locked_extents_t::finalize() const
+{
+  bluefs_locked_extents_t res;
+  if (head_length) {
+    res.head_offset = head_offset;
+    res.head_length = head_length;
+    if (tail_length) {
+      res.tail_offset = tail_offset;
+      res.tail_length = tail_length;
+    }
+  } else {
+    res.tail_offset = gray_tail_offset;
+    res.tail_length = gray_tail_length;
+  }
+  return res;
+}
+
+ostream& operator<<(ostream& out, const bluefs_locked_extents_t& e)
+{
+  return out << std::hex
+             << "<0x" << e.head_offset << "~" << e.head_length
+             << ", [0x"  << e.gray_tail_offset << "~" << e.gray_tail_length
+             << "], 0x"  << e.tail_offset << "~" << e.tail_length << ">"
+            << std::dec;
+}
+
 // bluefs_layout_t
 
 void bluefs_layout_t::encode(bufferlist& bl) const
index 3c2cff02bf33cfcb5833de5598e5141fe6223567..75fc22fb9eaf929d57c8c6272709c35ab345bdd3 100644 (file)
@@ -35,6 +35,37 @@ WRITE_CLASS_DENC(bluefs_extent_t)
 
 std::ostream& operator<<(std::ostream& out, const bluefs_extent_t& e);
 
+struct bluefs_locked_extents_t {
+  uint64_t head_offset = 0;
+  uint32_t head_length = 0;
+
+  uint64_t gray_tail_offset = 0;
+  uint32_t gray_tail_length = 0;
+
+  uint64_t tail_offset = 0;
+  uint32_t tail_length = 0;
+
+  bluefs_locked_extents_t() {}
+  bluefs_locked_extents_t(uint64_t head_reserved, uint64_t full_size, uint64_t alloc_size);
+
+  void reset() {
+    *this = bluefs_locked_extents_t();
+  }
+  uint64_t head_end() const { return head_offset + head_length; }
+  uint64_t gray_tail_end() const { return gray_tail_offset + gray_tail_length; }
+  uint64_t tail_end() const { return tail_offset + tail_length; }
+
+  void reset_intersected(const bluefs_extent_t& e);
+
+  // returns extents in a form where tails are merged
+  bluefs_locked_extents_t get_merged() const;
+
+  // returns final locked extents where head/tail are present only
+  bluefs_locked_extents_t finalize() const;
+};
+
+std::ostream& operator<<(std::ostream& out, const bluefs_locked_extents_t& e);
+
 struct bluefs_fnode_delta_t {
   uint64_t ino;
   uint64_t size;
index 5ab0694031c091771aeffb82a696d53fe11217c3..07c824ffbf4b8cd451c78278bb82aaaac996ef9b 100644 (file)
@@ -11993,7 +11993,7 @@ TEST_P(StoreTestSpecificAUSize, BlueFSReservedTest) {
             g_conf()->bluefs_alloc_size);
 
   ASSERT_EQ(fs->get_full_reserved(BlueFS::BDEV_WAL),
-            wal_extra);
+            g_conf()->bluefs_alloc_size + wal_extra);
 }
 
 #endif  // WITH_BLUESTORE
index 8c8bb4c0d28969056e489077f8bf4f2f26adc868..3e55d1392da2adfd38a12d388a90740ab077d00e 100644 (file)
@@ -1774,6 +1774,297 @@ TEST(BlueFS, test_69481_truncate_asserts) {
   fs.umount();
 }
 
+TEST(bluefs_locked_extents_t, basics) {
+  const uint64_t M = 1 << 20;
+  {
+    uint64_t reserved = 0x2000;
+    uint64_t au = 1*M;
+    uint64_t fullsize = 128*M;
+    bluefs_locked_extents_t lcke(reserved, fullsize, au);
+    ASSERT_EQ(lcke.head_offset, reserved);
+    ASSERT_EQ(lcke.head_length, au - reserved);
+    ASSERT_EQ(lcke.gray_tail_offset, fullsize - au + reserved);
+    ASSERT_EQ(lcke.gray_tail_length, au - reserved);
+    ASSERT_EQ(lcke.tail_offset, 0);
+    ASSERT_EQ(lcke.tail_length, 0);
+
+    // no ops
+    lcke.reset_intersected(bluefs_extent_t(0, 1*M, 1*M));
+    lcke.reset_intersected(bluefs_extent_t(0, 10*M, 1*M));
+    lcke.reset_intersected(bluefs_extent_t(0, 127*M, reserved));
+    ASSERT_EQ(lcke.head_offset, reserved);
+    ASSERT_EQ(lcke.head_length, au - reserved);
+    ASSERT_EQ(lcke.gray_tail_offset, fullsize - au + reserved);
+    ASSERT_EQ(lcke.gray_tail_length, au - reserved);
+    ASSERT_EQ(lcke.tail_offset, 0);
+    ASSERT_EQ(lcke.tail_length, 0);
+
+    // get_merged verification
+    auto e1 = lcke.get_merged();
+    ASSERT_EQ(e1.head_offset, lcke.head_offset);
+    ASSERT_EQ(e1.head_length, lcke.head_length);
+    ASSERT_EQ(e1.gray_tail_offset, 0);
+    ASSERT_EQ(e1.gray_tail_length, 0);
+    ASSERT_EQ(e1.tail_offset, lcke.gray_tail_offset);
+    ASSERT_EQ(e1.tail_length, lcke.gray_tail_length);
+
+    e1 = lcke.finalize();
+    ASSERT_EQ(e1.head_offset, lcke.head_offset);
+    ASSERT_EQ(e1.head_length, lcke.head_length);
+    ASSERT_EQ(e1.gray_tail_offset, 0);
+    ASSERT_EQ(e1.gray_tail_length, 0);
+    ASSERT_EQ(e1.tail_offset, lcke.tail_offset);
+    ASSERT_EQ(e1.tail_length, lcke.tail_length);
+
+    // head has intersection
+    lcke.reset_intersected(bluefs_extent_t(0, reserved, au));
+    ASSERT_EQ(lcke.head_offset, 0);
+    ASSERT_EQ(lcke.head_length, 0);
+    ASSERT_EQ(lcke.gray_tail_offset, fullsize - au + reserved);
+    ASSERT_EQ(lcke.gray_tail_length, au - reserved);
+    ASSERT_EQ(lcke.tail_offset, 0);
+    ASSERT_EQ(lcke.tail_length, 0);
+
+    e1 = lcke.finalize();
+    ASSERT_EQ(e1.head_offset, 0);
+    ASSERT_EQ(e1.head_length, 0);
+    ASSERT_EQ(e1.tail_offset, lcke.gray_tail_offset);
+    ASSERT_EQ(e1.tail_length, lcke.gray_tail_length);
+
+    // gray_tail has intersections
+    lcke.reset_intersected(bluefs_extent_t(0, 127*M + reserved, 0x1000));
+    lcke.reset_intersected(bluefs_extent_t(0, 128*M - 0x1000, 0x1000));
+    ASSERT_EQ(lcke.head_offset, 0);
+    ASSERT_EQ(lcke.head_length, 0);
+    ASSERT_EQ(lcke.gray_tail_offset, 0);
+    ASSERT_EQ(lcke.gray_tail_length, 0);
+    ASSERT_EQ(lcke.tail_offset, 0);
+    ASSERT_EQ(lcke.tail_length, 0);
+
+    e1 = lcke.finalize();
+    ASSERT_EQ(e1.head_offset, 0);
+    ASSERT_EQ(e1.head_length, 0);
+    ASSERT_EQ(e1.tail_offset, 0);
+    ASSERT_EQ(e1.tail_length, 0);
+  }
+  {
+    uint64_t reserved = 0x1000;
+    uint64_t au = 1*M;
+    uint64_t extra_tail = 0x10000;
+    uint64_t fullsize = 128*M + extra_tail;
+    bluefs_locked_extents_t lcke(reserved, fullsize, au);
+    ASSERT_EQ(lcke.head_offset, reserved);
+    ASSERT_EQ(lcke.head_length, au - reserved);
+    ASSERT_EQ(lcke.gray_tail_offset, fullsize - extra_tail + reserved);
+    ASSERT_EQ(lcke.gray_tail_length, extra_tail - reserved);
+    ASSERT_EQ(lcke.tail_offset, fullsize - extra_tail);
+    ASSERT_EQ(lcke.tail_length, extra_tail);
+
+    // no ops
+    lcke.reset_intersected(bluefs_extent_t(0, 1*M, 1*M));
+    lcke.reset_intersected(bluefs_extent_t(0, 10*M, 1*M));
+    lcke.reset_intersected(bluefs_extent_t(0, 127*M, reserved));
+    ASSERT_EQ(lcke.head_offset, reserved);
+    ASSERT_EQ(lcke.head_length, au - reserved);
+    ASSERT_EQ(lcke.gray_tail_offset, fullsize - extra_tail + reserved);
+    ASSERT_EQ(lcke.gray_tail_length, extra_tail - reserved);
+    ASSERT_EQ(lcke.tail_offset, fullsize - extra_tail);
+    ASSERT_EQ(lcke.tail_length, extra_tail);
+
+    // get_merged verification
+    auto e1 = lcke.get_merged();
+    ASSERT_EQ(e1.head_offset, lcke.head_offset);
+    ASSERT_EQ(e1.head_length, lcke.head_length);
+    ASSERT_EQ(e1.gray_tail_offset, 0);
+    ASSERT_EQ(e1.gray_tail_length, 0);
+    ASSERT_EQ(e1.tail_offset, std::min(lcke.gray_tail_offset, lcke.tail_offset));
+    ASSERT_EQ(e1.tail_length, fullsize - e1.tail_offset);
+
+    e1 = lcke.finalize();
+    ASSERT_EQ(e1.head_offset, lcke.head_offset);
+    ASSERT_EQ(e1.head_length, lcke.head_length);
+    ASSERT_EQ(e1.tail_offset, lcke.tail_offset);
+    ASSERT_EQ(e1.tail_length, lcke.tail_length);
+
+    // head has intersection
+    lcke.reset_intersected(bluefs_extent_t(0, reserved, au));
+    ASSERT_EQ(lcke.head_offset, 0);
+    ASSERT_EQ(lcke.head_length, 0);
+    ASSERT_EQ(lcke.gray_tail_offset, fullsize - extra_tail + reserved);
+    ASSERT_EQ(lcke.gray_tail_length, extra_tail - reserved);
+    ASSERT_EQ(lcke.tail_offset, fullsize - extra_tail);
+    ASSERT_EQ(lcke.tail_length, extra_tail);
+
+    e1 = lcke.finalize();
+    ASSERT_EQ(e1.head_offset, 0);
+    ASSERT_EQ(e1.head_length, 0);
+    ASSERT_EQ(e1.tail_offset, lcke.gray_tail_offset);
+    ASSERT_EQ(e1.tail_length, lcke.gray_tail_length);
+
+    // tail has intersections
+    lcke.reset_intersected(bluefs_extent_t(0, 128*M, 0x1000));
+    ASSERT_EQ(lcke.head_offset, 0);
+    ASSERT_EQ(lcke.head_length, 0);
+    ASSERT_EQ(lcke.gray_tail_offset, fullsize - extra_tail + reserved);
+    ASSERT_EQ(lcke.gray_tail_length, extra_tail - reserved);
+    ASSERT_EQ(lcke.tail_offset, 0);
+    ASSERT_EQ(lcke.tail_length, 0);
+
+    // gray_tail has intersections
+    lcke.reset_intersected(bluefs_extent_t(0, 128*M + reserved, 0x1000));
+    ASSERT_EQ(lcke.head_offset, 0);
+    ASSERT_EQ(lcke.head_length, 0);
+    ASSERT_EQ(lcke.gray_tail_offset, 0);
+    ASSERT_EQ(lcke.gray_tail_length, 0);
+    ASSERT_EQ(lcke.tail_offset, 0);
+    ASSERT_EQ(lcke.tail_length, 0);
+
+    e1 = lcke.finalize();
+    ASSERT_EQ(e1.head_offset, 0);
+    ASSERT_EQ(e1.head_length, 0);
+    ASSERT_EQ(e1.tail_offset, 0);
+    ASSERT_EQ(e1.tail_length, 0);
+  }
+  {
+    uint64_t reserved = 0x2000;
+    uint64_t au = 1*M;
+    uint64_t extra_tail = 0x1000;
+    uint64_t fullsize = 128*M + extra_tail;
+    bluefs_locked_extents_t lcke(reserved, fullsize, au);
+    ASSERT_EQ(lcke.head_offset, reserved);
+    ASSERT_EQ(lcke.head_length, au - reserved);
+    ASSERT_EQ(lcke.gray_tail_offset, fullsize - au + reserved - extra_tail);
+    ASSERT_EQ(lcke.gray_tail_length, au - reserved + extra_tail);
+    ASSERT_EQ(lcke.tail_offset, fullsize - extra_tail);
+    ASSERT_EQ(lcke.tail_length, extra_tail);
+
+    // no ops
+    lcke.reset_intersected(bluefs_extent_t(0, 1*M, 1*M));
+    lcke.reset_intersected(bluefs_extent_t(0, 10*M, 1*M));
+    lcke.reset_intersected(bluefs_extent_t(0, 127*M, reserved));
+    ASSERT_EQ(lcke.head_offset, reserved);
+    ASSERT_EQ(lcke.head_length, au - reserved);
+    ASSERT_EQ(lcke.gray_tail_offset, fullsize - au + reserved - extra_tail);
+    ASSERT_EQ(lcke.gray_tail_length, au - reserved + extra_tail);
+    ASSERT_EQ(lcke.tail_offset, fullsize - extra_tail);
+    ASSERT_EQ(lcke.tail_length, extra_tail);
+
+    // get_merged verification
+    auto e1 = lcke.get_merged();
+    ASSERT_EQ(e1.head_offset, lcke.head_offset);
+    ASSERT_EQ(e1.head_length, lcke.head_length);
+    ASSERT_EQ(e1.gray_tail_offset, 0);
+    ASSERT_EQ(e1.gray_tail_length, 0);
+    ASSERT_EQ(e1.tail_offset, std::min(lcke.gray_tail_offset, lcke.tail_offset));
+    ASSERT_EQ(e1.tail_length, fullsize - e1.tail_offset);
+
+    e1 = lcke.finalize();
+    ASSERT_EQ(e1.head_offset, lcke.head_offset);
+    ASSERT_EQ(e1.head_length, lcke.head_length);
+    ASSERT_EQ(e1.tail_offset, lcke.tail_offset);
+    ASSERT_EQ(e1.tail_length, lcke.tail_length);
+
+    // head has intersection, hopefully partial
+    lcke.reset_intersected(bluefs_extent_t(reserved - 0x1000, reserved, au));
+    ASSERT_EQ(lcke.head_offset, 0);
+    ASSERT_EQ(lcke.head_length, 0);
+    ASSERT_EQ(lcke.gray_tail_offset, fullsize - au + reserved - extra_tail);
+    ASSERT_EQ(lcke.gray_tail_length, au - reserved + extra_tail);
+    ASSERT_EQ(lcke.tail_offset, fullsize - extra_tail);
+    ASSERT_EQ(lcke.tail_length, extra_tail);
+
+    e1 = lcke.finalize();
+    ASSERT_EQ(e1.head_offset, 0);
+    ASSERT_EQ(e1.head_length, 0);
+    ASSERT_EQ(e1.tail_offset, lcke.gray_tail_offset);
+    ASSERT_EQ(e1.tail_length, lcke.gray_tail_length);
+
+    // tail&gray_tail have intersections
+    lcke.reset_intersected(bluefs_extent_t(0, 128*M, 0x1000));
+    ASSERT_EQ(lcke.head_offset, 0);
+    ASSERT_EQ(lcke.head_length, 0);
+    ASSERT_EQ(lcke.gray_tail_offset, 0);
+    ASSERT_EQ(lcke.gray_tail_length, 0);
+    ASSERT_EQ(lcke.tail_offset, 0);
+    ASSERT_EQ(lcke.tail_length, 0);
+
+    e1 = lcke.finalize();
+    ASSERT_EQ(e1.head_offset, 0);
+    ASSERT_EQ(e1.head_length, 0);
+    ASSERT_EQ(e1.tail_offset, 0);
+    ASSERT_EQ(e1.tail_length, 0);
+  }
+  {
+    uint64_t reserved = 0x2000;
+    uint64_t au = 1*M;
+    uint64_t extra_tail = 0x2000;
+    uint64_t fullsize = 128*M + extra_tail;
+    bluefs_locked_extents_t lcke(reserved, fullsize, au);
+    ASSERT_EQ(lcke.head_offset, reserved);
+    ASSERT_EQ(lcke.head_length, au - reserved);
+    ASSERT_EQ(lcke.gray_tail_offset, 0);
+    ASSERT_EQ(lcke.gray_tail_length, 0);
+    ASSERT_EQ(lcke.tail_offset, fullsize - extra_tail);
+    ASSERT_EQ(lcke.tail_length, extra_tail);
+
+    // no ops
+    lcke.reset_intersected(bluefs_extent_t(0, 1*M, 1*M));
+    lcke.reset_intersected(bluefs_extent_t(0, 10*M, 1*M));
+    lcke.reset_intersected(bluefs_extent_t(0, 127*M, reserved));
+    ASSERT_EQ(lcke.head_offset, reserved);
+    ASSERT_EQ(lcke.head_length, au - reserved);
+    ASSERT_EQ(lcke.gray_tail_offset, 0);
+    ASSERT_EQ(lcke.gray_tail_length, 0);
+    ASSERT_EQ(lcke.tail_offset, fullsize - extra_tail);
+    ASSERT_EQ(lcke.tail_length, extra_tail);
+
+    // get_merged verification
+    auto e1 = lcke.get_merged();
+    ASSERT_EQ(e1.head_offset, lcke.head_offset);
+    ASSERT_EQ(e1.head_length, lcke.head_length);
+    ASSERT_EQ(e1.gray_tail_offset, 0);
+    ASSERT_EQ(e1.gray_tail_length, 0);
+    ASSERT_EQ(e1.tail_offset, lcke.tail_offset);
+    ASSERT_EQ(e1.tail_length, fullsize - e1.tail_offset);
+
+    e1 = lcke.finalize();
+    ASSERT_EQ(e1.head_offset, lcke.head_offset);
+    ASSERT_EQ(e1.head_length, lcke.head_length);
+    ASSERT_EQ(e1.tail_offset, lcke.tail_offset);
+    ASSERT_EQ(e1.tail_length, lcke.tail_length);
+
+    // head has intersection, hopefully partial
+    lcke.reset_intersected(bluefs_extent_t(reserved - 0x1000, reserved, au));
+    ASSERT_EQ(lcke.head_offset, 0);
+    ASSERT_EQ(lcke.head_length, 0);
+    ASSERT_EQ(lcke.gray_tail_offset, 0);
+    ASSERT_EQ(lcke.gray_tail_length, 0);
+    ASSERT_EQ(lcke.tail_offset, fullsize - extra_tail);
+    ASSERT_EQ(lcke.tail_length, extra_tail);
+
+    e1 = lcke.finalize();
+    ASSERT_EQ(e1.head_offset, 0);
+    ASSERT_EQ(e1.head_length, 0);
+    ASSERT_EQ(e1.tail_offset, 0);
+    ASSERT_EQ(e1.tail_length, 0);
+
+    // tail have intersections
+    lcke.reset_intersected(bluefs_extent_t(0, 128*M, 0x1000));
+    ASSERT_EQ(lcke.head_offset, 0);
+    ASSERT_EQ(lcke.head_length, 0);
+    ASSERT_EQ(lcke.gray_tail_offset, 0);
+    ASSERT_EQ(lcke.gray_tail_length, 0);
+    ASSERT_EQ(lcke.tail_offset, 0);
+    ASSERT_EQ(lcke.tail_length, 0);
+
+    e1 = lcke.finalize();
+    ASSERT_EQ(e1.head_offset, 0);
+    ASSERT_EQ(e1.head_length, 0);
+    ASSERT_EQ(e1.tail_offset, 0);
+    ASSERT_EQ(e1.tail_length, 0);
+  }
+}
+
 int main(int argc, char **argv) {
   auto args = argv_to_vec(argc, argv);
   map<string,string> defaults = {