os/bluestore: improve adjustent extents merge in hybrid allocatoc

author Igor Fedotov <ifedotov@suse.com>

Wed, 25 Mar 2020 14:19:12 +0000 (17:19 +0300)

committer Igor Fedotov <ifedotov@suse.com>

Mon, 13 Apr 2020 13:16:07 +0000 (16:16 +0300)
author Igor Fedotov <ifedotov@suse.com>
Wed, 25 Mar 2020 14:19:12 +0000 (17:19 +0300)
committer Igor Fedotov <ifedotov@suse.com>
Mon, 13 Apr 2020 13:16:07 +0000 (16:16 +0300)
diff --git a/src/os/bluestore/AvlAllocator.h b/src/os/bluestore/AvlAllocator.h

index a68d4c190ac0280d34ecf81f1f4c93ec4f26bc26..571784a6c155b1a8a1ff90d725ae30f562fbc0ae 100644 (file)
--- a/src/os/bluestore/AvlAllocator.h
+++ b/src/os/bluestore/AvlAllocator.h
@@ -212,6 +212,10 @@ private:
      // i.e. (range_count_cap > 0)
      ceph_assert(false);
    }
+protected:
+  // called when extent to be released/marked free
+  virtual void _add_to_tree(uint64_t start, uint64_t size);
+
  protected:
    CephContext* cct;
    std::mutex lock;
@@ -241,7 +245,6 @@ protected:
    void _release(const PExtentVector&  release_set);
    void _shutdown();
  
-  void _add_to_tree(uint64_t start, uint64_t size);
    void _process_range_removal(uint64_t start, uint64_t end, range_tree_t::iterator& rs);
    void _remove_from_tree(uint64_t start, uint64_t size);
    void _try_remove_from_tree(uint64_t start, uint64_t size,
diff --git a/src/os/bluestore/BitmapAllocator.h b/src/os/bluestore/BitmapAllocator.h

index cbf8c007591a505eef2c36e4fad7f870fedb2df0..51ebaa4208c84a49293a39845df80ec0ed25e678 100644 (file)
--- a/src/os/bluestore/BitmapAllocator.h
+++ b/src/os/bluestore/BitmapAllocator.h
@@ -22,7 +22,6 @@ public:
    {
    }
  
-
    int64_t allocate(
      uint64_t want_size, uint64_t alloc_unit, uint64_t max_alloc_size,
      int64_t hint, PExtentVector *extents) override;
diff --git a/src/os/bluestore/HybridAllocator.cc b/src/os/bluestore/HybridAllocator.cc

index f3fdca158af53dd316987bef58213a58eb593525..484a664bf0d4422853a1a8521d11a9731f39d5ee 100644 (file)
--- a/src/os/bluestore/HybridAllocator.cc
+++ b/src/os/bluestore/HybridAllocator.cc
@@ -187,7 +187,8 @@ void HybridAllocator::shutdown()
    }
  }
  
-void HybridAllocator::_spillover_range(uint64_t start, uint64_t end) {
+void HybridAllocator::_spillover_range(uint64_t start, uint64_t end)
+{
    auto size = end - start;
    dout(20) << __func__
      << std::hex << " "
@@ -207,3 +208,15 @@ void HybridAllocator::_spillover_range(uint64_t start, uint64_t end) {
    }
    bmap_alloc->init_add_free(start, size);
  }
+
+void HybridAllocator::_add_to_tree(uint64_t start, uint64_t size)
+{
+  if (bmap_alloc) {
+    uint64_t head = bmap_alloc->claim_free_to_left(start);
+    uint64_t tail = bmap_alloc->claim_free_to_right(start + size);
+    ceph_assert(head <= start);
+    start -= head;
+    size += head + tail;
+  }
+  AvlAllocator::_add_to_tree(start, size);
+}
diff --git a/src/os/bluestore/HybridAllocator.h b/src/os/bluestore/HybridAllocator.h

index 2b9af918f7c1eb1c221e37acd88539a948d99b15..e8246cf4dfcb25621bbccb43c8e9ab850b16ae20 100644 (file)
--- a/src/os/bluestore/HybridAllocator.h
+++ b/src/os/bluestore/HybridAllocator.h
@@ -42,4 +42,7 @@ protected:
  private:
  
    void _spillover_range(uint64_t start, uint64_t end) override;
+
+  // called when extent to be released/marked free
+  void _add_to_tree(uint64_t start, uint64_t size) override;
  };
diff --git a/src/os/bluestore/fastbmap_allocator_impl.cc b/src/os/bluestore/fastbmap_allocator_impl.cc

index 441a185d5c72146339077bbb2fd6396b7a523a8c..bb8bdf0499a4c1ae27cebf9b66277522f95f7dd3 100644 (file)
--- a/src/os/bluestore/fastbmap_allocator_impl.cc
+++ b/src/os/bluestore/fastbmap_allocator_impl.cc
@@ -629,3 +629,86 @@ void AllocatorLevel01Loose::dump(
      notify(off, len);
  }
  
+uint64_t AllocatorLevel01Loose::_claim_free_to_left_l0(int64_t l0_pos_start)
+{
+  int64_t d0 = L0_ENTRIES_PER_SLOT;
+
+  int64_t pos = l0_pos_start - 1;
+  slot_t bits = (slot_t)1 << (pos % d0);
+  int64_t idx = pos / d0;
+  slot_t* val_s = l0.data() + idx;
+
+  int64_t pos_e = p2align<int64_t>(pos, d0);
+
+  while (pos >= pos_e) {
+    if (0 == ((*val_s) & bits))
+      return pos + 1;
+    (*val_s) &= ~bits;
+    bits >>= 1;
+    --pos;
+  }
+  --idx;
+  val_s = l0.data() + idx;
+  while (idx >= 0 && (*val_s) == all_slot_set) {
+    *val_s = all_slot_clear;
+    --idx;
+    pos -= d0;
+    val_s = l0.data() + idx;
+  }
+
+  if (idx >= 0 &&
+      (*val_s) != all_slot_set && (*val_s) != all_slot_clear) {
+    int64_t pos_e = p2align<int64_t>(pos, d0);
+    slot_t bits = (slot_t)1 << (pos % d0);
+    while (pos >= pos_e) {
+      if (0 == ((*val_s) & bits))
+        return pos + 1;
+      (*val_s) &= ~bits;
+      bits >>= 1;
+      --pos;
+    }
+  }
+  return pos + 1;
+}
+
+uint64_t AllocatorLevel01Loose::_claim_free_to_right_l0(int64_t l0_pos_start)
+{
+  auto d0 = L0_ENTRIES_PER_SLOT;
+
+  int64_t pos = l0_pos_start;
+  slot_t bits = (slot_t)1 << (pos % d0);
+  size_t idx = pos / d0;
+  slot_t* val_s = l0.data() + idx;
+
+  int64_t pos_e = p2roundup<int64_t>(pos + 1, d0);
+
+  while (pos < pos_e) {
+    if (0 == ((*val_s) & bits))
+      return pos;
+    (*val_s) &= ~bits;
+    bits <<= 1;
+    ++pos;
+  }
+  ++idx;
+  val_s = l0.data() + idx;
+  while (idx < l0.size() && (*val_s) == all_slot_set) {
+    *val_s = all_slot_clear;
+    ++idx;
+    pos += d0;
+    val_s = l0.data() + idx;
+  }
+
+  if (idx < l0.size() &&
+      (*val_s) != all_slot_set && (*val_s) != all_slot_clear) {
+    int64_t pos_e = p2roundup<int64_t>(pos + 1, d0);
+    slot_t bits = (slot_t)1 << (pos % d0);
+    while (pos < pos_e) {
+      if (0 == ((*val_s) & bits))
+        return pos;
+      (*val_s) &= ~bits;
+      bits <<= 1;
+      ++pos;
+    }
+  }
+  return pos;
+}
diff --git a/src/os/bluestore/fastbmap_allocator_impl.h b/src/os/bluestore/fastbmap_allocator_impl.h

index 7dcbec30ec54f3361ce12a774120cc42f12be1e8..ed2d4c8cc98ce4be65d864e1dc42474e5ea24c56 100644 (file)
--- a/src/os/bluestore/fastbmap_allocator_impl.h
+++ b/src/os/bluestore/fastbmap_allocator_impl.h
@@ -322,6 +322,9 @@ protected:
  
    void _mark_l1_on_l0(int64_t l0_pos, int64_t l0_pos_end);
    void _mark_alloc_l0(int64_t l0_pos_start, int64_t l0_pos_end);
+  uint64_t _claim_free_to_left_l0(int64_t l0_pos_start);
+  uint64_t _claim_free_to_right_l0(int64_t l0_pos_start);
+
  
    void _mark_alloc_l1_l0(int64_t l0_pos_start, int64_t l0_pos_end)
    {
@@ -424,6 +427,34 @@ protected:
      return l0_granularity * (l0_pos_end - l0_pos_start);
    }
  
+  uint64_t claim_free_to_left_l1(uint64_t offs)
+  {
+    uint64_t l0_pos_end = offs / l0_granularity;
+    uint64_t l0_pos_start = _claim_free_to_left_l0(l0_pos_end);
+    if (l0_pos_start < l0_pos_end) {
+      _mark_l1_on_l0(
+        p2align(l0_pos_start, uint64_t(bits_per_slotset)),
+        p2roundup(l0_pos_end, uint64_t(bits_per_slotset)));
+      return l0_granularity * (l0_pos_end - l0_pos_start);
+    }
+    return 0;
+  }
+
+  uint64_t claim_free_to_right_l1(uint64_t offs)
+  {
+    uint64_t l0_pos_start = offs / l0_granularity;
+    uint64_t l0_pos_end = _claim_free_to_right_l0(l0_pos_start);
+
+    if (l0_pos_start < l0_pos_end) {
+      _mark_l1_on_l0(
+        p2align(l0_pos_start, uint64_t(bits_per_slotset)),
+        p2roundup(l0_pos_end, uint64_t(bits_per_slotset)));
+      return l0_granularity * (l0_pos_end - l0_pos_start);
+    }
+    return 0;
+  }
+
+
  public:
    uint64_t debug_get_allocated(uint64_t pos0 = 0, uint64_t pos1 = 0)
    {
@@ -522,7 +553,31 @@ public:
        std::lock_guard l(lock);
        l1.collect_stats(bins_overall);
    }
+  uint64_t claim_free_to_left(uint64_t offset) {
+    std::lock_guard l(lock);
+    auto allocated = l1.claim_free_to_left_l1(offset);
+    ceph_assert(available >= allocated);
+    available -= allocated;
+
+    uint64_t l2_pos = (offset - allocated) / l2_granularity;
+    uint64_t l2_pos_end =
+      p2roundup(int64_t(offset), int64_t(l2_granularity)) / l2_granularity;
+    _mark_l2_on_l1(l2_pos, l2_pos_end);
+    return allocated;
+  }
  
+  uint64_t claim_free_to_right(uint64_t offset) {
+    std::lock_guard l(lock);
+    auto allocated = l1.claim_free_to_right_l1(offset);
+    ceph_assert(available >= allocated);
+    available -= allocated;
+
+    uint64_t l2_pos = (offset) / l2_granularity;
+    int64_t end = offset + allocated;
+    uint64_t l2_pos_end = p2roundup(end, int64_t(l2_granularity)) / l2_granularity;
+    _mark_l2_on_l1(l2_pos, l2_pos_end);
+    return allocated;
+  }
  protected:
    ceph::mutex lock = ceph::make_mutex("AllocatorLevel02::lock");
    L1 l1;
diff --git a/src/test/objectstore/fastbmap_allocator_test.cc b/src/test/objectstore/fastbmap_allocator_test.cc

index 14752422b29b3fcb69aaead2582b0e3f988a67b4..c3af73706569c8cd40f052ca8d81b2506134d391 100644 (file)
--- a/src/test/objectstore/fastbmap_allocator_test.cc
+++ b/src/test/objectstore/fastbmap_allocator_test.cc
@@ -44,6 +44,14 @@ public:
    {
      _free_l2(r);
    }
+  void mark_free(uint64_t o, uint64_t len)
+  {
+    _mark_free(o, len);
+  }
+  void mark_allocated(uint64_t o, uint64_t len)
+  {
+    _mark_allocated(o, len);
+  }
  };
  
  const uint64_t _1m = 1024 * 1024;
@@ -931,3 +939,123 @@ TEST(TestAllocatorLevel01, test_4G_alloc_bug3)
        ASSERT_EQ(a4[1].length, 2048ull * _1m);
    }
  }
+
+TEST(TestAllocatorLevel01, test_claim_free_l2)
+{
+  TestAllocatorLevel02 al2;
+  uint64_t num_l2_entries = 64;// *512;
+  uint64_t capacity = num_l2_entries * 256 * 512 * 4096;
+  al2.init(capacity, 0x1000);
+  std::cout << "Init L2" << std::endl;
+
+  uint64_t max_available = 0x20000;
+  al2.mark_allocated(max_available, capacity - max_available);
+
+  uint64_t allocated1 = 0;
+  interval_vector_t a1;
+  al2.allocate_l2(0x2000, 0x2000, &allocated1, &a1);
+  ASSERT_EQ(allocated1, 0x2000u);
+  ASSERT_EQ(a1[0].offset, 0u);
+  ASSERT_EQ(a1[0].length, 0x2000u);
+
+  uint64_t allocated2 = 0;
+  interval_vector_t a2;
+  al2.allocate_l2(0x2000, 0x2000, &allocated2, &a2);
+  ASSERT_EQ(allocated2, 0x2000u);
+  ASSERT_EQ(a2[0].offset, 0x2000u);
+  ASSERT_EQ(a2[0].length, 0x2000u);
+
+  uint64_t allocated3 = 0;
+  interval_vector_t a3;
+  al2.allocate_l2(0x3000, 0x3000, &allocated3, &a3);
+  ASSERT_EQ(allocated3, 0x3000u);
+  ASSERT_EQ(a3[0].offset, 0x4000u);
+  ASSERT_EQ(a3[0].length, 0x3000u);
+
+  al2.free_l2(a1);
+  al2.free_l2(a3);
+  ASSERT_EQ(max_available - 0x2000, al2.debug_get_free());
+
+  auto claimed = al2.claim_free_to_right(0x4000);
+  ASSERT_EQ(max_available - 0x4000u, claimed);
+  ASSERT_EQ(0x2000, al2.debug_get_free());
+
+  claimed = al2.claim_free_to_right(0x4000);
+  ASSERT_EQ(0, claimed);
+  ASSERT_EQ(0x2000, al2.debug_get_free());
+
+  claimed = al2.claim_free_to_left(0x2000);
+  ASSERT_EQ(0x2000u, claimed);
+  ASSERT_EQ(0, al2.debug_get_free());
+
+  claimed = al2.claim_free_to_left(0x2000);
+  ASSERT_EQ(0, claimed);
+  ASSERT_EQ(0, al2.debug_get_free());
+
+
+  al2.mark_free(0x3000, 0x4000);
+  ASSERT_EQ(0x4000, al2.debug_get_free());
+
+  claimed = al2.claim_free_to_right(0x7000);
+  ASSERT_EQ(0, claimed);
+  ASSERT_EQ(0x4000, al2.debug_get_free());
+
+  claimed = al2.claim_free_to_right(0x6000);
+  ASSERT_EQ(0x1000, claimed);
+  ASSERT_EQ(0x3000, al2.debug_get_free());
+
+  claimed = al2.claim_free_to_right(0x6000);
+  ASSERT_EQ(0, claimed);
+  ASSERT_EQ(0x3000, al2.debug_get_free());
+
+  claimed = al2.claim_free_to_left(0x3000);
+  ASSERT_EQ(0u, claimed);
+  ASSERT_EQ(0x3000, al2.debug_get_free());
+
+  claimed = al2.claim_free_to_left(0x4000);
+  ASSERT_EQ(0x1000, claimed);
+  ASSERT_EQ(0x2000, al2.debug_get_free());
+
+  // extend allocator space up to 64M
+  auto max_available2 = 64 * 1024 * 1024;
+  al2.mark_free(max_available, max_available2 - max_available);
+  ASSERT_EQ(max_available2 - max_available + 0x2000, al2.debug_get_free());
+
+  // pin some allocations
+  al2.mark_allocated(0x400000 + 0x2000, 1000);
+  al2.mark_allocated(0x400000 + 0x5000, 1000);
+  al2.mark_allocated(0x400000 + 0x20000, 1000);
+  ASSERT_EQ(max_available2 - max_available - 0x1000, al2.debug_get_free());
+
+  claimed = al2.claim_free_to_left(0x403000);
+  ASSERT_EQ(0x0, claimed);
+
+  claimed = al2.claim_free_to_left(0x404000);
+  ASSERT_EQ(0x1000, claimed);
+  ASSERT_EQ(max_available2 - max_available - 0x2000, al2.debug_get_free());
+
+  claimed = al2.claim_free_to_left(max_available);
+  ASSERT_EQ(0, claimed);
+
+  claimed = al2.claim_free_to_left(0x400000);
+  ASSERT_EQ(0x3e0000, claimed);
+  ASSERT_EQ(max_available2 - max_available - 0x3e2000, al2.get_available());
+  ASSERT_EQ(max_available2 - max_available - 0x3e2000, al2.debug_get_free());
+
+  claimed = al2.claim_free_to_right(0x407000);
+  ASSERT_EQ(0x19000, claimed);
+  ASSERT_EQ(max_available2 - max_available - 0x3e2000 - 0x19000,
+    al2.get_available());
+  ASSERT_EQ(max_available2 - max_available - 0x3e2000 - 0x19000,
+    al2.debug_get_free());
+
+  claimed = al2.claim_free_to_right(0x407000);
+  ASSERT_EQ(0, claimed);
+
+  claimed = al2.claim_free_to_right(0x430000);
+  ASSERT_EQ(max_available2 - 0x430000, claimed);
+  ASSERT_EQ(0x15000,
+    al2.get_available());
+  ASSERT_EQ(0x15000,
+    al2.debug_get_free());
+}
diff --git a/src/test/objectstore/hybrid_allocator_test.cc b/src/test/objectstore/hybrid_allocator_test.cc

index 5643d47d9b7983f57f1c851836107258d186bf58..e43d28b28442f6c48a3d935abcf3cf178c4aa4ce 100755 (executable)
--- a/src/test/objectstore/hybrid_allocator_test.cc
+++ b/src/test/objectstore/hybrid_allocator_test.cc
@@ -149,35 +149,35 @@ TEST(HybridAllocator, basic)
      // we have at avl: 2M~2M, 8M~7M
      // and at bmap: 0~1M, 16M~1M, 18M~2M
  
-    // this will go to avl making 16M~4M span broken between both allocators
+    // this will be merged with neighbors from bmap and go to avl
      ha.init_add_free(17 * _1m, _1m);
-    ASSERT_EQ(_1m * 10, ha.get_avl_free());
-    ASSERT_EQ(_1m * 4, ha.get_bmap_free());
+    ASSERT_EQ(_1m * 1, ha.get_bmap_free());
+    ASSERT_EQ(_1m * 13, ha.get_avl_free());
  
-    // we have at avl: 2M~2M, 8M~7M, 17M~1M
-    // and at bmap: 0~1M, 16M~1M, 18M~2M
+    // we have at avl: 2M~2M, 8M~7M, 16M~4M
+    // and at bmap: 0~1M
  
-    // and now do some cutoffs from this "broken" 16M~4M span
+    // and now do some cutoffs from 0~1M span
  
      //cut off 4K from bmap
-    ha.init_rm_free(16 * _1m, 0x1000);
-    ASSERT_EQ(_1m * 10, ha.get_avl_free());
-    ASSERT_EQ(_1m * 4 - 0x1000, ha.get_bmap_free());
+    ha.init_rm_free(0 * _1m, 0x1000);
+    ASSERT_EQ(_1m * 13, ha.get_avl_free());
+    ASSERT_EQ(_1m * 1 - 0x1000, ha.get_bmap_free());
  
-    //cut off 1M-4K from bmap and 4K from avl
-    ha.init_rm_free(16 * _1m + 0x1000, _1m);
-    ASSERT_EQ(_1m * 10 - 0x1000, ha.get_avl_free());
-    ASSERT_EQ(_1m * 3, ha.get_bmap_free());
+    //cut off 1M-4K from bmap
+    ha.init_rm_free(0 * _1m + 0x1000, _1m - 0x1000);
+    ASSERT_EQ(_1m * 13, ha.get_avl_free());
+    ASSERT_EQ(0, ha.get_bmap_free());
  
      //cut off 512K avl
      ha.init_rm_free(17 * _1m + 0x1000, _1m / 2);
-    ASSERT_EQ(_1m * 10 - 0x1000 - _1m / 2, ha.get_avl_free());
-    ASSERT_EQ(_1m * 3, ha.get_bmap_free());
+    ASSERT_EQ(_1m * 13 - _1m / 2, ha.get_avl_free());
+    ASSERT_EQ(0, ha.get_bmap_free());
  
-    //cut off the rest from avl and 8K from bmap
-    ha.init_rm_free(17 * _1m + 0x1000 + _1m / 2, _1m / 2 + 0x1000);
-    ASSERT_EQ(_1m * 9, ha.get_avl_free());
-    ASSERT_EQ(_1m * 3 - 0x2000, ha.get_bmap_free());
+    //cut off the rest from avl
+    ha.init_rm_free(17 * _1m + 0x1000 + _1m / 2, _1m / 2);
+    ASSERT_EQ(_1m * 12, ha.get_avl_free());
+    ASSERT_EQ(0, ha.get_bmap_free());
    }
  
    {
author	Igor Fedotov <ifedotov@suse.com>
	Wed, 25 Mar 2020 14:19:12 +0000 (17:19 +0300)
committer	Igor Fedotov <ifedotov@suse.com>
	Mon, 13 Apr 2020 13:16:07 +0000 (16:16 +0300)
src/os/bluestore/AvlAllocator.h		patch \| blob \| history
src/os/bluestore/BitmapAllocator.h		patch \| blob \| history
src/os/bluestore/HybridAllocator.cc		patch \| blob \| history
src/os/bluestore/HybridAllocator.h		patch \| blob \| history
src/os/bluestore/fastbmap_allocator_impl.cc		patch \| blob \| history
src/os/bluestore/fastbmap_allocator_impl.h		patch \| blob \| history
src/test/objectstore/fastbmap_allocator_test.cc		patch \| blob \| history
src/test/objectstore/hybrid_allocator_test.cc		patch \| blob \| history