]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/transaction_manager: add the max_data_allocation_size
authorXuehan Xu <xuxuehan@qianxin.com>
Wed, 21 Feb 2024 06:53:33 +0000 (14:53 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Mon, 22 Apr 2024 03:26:45 +0000 (11:26 +0800)
configuration

Limit the max size of extents in seastore, which can avoid much read
amplification in case of remapping extents when extents integrity check
is mandatory

Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/common/options/crimson.yaml.in
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/extent_placement_manager.h
src/crimson/os/seastore/transaction_manager.h
src/test/crimson/seastore/test_btree_lba_manager.cc
src/test/crimson/seastore/test_transaction_manager.cc

index ed670e4e1e55e0e277d8803f6a2fa2c95438c260..4a0eb5627b818009cffef7809510181761fb8d8c 100644 (file)
@@ -83,6 +83,11 @@ options:
   level: dev
   desc: default logical address space reservation for seastore objects' metadata
   default: 16777216
+- name: seastore_max_data_allocation_size
+  type: size
+  level: advanced
+  desc: Max size in bytes that an extent can be
+  default: 32_K
 - name: seastore_cache_lru_size
   type: size
   level: advanced
index c08576bf45ae7228d4d8e1fe031db55b2664b3d8..db3b8b3766c532a2fef75b8fb5c29a378985830a 100644 (file)
@@ -1015,13 +1015,15 @@ std::vector<CachedExtentRef> Cache::alloc_new_data_extents_by_type(
   switch (type) {
   case extent_types_t::OBJECT_DATA_BLOCK:
     {
-      auto extents = alloc_new_data_extents<ObjectDataBlock>(t, length, hint, gen);
+      auto extents = alloc_new_data_extents<
+       ObjectDataBlock>(t, length, hint, gen);
       res.insert(res.begin(), extents.begin(), extents.end());
     }
     return res;
   case extent_types_t::TEST_BLOCK:
     {
-      auto extents = alloc_new_data_extents<TestBlock>(t, length, hint, gen);
+      auto extents = alloc_new_data_extents<
+       TestBlock>(t, length, hint, gen);
       res.insert(res.begin(), extents.begin(), extents.end());
     }
     return res;
index 141a019c7932b0bb919ea3c942db3ae94c3fc38b..e6a5c76291c4dab58ae03cf73576e5f0bb28efa0 100644 (file)
@@ -223,7 +223,9 @@ class ExtentPlacementManager {
 public:
   ExtentPlacementManager()
     : ool_segment_seq_allocator(
-          std::make_unique<SegmentSeqAllocator>(segment_type_t::OOL))
+          std::make_unique<SegmentSeqAllocator>(segment_type_t::OOL)),
+      max_data_allocation_size(crimson::common::get_conf<Option::size_t>(
+         "seastore_max_data_allocation_size"))
   {
     devices_by_id.resize(DEVICE_ID_MAX, nullptr);
   }
@@ -352,6 +354,7 @@ public:
     rewrite_gen_t gen
 #endif
   ) {
+    LOG_PREFIX(ExtentPlacementManager::alloc_new_data_extents);
     assert(hint < placement_hint_t::NUM_HINTS);
     assert(is_target_rewrite_generation(gen));
     assert(gen == INIT_GENERATION || hint == placement_hint_t::REWRITE);
@@ -379,10 +382,20 @@ public:
       auto addrs = data_writers_by_gen[
           generation_to_writer(gen)]->alloc_paddrs(length);
       for (auto &ext : addrs) {
-        auto bp = ceph::bufferptr(
-          buffer::create_page_aligned(ext.len));
-        bp.zero();
-        allocs.emplace_back(alloc_result_t{ext.start, std::move(bp), gen});
+        auto left = ext.len;
+        while (left > 0) {
+          auto len = std::min(max_data_allocation_size, left);
+          auto bp = ceph::bufferptr(buffer::create_page_aligned(len));
+          bp.zero();
+          auto start = ext.start.is_delayed()
+                        ? ext.start
+                        : ext.start + (ext.len - left);
+          allocs.emplace_back(alloc_result_t{start, std::move(bp), gen});
+          SUBDEBUGT(seastore_epm,
+                    "allocated {} {}B extent at {}, hint={}, gen={}",
+                    t, type, len, start, hint, gen);
+          left -= len;
+        }
       }
     }
     return allocs;
@@ -1012,6 +1025,7 @@ private:
   BackgroundProcess background_process;
   // TODO: drop once paddr->journal_seq_t is introduced
   SegmentSeqAllocatorRef ool_segment_seq_allocator;
+  extent_len_t max_data_allocation_size = 0;
 
   friend class ::transaction_manager_test_t;
 };
index d7f143688ca5c9f0e7c326d692941f4ee0f03de9..bec1189e8c732ae4c72a53b81207ee22cd32dc05 100644 (file)
@@ -346,27 +346,18 @@ public:
     if (exts.empty()) {
       return crimson::ct_error::enospc::make();
     }
-    return seastar::do_with(
-      std::move(exts),
+    return lba_manager->alloc_extents(
+      t,
       laddr_hint,
-      [this, &t](auto &exts, auto &laddr_hint) {
-      return trans_intr::do_for_each(
-        exts,
-        [this, &t, &laddr_hint](auto &ext) {
-        return lba_manager->alloc_extent(
-          t,
-          laddr_hint,
-          *ext
-        ).si_then([&ext, &laddr_hint, &t](auto &&) mutable {
-          LOG_PREFIX(TransactionManager::alloc_extents);
-          SUBDEBUGT(seastore_tm, "new extent: {}, laddr_hint: {}", t, *ext, laddr_hint);
-          laddr_hint += ext->get_length();
-          return alloc_extent_iertr::now();
-        });
-      }).si_then([&exts] {
-        return alloc_extent_iertr::make_ready_future<
-          std::vector<TCachedExtentRef<T>>>(std::move(exts));
-      });
+      std::vector<LogicalCachedExtentRef>(
+       exts.begin(), exts.end()),
+      EXTENT_DEFAULT_REF_COUNT
+    ).si_then([exts=std::move(exts), &t, FNAME](auto &&) mutable {
+      for (auto &ext : exts) {
+       SUBDEBUGT(seastore_tm, "new extent: {}", t, *ext);
+      }
+      return alloc_extent_iertr::make_ready_future<
+       std::vector<TCachedExtentRef<T>>>(std::move(exts));
     });
   }
 
index b7389456bf12e581abe7d2278c0c7d10bf0540eb..977b8c386506946cc23acbdf0743be9ba2f5cefa 100644 (file)
@@ -309,14 +309,20 @@ struct lba_btree_test : btree_test_base {
          placement_hint_t::HOT,
          0,
          get_paddr());
-      assert(extents.size() == 1);
-      auto extent = extents.front();
-      return btree.insert(
-       get_op_context(t), addr, get_map_val(len), extent.get()
-      ).si_then([addr, extent](auto p){
-       auto& [iter, inserted] = p;
-       assert(inserted);
-       extent->set_laddr(addr);
+      return seastar::do_with(
+       std::move(extents),
+       [this, addr, &t, len, &btree](auto &extents) {
+       return trans_intr::do_for_each(
+         extents,
+         [this, addr, len, &t, &btree](auto &extent) {
+         return btree.insert(
+           get_op_context(t), addr, get_map_val(len), extent.get()
+         ).si_then([addr, extent](auto p){
+           auto& [iter, inserted] = p;
+           assert(inserted);
+           extent->set_laddr(addr);
+         });
+       });
       });
     });
   }
@@ -505,11 +511,11 @@ struct btree_lba_manager_test : btree_test_base {
     return make_fake_paddr(next_off);
   }
 
-  auto alloc_mapping(
+  auto alloc_mappings(
     test_transaction_t &t,
     laddr_t hint,
     size_t len) {
-    auto ret = with_trans_intr(
+    auto rets = with_trans_intr(
       *t.t,
       [=, this](auto &t) {
        auto extents = cache->alloc_new_data_extents<TestBlock>(
@@ -518,25 +524,29 @@ struct btree_lba_manager_test : btree_test_base {
            placement_hint_t::HOT,
            0,
            get_paddr());
-       assert(extents.size() == 1);
-       auto extent = extents.front();
-       return lba_manager->alloc_extent(
-         t, hint, *extent);
+       return seastar::do_with(
+         std::vector<LogicalCachedExtentRef>(
+           extents.begin(), extents.end()),
+         [this, &t, hint](auto &extents) {
+         return lba_manager->alloc_extents(t, hint, std::move(extents), EXTENT_DEFAULT_REF_COUNT);
+       });
       }).unsafe_get0();
-    logger().debug("alloc'd: {}", *ret);
-    EXPECT_EQ(len, ret->get_length());
-    auto [b, e] = get_overlap(t, ret->get_key(), len);
-    EXPECT_EQ(b, e);
-    t.mappings.emplace(
-      std::make_pair(
-       ret->get_key(),
-       test_extent_t{
-         ret->get_val(),
-         ret->get_length(),
-         1
-        }
-      ));
-    return ret;
+    for (auto &ret : rets) {
+      logger().debug("alloc'd: {}", *ret);
+      EXPECT_EQ(len, ret->get_length());
+      auto [b, e] = get_overlap(t, ret->get_key(), len);
+      EXPECT_EQ(b, e);
+      t.mappings.emplace(
+       std::make_pair(
+         ret->get_key(),
+         test_extent_t{
+           ret->get_val(),
+           ret->get_length(),
+           1
+         }
+       ));
+    }
+    return rets;
   }
 
   auto decref_mapping(
@@ -676,7 +686,7 @@ TEST_F(btree_lba_manager_test, basic)
       auto t = create_transaction();
       check_mappings(t);  // check in progress transaction sees mapping
       check_mappings();   // check concurrent does not
-      auto ret = alloc_mapping(t, laddr, block_size);
+      alloc_mappings(t, laddr, block_size);
       submit_test_transaction(std::move(t));
     }
     check_mappings();     // check new transaction post commit sees it
@@ -690,7 +700,7 @@ TEST_F(btree_lba_manager_test, force_split)
       auto t = create_transaction();
       logger().debug("opened transaction");
       for (unsigned j = 0; j < 5; ++j) {
-       auto ret = alloc_mapping(t, 0, block_size);
+       alloc_mappings(t, 0, block_size);
        if ((i % 10 == 0) && (j == 3)) {
          check_mappings(t);
          check_mappings();
@@ -710,14 +720,16 @@ TEST_F(btree_lba_manager_test, force_split_merge)
       auto t = create_transaction();
       logger().debug("opened transaction");
       for (unsigned j = 0; j < 5; ++j) {
-       auto ret = alloc_mapping(t, 0, block_size);
+       auto rets = alloc_mappings(t, 0, block_size);
        // just to speed things up a bit
        if ((i % 100 == 0) && (j == 3)) {
          check_mappings(t);
          check_mappings();
        }
-       incref_mapping(t, ret->get_key());
-       decref_mapping(t, ret->get_key());
+       for (auto &ret : rets) {
+         incref_mapping(t, ret->get_key());
+         decref_mapping(t, ret->get_key());
+       }
       }
       logger().debug("submitting transaction");
       submit_test_transaction(std::move(t));
@@ -767,7 +779,7 @@ TEST_F(btree_lba_manager_test, single_transaction_split_merge)
     {
       auto t = create_transaction();
       for (unsigned i = 0; i < 400; ++i) {
-       alloc_mapping(t, 0, block_size);
+       alloc_mappings(t, 0, block_size);
       }
       check_mappings(t);
       submit_test_transaction(std::move(t));
@@ -790,7 +802,7 @@ TEST_F(btree_lba_manager_test, single_transaction_split_merge)
     {
       auto t = create_transaction();
       for (unsigned i = 0; i < 600; ++i) {
-       alloc_mapping(t, 0, block_size);
+       alloc_mappings(t, 0, block_size);
       }
       auto addresses = get_mapped_addresses(t);
       for (unsigned i = 0; i != addresses.size(); ++i) {
@@ -818,7 +830,7 @@ TEST_F(btree_lba_manager_test, split_merge_multi)
       }
     };
     iterate([&](auto &t, auto idx) {
-      alloc_mapping(t, idx * block_size, block_size);
+      alloc_mappings(t, idx * block_size, block_size);
     });
     check_mappings();
     iterate([&](auto &t, auto idx) {
@@ -829,7 +841,7 @@ TEST_F(btree_lba_manager_test, split_merge_multi)
     check_mappings();
     iterate([&](auto &t, auto idx) {
       if ((idx % 32) > 0) {
-       alloc_mapping(t, idx * block_size, block_size);
+       alloc_mappings(t, idx * block_size, block_size);
       }
     });
     check_mappings();
index de8dce930ddc526aba8a97559f88c2c8a0f4579d..d7c500e3ee9aa847f019a80f08256ffbec032824 100644 (file)
@@ -391,10 +391,12 @@ struct transaction_manager_test_t :
     }).unsafe_get0();
     assert(extents.size() == 1);
     auto extent = extents.front();
+    extent_len_t allocated_len = 0;
     extent->set_contents(contents);
     EXPECT_FALSE(test_mappings.contains(extent->get_laddr(), t.mapping_delta));
-    EXPECT_EQ(len, extent->get_length());
     test_mappings.alloced(hint, *extent, t.mapping_delta);
+    allocated_len += extent->get_length();
+    EXPECT_EQ(len, allocated_len);
     return extent;
   }
 
@@ -407,14 +409,16 @@ struct transaction_manager_test_t :
       return tm->alloc_data_extents<TestBlock>(trans, hint, len);
     }).unsafe_get0();
     size_t length = 0;
+    std::vector<TestBlockRef> exts;
     for (auto &extent : extents) {
       extent->set_contents(contents);
       length += extent->get_length();
       EXPECT_FALSE(test_mappings.contains(extent->get_laddr(), t.mapping_delta));
       test_mappings.alloced(hint, *extent, t.mapping_delta);
+      exts.push_back(extent->template cast<TestBlock>());
     }
     EXPECT_EQ(len, length);
-    return extents;
+    return exts;
   }
 
   void alloc_extents_deemed_fail(
@@ -443,6 +447,17 @@ struct transaction_manager_test_t :
       get_random_contents());
   }
 
+  std::vector<TestBlockRef> alloc_extents(
+    test_transaction_t &t,
+    laddr_t hint,
+    extent_len_t len) {
+    return alloc_extents(
+      t,
+      hint,
+      len,
+      get_random_contents());
+  }
+
   bool check_usage() {
     return epm->check_usage();
   }
@@ -762,13 +777,16 @@ struct transaction_manager_test_t :
                  return tm->alloc_data_extents<TestBlock>(
                    *(t.t), L_ADDR_MIN, size
                  ).si_then([&t, this, size](auto extents) {
-                   assert(extents.size() == 1);
-                   auto extent = extents.front();
-                   extent->set_contents(get_random_contents());
-                   EXPECT_FALSE(
-                     test_mappings.contains(extent->get_laddr(), t.mapping_delta));
-                   EXPECT_EQ(size, extent->get_length());
-                   test_mappings.alloced(extent->get_laddr(), *extent, t.mapping_delta);
+                   extent_len_t length = 0;
+                   for (auto &extent : extents) {
+                     extent->set_contents(get_random_contents());
+                     EXPECT_FALSE(
+                       test_mappings.contains(extent->get_laddr(), t.mapping_delta));
+                     test_mappings.alloced(
+                       extent->get_laddr(), *extent, t.mapping_delta);
+                     length += extent->get_length();
+                   }
+                   EXPECT_EQ(size, length);
                    return seastar::now();
                  });
                }).si_then([&t, this] {
@@ -2048,11 +2066,13 @@ TEST_P(tm_single_device_test_t, random_writes)
            BSIZE);
          auto mut = mutate_extent(t, ext);
          // pad out transaction
-         auto padding = alloc_extent(
+         auto paddings = alloc_extents(
            t,
            TOTAL + (k * PADDING_SIZE),
            PADDING_SIZE);
-         dec_ref(t, padding->get_laddr());
+         for (auto &padding : paddings) {
+           dec_ref(t, padding->get_laddr());
+         }
        }
        submit_transaction(std::move(t));
       }
@@ -2086,12 +2106,14 @@ TEST_P(tm_single_device_test_t, remap_lazy_read)
    run_async([this, offset] {
     {
       auto t = create_transaction();
-      auto extent = alloc_extent(
+      auto extents = alloc_extents(
        t,
        offset,
        length,
        'a');
-      ASSERT_EQ(offset, extent->get_laddr());
+      for (auto &extent : extents) {
+       ASSERT_EQ(offset, extent->get_laddr());
+      }
       check_mappings(t);
       submit_transaction(std::move(t));
       check();