From a2f6c7b2a8607298be18e8d4e06644da05b2bb03 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Wed, 21 Feb 2024 14:53:33 +0800 Subject: [PATCH] crimson/os/seastore/transaction_manager: add the max_data_allocation_size configuration Limit the max size of extents in seastore, which can avoid much read amplification in case of remapping extents when extents integrity check is mandatory Signed-off-by: Xuehan Xu --- src/common/options/crimson.yaml.in | 5 ++ src/crimson/os/seastore/cache.cc | 6 +- .../os/seastore/extent_placement_manager.h | 24 ++++-- src/crimson/os/seastore/transaction_manager.h | 31 +++---- .../seastore/test_btree_lba_manager.cc | 86 +++++++++++-------- .../seastore/test_transaction_manager.cc | 48 ++++++++--- 6 files changed, 123 insertions(+), 77 deletions(-) diff --git a/src/common/options/crimson.yaml.in b/src/common/options/crimson.yaml.in index ed670e4e1e55e..4a0eb5627b818 100644 --- a/src/common/options/crimson.yaml.in +++ b/src/common/options/crimson.yaml.in @@ -83,6 +83,11 @@ options: level: dev desc: default logical address space reservation for seastore objects' metadata default: 16777216 +- name: seastore_max_data_allocation_size + type: size + level: advanced + desc: Max size in bytes that an extent can be + default: 32_K - name: seastore_cache_lru_size type: size level: advanced diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index c08576bf45ae7..db3b8b3766c53 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -1015,13 +1015,15 @@ std::vector Cache::alloc_new_data_extents_by_type( switch (type) { case extent_types_t::OBJECT_DATA_BLOCK: { - auto extents = alloc_new_data_extents(t, length, hint, gen); + auto extents = alloc_new_data_extents< + ObjectDataBlock>(t, length, hint, gen); res.insert(res.begin(), extents.begin(), extents.end()); } return res; case extent_types_t::TEST_BLOCK: { - auto extents = alloc_new_data_extents(t, length, hint, gen); + auto extents = alloc_new_data_extents< + TestBlock>(t, length, hint, gen); res.insert(res.begin(), extents.begin(), extents.end()); } return res; diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index 141a019c7932b..e6a5c76291c4d 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -223,7 +223,9 @@ class ExtentPlacementManager { public: ExtentPlacementManager() : ool_segment_seq_allocator( - std::make_unique(segment_type_t::OOL)) + std::make_unique(segment_type_t::OOL)), + max_data_allocation_size(crimson::common::get_conf( + "seastore_max_data_allocation_size")) { devices_by_id.resize(DEVICE_ID_MAX, nullptr); } @@ -352,6 +354,7 @@ public: rewrite_gen_t gen #endif ) { + LOG_PREFIX(ExtentPlacementManager::alloc_new_data_extents); assert(hint < placement_hint_t::NUM_HINTS); assert(is_target_rewrite_generation(gen)); assert(gen == INIT_GENERATION || hint == placement_hint_t::REWRITE); @@ -379,10 +382,20 @@ public: auto addrs = data_writers_by_gen[ generation_to_writer(gen)]->alloc_paddrs(length); for (auto &ext : addrs) { - auto bp = ceph::bufferptr( - buffer::create_page_aligned(ext.len)); - bp.zero(); - allocs.emplace_back(alloc_result_t{ext.start, std::move(bp), gen}); + auto left = ext.len; + while (left > 0) { + auto len = std::min(max_data_allocation_size, left); + auto bp = ceph::bufferptr(buffer::create_page_aligned(len)); + bp.zero(); + auto start = ext.start.is_delayed() + ? ext.start + : ext.start + (ext.len - left); + allocs.emplace_back(alloc_result_t{start, std::move(bp), gen}); + SUBDEBUGT(seastore_epm, + "allocated {} {}B extent at {}, hint={}, gen={}", + t, type, len, start, hint, gen); + left -= len; + } } } return allocs; @@ -1012,6 +1025,7 @@ private: BackgroundProcess background_process; // TODO: drop once paddr->journal_seq_t is introduced SegmentSeqAllocatorRef ool_segment_seq_allocator; + extent_len_t max_data_allocation_size = 0; friend class ::transaction_manager_test_t; }; diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index d7f143688ca5c..bec1189e8c732 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -346,27 +346,18 @@ public: if (exts.empty()) { return crimson::ct_error::enospc::make(); } - return seastar::do_with( - std::move(exts), + return lba_manager->alloc_extents( + t, laddr_hint, - [this, &t](auto &exts, auto &laddr_hint) { - return trans_intr::do_for_each( - exts, - [this, &t, &laddr_hint](auto &ext) { - return lba_manager->alloc_extent( - t, - laddr_hint, - *ext - ).si_then([&ext, &laddr_hint, &t](auto &&) mutable { - LOG_PREFIX(TransactionManager::alloc_extents); - SUBDEBUGT(seastore_tm, "new extent: {}, laddr_hint: {}", t, *ext, laddr_hint); - laddr_hint += ext->get_length(); - return alloc_extent_iertr::now(); - }); - }).si_then([&exts] { - return alloc_extent_iertr::make_ready_future< - std::vector>>(std::move(exts)); - }); + std::vector( + exts.begin(), exts.end()), + EXTENT_DEFAULT_REF_COUNT + ).si_then([exts=std::move(exts), &t, FNAME](auto &&) mutable { + for (auto &ext : exts) { + SUBDEBUGT(seastore_tm, "new extent: {}", t, *ext); + } + return alloc_extent_iertr::make_ready_future< + std::vector>>(std::move(exts)); }); } diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc index b7389456bf12e..977b8c3865069 100644 --- a/src/test/crimson/seastore/test_btree_lba_manager.cc +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -309,14 +309,20 @@ struct lba_btree_test : btree_test_base { placement_hint_t::HOT, 0, get_paddr()); - assert(extents.size() == 1); - auto extent = extents.front(); - return btree.insert( - get_op_context(t), addr, get_map_val(len), extent.get() - ).si_then([addr, extent](auto p){ - auto& [iter, inserted] = p; - assert(inserted); - extent->set_laddr(addr); + return seastar::do_with( + std::move(extents), + [this, addr, &t, len, &btree](auto &extents) { + return trans_intr::do_for_each( + extents, + [this, addr, len, &t, &btree](auto &extent) { + return btree.insert( + get_op_context(t), addr, get_map_val(len), extent.get() + ).si_then([addr, extent](auto p){ + auto& [iter, inserted] = p; + assert(inserted); + extent->set_laddr(addr); + }); + }); }); }); } @@ -505,11 +511,11 @@ struct btree_lba_manager_test : btree_test_base { return make_fake_paddr(next_off); } - auto alloc_mapping( + auto alloc_mappings( test_transaction_t &t, laddr_t hint, size_t len) { - auto ret = with_trans_intr( + auto rets = with_trans_intr( *t.t, [=, this](auto &t) { auto extents = cache->alloc_new_data_extents( @@ -518,25 +524,29 @@ struct btree_lba_manager_test : btree_test_base { placement_hint_t::HOT, 0, get_paddr()); - assert(extents.size() == 1); - auto extent = extents.front(); - return lba_manager->alloc_extent( - t, hint, *extent); + return seastar::do_with( + std::vector( + extents.begin(), extents.end()), + [this, &t, hint](auto &extents) { + return lba_manager->alloc_extents(t, hint, std::move(extents), EXTENT_DEFAULT_REF_COUNT); + }); }).unsafe_get0(); - logger().debug("alloc'd: {}", *ret); - EXPECT_EQ(len, ret->get_length()); - auto [b, e] = get_overlap(t, ret->get_key(), len); - EXPECT_EQ(b, e); - t.mappings.emplace( - std::make_pair( - ret->get_key(), - test_extent_t{ - ret->get_val(), - ret->get_length(), - 1 - } - )); - return ret; + for (auto &ret : rets) { + logger().debug("alloc'd: {}", *ret); + EXPECT_EQ(len, ret->get_length()); + auto [b, e] = get_overlap(t, ret->get_key(), len); + EXPECT_EQ(b, e); + t.mappings.emplace( + std::make_pair( + ret->get_key(), + test_extent_t{ + ret->get_val(), + ret->get_length(), + 1 + } + )); + } + return rets; } auto decref_mapping( @@ -676,7 +686,7 @@ TEST_F(btree_lba_manager_test, basic) auto t = create_transaction(); check_mappings(t); // check in progress transaction sees mapping check_mappings(); // check concurrent does not - auto ret = alloc_mapping(t, laddr, block_size); + alloc_mappings(t, laddr, block_size); submit_test_transaction(std::move(t)); } check_mappings(); // check new transaction post commit sees it @@ -690,7 +700,7 @@ TEST_F(btree_lba_manager_test, force_split) auto t = create_transaction(); logger().debug("opened transaction"); for (unsigned j = 0; j < 5; ++j) { - auto ret = alloc_mapping(t, 0, block_size); + alloc_mappings(t, 0, block_size); if ((i % 10 == 0) && (j == 3)) { check_mappings(t); check_mappings(); @@ -710,14 +720,16 @@ TEST_F(btree_lba_manager_test, force_split_merge) auto t = create_transaction(); logger().debug("opened transaction"); for (unsigned j = 0; j < 5; ++j) { - auto ret = alloc_mapping(t, 0, block_size); + auto rets = alloc_mappings(t, 0, block_size); // just to speed things up a bit if ((i % 100 == 0) && (j == 3)) { check_mappings(t); check_mappings(); } - incref_mapping(t, ret->get_key()); - decref_mapping(t, ret->get_key()); + for (auto &ret : rets) { + incref_mapping(t, ret->get_key()); + decref_mapping(t, ret->get_key()); + } } logger().debug("submitting transaction"); submit_test_transaction(std::move(t)); @@ -767,7 +779,7 @@ TEST_F(btree_lba_manager_test, single_transaction_split_merge) { auto t = create_transaction(); for (unsigned i = 0; i < 400; ++i) { - alloc_mapping(t, 0, block_size); + alloc_mappings(t, 0, block_size); } check_mappings(t); submit_test_transaction(std::move(t)); @@ -790,7 +802,7 @@ TEST_F(btree_lba_manager_test, single_transaction_split_merge) { auto t = create_transaction(); for (unsigned i = 0; i < 600; ++i) { - alloc_mapping(t, 0, block_size); + alloc_mappings(t, 0, block_size); } auto addresses = get_mapped_addresses(t); for (unsigned i = 0; i != addresses.size(); ++i) { @@ -818,7 +830,7 @@ TEST_F(btree_lba_manager_test, split_merge_multi) } }; iterate([&](auto &t, auto idx) { - alloc_mapping(t, idx * block_size, block_size); + alloc_mappings(t, idx * block_size, block_size); }); check_mappings(); iterate([&](auto &t, auto idx) { @@ -829,7 +841,7 @@ TEST_F(btree_lba_manager_test, split_merge_multi) check_mappings(); iterate([&](auto &t, auto idx) { if ((idx % 32) > 0) { - alloc_mapping(t, idx * block_size, block_size); + alloc_mappings(t, idx * block_size, block_size); } }); check_mappings(); diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc index de8dce930ddc5..d7c500e3ee9aa 100644 --- a/src/test/crimson/seastore/test_transaction_manager.cc +++ b/src/test/crimson/seastore/test_transaction_manager.cc @@ -391,10 +391,12 @@ struct transaction_manager_test_t : }).unsafe_get0(); assert(extents.size() == 1); auto extent = extents.front(); + extent_len_t allocated_len = 0; extent->set_contents(contents); EXPECT_FALSE(test_mappings.contains(extent->get_laddr(), t.mapping_delta)); - EXPECT_EQ(len, extent->get_length()); test_mappings.alloced(hint, *extent, t.mapping_delta); + allocated_len += extent->get_length(); + EXPECT_EQ(len, allocated_len); return extent; } @@ -407,14 +409,16 @@ struct transaction_manager_test_t : return tm->alloc_data_extents(trans, hint, len); }).unsafe_get0(); size_t length = 0; + std::vector exts; for (auto &extent : extents) { extent->set_contents(contents); length += extent->get_length(); EXPECT_FALSE(test_mappings.contains(extent->get_laddr(), t.mapping_delta)); test_mappings.alloced(hint, *extent, t.mapping_delta); + exts.push_back(extent->template cast()); } EXPECT_EQ(len, length); - return extents; + return exts; } void alloc_extents_deemed_fail( @@ -443,6 +447,17 @@ struct transaction_manager_test_t : get_random_contents()); } + std::vector alloc_extents( + test_transaction_t &t, + laddr_t hint, + extent_len_t len) { + return alloc_extents( + t, + hint, + len, + get_random_contents()); + } + bool check_usage() { return epm->check_usage(); } @@ -762,13 +777,16 @@ struct transaction_manager_test_t : return tm->alloc_data_extents( *(t.t), L_ADDR_MIN, size ).si_then([&t, this, size](auto extents) { - assert(extents.size() == 1); - auto extent = extents.front(); - extent->set_contents(get_random_contents()); - EXPECT_FALSE( - test_mappings.contains(extent->get_laddr(), t.mapping_delta)); - EXPECT_EQ(size, extent->get_length()); - test_mappings.alloced(extent->get_laddr(), *extent, t.mapping_delta); + extent_len_t length = 0; + for (auto &extent : extents) { + extent->set_contents(get_random_contents()); + EXPECT_FALSE( + test_mappings.contains(extent->get_laddr(), t.mapping_delta)); + test_mappings.alloced( + extent->get_laddr(), *extent, t.mapping_delta); + length += extent->get_length(); + } + EXPECT_EQ(size, length); return seastar::now(); }); }).si_then([&t, this] { @@ -2048,11 +2066,13 @@ TEST_P(tm_single_device_test_t, random_writes) BSIZE); auto mut = mutate_extent(t, ext); // pad out transaction - auto padding = alloc_extent( + auto paddings = alloc_extents( t, TOTAL + (k * PADDING_SIZE), PADDING_SIZE); - dec_ref(t, padding->get_laddr()); + for (auto &padding : paddings) { + dec_ref(t, padding->get_laddr()); + } } submit_transaction(std::move(t)); } @@ -2086,12 +2106,14 @@ TEST_P(tm_single_device_test_t, remap_lazy_read) run_async([this, offset] { { auto t = create_transaction(); - auto extent = alloc_extent( + auto extents = alloc_extents( t, offset, length, 'a'); - ASSERT_EQ(offset, extent->get_laddr()); + for (auto &extent : extents) { + ASSERT_EQ(offset, extent->get_laddr()); + } check_mappings(t); submit_transaction(std::move(t)); check(); -- 2.39.5