From: Xuehan Xu Date: Sun, 12 Dec 2021 08:43:30 +0000 (+0800) Subject: crimson/os/seastore: make onode data/metadata laddr space reservation configurable X-Git-Tag: v17.1.0~245^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d2235ba3b97c387d678c12d86cec63f61b29109c;p=ceph.git crimson/os/seastore: make onode data/metadata laddr space reservation configurable Signed-off-by: Xuehan Xu --- diff --git a/src/common/options/crimson.yaml.in b/src/common/options/crimson.yaml.in index d8072a94677..d110842c759 100644 --- a/src/common/options/crimson.yaml.in +++ b/src/common/options/crimson.yaml.in @@ -65,3 +65,13 @@ options: level: dev desc: The record fullness threshold to flush a journal batch default: 0.95 +- name: seastore_default_max_object_size + type: uint + level: dev + desc: default logical address space reservation for seastore objects' data + default: 16777216 +- name: seastore_default_object_metadata_reservation + type: uint + level: dev + desc: default logical address space reservation for seastore objects' metadata + default: 16777216 diff --git a/src/crimson/os/seastore/object_data_handler.cc b/src/crimson/os/seastore/object_data_handler.cc index a3367d0668f..2b91cf4835b 100644 --- a/src/crimson/os/seastore/object_data_handler.cc +++ b/src/crimson/os/seastore/object_data_handler.cc @@ -15,16 +15,6 @@ namespace { } namespace crimson::os::seastore { - -/** - * MAX_OBJECT_SIZE - * - * For now, we allocate a fixed region of laddr space of size MAX_OBJECT_SIZE - * for any object. In the future, once we have the ability to remap logical - * mappings (necessary for clone), we'll add the ability to grow and shrink - * these regions and remove this assumption. - */ -static constexpr extent_len_t MAX_OBJECT_SIZE = Onode::DEFAULT_DATA_RESERVATION; #define assert_aligned(x) ceph_assert(((x)%ctx.tm.get_block_size()) == 0) using context_t = ObjectDataHandler::context_t; @@ -261,9 +251,9 @@ ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation( extent_len_t size) { LOG_PREFIX(ObjectDataHandler::prepare_data_reservation); - ceph_assert(size <= MAX_OBJECT_SIZE); + ceph_assert(size <= max_object_size); if (!object_data.is_null()) { - ceph_assert(object_data.get_reserved_data_len() == MAX_OBJECT_SIZE); + ceph_assert(object_data.get_reserved_data_len() == max_object_size); DEBUGT("reservation present: {}~{}", ctx.t, object_data.get_reserved_data_base(), @@ -273,13 +263,13 @@ ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation( DEBUGT("reserving: {}~{}", ctx.t, ctx.onode.get_data_hint(), - MAX_OBJECT_SIZE); + max_object_size); return ctx.tm.reserve_region( ctx.t, ctx.onode.get_data_hint(), - MAX_OBJECT_SIZE - ).si_then([&object_data](auto pin) { - ceph_assert(pin->get_length() == MAX_OBJECT_SIZE); + max_object_size + ).si_then([max_object_size=max_object_size, &object_data](auto pin) { + ceph_assert(pin->get_length() == max_object_size); object_data.update_reserved( pin->get_laddr(), pin->get_length()); diff --git a/src/crimson/os/seastore/object_data_handler.h b/src/crimson/os/seastore/object_data_handler.h index 677094a25db..c397245125c 100644 --- a/src/crimson/os/seastore/object_data_handler.h +++ b/src/crimson/os/seastore/object_data_handler.h @@ -50,6 +50,8 @@ class ObjectDataHandler { public: using base_iertr = TransactionManager::base_iertr; + ObjectDataHandler(uint32_t mos) : max_object_size(mos) {} + struct context_t { TransactionManager &tm; Transaction &t; @@ -104,6 +106,16 @@ private: context_t ctx, object_data_t &object_data, extent_len_t size); +private: + /** + * max_object_size + * + * For now, we allocate a fixed region of laddr space of size max_object_size + * for any object. In the future, once we have the ability to remap logical + * mappings (necessary for clone), we'll add the ability to grow and shrink + * these regions and remove this assumption. + */ + const uint32_t max_object_size = 0; }; } diff --git a/src/crimson/os/seastore/onode.h b/src/crimson/os/seastore/onode.h index db7aec06b47..c9c31c3a0b5 100644 --- a/src/crimson/os/seastore/onode.h +++ b/src/crimson/os/seastore/onode.h @@ -54,19 +54,23 @@ class Onode : public boost::intrusive_ref_counter< { protected: virtual laddr_t get_hint() const = 0; + const uint32_t default_metadata_offset = 0; + const uint32_t default_metadata_range = 0; public: - static constexpr uint32_t DEFAULT_DATA_RESERVATION = 16<<20; - static constexpr uint32_t DEFAULT_METADATA_OFFSET = - DEFAULT_DATA_RESERVATION; - static constexpr uint32_t DEFAULT_METADATA_RANGE = 16<<20; + Onode(uint32_t ddr, uint32_t dmr) + : default_metadata_offset(ddr), + default_metadata_range(dmr) + {} virtual const onode_layout_t &get_layout() const = 0; virtual onode_layout_t &get_mutable_layout(Transaction &t) = 0; virtual ~Onode() = default; laddr_t get_metadata_hint() const { - return get_hint() + DEFAULT_METADATA_OFFSET + - ((uint32_t)std::rand() % DEFAULT_METADATA_RANGE); + assert(default_metadata_offset); + assert(default_metadata_range); + return get_hint() + default_metadata_offset + + ((uint32_t)std::rand() % default_metadata_range); } laddr_t get_data_hint() const { return get_hint(); diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc index ad6d1d19cc8..2865cba4b97 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc @@ -27,7 +27,10 @@ FLTreeOnodeManager::get_onode_ret FLTreeOnodeManager::get_onode( DEBUGT("no entry for {}", trans, hoid); return crimson::ct_error::enoent::make(); } - auto val = OnodeRef(new FLTreeOnode(cursor.value())); + auto val = OnodeRef(new FLTreeOnode( + default_data_reservation, + default_metadata_range, + cursor.value())); return get_onode_iertr::make_ready_future( val ); @@ -43,10 +46,13 @@ FLTreeOnodeManager::get_or_create_onode( return tree.insert( trans, hoid, OnodeTree::tree_value_config_t{sizeof(onode_layout_t)} - ).si_then([&trans, &hoid, FNAME](auto p) + ).si_then([this, &trans, &hoid, FNAME](auto p) -> get_or_create_onode_ret { auto [cursor, created] = std::move(p); - auto val = OnodeRef(new FLTreeOnode(cursor.value())); + auto val = OnodeRef(new FLTreeOnode( + default_data_reservation, + default_metadata_range, + cursor.value())); if (created) { DEBUGT("created onode for entry for {}", trans, hoid); val->get_mutable_layout(trans) = onode_layout_t{}; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h index 814471fffbc..0367b823f36 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h @@ -37,7 +37,14 @@ struct FLTreeOnode final : Onode, Value { FLTreeOnode& operator=(const FLTreeOnode&) = delete; template - FLTreeOnode(T&&... args) : Value(std::forward(args)...) {} + FLTreeOnode(uint32_t ddr, uint32_t dmr, T&&... args) + : Onode(ddr, dmr), + Value(std::forward(args)...) {} + + template + FLTreeOnode(T&&... args) + : Onode(0, 0), + Value(std::forward(args)...) {} struct Recorder : public ValueDeltaRecorder { Recorder(bufferlist &bl) : ValueDeltaRecorder(bl) {} @@ -102,12 +109,23 @@ struct FLTreeOnode final : Onode, Value { using OnodeTree = Btree; +using crimson::common::get_conf; + class FLTreeOnodeManager : public crimson::os::seastore::OnodeManager { OnodeTree tree; + uint32_t default_data_reservation = 0; + uint32_t default_metadata_offset = 0; + uint32_t default_metadata_range = 0; public: FLTreeOnodeManager(TransactionManager &tm) : - tree(NodeExtentManager::create_seastore(tm)) {} + tree(NodeExtentManager::create_seastore(tm)), + default_data_reservation( + get_conf("seastore_default_max_object_size")), + default_metadata_offset(default_data_reservation), + default_metadata_range( + get_conf("seastore_default_object_metadata_reservation")) + {} mkfs_ret mkfs(Transaction &t) { return tree.mkfs(t); diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index 88c12b19bf7..6c0c198b8a5 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -68,6 +68,8 @@ public: } }; +using crimson::common::get_conf; + SeaStore::SeaStore( const std::string& root, MDStoreRef mdstore, @@ -80,7 +82,9 @@ SeaStore::SeaStore( segment_manager(std::move(sm)), transaction_manager(std::move(tm)), collection_manager(std::move(cm)), - onode_manager(std::move(om)) + onode_manager(std::move(om)), + max_object_size( + get_conf("seastore_default_max_object_size")) { register_metrics(); } @@ -459,7 +463,7 @@ SeaStore::read_errorator::future SeaStore::read( size - offset : std::min(size - offset, len); - return ObjectDataHandler().read( + return ObjectDataHandler(max_object_size).read( ObjectDataHandler::context_t{ *transaction_manager, t, @@ -1069,7 +1073,7 @@ SeaStore::tm_ret SeaStore::_write( return seastar::do_with( std::move(_bl), [=, &ctx, &onode](auto &bl) { - return ObjectDataHandler().write( + return ObjectDataHandler(max_object_size).write( ObjectDataHandler::context_t{ *transaction_manager, *ctx.transaction, @@ -1199,7 +1203,7 @@ SeaStore::tm_ret SeaStore::_truncate( LOG_PREFIX(SeaStore::_truncate); DEBUGT("onode={} size={}", *ctx.transaction, *onode, size); onode->get_mutable_layout(*ctx.transaction).size = size; - return ObjectDataHandler().truncate( + return ObjectDataHandler(max_object_size).truncate( ObjectDataHandler::context_t{ *transaction_manager, *ctx.transaction, diff --git a/src/crimson/os/seastore/seastore.h b/src/crimson/os/seastore/seastore.h index b205872141d..f1d96cbaa6f 100644 --- a/src/crimson/os/seastore/seastore.h +++ b/src/crimson/os/seastore/seastore.h @@ -305,6 +305,7 @@ private: TransactionManagerRef transaction_manager; CollectionManagerRef collection_manager; OnodeManagerRef onode_manager; + const uint32_t max_object_size = 0; using tm_iertr = TransactionManager::base_iertr; using tm_ret = tm_iertr::future<>; diff --git a/src/test/crimson/seastore/test_object_data_handler.cc b/src/test/crimson/seastore/test_object_data_handler.cc index e2fac7f68f1..162dc1c19cd 100644 --- a/src/test/crimson/seastore/test_object_data_handler.cc +++ b/src/test/crimson/seastore/test_object_data_handler.cc @@ -11,6 +11,10 @@ using namespace crimson; using namespace crimson::os; using namespace crimson::os::seastore; +#define MAX_OBJECT_SIZE (16<<20) +#define DEFAULT_OBJECT_DATA_RESERVATION (16<<20) +#define DEFAULT_OBJECT_METADATA_RESERVATION (16<<20) + namespace { [[maybe_unused]] seastar::logger& logger() { return crimson::get_logger(ceph_subsys_test); @@ -22,6 +26,7 @@ class TestOnode final : public Onode { bool dirty = false; public: + TestOnode(uint32_t ddr, uint32_t dmr) : Onode(ddr, dmr) {} const onode_layout_t &get_layout() const final { return layout; } @@ -58,7 +63,7 @@ struct object_data_handler_test_t: offset, len)); with_trans_intr(t, [&](auto &t) { - return ObjectDataHandler().write( + return ObjectDataHandler(MAX_OBJECT_SIZE).write( ObjectDataHandler::context_t{ *tm, t, @@ -81,7 +86,7 @@ struct object_data_handler_test_t: 0, size - offset); with_trans_intr(t, [&](auto &t) { - return ObjectDataHandler().truncate( + return ObjectDataHandler(MAX_OBJECT_SIZE).truncate( ObjectDataHandler::context_t{ *tm, t, @@ -100,7 +105,7 @@ struct object_data_handler_test_t: void read(Transaction &t, objaddr_t offset, extent_len_t len) { bufferlist bl = with_trans_intr(t, [&](auto &t) { - return ObjectDataHandler().read( + return ObjectDataHandler(MAX_OBJECT_SIZE).read( ObjectDataHandler::context_t{ *tm, t, @@ -132,7 +137,9 @@ struct object_data_handler_test_t: } seastar::future<> set_up_fut() final { - onode = new TestOnode{}; + onode = new TestOnode( + DEFAULT_OBJECT_DATA_RESERVATION, + DEFAULT_OBJECT_METADATA_RESERVATION); known_contents = buffer::create(4<<20 /* 4MB */); memset(known_contents.c_str(), 0, known_contents.length()); size = 0;