From: myoungwon oh Date: Mon, 5 Sep 2022 06:05:48 +0000 (+0900) Subject: crimson/os/seastore/rbm: add alloc/free extent regarding rbm X-Git-Tag: v18.1.0~794^2~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5485640f7a7a2bb4299ae620d4d9ee604f729dea;p=ceph.git crimson/os/seastore/rbm: add alloc/free extent regarding rbm This commit also adds alloc_paddr interface to ExtentOolWriter Signed-off-by: Myoungwon Oh --- diff --git a/src/crimson/os/seastore/async_cleaner.cc b/src/crimson/os/seastore/async_cleaner.cc index 84d5260a1ff4..08c755f3ea59 100644 --- a/src/crimson/os/seastore/async_cleaner.cc +++ b/src/crimson/os/seastore/async_cleaner.cc @@ -1561,16 +1561,36 @@ void RBMCleaner::mark_space_used( paddr_t addr, extent_len_t len) { - // TODO - return; + // Need to mark the space as used at startup time. + // We do not use mark_space_used after complete_commit() + // because alloc_extent() already reserved the space. + assert(addr.get_addr_type() == paddr_types_t::RANDOM_BLOCK); + if (background_callback->get_state() == + BackgroundListener::state_t::SCAN_SPACE) { + auto rbms = rb_group->get_rb_managers(); + for (auto rbm : rbms) { + if (addr.get_device_id() == rbm->get_device_id()) { + if (rbm->get_start() <= addr) { + return rbm->mark_space_used(addr, len); + } + } + } + } } void RBMCleaner::mark_space_free( paddr_t addr, extent_len_t len) { - // TODO - return; + assert(addr.get_addr_type() == paddr_types_t::RANDOM_BLOCK); + auto rbms = rb_group->get_rb_managers(); + for (auto rbm : rbms) { + if (addr.get_device_id() == rbm->get_device_id()) { + if (rbm->get_start() <= addr) { + return rbm->mark_space_free(addr, len); + } + } + } } void RBMCleaner::reserve_projected_usage(std::size_t projected_usage) diff --git a/src/crimson/os/seastore/async_cleaner.h b/src/crimson/os/seastore/async_cleaner.h index 3f5a312f292d..493d88146724 100644 --- a/src/crimson/os/seastore/async_cleaner.h +++ b/src/crimson/os/seastore/async_cleaner.h @@ -1357,6 +1357,12 @@ public: return nullptr; } + paddr_t alloc_paddr(extent_len_t length) { + // TODO: implement allocation strategy (dirty metadata and multiple devices) + auto rbs = rb_group->get_rb_managers(); + return rbs[0]->alloc_extent(length); + } + // Testing interfaces bool check_usage() final { diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index cb0dde5b0c0b..fe1642b17261 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -30,6 +30,8 @@ public: using open_ertr = base_ertr; virtual open_ertr::future<> open() = 0; + virtual paddr_t alloc_paddr(extent_len_t length) = 0; + using alloc_write_ertr = base_ertr; using alloc_write_iertr = trans_iertr; virtual alloc_write_iertr::future<> alloc_write_ool_extents( @@ -70,6 +72,10 @@ public: }); } + paddr_t alloc_paddr(extent_len_t length) final { + return make_delayed_temp_paddr(0); + } + private: alloc_write_iertr::future<> do_write( Transaction& t, @@ -106,6 +112,12 @@ public: return close_ertr::now(); }); } + + paddr_t alloc_paddr(extent_len_t length) final { + assert(rb_cleaner); + return rb_cleaner->alloc_paddr(length); + } + private: alloc_write_iertr::future<> do_write( Transaction& t, @@ -184,36 +196,49 @@ public: // XXX: bp might be extended to point to differnt memory (e.g. PMem) // according to the allocator. - auto bp = ceph::bufferptr( + auto alloc_paddr = [this](rewrite_gen_t gen, + data_category_t category, extent_len_t length) + -> alloc_result_t { + auto bp = ceph::bufferptr( buffer::create_page_aligned(length)); - bp.zero(); + bp.zero(); + paddr_t addr; + if (gen == INLINE_GENERATION) { + addr = make_record_relative_paddr(0); + } else if (category == data_category_t::DATA) { + assert(data_writers_by_gen[generation_to_writer(gen)]); + addr = data_writers_by_gen[ + generation_to_writer(gen)]->alloc_paddr(length); + } else { + assert(category == data_category_t::METADATA); + assert(md_writers_by_gen[generation_to_writer(gen)]); + addr = md_writers_by_gen[ + generation_to_writer(gen)]->alloc_paddr(length); + } + return {addr, + std::move(bp), + gen}; + }; if (!is_logical_type(type)) { // TODO: implement out-of-line strategy for physical extent. - return {make_record_relative_paddr(0), - std::move(bp), - INLINE_GENERATION}; + assert(get_extent_category(type) == data_category_t::METADATA); + return alloc_paddr(INLINE_GENERATION, data_category_t::METADATA, length); } if (hint == placement_hint_t::COLD) { assert(gen == INIT_GENERATION); - return {make_delayed_temp_paddr(0), - std::move(bp), - MIN_REWRITE_GENERATION}; + return alloc_paddr(MIN_REWRITE_GENERATION, get_extent_category(type), length); } if (get_extent_category(type) == data_category_t::METADATA && gen == INIT_GENERATION) { if (prefer_ool) { - return {make_delayed_temp_paddr(0), - std::move(bp), - OOL_GENERATION}; + return alloc_paddr(OOL_GENERATION, get_extent_category(type), length); } else { // default not to ool metadata extents to reduce padding overhead. // TODO: improve padding so we can default to the prefer_ool path. - return {make_record_relative_paddr(0), - std::move(bp), - INLINE_GENERATION}; + return alloc_paddr(INLINE_GENERATION, get_extent_category(type), length); } } else { assert(get_extent_category(type) == data_category_t::DATA || @@ -223,9 +248,7 @@ public: } else if (gen == INIT_GENERATION) { gen = OOL_GENERATION; } - return {make_delayed_temp_paddr(0), - std::move(bp), - gen}; + return alloc_paddr(gen, get_extent_category(type), length); } } diff --git a/src/crimson/os/seastore/random_block_manager.h b/src/crimson/os/seastore/random_block_manager.h index 056bc9a1bcf7..9829707a03e6 100644 --- a/src/crimson/os/seastore/random_block_manager.h +++ b/src/crimson/os/seastore/random_block_manager.h @@ -84,7 +84,10 @@ public: >; using allocate_ret = allocate_ertr::future; // allocator, return start addr of allocated blocks - virtual allocate_ret alloc_extent(Transaction &t, size_t size) = 0; + virtual paddr_t alloc_extent(size_t size) = 0; + + virtual void mark_space_used(paddr_t paddr, size_t len) = 0; + virtual void mark_space_free(paddr_t paddr, size_t len) = 0; using abort_allocation_ertr = crimson::errorator< crimson::ct_error::input_output_error, @@ -106,6 +109,7 @@ public: virtual device_id_t get_device_id() const = 0; virtual const seastore_meta_t &get_meta() const = 0; virtual Device* get_device() = 0; + virtual paddr_t get_start() = 0; virtual ~RandomBlockManager() {} }; using RandomBlockManagerRef = std::unique_ptr; diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc index 65af58d01f11..54fdb5e593a8 100644 --- a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc +++ b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc @@ -46,24 +46,20 @@ device_config_t get_rbm_ephemeral_device_config( secondary_devices}; } -/* TODO : block allocator */ -BlockRBManager::allocate_ret BlockRBManager::alloc_extent( - Transaction &t, size_t size) +paddr_t BlockRBManager::alloc_extent(size_t size) { - - /* - * 1. find free blocks using block allocator - * 2. add free blocks to transaction - * (the free block is reserved state, not stored) - * 3. link free blocks to onode - * Due to in-memory block allocator is the next work to do, - * just read the block bitmap directly to find free blocks. - * - */ - // TODO: block allocation using in-memory block allocator - return allocate_ret( - allocate_ertr::ready_future_marker{}, - paddr_t{}); + LOG_PREFIX(BlockRBManager::alloc_extent); + assert(allocator); + auto alloc = allocator->alloc_extent(size); + ceph_assert((*alloc).num_intervals() == 1); + auto extent = (*alloc).begin(); + ceph_assert(size == extent.get_len()); + paddr_t paddr = convert_abs_addr_to_paddr( + extent.get_start(), + device->get_device_id()); + DEBUG("allocated addr: {}, size: {}, requested size: {}", + paddr, extent.get_len(), size); + return paddr; } @@ -84,9 +80,16 @@ BlockRBManager::write_ertr::future<> BlockRBManager::complete_allocation( BlockRBManager::open_ertr::future<> BlockRBManager::open() { + assert(device); return device->read_rbm_header(RBM_START_ADDRESS - ).safe_then([](auto s) + ).safe_then([this](auto s) -> open_ertr::future<> { + auto ool_start = device->get_journal_start() + device->get_journal_size(); + allocator->init( + ool_start, + device->get_available_size() - + ool_start, + device->get_block_size()); return open_ertr::now(); }).handle_error( open_ertr::pass_further{}, @@ -137,10 +140,10 @@ BlockRBManager::read_ertr::future<> BlockRBManager::read( BlockRBManager::close_ertr::future<> BlockRBManager::close() { ceph_assert(device); + allocator->close(); return device->close(); } - BlockRBManager::write_ertr::future<> BlockRBManager::write( rbm_abs_addr addr, bufferlist &bl) diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.h b/src/crimson/os/seastore/random_block_manager/block_rb_manager.h index 210de43c59ab..ecb358ed50c0 100644 --- a/src/crimson/os/seastore/random_block_manager/block_rb_manager.h +++ b/src/crimson/os/seastore/random_block_manager/block_rb_manager.h @@ -21,6 +21,7 @@ #include "crimson/common/layout.h" #include "include/buffer.h" #include "include/uuid.h" +#include "avlallocator.h" namespace crimson::os::seastore { @@ -56,8 +57,7 @@ public: * TODO: multiple allocation * */ - allocate_ret alloc_extent( - Transaction &t, size_t size) final; // allocator, return blocks + paddr_t alloc_extent(size_t size) final; // allocator, return blocks abort_allocation_ertr::future<> abort_allocation(Transaction &t) final; write_ertr::future<> complete_allocation(Transaction &t) final; @@ -71,7 +71,9 @@ public: * support such case. */ BlockRBManager(RBMDevice * device, std::string path) - : device(device), path(path) {} + : device(device), path(path) { + allocator.reset(new AvlAllocator); + } write_ertr::future<> write(rbm_abs_addr addr, bufferlist &bl); @@ -92,12 +94,34 @@ public: return device; } + void mark_space_used(paddr_t paddr, size_t len) final { + assert(allocator); + rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr); + assert(addr >= device->get_journal_size() + device->get_journal_start() && + addr + len <= device->get_available_size()); + allocator->mark_extent_used(addr, len); + } + + void mark_space_free(paddr_t paddr, size_t len) final { + assert(allocator); + rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr); + assert(addr >= device->get_journal_size() + device->get_journal_start() && + addr + len <= device->get_available_size()); + allocator->free_extent(addr, len); + } + + paddr_t get_start() final { + return convert_abs_addr_to_paddr( + device->get_journal_start() + device->get_journal_size(), + device->get_device_id()); + } + private: /* * this contains the number of bitmap blocks, free blocks and * rbm specific information */ - //FreelistManager free_manager; // TODO: block management + ExtentAllocatorRef allocator; RBMDevice * device; std::string path; int stream_id; // for multi-stream diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index d89feb2b390f..eef9be98719b 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -157,6 +157,10 @@ public: delayed_temp_offset += ref->get_length(); delayed_alloc_list.emplace_back(ref->cast()); fresh_block_stats.increment(ref->get_length()); + } else if (ref->get_paddr().is_absolute()) { + assert(ref->is_logical()); + delayed_alloc_list.emplace_back(ref->cast()); + fresh_block_stats.increment(ref->get_length()); } else { assert(ref->get_paddr() == make_record_relative_paddr(0)); ref->set_paddr(make_record_relative_paddr(offset)); @@ -175,6 +179,7 @@ public: void mark_delayed_extent_inline(LogicalCachedExtentRef& ref) { write_set.erase(*ref); + assert(ref->get_paddr().is_delayed()); ref->set_paddr(make_record_relative_paddr(offset)); offset += ref->get_length(); inline_block_list.push_back(ref); @@ -183,6 +188,7 @@ public: void mark_delayed_extent_ool(LogicalCachedExtentRef& ref, paddr_t final_addr) { write_set.erase(*ref); + assert(ref->get_paddr().is_delayed()); ref->set_paddr(final_addr); assert(!ref->get_paddr().is_null()); assert(!ref->is_inline());