From: myoungwon oh Date: Sat, 29 Oct 2022 02:44:04 +0000 (+0900) Subject: crimson/os/seastore/rbm: add RBMSpaceTracker to track allocated space X-Git-Tag: v18.1.0~794^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=458a0771f26b8aef531c7bfd0874e98974ad3238;p=ceph.git crimson/os/seastore/rbm: add RBMSpaceTracker to track allocated space Signed-off-by: Myoungwon Oh --- diff --git a/src/crimson/os/seastore/async_cleaner.cc b/src/crimson/os/seastore/async_cleaner.cc index 10c6a9dd222d..0cf7ac69e770 100644 --- a/src/crimson/os/seastore/async_cleaner.cc +++ b/src/crimson/os/seastore/async_cleaner.cc @@ -1561,13 +1561,16 @@ void RBMCleaner::mark_space_used( paddr_t addr, extent_len_t len) { + LOG_PREFIX(RBMCleaner::mark_space_used); assert(addr.get_addr_type() == paddr_types_t::RANDOM_BLOCK); auto rbms = rb_group->get_rb_managers(); for (auto rbm : rbms) { if (addr.get_device_id() == rbm->get_device_id()) { if (rbm->get_start() <= addr) { + INFO("allocate addr: {} len: {}", addr, len); rbm->mark_space_used(addr, len); } + return; } } } @@ -1576,13 +1579,16 @@ void RBMCleaner::mark_space_free( paddr_t addr, extent_len_t len) { + LOG_PREFIX(RBMCleaner::mark_space_free); assert(addr.get_addr_type() == paddr_types_t::RANDOM_BLOCK); auto rbms = rb_group->get_rb_managers(); for (auto rbm : rbms) { if (addr.get_device_id() == rbm->get_device_id()) { if (rbm->get_start() <= addr) { - return rbm->mark_space_free(addr, len); + INFO("free addr: {} len: {}", addr, len); + rbm->mark_space_free(addr, len); } + return; } } } @@ -1640,4 +1646,80 @@ RBMCleaner::mount_ret RBMCleaner::mount() }); } +bool RBMCleaner::check_usage() +{ + assert(detailed); + const auto& rbms = rb_group->get_rb_managers(); + RBMSpaceTracker tracker(rbms); + extent_callback->with_transaction_weak( + "check_usage", + [this, &tracker, &rbms](auto &t) { + return backref_manager.scan_mapped_space( + t, + [&tracker, &rbms]( + paddr_t paddr, + extent_len_t len, + extent_types_t type, + laddr_t laddr) + { + for (auto rbm : rbms) { + if (rbm->get_device_id() == paddr.get_device_id()) { + if (is_backref_node(type)) { + assert(laddr == L_ADDR_NULL); + tracker.allocate( + paddr, + len); + } else if (laddr == L_ADDR_NULL) { + tracker.release( + paddr, + len); + } else { + tracker.allocate( + paddr, + len); + } + } + } + }); + }).unsafe_get0(); + return equals(tracker); +} + +bool RBMCleaner::equals(const RBMSpaceTracker &_other) const +{ + LOG_PREFIX(RBMSpaceTracker::equals); + const auto &other = static_cast(_other); + auto rbs = rb_group->get_rb_managers(); + //TODO: multiple rbm allocator + auto rbm = rbs[0]; + assert(rbm); + + if (rbm->get_device()->get_available_size() / rbm->get_block_size() + != other.block_usage.size()) { + assert(0 == "block counts should match"); + return false; + } + bool all_match = true; + for (auto i = other.block_usage.begin(); + i != other.block_usage.end(); ++i) { + if (i->first < rbm->get_start().as_blk_paddr().get_device_off()) { + continue; + } + auto addr = i->first; + auto state = rbm->get_extent_state( + convert_abs_addr_to_paddr(addr, rbm->get_device_id()), + rbm->get_block_size()); + if ((i->second.used && state == rbm_extent_state_t::ALLOCATED) || + (!i->second.used && (state == rbm_extent_state_t::FREE || + state == rbm_extent_state_t::RESERVED))) { + // pass + } else { + all_match = false; + ERROR("block addr {} mismatch other used: {}", + addr, i->second.used); + } + } + return all_match; +} + } diff --git a/src/crimson/os/seastore/async_cleaner.h b/src/crimson/os/seastore/async_cleaner.h index b6cab8d60017..c7b6f9fb531d 100644 --- a/src/crimson/os/seastore/async_cleaner.h +++ b/src/crimson/os/seastore/async_cleaner.h @@ -820,6 +820,247 @@ public: bool equals(const SpaceTrackerI &other) const; }; +template +class block_map_t { +public: + block_map_t() { + device_to_blocks.resize(DEVICE_ID_MAX_VALID); + device_block_size.resize(DEVICE_ID_MAX_VALID); + } + void add_device(device_id_t device, std::size_t blocks, const T& init, + size_t block_size) { + ceph_assert(device <= DEVICE_ID_MAX_VALID); + ceph_assert(device_to_blocks[device].size() == 0); + ceph_assert(blocks > 0); + device_to_blocks[device].resize(blocks, init); + total_blocks += blocks; + device_block_size[device] = block_size; + } + void clear() { + device_to_blocks.clear(); + device_to_blocks.resize(DEVICE_ID_MAX_VALID); + total_blocks = 0; + } + + T& operator[](paddr_t block) { + ceph_assert(device_to_blocks[block.get_device_id()].size() != 0); + auto &blk = block.as_blk_paddr(); + auto block_id = get_block_id(block.get_device_id(), blk.get_device_off()); + return device_to_blocks[block.get_device_id()][block_id]; + } + const T& operator[](paddr_t block) const { + ceph_assert(device_to_blocks[block.get_device_id()].size() != 0); + auto &blk = block.as_blk_paddr(); + auto block_id = get_block_id(block.get_device_id(), blk.get_device_off()); + return device_to_blocks[block.get_device_id()][block_id]; + } + + auto begin() { + return iterator::lower_bound(*this, 0, 0); + } + auto begin() const { + return iterator::lower_bound(*this, 0, 0); + } + + auto end() { + return iterator::end_iterator(*this); + } + auto end() const { + return iterator::end_iterator(*this); + } + + size_t size() const { + return total_blocks; + } + + uint64_t get_block_size(device_id_t device_id) { + return device_block_size[device_id]; + } + + uint32_t get_block_id(device_id_t device_id, device_off_t blk_off) const { + auto block_size = device_block_size[device_id]; + return blk_off == 0 ? 0 : blk_off/block_size; + } + + template + class iterator { + /// points at set being iterated over + std::conditional_t< + is_const, + const block_map_t &, + block_map_t &> parent; + + /// points at current device, or DEVICE_ID_MAX_VALID if is_end() + device_id_t device_id; + + /// segment at which we are pointing, 0 if is_end() + device_off_t blk_off; + + /// holds referent for operator* and operator-> when !is_end() + std::optional< + std::pair< + const device_off_t, + std::conditional_t + >> current; + + bool is_end() const { + return device_id == DEVICE_ID_MAX_VALID; + } + + uint32_t get_block_id() { + return parent.get_block_id(device_id, blk_off); + } + + void find_valid() { + assert(!is_end()); + auto &device_vec = parent.device_to_blocks[device_id]; + if (device_vec.size() == 0 || + get_block_id() == device_vec.size()) { + while (++device_id < DEVICE_ID_MAX_VALID&& + parent.device_to_blocks[device_id].size() == 0); + blk_off = 0; + } + if (is_end()) { + current = std::nullopt; + } else { + current.emplace( + blk_off, + parent.device_to_blocks[device_id][get_block_id()] + ); + } + } + + iterator( + decltype(parent) &parent, + device_id_t device_id, + device_off_t device_block_off) + : parent(parent), device_id(device_id), + blk_off(device_block_off) {} + + public: + static iterator lower_bound( + decltype(parent) &parent, + device_id_t device_id, + device_off_t block_off) { + if (device_id == DEVICE_ID_MAX_VALID) { + return end_iterator(parent); + } else { + auto ret = iterator{parent, device_id, block_off}; + ret.find_valid(); + return ret; + } + } + + static iterator end_iterator( + decltype(parent) &parent) { + return iterator{parent, DEVICE_ID_MAX_VALID, 0}; + } + + iterator& operator++() { + assert(!is_end()); + auto block_size = parent.device_block_size[device_id]; + blk_off += block_size; + find_valid(); + return *this; + } + + bool operator==(iterator rit) { + return (device_id == rit.device_id && + blk_off == rit.blk_off); + } + + bool operator!=(iterator rit) { + return !(*this == rit); + } + template = 0> + const std::pair *operator->() { + assert(!is_end()); + return &*current; + } + template = 0> + std::pair *operator->() { + assert(!is_end()); + return &*current; + } + template = 0> + const std::pair &operator*() { + assert(!is_end()); + return *current; + } + template = 0> + std::pair &operator*() { + assert(!is_end()); + return *current; + } + }; + std::vector> device_to_blocks; + std::vector device_block_size; + size_t total_blocks = 0; +}; + +class RBMSpaceTracker { + struct random_block_t { + bool used = false; + void allocate() { + used = true; + } + void release() { + used = false; + } + }; + block_map_t block_usage; + +public: + RBMSpaceTracker(const RBMSpaceTracker &) = default; + RBMSpaceTracker(const std::vector &rbms) { + for (auto rbm : rbms) { + block_usage.add_device( + rbm->get_device_id(), + rbm->get_device()->get_available_size() / rbm->get_block_size(), + {false}, + rbm->get_block_size()); + } + } + + void allocate( + paddr_t addr, + extent_len_t len) { + paddr_t cursor = addr; + paddr_t end = addr.add_offset(len); + do { + block_usage[cursor].allocate(); + cursor = cursor.add_offset( + block_usage.get_block_size(addr.get_device_id())); + } while (cursor < end); + } + + void release( + paddr_t addr, + extent_len_t len) { + paddr_t cursor = addr; + paddr_t end = addr.add_offset(len); + do { + block_usage[cursor].release(); + cursor = cursor.add_offset( + block_usage.get_block_size(addr.get_device_id())); + } while (cursor < end); + } + + void reset() { + for (auto &i : block_usage) { + i.second = {false}; + } + } + + std::unique_ptr make_empty() const { + auto ret = std::make_unique(*this); + ret->reset(); + return ret; + } + friend class RBMCleaner; +}; +using RBMSpaceTrackerRef = std::unique_ptr; + /* * AsyncCleaner * @@ -1373,10 +1614,7 @@ public: // Testing interfaces - bool check_usage() final { - // TODO - return true; - } + bool check_usage() final; bool check_usage_is_empty() const final { // TODO @@ -1384,11 +1622,12 @@ public: } private: + bool equals(const RBMSpaceTracker &other) const; + const bool detailed; RBMDeviceGroupRef rb_group; BackrefManager &backref_manager; - struct { /** * used_bytes diff --git a/src/crimson/os/seastore/random_block_manager.h b/src/crimson/os/seastore/random_block_manager.h index 9829707a03e6..1e11abdae7e8 100644 --- a/src/crimson/os/seastore/random_block_manager.h +++ b/src/crimson/os/seastore/random_block_manager.h @@ -44,6 +44,12 @@ struct rbm_metadata_header_t { }; +enum class rbm_extent_state_t { + FREE, // not allocated + RESERVED, // extent is reserved by alloc_new_extent, but is not persistent + ALLOCATED, // extent is persistent +}; + class Device; using rbm_abs_addr = uint64_t; constexpr rbm_abs_addr RBM_START_ADDRESS = 0; @@ -95,13 +101,7 @@ public: >; virtual abort_allocation_ertr::future<> abort_allocation(Transaction &t) = 0; - using complete_allocation_ertr = crimson::errorator< - crimson::ct_error::input_output_error, - crimson::ct_error::invarg, - crimson::ct_error::enoent, - crimson::ct_error::erange - >; - virtual write_ertr::future<> complete_allocation(Transaction &t) = 0; + virtual void complete_allocation(paddr_t addr, size_t size) = 0; virtual size_t get_size() const = 0; virtual extent_len_t get_block_size() const = 0; @@ -110,6 +110,7 @@ public: virtual const seastore_meta_t &get_meta() const = 0; virtual Device* get_device() = 0; virtual paddr_t get_start() = 0; + virtual rbm_extent_state_t get_extent_state(paddr_t addr, size_t size) = 0; virtual ~RandomBlockManager() {} }; using RandomBlockManagerRef = std::unique_ptr; diff --git a/src/crimson/os/seastore/random_block_manager/avlallocator.cc b/src/crimson/os/seastore/random_block_manager/avlallocator.cc index 699687c0fa35..28137a23d798 100644 --- a/src/crimson/os/seastore/random_block_manager/avlallocator.cc +++ b/src/crimson/os/seastore/random_block_manager/avlallocator.cc @@ -166,6 +166,10 @@ std::optional> AvlAllocator::alloc_extent( assert(result.num_intervals() == 1); for (auto p : result) { INFO("result start: {}, end: {}", p.first, p.first + p.second); + if (detailed) { + assert(!reserved_extent_tracker.contains(p.first, p.second)); + reserved_extent_tracker.insert(p.first, p.second); + } } return result; } @@ -175,5 +179,23 @@ void AvlAllocator::free_extent(rbm_abs_addr addr, size_t size) assert(total_size); assert(total_size > available_size); _add_to_tree(addr, size); + if (detailed && reserved_extent_tracker.contains(addr, size)) { + reserved_extent_tracker.erase(addr, size); + } +} + +bool AvlAllocator::is_free_extent(rbm_abs_addr start, size_t size) +{ + rbm_abs_addr end = start + size; + ceph_assert(size != 0); + if (start < base_addr || base_addr + total_size < end) { + return false; + } + + auto rs = extent_tree.find(extent_range_t{start, end}, extent_tree.key_comp()); + if (rs != extent_tree.end() && rs->start <= start && rs->end >= end) { + return true; + } + return false; } } diff --git a/src/crimson/os/seastore/random_block_manager/avlallocator.h b/src/crimson/os/seastore/random_block_manager/avlallocator.h index d6d2e95c589a..d1a4fabca5a4 100644 --- a/src/crimson/os/seastore/random_block_manager/avlallocator.h +++ b/src/crimson/os/seastore/random_block_manager/avlallocator.h @@ -60,8 +60,8 @@ struct extent_range_t { */ class AvlAllocator : public ExtentAllocator { public: - AvlAllocator(uint64_t block_size = 0, uint64_t available_size = 0) : - block_size(block_size), available_size(available_size) {} + AvlAllocator(bool detailed) : + detailed(detailed) {} std::optional> alloc_extent( size_t size) final; @@ -81,6 +81,9 @@ public: } void close() { + if (!detailed) { + assert(reserved_extent_tracker.size() == 0); + } extent_size_tree.clear(); extent_tree.clear_and_dispose(dispose_rs{}); total_size = 0; @@ -97,6 +100,31 @@ public: return max_alloc_size; } + bool is_free_extent(rbm_abs_addr start, size_t size); + + void complete_allocation(rbm_abs_addr start, size_t size) final { + if (detailed) { + assert(reserved_extent_tracker.contains(start, size)); + reserved_extent_tracker.erase(start, size); + } + } + + bool is_reserved_extent(rbm_abs_addr start, size_t size) { + if (detailed) { + return reserved_extent_tracker.contains(start, size); + } + return false; + } + + rbm_extent_state_t get_extent_state(rbm_abs_addr addr, size_t size) final { + if (is_reserved_extent(addr, size)) { + return rbm_extent_state_t::RESERVED; + } else if (is_free_extent(addr, size)) { + return rbm_extent_state_t::FREE; + } + return rbm_extent_state_t::ALLOCATED; + } + private: void _add_to_tree(rbm_abs_addr start, size_t size); @@ -139,6 +167,8 @@ private: uint64_t total_size = 0; uint64_t base_addr = 0; uint64_t max_alloc_size = 4 << 20; + bool detailed; + interval_set reserved_extent_tracker; }; } diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc index 54fdb5e593a8..78bacf28763d 100644 --- a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc +++ b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc @@ -72,10 +72,12 @@ BlockRBManager::abort_allocation_ertr::future<> BlockRBManager::abort_allocation return abort_allocation_ertr::now(); } -BlockRBManager::write_ertr::future<> BlockRBManager::complete_allocation( - Transaction &t) +void BlockRBManager::complete_allocation( + paddr_t paddr, size_t size) { - return write_ertr::now(); + assert(allocator); + rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr); + allocator->complete_allocation(addr, size); } BlockRBManager::open_ertr::future<> BlockRBManager::open() @@ -84,7 +86,7 @@ BlockRBManager::open_ertr::future<> BlockRBManager::open() return device->read_rbm_header(RBM_START_ADDRESS ).safe_then([this](auto s) -> open_ertr::future<> { - auto ool_start = device->get_journal_start() + device->get_journal_size(); + auto ool_start = get_start_rbm_addr(); allocator->init( ool_start, device->get_available_size() - diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.h b/src/crimson/os/seastore/random_block_manager/block_rb_manager.h index ecb358ed50c0..3bfc78dce60f 100644 --- a/src/crimson/os/seastore/random_block_manager/block_rb_manager.h +++ b/src/crimson/os/seastore/random_block_manager/block_rb_manager.h @@ -60,9 +60,14 @@ public: paddr_t alloc_extent(size_t size) final; // allocator, return blocks abort_allocation_ertr::future<> abort_allocation(Transaction &t) final; - write_ertr::future<> complete_allocation(Transaction &t) final; + void complete_allocation(paddr_t addr, size_t size) final; - size_t get_size() const final { return device->get_available_size(); }; + size_t get_start_rbm_addr() const { + return device->get_journal_start() + device->get_journal_size(); + } + size_t get_size() const final { + return device->get_available_size() - get_start_rbm_addr(); + }; extent_len_t get_block_size() const final { return device->get_block_size(); } /* @@ -70,9 +75,9 @@ public: * on a device, so start and end location of the device are needed to * support such case. */ - BlockRBManager(RBMDevice * device, std::string path) + BlockRBManager(RBMDevice * device, std::string path, bool detailed) : device(device), path(path) { - allocator.reset(new AvlAllocator); + allocator.reset(new AvlAllocator(detailed)); } write_ertr::future<> write(rbm_abs_addr addr, bufferlist &bl); @@ -97,7 +102,7 @@ public: void mark_space_used(paddr_t paddr, size_t len) final { assert(allocator); rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr); - assert(addr >= device->get_journal_size() + device->get_journal_start() && + assert(addr >= get_start_rbm_addr() && addr + len <= device->get_available_size()); allocator->mark_extent_used(addr, len); } @@ -105,17 +110,25 @@ public: void mark_space_free(paddr_t paddr, size_t len) final { assert(allocator); rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr); - assert(addr >= device->get_journal_size() + device->get_journal_start() && + assert(addr >= get_start_rbm_addr() && addr + len <= device->get_available_size()); allocator->free_extent(addr, len); } paddr_t get_start() final { return convert_abs_addr_to_paddr( - device->get_journal_start() + device->get_journal_size(), + get_start_rbm_addr(), device->get_device_id()); } + rbm_extent_state_t get_extent_state(paddr_t paddr, size_t size) final { + assert(allocator); + rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr); + assert(addr >= get_start_rbm_addr() && + addr + size <= device->get_available_size()); + return allocator->get_extent_state(addr, size); + } + private: /* * this contains the number of bitmap blocks, free blocks and diff --git a/src/crimson/os/seastore/random_block_manager/extent_allocator.h b/src/crimson/os/seastore/random_block_manager/extent_allocator.h index 82d84a7af14c..8a3e62c6d050 100644 --- a/src/crimson/os/seastore/random_block_manager/extent_allocator.h +++ b/src/crimson/os/seastore/random_block_manager/extent_allocator.h @@ -57,6 +57,16 @@ public: virtual uint64_t get_available_size() const = 0; virtual uint64_t get_max_alloc_size() const = 0; virtual void close() = 0; + /** + * complete_allocation + * + * This changes this extent state from RESERVED to ALLOCATED + * + * @param start address + * @param size + */ + virtual void complete_allocation(rbm_abs_addr start, size_t size) = 0; + virtual rbm_extent_state_t get_extent_state(rbm_abs_addr addr, size_t size) = 0; virtual ~ExtentAllocator() {} }; using ExtentAllocatorRef = std::unique_ptr; diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index cc11ea9ee4ee..7ac6295ee2de 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -647,7 +647,7 @@ TransactionManagerRef make_transaction_manager( sms->add_segment_manager(static_cast(primary_device)); } else { auto rbm = std::make_unique( - static_cast(primary_device), ""); + static_cast(primary_device), "", is_test); rbs->add_rb_manager(std::move(rbm)); } @@ -655,7 +655,8 @@ TransactionManagerRef make_transaction_manager( if (p_dev->get_backend_type() == backend_type_t::SEGMENTED) { sms->add_segment_manager(static_cast(p_dev)); } else { - auto rbm = std::make_unique(static_cast(p_dev), ""); + auto rbm = std::make_unique( + static_cast(p_dev), "", is_test); rbs->add_rb_manager(std::move(rbm)); } } diff --git a/src/test/crimson/seastore/test_extent_allocator.cc b/src/test/crimson/seastore/test_extent_allocator.cc index 6bda48257419..8217e5a660f1 100644 --- a/src/test/crimson/seastore/test_extent_allocator.cc +++ b/src/test/crimson/seastore/test_extent_allocator.cc @@ -35,7 +35,7 @@ struct allocator_test_t : seastar::future<> set_up_fut() final { std::string a_type = GetParam(); if (a_type == "avl") { - allocator.reset(new AvlAllocator); + allocator.reset(new AvlAllocator(false)); return seastar::now(); } ceph_assert(0 == "no support"); diff --git a/src/test/crimson/seastore/test_randomblock_manager.cc b/src/test/crimson/seastore/test_randomblock_manager.cc index 58ec88a4ba2a..efb1fb4b790d 100644 --- a/src/test/crimson/seastore/test_randomblock_manager.cc +++ b/src/test/crimson/seastore/test_randomblock_manager.cc @@ -55,7 +55,7 @@ struct rbm_test_t : 0, DEFAULT_TEST_SIZE); block_size = device->get_block_size(); size = device->get_available_size(); - rbm_manager.reset(new BlockRBManager(device.get(), std::string())); + rbm_manager.reset(new BlockRBManager(device.get(), std::string(), false)); config = get_rbm_ephemeral_device_config(0, 1); return device->mount().handle_error(crimson::ct_error::assert_all{} ).then([this] { diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc index 5bbd2e57daa5..0e88aa355758 100644 --- a/src/test/crimson/seastore/test_transaction_manager.cc +++ b/src/test/crimson/seastore/test_transaction_manager.cc @@ -399,11 +399,6 @@ struct transaction_manager_test_t : } bool check_usage() { - std::string j_type = GetParam(); - if (j_type == "circularbounded") { - // TODO: add check_usage for RBM - return true; - } return epm->check_usage(); }