paddr_t addr,
extent_len_t len)
{
+ LOG_PREFIX(RBMCleaner::mark_space_used);
assert(addr.get_addr_type() == paddr_types_t::RANDOM_BLOCK);
auto rbms = rb_group->get_rb_managers();
for (auto rbm : rbms) {
if (addr.get_device_id() == rbm->get_device_id()) {
if (rbm->get_start() <= addr) {
+ INFO("allocate addr: {} len: {}", addr, len);
rbm->mark_space_used(addr, len);
}
+ return;
}
}
}
paddr_t addr,
extent_len_t len)
{
+ LOG_PREFIX(RBMCleaner::mark_space_free);
assert(addr.get_addr_type() == paddr_types_t::RANDOM_BLOCK);
auto rbms = rb_group->get_rb_managers();
for (auto rbm : rbms) {
if (addr.get_device_id() == rbm->get_device_id()) {
if (rbm->get_start() <= addr) {
- return rbm->mark_space_free(addr, len);
+ INFO("free addr: {} len: {}", addr, len);
+ rbm->mark_space_free(addr, len);
}
+ return;
}
}
}
});
}
+bool RBMCleaner::check_usage()
+{
+ assert(detailed);
+ const auto& rbms = rb_group->get_rb_managers();
+ RBMSpaceTracker tracker(rbms);
+ extent_callback->with_transaction_weak(
+ "check_usage",
+ [this, &tracker, &rbms](auto &t) {
+ return backref_manager.scan_mapped_space(
+ t,
+ [&tracker, &rbms](
+ paddr_t paddr,
+ extent_len_t len,
+ extent_types_t type,
+ laddr_t laddr)
+ {
+ for (auto rbm : rbms) {
+ if (rbm->get_device_id() == paddr.get_device_id()) {
+ if (is_backref_node(type)) {
+ assert(laddr == L_ADDR_NULL);
+ tracker.allocate(
+ paddr,
+ len);
+ } else if (laddr == L_ADDR_NULL) {
+ tracker.release(
+ paddr,
+ len);
+ } else {
+ tracker.allocate(
+ paddr,
+ len);
+ }
+ }
+ }
+ });
+ }).unsafe_get0();
+ return equals(tracker);
+}
+
+bool RBMCleaner::equals(const RBMSpaceTracker &_other) const
+{
+ LOG_PREFIX(RBMSpaceTracker::equals);
+ const auto &other = static_cast<const RBMSpaceTracker&>(_other);
+ auto rbs = rb_group->get_rb_managers();
+ //TODO: multiple rbm allocator
+ auto rbm = rbs[0];
+ assert(rbm);
+
+ if (rbm->get_device()->get_available_size() / rbm->get_block_size()
+ != other.block_usage.size()) {
+ assert(0 == "block counts should match");
+ return false;
+ }
+ bool all_match = true;
+ for (auto i = other.block_usage.begin();
+ i != other.block_usage.end(); ++i) {
+ if (i->first < rbm->get_start().as_blk_paddr().get_device_off()) {
+ continue;
+ }
+ auto addr = i->first;
+ auto state = rbm->get_extent_state(
+ convert_abs_addr_to_paddr(addr, rbm->get_device_id()),
+ rbm->get_block_size());
+ if ((i->second.used && state == rbm_extent_state_t::ALLOCATED) ||
+ (!i->second.used && (state == rbm_extent_state_t::FREE ||
+ state == rbm_extent_state_t::RESERVED))) {
+ // pass
+ } else {
+ all_match = false;
+ ERROR("block addr {} mismatch other used: {}",
+ addr, i->second.used);
+ }
+ }
+ return all_match;
+}
+
}
bool equals(const SpaceTrackerI &other) const;
};
+template <typename T>
+class block_map_t {
+public:
+ block_map_t() {
+ device_to_blocks.resize(DEVICE_ID_MAX_VALID);
+ device_block_size.resize(DEVICE_ID_MAX_VALID);
+ }
+ void add_device(device_id_t device, std::size_t blocks, const T& init,
+ size_t block_size) {
+ ceph_assert(device <= DEVICE_ID_MAX_VALID);
+ ceph_assert(device_to_blocks[device].size() == 0);
+ ceph_assert(blocks > 0);
+ device_to_blocks[device].resize(blocks, init);
+ total_blocks += blocks;
+ device_block_size[device] = block_size;
+ }
+ void clear() {
+ device_to_blocks.clear();
+ device_to_blocks.resize(DEVICE_ID_MAX_VALID);
+ total_blocks = 0;
+ }
+
+ T& operator[](paddr_t block) {
+ ceph_assert(device_to_blocks[block.get_device_id()].size() != 0);
+ auto &blk = block.as_blk_paddr();
+ auto block_id = get_block_id(block.get_device_id(), blk.get_device_off());
+ return device_to_blocks[block.get_device_id()][block_id];
+ }
+ const T& operator[](paddr_t block) const {
+ ceph_assert(device_to_blocks[block.get_device_id()].size() != 0);
+ auto &blk = block.as_blk_paddr();
+ auto block_id = get_block_id(block.get_device_id(), blk.get_device_off());
+ return device_to_blocks[block.get_device_id()][block_id];
+ }
+
+ auto begin() {
+ return iterator<false>::lower_bound(*this, 0, 0);
+ }
+ auto begin() const {
+ return iterator<true>::lower_bound(*this, 0, 0);
+ }
+
+ auto end() {
+ return iterator<false>::end_iterator(*this);
+ }
+ auto end() const {
+ return iterator<true>::end_iterator(*this);
+ }
+
+ size_t size() const {
+ return total_blocks;
+ }
+
+ uint64_t get_block_size(device_id_t device_id) {
+ return device_block_size[device_id];
+ }
+
+ uint32_t get_block_id(device_id_t device_id, device_off_t blk_off) const {
+ auto block_size = device_block_size[device_id];
+ return blk_off == 0 ? 0 : blk_off/block_size;
+ }
+
+ template <bool is_const = false>
+ class iterator {
+ /// points at set being iterated over
+ std::conditional_t<
+ is_const,
+ const block_map_t &,
+ block_map_t &> parent;
+
+ /// points at current device, or DEVICE_ID_MAX_VALID if is_end()
+ device_id_t device_id;
+
+ /// segment at which we are pointing, 0 if is_end()
+ device_off_t blk_off;
+
+ /// holds referent for operator* and operator-> when !is_end()
+ std::optional<
+ std::pair<
+ const device_off_t,
+ std::conditional_t<is_const, const T&, T&>
+ >> current;
+
+ bool is_end() const {
+ return device_id == DEVICE_ID_MAX_VALID;
+ }
+
+ uint32_t get_block_id() {
+ return parent.get_block_id(device_id, blk_off);
+ }
+
+ void find_valid() {
+ assert(!is_end());
+ auto &device_vec = parent.device_to_blocks[device_id];
+ if (device_vec.size() == 0 ||
+ get_block_id() == device_vec.size()) {
+ while (++device_id < DEVICE_ID_MAX_VALID&&
+ parent.device_to_blocks[device_id].size() == 0);
+ blk_off = 0;
+ }
+ if (is_end()) {
+ current = std::nullopt;
+ } else {
+ current.emplace(
+ blk_off,
+ parent.device_to_blocks[device_id][get_block_id()]
+ );
+ }
+ }
+
+ iterator(
+ decltype(parent) &parent,
+ device_id_t device_id,
+ device_off_t device_block_off)
+ : parent(parent), device_id(device_id),
+ blk_off(device_block_off) {}
+
+ public:
+ static iterator lower_bound(
+ decltype(parent) &parent,
+ device_id_t device_id,
+ device_off_t block_off) {
+ if (device_id == DEVICE_ID_MAX_VALID) {
+ return end_iterator(parent);
+ } else {
+ auto ret = iterator{parent, device_id, block_off};
+ ret.find_valid();
+ return ret;
+ }
+ }
+
+ static iterator end_iterator(
+ decltype(parent) &parent) {
+ return iterator{parent, DEVICE_ID_MAX_VALID, 0};
+ }
+
+ iterator<is_const>& operator++() {
+ assert(!is_end());
+ auto block_size = parent.device_block_size[device_id];
+ blk_off += block_size;
+ find_valid();
+ return *this;
+ }
+
+ bool operator==(iterator<is_const> rit) {
+ return (device_id == rit.device_id &&
+ blk_off == rit.blk_off);
+ }
+
+ bool operator!=(iterator<is_const> rit) {
+ return !(*this == rit);
+ }
+ template <bool c = is_const, std::enable_if_t<c, int> = 0>
+ const std::pair<const device_off_t, const T&> *operator->() {
+ assert(!is_end());
+ return &*current;
+ }
+ template <bool c = is_const, std::enable_if_t<!c, int> = 0>
+ std::pair<const device_off_t, T&> *operator->() {
+ assert(!is_end());
+ return &*current;
+ }
+ template <bool c = is_const, std::enable_if_t<c, int> = 0>
+ const std::pair<const device_off_t, const T&> &operator*() {
+ assert(!is_end());
+ return *current;
+ }
+ template <bool c = is_const, std::enable_if_t<!c, int> = 0>
+ std::pair<const device_off_t, T&> &operator*() {
+ assert(!is_end());
+ return *current;
+ }
+ };
+ std::vector<std::vector<T>> device_to_blocks;
+ std::vector<size_t> device_block_size;
+ size_t total_blocks = 0;
+};
+
+class RBMSpaceTracker {
+ struct random_block_t {
+ bool used = false;
+ void allocate() {
+ used = true;
+ }
+ void release() {
+ used = false;
+ }
+ };
+ block_map_t<random_block_t> block_usage;
+
+public:
+ RBMSpaceTracker(const RBMSpaceTracker &) = default;
+ RBMSpaceTracker(const std::vector<RandomBlockManager*> &rbms) {
+ for (auto rbm : rbms) {
+ block_usage.add_device(
+ rbm->get_device_id(),
+ rbm->get_device()->get_available_size() / rbm->get_block_size(),
+ {false},
+ rbm->get_block_size());
+ }
+ }
+
+ void allocate(
+ paddr_t addr,
+ extent_len_t len) {
+ paddr_t cursor = addr;
+ paddr_t end = addr.add_offset(len);
+ do {
+ block_usage[cursor].allocate();
+ cursor = cursor.add_offset(
+ block_usage.get_block_size(addr.get_device_id()));
+ } while (cursor < end);
+ }
+
+ void release(
+ paddr_t addr,
+ extent_len_t len) {
+ paddr_t cursor = addr;
+ paddr_t end = addr.add_offset(len);
+ do {
+ block_usage[cursor].release();
+ cursor = cursor.add_offset(
+ block_usage.get_block_size(addr.get_device_id()));
+ } while (cursor < end);
+ }
+
+ void reset() {
+ for (auto &i : block_usage) {
+ i.second = {false};
+ }
+ }
+
+ std::unique_ptr<RBMSpaceTracker> make_empty() const {
+ auto ret = std::make_unique<RBMSpaceTracker>(*this);
+ ret->reset();
+ return ret;
+ }
+ friend class RBMCleaner;
+};
+using RBMSpaceTrackerRef = std::unique_ptr<RBMSpaceTracker>;
+
/*
* AsyncCleaner
*
// Testing interfaces
- bool check_usage() final {
- // TODO
- return true;
- }
+ bool check_usage() final;
bool check_usage_is_empty() const final {
// TODO
}
private:
+ bool equals(const RBMSpaceTracker &other) const;
+
const bool detailed;
RBMDeviceGroupRef rb_group;
BackrefManager &backref_manager;
-
struct {
/**
* used_bytes
};
+enum class rbm_extent_state_t {
+ FREE, // not allocated
+ RESERVED, // extent is reserved by alloc_new_extent, but is not persistent
+ ALLOCATED, // extent is persistent
+};
+
class Device;
using rbm_abs_addr = uint64_t;
constexpr rbm_abs_addr RBM_START_ADDRESS = 0;
>;
virtual abort_allocation_ertr::future<> abort_allocation(Transaction &t) = 0;
- using complete_allocation_ertr = crimson::errorator<
- crimson::ct_error::input_output_error,
- crimson::ct_error::invarg,
- crimson::ct_error::enoent,
- crimson::ct_error::erange
- >;
- virtual write_ertr::future<> complete_allocation(Transaction &t) = 0;
+ virtual void complete_allocation(paddr_t addr, size_t size) = 0;
virtual size_t get_size() const = 0;
virtual extent_len_t get_block_size() const = 0;
virtual const seastore_meta_t &get_meta() const = 0;
virtual Device* get_device() = 0;
virtual paddr_t get_start() = 0;
+ virtual rbm_extent_state_t get_extent_state(paddr_t addr, size_t size) = 0;
virtual ~RandomBlockManager() {}
};
using RandomBlockManagerRef = std::unique_ptr<RandomBlockManager>;
assert(result.num_intervals() == 1);
for (auto p : result) {
INFO("result start: {}, end: {}", p.first, p.first + p.second);
+ if (detailed) {
+ assert(!reserved_extent_tracker.contains(p.first, p.second));
+ reserved_extent_tracker.insert(p.first, p.second);
+ }
}
return result;
}
assert(total_size);
assert(total_size > available_size);
_add_to_tree(addr, size);
+ if (detailed && reserved_extent_tracker.contains(addr, size)) {
+ reserved_extent_tracker.erase(addr, size);
+ }
+}
+
+bool AvlAllocator::is_free_extent(rbm_abs_addr start, size_t size)
+{
+ rbm_abs_addr end = start + size;
+ ceph_assert(size != 0);
+ if (start < base_addr || base_addr + total_size < end) {
+ return false;
+ }
+
+ auto rs = extent_tree.find(extent_range_t{start, end}, extent_tree.key_comp());
+ if (rs != extent_tree.end() && rs->start <= start && rs->end >= end) {
+ return true;
+ }
+ return false;
}
}
*/
class AvlAllocator : public ExtentAllocator {
public:
- AvlAllocator(uint64_t block_size = 0, uint64_t available_size = 0) :
- block_size(block_size), available_size(available_size) {}
+ AvlAllocator(bool detailed) :
+ detailed(detailed) {}
std::optional<interval_set<rbm_abs_addr>> alloc_extent(
size_t size) final;
}
void close() {
+ if (!detailed) {
+ assert(reserved_extent_tracker.size() == 0);
+ }
extent_size_tree.clear();
extent_tree.clear_and_dispose(dispose_rs{});
total_size = 0;
return max_alloc_size;
}
+ bool is_free_extent(rbm_abs_addr start, size_t size);
+
+ void complete_allocation(rbm_abs_addr start, size_t size) final {
+ if (detailed) {
+ assert(reserved_extent_tracker.contains(start, size));
+ reserved_extent_tracker.erase(start, size);
+ }
+ }
+
+ bool is_reserved_extent(rbm_abs_addr start, size_t size) {
+ if (detailed) {
+ return reserved_extent_tracker.contains(start, size);
+ }
+ return false;
+ }
+
+ rbm_extent_state_t get_extent_state(rbm_abs_addr addr, size_t size) final {
+ if (is_reserved_extent(addr, size)) {
+ return rbm_extent_state_t::RESERVED;
+ } else if (is_free_extent(addr, size)) {
+ return rbm_extent_state_t::FREE;
+ }
+ return rbm_extent_state_t::ALLOCATED;
+ }
+
private:
void _add_to_tree(rbm_abs_addr start, size_t size);
uint64_t total_size = 0;
uint64_t base_addr = 0;
uint64_t max_alloc_size = 4 << 20;
+ bool detailed;
+ interval_set<rbm_abs_addr> reserved_extent_tracker;
};
}
return abort_allocation_ertr::now();
}
-BlockRBManager::write_ertr::future<> BlockRBManager::complete_allocation(
- Transaction &t)
+void BlockRBManager::complete_allocation(
+ paddr_t paddr, size_t size)
{
- return write_ertr::now();
+ assert(allocator);
+ rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr);
+ allocator->complete_allocation(addr, size);
}
BlockRBManager::open_ertr::future<> BlockRBManager::open()
return device->read_rbm_header(RBM_START_ADDRESS
).safe_then([this](auto s)
-> open_ertr::future<> {
- auto ool_start = device->get_journal_start() + device->get_journal_size();
+ auto ool_start = get_start_rbm_addr();
allocator->init(
ool_start,
device->get_available_size() -
paddr_t alloc_extent(size_t size) final; // allocator, return blocks
abort_allocation_ertr::future<> abort_allocation(Transaction &t) final;
- write_ertr::future<> complete_allocation(Transaction &t) final;
+ void complete_allocation(paddr_t addr, size_t size) final;
- size_t get_size() const final { return device->get_available_size(); };
+ size_t get_start_rbm_addr() const {
+ return device->get_journal_start() + device->get_journal_size();
+ }
+ size_t get_size() const final {
+ return device->get_available_size() - get_start_rbm_addr();
+ };
extent_len_t get_block_size() const final { return device->get_block_size(); }
/*
* on a device, so start and end location of the device are needed to
* support such case.
*/
- BlockRBManager(RBMDevice * device, std::string path)
+ BlockRBManager(RBMDevice * device, std::string path, bool detailed)
: device(device), path(path) {
- allocator.reset(new AvlAllocator);
+ allocator.reset(new AvlAllocator(detailed));
}
write_ertr::future<> write(rbm_abs_addr addr, bufferlist &bl);
void mark_space_used(paddr_t paddr, size_t len) final {
assert(allocator);
rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr);
- assert(addr >= device->get_journal_size() + device->get_journal_start() &&
+ assert(addr >= get_start_rbm_addr() &&
addr + len <= device->get_available_size());
allocator->mark_extent_used(addr, len);
}
void mark_space_free(paddr_t paddr, size_t len) final {
assert(allocator);
rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr);
- assert(addr >= device->get_journal_size() + device->get_journal_start() &&
+ assert(addr >= get_start_rbm_addr() &&
addr + len <= device->get_available_size());
allocator->free_extent(addr, len);
}
paddr_t get_start() final {
return convert_abs_addr_to_paddr(
- device->get_journal_start() + device->get_journal_size(),
+ get_start_rbm_addr(),
device->get_device_id());
}
+ rbm_extent_state_t get_extent_state(paddr_t paddr, size_t size) final {
+ assert(allocator);
+ rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr);
+ assert(addr >= get_start_rbm_addr() &&
+ addr + size <= device->get_available_size());
+ return allocator->get_extent_state(addr, size);
+ }
+
private:
/*
* this contains the number of bitmap blocks, free blocks and
virtual uint64_t get_available_size() const = 0;
virtual uint64_t get_max_alloc_size() const = 0;
virtual void close() = 0;
+ /**
+ * complete_allocation
+ *
+ * This changes this extent state from RESERVED to ALLOCATED
+ *
+ * @param start address
+ * @param size
+ */
+ virtual void complete_allocation(rbm_abs_addr start, size_t size) = 0;
+ virtual rbm_extent_state_t get_extent_state(rbm_abs_addr addr, size_t size) = 0;
virtual ~ExtentAllocator() {}
};
using ExtentAllocatorRef = std::unique_ptr<ExtentAllocator>;
sms->add_segment_manager(static_cast<SegmentManager*>(primary_device));
} else {
auto rbm = std::make_unique<BlockRBManager>(
- static_cast<RBMDevice*>(primary_device), "");
+ static_cast<RBMDevice*>(primary_device), "", is_test);
rbs->add_rb_manager(std::move(rbm));
}
if (p_dev->get_backend_type() == backend_type_t::SEGMENTED) {
sms->add_segment_manager(static_cast<SegmentManager*>(p_dev));
} else {
- auto rbm = std::make_unique<BlockRBManager>(static_cast<RBMDevice*>(p_dev), "");
+ auto rbm = std::make_unique<BlockRBManager>(
+ static_cast<RBMDevice*>(p_dev), "", is_test);
rbs->add_rb_manager(std::move(rbm));
}
}
seastar::future<> set_up_fut() final {
std::string a_type = GetParam();
if (a_type == "avl") {
- allocator.reset(new AvlAllocator);
+ allocator.reset(new AvlAllocator(false));
return seastar::now();
}
ceph_assert(0 == "no support");
0, DEFAULT_TEST_SIZE);
block_size = device->get_block_size();
size = device->get_available_size();
- rbm_manager.reset(new BlockRBManager(device.get(), std::string()));
+ rbm_manager.reset(new BlockRBManager(device.get(), std::string(), false));
config = get_rbm_ephemeral_device_config(0, 1);
return device->mount().handle_error(crimson::ct_error::assert_all{}
).then([this] {
}
bool check_usage() {
- std::string j_type = GetParam();
- if (j_type == "circularbounded") {
- // TODO: add check_usage for RBM
- return true;
- }
return epm->check_usage();
}