From 5fa8598c5cba85af4677a05e0137dbd27154d4bb Mon Sep 17 00:00:00 2001 From: myoungwon oh Date: Wed, 12 Oct 2022 12:25:37 +0900 Subject: [PATCH] crimson/os/seastore/rbm: move mkfs to RBMDevice Signed-off-by: Myoungwon Oh --- .../os/seastore/random_block_manager.h | 53 ++++-- .../random_block_manager/block_rb_manager.cc | 151 +++++------------- .../random_block_manager/block_rb_manager.h | 66 ++------ .../random_block_manager/nvme_block_device.cc | 100 ++++++++++++ .../random_block_manager/nvme_block_device.h | 4 - .../random_block_manager/rbm_device.h | 22 ++- src/crimson/os/seastore/seastore_types.h | 3 + .../seastore/test_randomblock_manager.cc | 19 +-- 8 files changed, 214 insertions(+), 204 deletions(-) diff --git a/src/crimson/os/seastore/random_block_manager.h b/src/crimson/os/seastore/random_block_manager.h index 722685e80a4..1158e32befd 100644 --- a/src/crimson/os/seastore/random_block_manager.h +++ b/src/crimson/os/seastore/random_block_manager.h @@ -23,24 +23,42 @@ namespace crimson::os::seastore { +struct rbm_metadata_header_t { + size_t size = 0; + size_t block_size = 0; + uint64_t start; // start location of the device + uint64_t end; // end location of the device + uint64_t magic; // to indicate randomblock_manager + uuid_d uuid; + uint32_t start_data_area; + uint64_t flag; // reserved + uint64_t feature; + device_id_t device_id; + checksum_t crc; + + DENC(rbm_metadata_header_t, v, p) { + DENC_START(1, 1, p); + denc(v.size, p); + denc(v.block_size, p); + denc(v.start, p); + denc(v.end, p); + denc(v.magic, p); + denc(v.uuid, p); + denc(v.start_data_area, p); + denc(v.flag, p); + denc(v.feature, p); + denc(v.device_id, p); + + denc(v.crc, p); + DENC_FINISH(p); + } + +}; + +class Device; class RandomBlockManager { public: - struct mkfs_config_t { - std::string path; - paddr_t start; - paddr_t end; - size_t block_size = 0; - size_t total_size = 0; - device_id_t device_id = 0; - seastore_meta_t meta; - }; - using mkfs_ertr = crimson::errorator< - crimson::ct_error::input_output_error, - crimson::ct_error::invarg - >; - virtual mkfs_ertr::future<> mkfs(mkfs_config_t) = 0; - using read_ertr = crimson::errorator< crimson::ct_error::input_output_error, crimson::ct_error::invarg, @@ -109,4 +127,9 @@ inline rbm_abs_addr convert_paddr_to_abs_addr(const paddr_t& paddr) { inline paddr_t convert_abs_addr_to_paddr(rbm_abs_addr addr, device_id_t d_id) { return paddr_t::make_blk_paddr(d_id, addr); } +std::ostream &operator<<(std::ostream &out, const rbm_metadata_header_t &header); } + +WRITE_CLASS_DENC_BOUNDED( + crimson::os::seastore::rbm_metadata_header_t +) diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc index 33dff0c614e..80acffd726d 100644 --- a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc +++ b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc @@ -16,34 +16,35 @@ SET_SUBSYS(seastore_device); namespace crimson::os::seastore { -BlockRBManager::mkfs_ertr::future<> BlockRBManager::mkfs(mkfs_config_t config) +device_config_t get_rbm_ephemeral_device_config( + std::size_t index, std::size_t num_devices) { - LOG_PREFIX(BlockRBManager::mkfs); - super.uuid = uuid_d(); // TODO - super.magic = 0xFF; // TODO - super.start = convert_paddr_to_abs_addr( - config.start); - super.end = convert_paddr_to_abs_addr( - config.end); - super.block_size = config.block_size; - super.size = config.total_size; - super.start_data_area = 0; - super.crc = 0; - super.feature |= RBM_BITMAP_BLOCK_CRC; - super.device_id = config.device_id; - - DEBUG("super {} ", super); - // write super block - return write_rbm_header( - ).safe_then([] { - return mkfs_ertr::now(); - }).handle_error( - mkfs_ertr::pass_further{}, - crimson::ct_error::assert_all{ - "Invalid error write_rbm_header in BlockRBManager::mkfs" - }); -} + assert(num_devices > index); + magic_t magic = 0xfffa; + auto type = device_type_t::RANDOM_BLOCK_EPHEMERAL; + bool is_major_device; + secondary_device_set_t secondary_devices; + if (index == 0) { + is_major_device = true; + for (std::size_t secondary_index = index + 1; + secondary_index < num_devices; + ++secondary_index) { + device_id_t secondary_id = static_cast(secondary_index); + secondary_devices.insert({ + secondary_index, device_spec_t{magic, type, secondary_id} + }); + } + } else { // index > 0 + is_major_device = false; + } + device_id_t id = static_cast(DEVICE_ID_RANDOM_BLOCK_MIN + index); + seastore_meta_t meta = {}; + return {is_major_device, + device_spec_t{magic, type, id}, + meta, + secondary_devices}; +} /* TODO : block allocator */ BlockRBManager::allocate_ret BlockRBManager::alloc_extent( @@ -83,13 +84,9 @@ BlockRBManager::write_ertr::future<> BlockRBManager::complete_allocation( BlockRBManager::open_ertr::future<> BlockRBManager::open() { - return read_rbm_header(RBM_START_ADDRESS + return device->read_rbm_header(RBM_START_ADDRESS ).safe_then([&](auto s) -> open_ertr::future<> { - if (s.magic != 0xFF) { - return crimson::ct_error::enoent::make(); - } - super = s; return open_ertr::now(); }).handle_error( open_ertr::pass_further{}, @@ -103,10 +100,14 @@ BlockRBManager::write_ertr::future<> BlockRBManager::write( paddr_t paddr, bufferptr &bptr) { + LOG_PREFIX(BlockRBManager::write); ceph_assert(device); rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr); - if (addr > super.end || addr < super.start || - bptr.length() > super.end - super.start) { + rbm_abs_addr start = 0; + rbm_abs_addr end = device->get_available_size(); + if (addr < start || addr + bptr.length() > end) { + ERROR("out of range: start {}, end {}, addr {}, length {}", + start, end, addr, bptr.length()); return crimson::ct_error::erange::make(); } return device->write( @@ -118,10 +119,14 @@ BlockRBManager::read_ertr::future<> BlockRBManager::read( paddr_t paddr, bufferptr &bptr) { + LOG_PREFIX(BlockRBManager::read); ceph_assert(device); rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr); - if (addr > super.end || addr < super.start || - bptr.length() > super.end - super.start) { + rbm_abs_addr start = 0; + rbm_abs_addr end = device->get_available_size(); + if (addr < start || addr + bptr.length() > end) { + ERROR("out of range: start {}, end {}, addr {}, length {}", + start, end, addr, bptr.length()); return crimson::ct_error::erange::make(); } return device->read( @@ -136,82 +141,6 @@ BlockRBManager::close_ertr::future<> BlockRBManager::close() } -BlockRBManager::write_ertr::future<> BlockRBManager::write_rbm_header() -{ - bufferlist meta_b_header; - super.crc = 0; - encode(super, meta_b_header); - // If NVMeDevice supports data protection, CRC for checksum is not required - // NVMeDevice is expected to generate and store checksum internally. - // CPU overhead for CRC might be saved. - if (device->is_data_protection_enabled()) { - super.crc = -1; - } - else { - super.crc = meta_b_header.crc32c(-1); - } - - bufferlist bl; - encode(super, bl); - auto iter = bl.begin(); - auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size)); - assert(bl.length() < super.block_size); - iter.copy(bl.length(), bp.c_str()); - - return device->write(super.start, bp); -} - -BlockRBManager::read_ertr::future BlockRBManager::read_rbm_header( - rbm_abs_addr addr) -{ - LOG_PREFIX(BlockRBManager::read_rbm_header); - ceph_assert(device); - bufferptr bptr = - bufferptr(ceph::buffer::create_page_aligned(RBM_SUPERBLOCK_SIZE)); - bptr.zero(); - return device->read( - addr, - bptr - ).safe_then([length=bptr.length(), this, bptr, FNAME]() - -> read_ertr::future { - bufferlist bl; - bl.append(bptr); - auto p = bl.cbegin(); - rbm_metadata_header_t super_block; - try { - decode(super_block, p); - } - catch (ceph::buffer::error& e) { - DEBUG("read_rbm_header: unable to decode rbm super block {}", - e.what()); - return crimson::ct_error::enoent::make(); - } - checksum_t crc = super_block.crc; - bufferlist meta_b_header; - super_block.crc = 0; - encode(super_block, meta_b_header); - - // Do CRC verification only if data protection is not supported. - if (device->is_data_protection_enabled() == false) { - if (meta_b_header.crc32c(-1) != crc) { - DEBUG("bad crc on super block, expected {} != actual {} ", - meta_b_header.crc32c(-1), crc); - return crimson::ct_error::input_output_error::make(); - } - } - DEBUG("got {} ", super); - return read_ertr::future( - read_ertr::ready_future_marker{}, - super_block - ); - }).handle_error( - read_ertr::pass_further{}, - crimson::ct_error::assert_all{ - "Invalid error in BlockRBManager::read_rbm_header" - } - ); -} - BlockRBManager::write_ertr::future<> BlockRBManager::write( rbm_abs_addr addr, bufferlist &bl) diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.h b/src/crimson/os/seastore/random_block_manager/block_rb_manager.h index bd6927b258b..fb208c21573 100644 --- a/src/crimson/os/seastore/random_block_manager/block_rb_manager.h +++ b/src/crimson/os/seastore/random_block_manager/block_rb_manager.h @@ -22,61 +22,16 @@ #include "include/buffer.h" #include "include/uuid.h" + namespace crimson::os::seastore { constexpr rbm_abs_addr RBM_START_ADDRESS = 0; -constexpr uint32_t RBM_SUPERBLOCK_SIZE = 4096; using RBMDevice = random_block_device::RBMDevice; using RBMDeviceRef = std::unique_ptr; -enum { - // TODO: This allows the device to manage crc on a block by itself - RBM_NVME_END_TO_END_PROTECTION = 1, - RBM_BITMAP_BLOCK_CRC = 2, -}; - -struct rbm_metadata_header_t { - size_t size = 0; - size_t block_size = 0; - uint64_t start; // start location of the device - uint64_t end; // end location of the device - uint64_t magic; // to indicate randomblock_manager - uuid_d uuid; - uint32_t start_data_area; - uint64_t flag; // reserved - uint64_t feature; - device_id_t device_id; - checksum_t crc; - - DENC(rbm_metadata_header_t, v, p) { - DENC_START(1, 1, p); - denc(v.size, p); - denc(v.block_size, p); - denc(v.start, p); - denc(v.end, p); - denc(v.magic, p); - denc(v.uuid, p); - denc(v.start_data_area, p); - denc(v.flag, p); - denc(v.feature, p); - denc(v.device_id, p); - - denc(v.crc, p); - DENC_FINISH(p); - } - -}; - -std::ostream &operator<<(std::ostream &out, const rbm_metadata_header_t &header); - -} - -WRITE_CLASS_DENC_BOUNDED( - crimson::os::seastore::rbm_metadata_header_t -) - -namespace crimson::os::seastore { +device_config_t get_rbm_ephemeral_device_config( + std::size_t index, std::size_t num_devices); class BlockRBManager final : public RandomBlockManager { public: @@ -88,7 +43,6 @@ public: * --------------------------------------------------------------------------- */ - mkfs_ertr::future<> mkfs(mkfs_config_t) final; read_ertr::future<> read(paddr_t addr, bufferptr &buffer) final; write_ertr::future<> write(paddr_t addr, bufferptr &buf) final; open_ertr::future<> open() final; @@ -110,11 +64,8 @@ public: abort_allocation_ertr::future<> abort_allocation(Transaction &t) final; write_ertr::future<> complete_allocation(Transaction &t) final; - read_ertr::future read_rbm_header(rbm_abs_addr addr); - write_ertr::future<> write_rbm_header(); - - size_t get_size() const final { return super.size; }; - extent_len_t get_block_size() const final { return super.block_size; } + size_t get_size() const final { return device->get_available_size(); }; + extent_len_t get_block_size() const final { return device->get_block_size(); } /* * We will have mulitple partitions (circularjournals and randbomblockmanagers) @@ -127,12 +78,14 @@ public: write_ertr::future<> write(rbm_abs_addr addr, bufferlist &bl); device_id_t get_device_id() const final { - return super.device_id; + assert(device); + return device->get_device_id(); } uint64_t get_free_blocks() const final { // TODO: return correct free blocks after block allocator is introduced - return super.size / super.block_size; + assert(device); + return get_size() / get_block_size(); } private: @@ -140,7 +93,6 @@ private: * this contains the number of bitmap blocks, free blocks and * rbm specific information */ - rbm_metadata_header_t super; //FreelistManager free_manager; // TODO: block management RBMDevice * device; std::string path; diff --git a/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc b/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc index 42541356c5a..2a3d52947a3 100644 --- a/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc +++ b/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc @@ -18,6 +18,106 @@ namespace { return crimson::get_logger(ceph_subsys_seastore_tm); } } +namespace crimson::os::seastore::random_block_device { +#include "crimson/os/seastore/logging.h" +SET_SUBSYS(seastore_device); + +RBMDevice::mkfs_ret RBMDevice::mkfs(device_config_t config) { + LOG_PREFIX(RBMDevice::mkfs); + super.start = 0; + super.end = get_available_size(); + super.block_size = get_block_size(); + super.size = get_available_size(); + + super.start_data_area = 0; + super.feature |= RBM_BITMAP_BLOCK_CRC; + super.device_id = config.spec.id; + DEBUG("super {} ", super); + // write super block + return write_rbm_header( + ).safe_then([] { + return mkfs_ertr::now(); + }).handle_error( + mkfs_ertr::pass_further{}, + crimson::ct_error::assert_all{ + "Invalid error write_rbm_header in RBMDevice::mkfs" + }); +} + +write_ertr::future<> RBMDevice::write_rbm_header() +{ + bufferlist meta_b_header; + super.crc = 0; + encode(super, meta_b_header); + // If NVMeDevice supports data protection, CRC for checksum is not required + // NVMeDevice is expected to generate and store checksum internally. + // CPU overhead for CRC might be saved. + if (is_data_protection_enabled()) { + super.crc = -1; + } else { + super.crc = meta_b_header.crc32c(-1); + } + + bufferlist bl; + encode(super, bl); + auto iter = bl.begin(); + auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size)); + assert(bl.length() < super.block_size); + iter.copy(bl.length(), bp.c_str()); + + return write(super.start, bp); +} + +read_ertr::future RBMDevice::read_rbm_header( + rbm_abs_addr addr) +{ + LOG_PREFIX(RBMDevice::read_rbm_header); + bufferptr bptr = + bufferptr(ceph::buffer::create_page_aligned(RBM_SUPERBLOCK_SIZE)); + bptr.zero(); + return read( + addr, + bptr + ).safe_then([length=bptr.length(), this, bptr, FNAME]() + -> read_ertr::future { + bufferlist bl; + bl.append(bptr); + auto p = bl.cbegin(); + rbm_metadata_header_t super_block; + try { + decode(super_block, p); + } + catch (ceph::buffer::error& e) { + DEBUG("read_rbm_header: unable to decode rbm super block {}", + e.what()); + return crimson::ct_error::enoent::make(); + } + checksum_t crc = super_block.crc; + bufferlist meta_b_header; + super_block.crc = 0; + encode(super_block, meta_b_header); + + // Do CRC verification only if data protection is not supported. + if (is_data_protection_enabled() == false) { + if (meta_b_header.crc32c(-1) != crc) { + DEBUG("bad crc on super block, expected {} != actual {} ", + meta_b_header.crc32c(-1), crc); + return crimson::ct_error::input_output_error::make(); + } + } else { + ceph_assert_always(crc == (checksum_t)-1); + } + super_block.crc = crc; + super = super_block; + DEBUG("got {} ", super); + return read_ertr::future( + read_ertr::ready_future_marker{}, + super_block + ); + }); +} + +} namespace crimson::os::seastore::random_block_device::nvme { diff --git a/src/crimson/os/seastore/random_block_manager/nvme_block_device.h b/src/crimson/os/seastore/random_block_manager/nvme_block_device.h index d3acb057aea..84417129934 100644 --- a/src/crimson/os/seastore/random_block_manager/nvme_block_device.h +++ b/src/crimson/os/seastore/random_block_manager/nvme_block_device.h @@ -207,10 +207,6 @@ public: uint64_t offset, uint64_t len) override; - mkfs_ret mkfs(device_config_t) final { - return mkfs_ertr::now(); - } - mount_ret mount() final { return mount_ertr::now(); } diff --git a/src/crimson/os/seastore/random_block_manager/rbm_device.h b/src/crimson/os/seastore/random_block_manager/rbm_device.h index 4a4b720ea2e..9264b104414 100644 --- a/src/crimson/os/seastore/random_block_manager/rbm_device.h +++ b/src/crimson/os/seastore/random_block_manager/rbm_device.h @@ -65,6 +65,13 @@ using nvme_command_ertr = crimson::errorator< using discard_ertr = crimson::errorator< crimson::ct_error::input_output_error>; +constexpr uint32_t RBM_SUPERBLOCK_SIZE = 4096; +enum { + // TODO: This allows the device to manage crc on a block by itself + RBM_NVME_END_TO_END_PROTECTION = 1, + RBM_BITMAP_BLOCK_CRC = 2, +}; + class RBMDevice : public Device { public: using Device::read; @@ -84,6 +91,7 @@ protected: device_id_t device_id; seastore_meta_t meta; secondary_device_set_t devices; + rbm_metadata_header_t super; public: RBMDevice() {} virtual ~RBMDevice() = default; @@ -100,6 +108,10 @@ public: device_id = id; } + void set_block_size(uint64_t bs) { + block_size = bs; + } + magic_t get_magic() const final { return magic_t(); } @@ -152,6 +164,12 @@ public: uint16_t stream = 0) = 0; bool is_data_protection_enabled() const { return false; } + + mkfs_ret mkfs(device_config_t) final; + + write_ertr::future<> write_rbm_header(); + + read_ertr::future read_rbm_header(rbm_abs_addr addr); }; @@ -168,10 +186,6 @@ public: } } - mkfs_ret mkfs(device_config_t) final { - return mkfs_ertr::now(); - } - mount_ret mount() final { return open("", seastar::open_flags::rw ).safe_then([]() { diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 17a9fc825b5..28d12ebb957 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -68,6 +68,9 @@ constexpr device_id_t DEVICE_ID_ZERO = DEVICE_ID_MAX - 5; constexpr device_id_t DEVICE_ID_ROOT = DEVICE_ID_MAX - 6; constexpr device_id_t DEVICE_ID_MAX_VALID = DEVICE_ID_MAX - 7; constexpr device_id_t DEVICE_ID_MAX_VALID_SEGMENT = DEVICE_ID_MAX >> 1; +constexpr device_id_t DEVICE_ID_SEGMENTED_MIN = 0; +constexpr device_id_t DEVICE_ID_RANDOM_BLOCK_MIN = + 1 << (std::numeric_limits::digits - 1); struct device_id_printer_t { device_id_t id; diff --git a/src/test/crimson/seastore/test_randomblock_manager.cc b/src/test/crimson/seastore/test_randomblock_manager.cc index d2bda86ee38..be748393cf3 100644 --- a/src/test/crimson/seastore/test_randomblock_manager.cc +++ b/src/test/crimson/seastore/test_randomblock_manager.cc @@ -46,24 +46,18 @@ struct rbm_test_t : const uint64_t block_size = DEFAULT_BLOCK_SIZE; - RandomBlockManager::mkfs_config_t config; + device_config_t config; paddr_t current; rbm_test_t() = default; seastar::future<> set_up_fut() final { device.reset(new random_block_device::TestMemory(DEFAULT_TEST_SIZE)); - device_id_t d_id = 1 << (std::numeric_limits::digits - 1); - device->set_device_id(d_id); rbm_manager.reset(new BlockRBManager(device.get(), std::string())); - config.start = paddr_t::make_blk_paddr(d_id, 0); - config.end = paddr_t::make_blk_paddr(d_id, DEFAULT_TEST_SIZE); - config.block_size = DEFAULT_BLOCK_SIZE; - config.total_size = DEFAULT_TEST_SIZE; - config.device_id = d_id; + config = get_rbm_ephemeral_device_config(0, 1); return device->mount().handle_error(crimson::ct_error::assert_all{} ).then([this] { - return rbm_manager->mkfs(config).handle_error(crimson::ct_error::assert_all{} + return device->mkfs(config).handle_error(crimson::ct_error::assert_all{} ).then([this] { return rbm_manager->open().handle_error(crimson::ct_error::assert_all{}); }); @@ -79,12 +73,11 @@ struct rbm_test_t : } auto mkfs() { - return rbm_manager->mkfs(config).unsafe_get0(); + return device->mkfs(config).unsafe_get0(); } auto read_rbm_header() { - rbm_abs_addr addr = convert_paddr_to_abs_addr(config.start); - return rbm_manager->read_rbm_header(addr).unsafe_get0(); + return device->read_rbm_header(RBM_START_ADDRESS).unsafe_get0(); } auto open() { @@ -130,7 +123,7 @@ TEST_F(rbm_test_t, mkfs_test) super.block_size == DEFAULT_BLOCK_SIZE && super.end == DEFAULT_TEST_SIZE ); - config.block_size = 8196; + device->set_block_size(8196); mkfs(); super = read_rbm_header(); ASSERT_TRUE( -- 2.39.5