namespace crimson::os::seastore {
+struct rbm_metadata_header_t {
+ size_t size = 0;
+ size_t block_size = 0;
+ uint64_t start; // start location of the device
+ uint64_t end; // end location of the device
+ uint64_t magic; // to indicate randomblock_manager
+ uuid_d uuid;
+ uint32_t start_data_area;
+ uint64_t flag; // reserved
+ uint64_t feature;
+ device_id_t device_id;
+ checksum_t crc;
+
+ DENC(rbm_metadata_header_t, v, p) {
+ DENC_START(1, 1, p);
+ denc(v.size, p);
+ denc(v.block_size, p);
+ denc(v.start, p);
+ denc(v.end, p);
+ denc(v.magic, p);
+ denc(v.uuid, p);
+ denc(v.start_data_area, p);
+ denc(v.flag, p);
+ denc(v.feature, p);
+ denc(v.device_id, p);
+
+ denc(v.crc, p);
+ DENC_FINISH(p);
+ }
+
+};
+
+class Device;
class RandomBlockManager {
public:
- struct mkfs_config_t {
- std::string path;
- paddr_t start;
- paddr_t end;
- size_t block_size = 0;
- size_t total_size = 0;
- device_id_t device_id = 0;
- seastore_meta_t meta;
- };
- using mkfs_ertr = crimson::errorator<
- crimson::ct_error::input_output_error,
- crimson::ct_error::invarg
- >;
- virtual mkfs_ertr::future<> mkfs(mkfs_config_t) = 0;
-
using read_ertr = crimson::errorator<
crimson::ct_error::input_output_error,
crimson::ct_error::invarg,
inline paddr_t convert_abs_addr_to_paddr(rbm_abs_addr addr, device_id_t d_id) {
return paddr_t::make_blk_paddr(d_id, addr);
}
+std::ostream &operator<<(std::ostream &out, const rbm_metadata_header_t &header);
}
+
+WRITE_CLASS_DENC_BOUNDED(
+ crimson::os::seastore::rbm_metadata_header_t
+)
namespace crimson::os::seastore {
-BlockRBManager::mkfs_ertr::future<> BlockRBManager::mkfs(mkfs_config_t config)
+device_config_t get_rbm_ephemeral_device_config(
+ std::size_t index, std::size_t num_devices)
{
- LOG_PREFIX(BlockRBManager::mkfs);
- super.uuid = uuid_d(); // TODO
- super.magic = 0xFF; // TODO
- super.start = convert_paddr_to_abs_addr(
- config.start);
- super.end = convert_paddr_to_abs_addr(
- config.end);
- super.block_size = config.block_size;
- super.size = config.total_size;
- super.start_data_area = 0;
- super.crc = 0;
- super.feature |= RBM_BITMAP_BLOCK_CRC;
- super.device_id = config.device_id;
-
- DEBUG("super {} ", super);
- // write super block
- return write_rbm_header(
- ).safe_then([] {
- return mkfs_ertr::now();
- }).handle_error(
- mkfs_ertr::pass_further{},
- crimson::ct_error::assert_all{
- "Invalid error write_rbm_header in BlockRBManager::mkfs"
- });
-}
+ assert(num_devices > index);
+ magic_t magic = 0xfffa;
+ auto type = device_type_t::RANDOM_BLOCK_EPHEMERAL;
+ bool is_major_device;
+ secondary_device_set_t secondary_devices;
+ if (index == 0) {
+ is_major_device = true;
+ for (std::size_t secondary_index = index + 1;
+ secondary_index < num_devices;
+ ++secondary_index) {
+ device_id_t secondary_id = static_cast<device_id_t>(secondary_index);
+ secondary_devices.insert({
+ secondary_index, device_spec_t{magic, type, secondary_id}
+ });
+ }
+ } else { // index > 0
+ is_major_device = false;
+ }
+ device_id_t id = static_cast<device_id_t>(DEVICE_ID_RANDOM_BLOCK_MIN + index);
+ seastore_meta_t meta = {};
+ return {is_major_device,
+ device_spec_t{magic, type, id},
+ meta,
+ secondary_devices};
+}
/* TODO : block allocator */
BlockRBManager::allocate_ret BlockRBManager::alloc_extent(
BlockRBManager::open_ertr::future<> BlockRBManager::open()
{
- return read_rbm_header(RBM_START_ADDRESS
+ return device->read_rbm_header(RBM_START_ADDRESS
).safe_then([&](auto s)
-> open_ertr::future<> {
- if (s.magic != 0xFF) {
- return crimson::ct_error::enoent::make();
- }
- super = s;
return open_ertr::now();
}).handle_error(
open_ertr::pass_further{},
paddr_t paddr,
bufferptr &bptr)
{
+ LOG_PREFIX(BlockRBManager::write);
ceph_assert(device);
rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr);
- if (addr > super.end || addr < super.start ||
- bptr.length() > super.end - super.start) {
+ rbm_abs_addr start = 0;
+ rbm_abs_addr end = device->get_available_size();
+ if (addr < start || addr + bptr.length() > end) {
+ ERROR("out of range: start {}, end {}, addr {}, length {}",
+ start, end, addr, bptr.length());
return crimson::ct_error::erange::make();
}
return device->write(
paddr_t paddr,
bufferptr &bptr)
{
+ LOG_PREFIX(BlockRBManager::read);
ceph_assert(device);
rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr);
- if (addr > super.end || addr < super.start ||
- bptr.length() > super.end - super.start) {
+ rbm_abs_addr start = 0;
+ rbm_abs_addr end = device->get_available_size();
+ if (addr < start || addr + bptr.length() > end) {
+ ERROR("out of range: start {}, end {}, addr {}, length {}",
+ start, end, addr, bptr.length());
return crimson::ct_error::erange::make();
}
return device->read(
}
-BlockRBManager::write_ertr::future<> BlockRBManager::write_rbm_header()
-{
- bufferlist meta_b_header;
- super.crc = 0;
- encode(super, meta_b_header);
- // If NVMeDevice supports data protection, CRC for checksum is not required
- // NVMeDevice is expected to generate and store checksum internally.
- // CPU overhead for CRC might be saved.
- if (device->is_data_protection_enabled()) {
- super.crc = -1;
- }
- else {
- super.crc = meta_b_header.crc32c(-1);
- }
-
- bufferlist bl;
- encode(super, bl);
- auto iter = bl.begin();
- auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
- assert(bl.length() < super.block_size);
- iter.copy(bl.length(), bp.c_str());
-
- return device->write(super.start, bp);
-}
-
-BlockRBManager::read_ertr::future<rbm_metadata_header_t> BlockRBManager::read_rbm_header(
- rbm_abs_addr addr)
-{
- LOG_PREFIX(BlockRBManager::read_rbm_header);
- ceph_assert(device);
- bufferptr bptr =
- bufferptr(ceph::buffer::create_page_aligned(RBM_SUPERBLOCK_SIZE));
- bptr.zero();
- return device->read(
- addr,
- bptr
- ).safe_then([length=bptr.length(), this, bptr, FNAME]()
- -> read_ertr::future<rbm_metadata_header_t> {
- bufferlist bl;
- bl.append(bptr);
- auto p = bl.cbegin();
- rbm_metadata_header_t super_block;
- try {
- decode(super_block, p);
- }
- catch (ceph::buffer::error& e) {
- DEBUG("read_rbm_header: unable to decode rbm super block {}",
- e.what());
- return crimson::ct_error::enoent::make();
- }
- checksum_t crc = super_block.crc;
- bufferlist meta_b_header;
- super_block.crc = 0;
- encode(super_block, meta_b_header);
-
- // Do CRC verification only if data protection is not supported.
- if (device->is_data_protection_enabled() == false) {
- if (meta_b_header.crc32c(-1) != crc) {
- DEBUG("bad crc on super block, expected {} != actual {} ",
- meta_b_header.crc32c(-1), crc);
- return crimson::ct_error::input_output_error::make();
- }
- }
- DEBUG("got {} ", super);
- return read_ertr::future<rbm_metadata_header_t>(
- read_ertr::ready_future_marker{},
- super_block
- );
- }).handle_error(
- read_ertr::pass_further{},
- crimson::ct_error::assert_all{
- "Invalid error in BlockRBManager::read_rbm_header"
- }
- );
-}
-
BlockRBManager::write_ertr::future<> BlockRBManager::write(
rbm_abs_addr addr,
bufferlist &bl)
#include "include/buffer.h"
#include "include/uuid.h"
+
namespace crimson::os::seastore {
constexpr rbm_abs_addr RBM_START_ADDRESS = 0;
-constexpr uint32_t RBM_SUPERBLOCK_SIZE = 4096;
using RBMDevice = random_block_device::RBMDevice;
using RBMDeviceRef = std::unique_ptr<RBMDevice>;
-enum {
- // TODO: This allows the device to manage crc on a block by itself
- RBM_NVME_END_TO_END_PROTECTION = 1,
- RBM_BITMAP_BLOCK_CRC = 2,
-};
-
-struct rbm_metadata_header_t {
- size_t size = 0;
- size_t block_size = 0;
- uint64_t start; // start location of the device
- uint64_t end; // end location of the device
- uint64_t magic; // to indicate randomblock_manager
- uuid_d uuid;
- uint32_t start_data_area;
- uint64_t flag; // reserved
- uint64_t feature;
- device_id_t device_id;
- checksum_t crc;
-
- DENC(rbm_metadata_header_t, v, p) {
- DENC_START(1, 1, p);
- denc(v.size, p);
- denc(v.block_size, p);
- denc(v.start, p);
- denc(v.end, p);
- denc(v.magic, p);
- denc(v.uuid, p);
- denc(v.start_data_area, p);
- denc(v.flag, p);
- denc(v.feature, p);
- denc(v.device_id, p);
-
- denc(v.crc, p);
- DENC_FINISH(p);
- }
-
-};
-
-std::ostream &operator<<(std::ostream &out, const rbm_metadata_header_t &header);
-
-}
-
-WRITE_CLASS_DENC_BOUNDED(
- crimson::os::seastore::rbm_metadata_header_t
-)
-
-namespace crimson::os::seastore {
+device_config_t get_rbm_ephemeral_device_config(
+ std::size_t index, std::size_t num_devices);
class BlockRBManager final : public RandomBlockManager {
public:
* ---------------------------------------------------------------------------
*/
- mkfs_ertr::future<> mkfs(mkfs_config_t) final;
read_ertr::future<> read(paddr_t addr, bufferptr &buffer) final;
write_ertr::future<> write(paddr_t addr, bufferptr &buf) final;
open_ertr::future<> open() final;
abort_allocation_ertr::future<> abort_allocation(Transaction &t) final;
write_ertr::future<> complete_allocation(Transaction &t) final;
- read_ertr::future<rbm_metadata_header_t> read_rbm_header(rbm_abs_addr addr);
- write_ertr::future<> write_rbm_header();
-
- size_t get_size() const final { return super.size; };
- extent_len_t get_block_size() const final { return super.block_size; }
+ size_t get_size() const final { return device->get_available_size(); };
+ extent_len_t get_block_size() const final { return device->get_block_size(); }
/*
* We will have mulitple partitions (circularjournals and randbomblockmanagers)
write_ertr::future<> write(rbm_abs_addr addr, bufferlist &bl);
device_id_t get_device_id() const final {
- return super.device_id;
+ assert(device);
+ return device->get_device_id();
}
uint64_t get_free_blocks() const final {
// TODO: return correct free blocks after block allocator is introduced
- return super.size / super.block_size;
+ assert(device);
+ return get_size() / get_block_size();
}
private:
* this contains the number of bitmap blocks, free blocks and
* rbm specific information
*/
- rbm_metadata_header_t super;
//FreelistManager free_manager; // TODO: block management
RBMDevice * device;
std::string path;
return crimson::get_logger(ceph_subsys_seastore_tm);
}
}
+namespace crimson::os::seastore::random_block_device {
+#include "crimson/os/seastore/logging.h"
+SET_SUBSYS(seastore_device);
+
+RBMDevice::mkfs_ret RBMDevice::mkfs(device_config_t config) {
+ LOG_PREFIX(RBMDevice::mkfs);
+ super.start = 0;
+ super.end = get_available_size();
+ super.block_size = get_block_size();
+ super.size = get_available_size();
+
+ super.start_data_area = 0;
+ super.feature |= RBM_BITMAP_BLOCK_CRC;
+ super.device_id = config.spec.id;
+ DEBUG("super {} ", super);
+ // write super block
+ return write_rbm_header(
+ ).safe_then([] {
+ return mkfs_ertr::now();
+ }).handle_error(
+ mkfs_ertr::pass_further{},
+ crimson::ct_error::assert_all{
+ "Invalid error write_rbm_header in RBMDevice::mkfs"
+ });
+}
+
+write_ertr::future<> RBMDevice::write_rbm_header()
+{
+ bufferlist meta_b_header;
+ super.crc = 0;
+ encode(super, meta_b_header);
+ // If NVMeDevice supports data protection, CRC for checksum is not required
+ // NVMeDevice is expected to generate and store checksum internally.
+ // CPU overhead for CRC might be saved.
+ if (is_data_protection_enabled()) {
+ super.crc = -1;
+ } else {
+ super.crc = meta_b_header.crc32c(-1);
+ }
+
+ bufferlist bl;
+ encode(super, bl);
+ auto iter = bl.begin();
+ auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
+ assert(bl.length() < super.block_size);
+ iter.copy(bl.length(), bp.c_str());
+
+ return write(super.start, bp);
+}
+
+read_ertr::future<rbm_metadata_header_t> RBMDevice::read_rbm_header(
+ rbm_abs_addr addr)
+{
+ LOG_PREFIX(RBMDevice::read_rbm_header);
+ bufferptr bptr =
+ bufferptr(ceph::buffer::create_page_aligned(RBM_SUPERBLOCK_SIZE));
+ bptr.zero();
+ return read(
+ addr,
+ bptr
+ ).safe_then([length=bptr.length(), this, bptr, FNAME]()
+ -> read_ertr::future<rbm_metadata_header_t> {
+ bufferlist bl;
+ bl.append(bptr);
+ auto p = bl.cbegin();
+ rbm_metadata_header_t super_block;
+ try {
+ decode(super_block, p);
+ }
+ catch (ceph::buffer::error& e) {
+ DEBUG("read_rbm_header: unable to decode rbm super block {}",
+ e.what());
+ return crimson::ct_error::enoent::make();
+ }
+ checksum_t crc = super_block.crc;
+ bufferlist meta_b_header;
+ super_block.crc = 0;
+ encode(super_block, meta_b_header);
+
+ // Do CRC verification only if data protection is not supported.
+ if (is_data_protection_enabled() == false) {
+ if (meta_b_header.crc32c(-1) != crc) {
+ DEBUG("bad crc on super block, expected {} != actual {} ",
+ meta_b_header.crc32c(-1), crc);
+ return crimson::ct_error::input_output_error::make();
+ }
+ } else {
+ ceph_assert_always(crc == (checksum_t)-1);
+ }
+ super_block.crc = crc;
+ super = super_block;
+ DEBUG("got {} ", super);
+ return read_ertr::future<rbm_metadata_header_t>(
+ read_ertr::ready_future_marker{},
+ super_block
+ );
+ });
+}
+
+}
namespace crimson::os::seastore::random_block_device::nvme {
uint64_t offset,
uint64_t len) override;
- mkfs_ret mkfs(device_config_t) final {
- return mkfs_ertr::now();
- }
-
mount_ret mount() final {
return mount_ertr::now();
}
using discard_ertr = crimson::errorator<
crimson::ct_error::input_output_error>;
+constexpr uint32_t RBM_SUPERBLOCK_SIZE = 4096;
+enum {
+ // TODO: This allows the device to manage crc on a block by itself
+ RBM_NVME_END_TO_END_PROTECTION = 1,
+ RBM_BITMAP_BLOCK_CRC = 2,
+};
+
class RBMDevice : public Device {
public:
using Device::read;
device_id_t device_id;
seastore_meta_t meta;
secondary_device_set_t devices;
+ rbm_metadata_header_t super;
public:
RBMDevice() {}
virtual ~RBMDevice() = default;
device_id = id;
}
+ void set_block_size(uint64_t bs) {
+ block_size = bs;
+ }
+
magic_t get_magic() const final {
return magic_t();
}
uint16_t stream = 0) = 0;
bool is_data_protection_enabled() const { return false; }
+
+ mkfs_ret mkfs(device_config_t) final;
+
+ write_ertr::future<> write_rbm_header();
+
+ read_ertr::future<rbm_metadata_header_t> read_rbm_header(rbm_abs_addr addr);
};
}
}
- mkfs_ret mkfs(device_config_t) final {
- return mkfs_ertr::now();
- }
-
mount_ret mount() final {
return open("", seastar::open_flags::rw
).safe_then([]() {
constexpr device_id_t DEVICE_ID_ROOT = DEVICE_ID_MAX - 6;
constexpr device_id_t DEVICE_ID_MAX_VALID = DEVICE_ID_MAX - 7;
constexpr device_id_t DEVICE_ID_MAX_VALID_SEGMENT = DEVICE_ID_MAX >> 1;
+constexpr device_id_t DEVICE_ID_SEGMENTED_MIN = 0;
+constexpr device_id_t DEVICE_ID_RANDOM_BLOCK_MIN =
+ 1 << (std::numeric_limits<device_id_t>::digits - 1);
struct device_id_printer_t {
device_id_t id;
const uint64_t block_size = DEFAULT_BLOCK_SIZE;
- RandomBlockManager::mkfs_config_t config;
+ device_config_t config;
paddr_t current;
rbm_test_t() = default;
seastar::future<> set_up_fut() final {
device.reset(new random_block_device::TestMemory(DEFAULT_TEST_SIZE));
- device_id_t d_id = 1 << (std::numeric_limits<device_id_t>::digits - 1);
- device->set_device_id(d_id);
rbm_manager.reset(new BlockRBManager(device.get(), std::string()));
- config.start = paddr_t::make_blk_paddr(d_id, 0);
- config.end = paddr_t::make_blk_paddr(d_id, DEFAULT_TEST_SIZE);
- config.block_size = DEFAULT_BLOCK_SIZE;
- config.total_size = DEFAULT_TEST_SIZE;
- config.device_id = d_id;
+ config = get_rbm_ephemeral_device_config(0, 1);
return device->mount().handle_error(crimson::ct_error::assert_all{}
).then([this] {
- return rbm_manager->mkfs(config).handle_error(crimson::ct_error::assert_all{}
+ return device->mkfs(config).handle_error(crimson::ct_error::assert_all{}
).then([this] {
return rbm_manager->open().handle_error(crimson::ct_error::assert_all{});
});
}
auto mkfs() {
- return rbm_manager->mkfs(config).unsafe_get0();
+ return device->mkfs(config).unsafe_get0();
}
auto read_rbm_header() {
- rbm_abs_addr addr = convert_paddr_to_abs_addr(config.start);
- return rbm_manager->read_rbm_header(addr).unsafe_get0();
+ return device->read_rbm_header(RBM_START_ADDRESS).unsafe_get0();
}
auto open() {
super.block_size == DEFAULT_BLOCK_SIZE &&
super.end == DEFAULT_TEST_SIZE
);
- config.block_size = 8196;
+ device->set_block_size(8196);
mkfs();
super = read_rbm_header();
ASSERT_TRUE(