]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore/rbm: move mkfs to RBMDevice
authormyoungwon oh <ohmyoungwon@gmail.com>
Wed, 12 Oct 2022 03:25:37 +0000 (12:25 +0900)
committermyoungwon oh <ohmyoungwon@gmail.com>
Tue, 18 Oct 2022 02:32:24 +0000 (11:32 +0900)
Signed-off-by: Myoungwon Oh <myoungwon.oh@samsung.com>
src/crimson/os/seastore/random_block_manager.h
src/crimson/os/seastore/random_block_manager/block_rb_manager.cc
src/crimson/os/seastore/random_block_manager/block_rb_manager.h
src/crimson/os/seastore/random_block_manager/nvme_block_device.cc
src/crimson/os/seastore/random_block_manager/nvme_block_device.h
src/crimson/os/seastore/random_block_manager/rbm_device.h
src/crimson/os/seastore/seastore_types.h
src/test/crimson/seastore/test_randomblock_manager.cc

index 722685e80a47f303f780771a26a730e0d1488edd..1158e32befd71bb1cbb062cc9a9597d7385b341f 100644 (file)
 
 namespace crimson::os::seastore {
 
+struct rbm_metadata_header_t {
+  size_t size = 0;
+  size_t block_size = 0;
+  uint64_t start; // start location of the device
+  uint64_t end;   // end location of the device
+  uint64_t magic; // to indicate randomblock_manager
+  uuid_d uuid;
+  uint32_t start_data_area;
+  uint64_t flag; // reserved
+  uint64_t feature;
+  device_id_t device_id;
+  checksum_t crc;
+
+  DENC(rbm_metadata_header_t, v, p) {
+    DENC_START(1, 1, p);
+    denc(v.size, p);
+    denc(v.block_size, p);
+    denc(v.start, p);
+    denc(v.end, p);
+    denc(v.magic, p);
+    denc(v.uuid, p);
+    denc(v.start_data_area, p);
+    denc(v.flag, p);
+    denc(v.feature, p);
+    denc(v.device_id, p);
+
+    denc(v.crc, p);
+    DENC_FINISH(p);
+  }
+
+};
+
+class Device;
 class RandomBlockManager {
 public:
 
-  struct mkfs_config_t {
-    std::string path;
-    paddr_t start;
-    paddr_t end;
-    size_t block_size = 0;
-    size_t total_size = 0;
-    device_id_t device_id = 0;
-    seastore_meta_t meta;
-  };
-  using mkfs_ertr = crimson::errorator<
-       crimson::ct_error::input_output_error,
-       crimson::ct_error::invarg
-       >;
-  virtual mkfs_ertr::future<> mkfs(mkfs_config_t) = 0;
-
   using read_ertr = crimson::errorator<
     crimson::ct_error::input_output_error,
     crimson::ct_error::invarg,
@@ -109,4 +127,9 @@ inline rbm_abs_addr convert_paddr_to_abs_addr(const paddr_t& paddr) {
 inline paddr_t convert_abs_addr_to_paddr(rbm_abs_addr addr, device_id_t d_id) {
   return paddr_t::make_blk_paddr(d_id, addr);
 }
+std::ostream &operator<<(std::ostream &out, const rbm_metadata_header_t &header);
 }
+
+WRITE_CLASS_DENC_BOUNDED(
+  crimson::os::seastore::rbm_metadata_header_t
+)
index 33dff0c614e3bf962600cc9f0f1b0b1ab37927fd..80acffd726d70734e5cd5c178e94132a84266d85 100644 (file)
@@ -16,34 +16,35 @@ SET_SUBSYS(seastore_device);
 
 namespace crimson::os::seastore {
 
-BlockRBManager::mkfs_ertr::future<> BlockRBManager::mkfs(mkfs_config_t config)
+device_config_t get_rbm_ephemeral_device_config(
+    std::size_t index, std::size_t num_devices)
 {
-  LOG_PREFIX(BlockRBManager::mkfs);
-  super.uuid = uuid_d(); // TODO
-  super.magic = 0xFF; // TODO
-  super.start = convert_paddr_to_abs_addr(
-    config.start);
-  super.end = convert_paddr_to_abs_addr(
-    config.end);
-  super.block_size = config.block_size;
-  super.size = config.total_size;
-  super.start_data_area = 0;
-  super.crc = 0;
-  super.feature |= RBM_BITMAP_BLOCK_CRC;
-  super.device_id = config.device_id;
-
-  DEBUG("super {} ", super);
-  // write super block
-  return write_rbm_header(
-  ).safe_then([] {
-    return mkfs_ertr::now();
-  }).handle_error(
-    mkfs_ertr::pass_further{},
-    crimson::ct_error::assert_all{
-    "Invalid error write_rbm_header in BlockRBManager::mkfs"
-  });
-}
+  assert(num_devices > index);
+  magic_t magic = 0xfffa;
+  auto type = device_type_t::RANDOM_BLOCK_EPHEMERAL;
+  bool is_major_device;
+  secondary_device_set_t secondary_devices;
+  if (index == 0) {
+    is_major_device = true;
+    for (std::size_t secondary_index = index + 1;
+         secondary_index < num_devices;
+         ++secondary_index) {
+      device_id_t secondary_id = static_cast<device_id_t>(secondary_index);
+      secondary_devices.insert({
+        secondary_index, device_spec_t{magic, type, secondary_id}
+      });
+    }
+  } else { // index > 0
+    is_major_device = false;
+  }
 
+  device_id_t id = static_cast<device_id_t>(DEVICE_ID_RANDOM_BLOCK_MIN + index);
+  seastore_meta_t meta = {};
+  return {is_major_device,
+          device_spec_t{magic, type, id},
+          meta,
+          secondary_devices};
+}
 
 /* TODO : block allocator */
 BlockRBManager::allocate_ret BlockRBManager::alloc_extent(
@@ -83,13 +84,9 @@ BlockRBManager::write_ertr::future<> BlockRBManager::complete_allocation(
 
 BlockRBManager::open_ertr::future<> BlockRBManager::open()
 {
-  return read_rbm_header(RBM_START_ADDRESS
+  return device->read_rbm_header(RBM_START_ADDRESS
   ).safe_then([&](auto s)
     -> open_ertr::future<> {
-    if (s.magic != 0xFF) {
-      return crimson::ct_error::enoent::make();
-    }
-    super = s;
     return open_ertr::now();
   }).handle_error(
     open_ertr::pass_further{},
@@ -103,10 +100,14 @@ BlockRBManager::write_ertr::future<> BlockRBManager::write(
   paddr_t paddr,
   bufferptr &bptr)
 {
+  LOG_PREFIX(BlockRBManager::write);
   ceph_assert(device);
   rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr);
-  if (addr > super.end || addr < super.start ||
-      bptr.length() > super.end - super.start) {
+  rbm_abs_addr start = 0;
+  rbm_abs_addr end = device->get_available_size();
+  if (addr < start || addr + bptr.length() > end) {
+    ERROR("out of range: start {}, end {}, addr {}, length {}",
+      start, end, addr, bptr.length());
     return crimson::ct_error::erange::make();
   }
   return device->write(
@@ -118,10 +119,14 @@ BlockRBManager::read_ertr::future<> BlockRBManager::read(
   paddr_t paddr,
   bufferptr &bptr)
 {
+  LOG_PREFIX(BlockRBManager::read);
   ceph_assert(device);
   rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr);
-  if (addr > super.end || addr < super.start ||
-      bptr.length() > super.end - super.start) {
+  rbm_abs_addr start = 0;
+  rbm_abs_addr end = device->get_available_size();
+  if (addr < start || addr + bptr.length() > end) {
+    ERROR("out of range: start {}, end {}, addr {}, length {}",
+      start, end, addr, bptr.length());
     return crimson::ct_error::erange::make();
   }
   return device->read(
@@ -136,82 +141,6 @@ BlockRBManager::close_ertr::future<> BlockRBManager::close()
 }
 
 
-BlockRBManager::write_ertr::future<> BlockRBManager::write_rbm_header()
-{
-  bufferlist meta_b_header;
-  super.crc = 0;
-  encode(super, meta_b_header);
-  // If NVMeDevice supports data protection, CRC for checksum is not required
-  // NVMeDevice is expected to generate and store checksum internally.
-  // CPU overhead for CRC might be saved.
-  if (device->is_data_protection_enabled()) {
-    super.crc = -1;
-  }
-  else {
-    super.crc = meta_b_header.crc32c(-1);
-  }
-
-  bufferlist bl;
-  encode(super, bl);
-  auto iter = bl.begin();
-  auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
-  assert(bl.length() < super.block_size);
-  iter.copy(bl.length(), bp.c_str());
-
-  return device->write(super.start, bp);
-}
-
-BlockRBManager::read_ertr::future<rbm_metadata_header_t> BlockRBManager::read_rbm_header(
-    rbm_abs_addr addr)
-{
-  LOG_PREFIX(BlockRBManager::read_rbm_header);
-  ceph_assert(device);
-  bufferptr bptr =
-    bufferptr(ceph::buffer::create_page_aligned(RBM_SUPERBLOCK_SIZE));
-  bptr.zero();
-  return device->read(
-    addr,
-    bptr
-  ).safe_then([length=bptr.length(), this, bptr, FNAME]()
-    -> read_ertr::future<rbm_metadata_header_t> {
-    bufferlist bl;
-    bl.append(bptr);
-    auto p = bl.cbegin();
-    rbm_metadata_header_t super_block;
-    try {
-      decode(super_block, p);
-    }
-    catch (ceph::buffer::error& e) {
-      DEBUG("read_rbm_header: unable to decode rbm super block {}",
-           e.what());
-      return crimson::ct_error::enoent::make();
-    }
-    checksum_t crc = super_block.crc;
-    bufferlist meta_b_header;
-    super_block.crc = 0;
-    encode(super_block, meta_b_header);
-
-    // Do CRC verification only if data protection is not supported.
-    if (device->is_data_protection_enabled() == false) {
-      if (meta_b_header.crc32c(-1) != crc) {
-        DEBUG("bad crc on super block, expected {} != actual {} ",
-              meta_b_header.crc32c(-1), crc);
-        return crimson::ct_error::input_output_error::make();
-      }
-    }
-    DEBUG("got {} ", super);
-    return read_ertr::future<rbm_metadata_header_t>(
-      read_ertr::ready_future_marker{},
-      super_block
-    );
-  }).handle_error(
-    read_ertr::pass_further{},
-    crimson::ct_error::assert_all{
-      "Invalid error in BlockRBManager::read_rbm_header"
-    }
-  );
-}
-
 BlockRBManager::write_ertr::future<> BlockRBManager::write(
   rbm_abs_addr addr,
   bufferlist &bl)
index bd6927b258b12297d996e6e8c1e4482f9890d1c6..fb208c2157324c28c9478f260b300cdfbc0447e1 100644 (file)
 #include "include/buffer.h"
 #include "include/uuid.h"
 
+
 namespace crimson::os::seastore {
 
 constexpr rbm_abs_addr RBM_START_ADDRESS = 0;
-constexpr uint32_t RBM_SUPERBLOCK_SIZE = 4096;
 
 using RBMDevice = random_block_device::RBMDevice;
 using RBMDeviceRef = std::unique_ptr<RBMDevice>;
 
-enum {
-  // TODO: This allows the device to manage crc on a block by itself
-  RBM_NVME_END_TO_END_PROTECTION = 1,
-  RBM_BITMAP_BLOCK_CRC = 2,
-};
-
-struct rbm_metadata_header_t {
-  size_t size = 0;
-  size_t block_size = 0;
-  uint64_t start; // start location of the device
-  uint64_t end;   // end location of the device
-  uint64_t magic; // to indicate randomblock_manager
-  uuid_d uuid;
-  uint32_t start_data_area;
-  uint64_t flag; // reserved
-  uint64_t feature;
-  device_id_t device_id;
-  checksum_t crc;
-
-  DENC(rbm_metadata_header_t, v, p) {
-    DENC_START(1, 1, p);
-    denc(v.size, p);
-    denc(v.block_size, p);
-    denc(v.start, p);
-    denc(v.end, p);
-    denc(v.magic, p);
-    denc(v.uuid, p);
-    denc(v.start_data_area, p);
-    denc(v.flag, p);
-    denc(v.feature, p);
-    denc(v.device_id, p);
-
-    denc(v.crc, p);
-    DENC_FINISH(p);
-  }
-
-};
-
-std::ostream &operator<<(std::ostream &out, const rbm_metadata_header_t &header);
-
-}
-
-WRITE_CLASS_DENC_BOUNDED(
-  crimson::os::seastore::rbm_metadata_header_t
-)
-
-namespace crimson::os::seastore {
+device_config_t get_rbm_ephemeral_device_config(
+    std::size_t index, std::size_t num_devices);
 
 class BlockRBManager final : public RandomBlockManager {
 public:
@@ -88,7 +43,6 @@ public:
    * ---------------------------------------------------------------------------
    */
 
-  mkfs_ertr::future<> mkfs(mkfs_config_t) final;
   read_ertr::future<> read(paddr_t addr, bufferptr &buffer) final;
   write_ertr::future<> write(paddr_t addr, bufferptr &buf) final;
   open_ertr::future<> open() final;
@@ -110,11 +64,8 @@ public:
   abort_allocation_ertr::future<> abort_allocation(Transaction &t) final;
   write_ertr::future<> complete_allocation(Transaction &t) final;
 
-  read_ertr::future<rbm_metadata_header_t> read_rbm_header(rbm_abs_addr addr);
-  write_ertr::future<> write_rbm_header();
-
-  size_t get_size() const final { return super.size; };
-  extent_len_t get_block_size() const final { return super.block_size; }
+  size_t get_size() const final { return device->get_available_size(); };
+  extent_len_t get_block_size() const final { return device->get_block_size(); }
 
   /*
    * We will have mulitple partitions (circularjournals and randbomblockmanagers)
@@ -127,12 +78,14 @@ public:
   write_ertr::future<> write(rbm_abs_addr addr, bufferlist &bl);
 
   device_id_t get_device_id() const final {
-    return super.device_id;
+    assert(device);
+    return device->get_device_id();
   }
 
   uint64_t get_free_blocks() const final { 
     // TODO: return correct free blocks after block allocator is introduced
-    return super.size / super.block_size;
+    assert(device);
+    return get_size() / get_block_size();
   }
 
 private:
@@ -140,7 +93,6 @@ private:
    * this contains the number of bitmap blocks, free blocks and
    * rbm specific information
    */
-  rbm_metadata_header_t super;
   //FreelistManager free_manager; // TODO: block management
   RBMDevice * device;
   std::string path;
index 42541356c5ad86d232bd28fa1650819cbebede34..2a3d52947a306c196fbb6a0d5cfadb58ebfcbe72 100644 (file)
@@ -18,6 +18,106 @@ namespace {
     return crimson::get_logger(ceph_subsys_seastore_tm);
   }
 }
+namespace crimson::os::seastore::random_block_device {
+#include "crimson/os/seastore/logging.h"
+SET_SUBSYS(seastore_device);
+
+RBMDevice::mkfs_ret RBMDevice::mkfs(device_config_t config) {
+  LOG_PREFIX(RBMDevice::mkfs);
+  super.start = 0;
+  super.end = get_available_size();
+  super.block_size = get_block_size();
+  super.size = get_available_size();
+
+  super.start_data_area = 0;
+  super.feature |= RBM_BITMAP_BLOCK_CRC;
+  super.device_id = config.spec.id;
+  DEBUG("super {} ", super);
+  // write super block
+  return write_rbm_header(
+  ).safe_then([] {
+    return mkfs_ertr::now();
+  }).handle_error(
+    mkfs_ertr::pass_further{},
+    crimson::ct_error::assert_all{
+    "Invalid error write_rbm_header in RBMDevice::mkfs"
+  });
+}
+
+write_ertr::future<> RBMDevice::write_rbm_header()
+{
+  bufferlist meta_b_header;
+  super.crc = 0;
+  encode(super, meta_b_header);
+  // If NVMeDevice supports data protection, CRC for checksum is not required
+  // NVMeDevice is expected to generate and store checksum internally.
+  // CPU overhead for CRC might be saved.
+  if (is_data_protection_enabled()) {
+    super.crc = -1;
+  } else {
+    super.crc = meta_b_header.crc32c(-1);
+  }
+
+  bufferlist bl;
+  encode(super, bl);
+  auto iter = bl.begin();
+  auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
+  assert(bl.length() < super.block_size);
+  iter.copy(bl.length(), bp.c_str());
+
+  return write(super.start, bp);
+}
+
+read_ertr::future<rbm_metadata_header_t> RBMDevice::read_rbm_header(
+  rbm_abs_addr addr)
+{
+  LOG_PREFIX(RBMDevice::read_rbm_header);
+  bufferptr bptr =
+    bufferptr(ceph::buffer::create_page_aligned(RBM_SUPERBLOCK_SIZE));
+  bptr.zero();
+  return read(
+    addr,
+    bptr
+  ).safe_then([length=bptr.length(), this, bptr, FNAME]()
+    -> read_ertr::future<rbm_metadata_header_t> {
+    bufferlist bl;
+    bl.append(bptr);
+    auto p = bl.cbegin();
+    rbm_metadata_header_t super_block;
+    try {
+      decode(super_block, p);
+    }
+    catch (ceph::buffer::error& e) {
+      DEBUG("read_rbm_header: unable to decode rbm super block {}",
+           e.what());
+      return crimson::ct_error::enoent::make();
+    }
+    checksum_t crc = super_block.crc;
+    bufferlist meta_b_header;
+    super_block.crc = 0;
+    encode(super_block, meta_b_header);
+
+    // Do CRC verification only if data protection is not supported.
+    if (is_data_protection_enabled() == false) {
+      if (meta_b_header.crc32c(-1) != crc) {
+       DEBUG("bad crc on super block, expected {} != actual {} ",
+             meta_b_header.crc32c(-1), crc);
+       return crimson::ct_error::input_output_error::make();
+      }
+    } else {
+      ceph_assert_always(crc == (checksum_t)-1);
+    }
+    super_block.crc = crc;
+    super = super_block;
+    DEBUG("got {} ", super);
+    return read_ertr::future<rbm_metadata_header_t>(
+      read_ertr::ready_future_marker{},
+      super_block
+    );
+  });
+}
+
+}
 
 namespace crimson::os::seastore::random_block_device::nvme {
 
index d3acb057aeaf509daf0a4ccd303ff9076f226d4b..8441712993417c4d3c286307157c9935b428be54 100644 (file)
@@ -207,10 +207,6 @@ public:
     uint64_t offset,
     uint64_t len) override;
 
-  mkfs_ret mkfs(device_config_t) final {
-    return mkfs_ertr::now();
-  }
-
   mount_ret mount() final {
     return mount_ertr::now();
   }
index 4a4b720ea2ecb7d6df2956ea0d4dab27f88f307b..9264b104414333299632b025b8484511855b27f4 100644 (file)
@@ -65,6 +65,13 @@ using nvme_command_ertr = crimson::errorator<
 using discard_ertr = crimson::errorator<
   crimson::ct_error::input_output_error>;
 
+constexpr uint32_t RBM_SUPERBLOCK_SIZE = 4096;
+enum {
+  // TODO: This allows the device to manage crc on a block by itself
+  RBM_NVME_END_TO_END_PROTECTION = 1,
+  RBM_BITMAP_BLOCK_CRC = 2,
+};
+
 class RBMDevice : public Device {
 public:
   using Device::read;
@@ -84,6 +91,7 @@ protected:
   device_id_t device_id;
   seastore_meta_t meta;
   secondary_device_set_t devices;
+  rbm_metadata_header_t super;
 public:
   RBMDevice() {}
   virtual ~RBMDevice() = default;
@@ -100,6 +108,10 @@ public:
     device_id = id;
   }
 
+  void set_block_size(uint64_t bs) {
+    block_size = bs;
+  }
+
   magic_t get_magic() const final {
     return magic_t();
   }
@@ -152,6 +164,12 @@ public:
     uint16_t stream = 0) = 0;
 
   bool is_data_protection_enabled() const { return false; }
+
+  mkfs_ret mkfs(device_config_t) final;
+
+  write_ertr::future<> write_rbm_header();
+
+  read_ertr::future<rbm_metadata_header_t> read_rbm_header(rbm_abs_addr addr);
 };
 
 
@@ -168,10 +186,6 @@ public:
     }
   }
 
-  mkfs_ret mkfs(device_config_t) final {
-    return mkfs_ertr::now();
-  }
-
   mount_ret mount() final {
     return open("", seastar::open_flags::rw
     ).safe_then([]() {
index 17a9fc825b5d2f504b3d69eaa64f6096589125bb..28d12ebb9573b1ff983736b3a4e5d0da6044050c 100644 (file)
@@ -68,6 +68,9 @@ constexpr device_id_t DEVICE_ID_ZERO = DEVICE_ID_MAX - 5;
 constexpr device_id_t DEVICE_ID_ROOT = DEVICE_ID_MAX - 6;
 constexpr device_id_t DEVICE_ID_MAX_VALID = DEVICE_ID_MAX - 7;
 constexpr device_id_t DEVICE_ID_MAX_VALID_SEGMENT = DEVICE_ID_MAX >> 1;
+constexpr device_id_t DEVICE_ID_SEGMENTED_MIN = 0;
+constexpr device_id_t DEVICE_ID_RANDOM_BLOCK_MIN = 
+  1 << (std::numeric_limits<device_id_t>::digits - 1);
 
 struct device_id_printer_t {
   device_id_t id;
index d2bda86ee38ec1aebabad4dbe28d85e4b7fe8edc..be748393cf3f60ba6aa588ec5dac36bd061a6f30 100644 (file)
@@ -46,24 +46,18 @@ struct rbm_test_t :
 
   const uint64_t block_size = DEFAULT_BLOCK_SIZE;
 
-  RandomBlockManager::mkfs_config_t config;
+  device_config_t config;
   paddr_t current;
 
   rbm_test_t() = default;
 
   seastar::future<> set_up_fut() final {
     device.reset(new random_block_device::TestMemory(DEFAULT_TEST_SIZE));
-    device_id_t d_id = 1 << (std::numeric_limits<device_id_t>::digits - 1);
-    device->set_device_id(d_id);
     rbm_manager.reset(new BlockRBManager(device.get(), std::string()));
-    config.start = paddr_t::make_blk_paddr(d_id, 0);
-    config.end = paddr_t::make_blk_paddr(d_id, DEFAULT_TEST_SIZE);
-    config.block_size = DEFAULT_BLOCK_SIZE;
-    config.total_size = DEFAULT_TEST_SIZE;
-    config.device_id = d_id;
+    config = get_rbm_ephemeral_device_config(0, 1);
     return device->mount().handle_error(crimson::ct_error::assert_all{}
     ).then([this] {
-      return rbm_manager->mkfs(config).handle_error(crimson::ct_error::assert_all{}
+      return device->mkfs(config).handle_error(crimson::ct_error::assert_all{}
       ).then([this] {
        return rbm_manager->open().handle_error(crimson::ct_error::assert_all{});
       });
@@ -79,12 +73,11 @@ struct rbm_test_t :
   }
 
   auto mkfs() {
-    return rbm_manager->mkfs(config).unsafe_get0();
+    return device->mkfs(config).unsafe_get0();
   }
 
   auto read_rbm_header() {
-    rbm_abs_addr addr = convert_paddr_to_abs_addr(config.start);
-    return rbm_manager->read_rbm_header(addr).unsafe_get0();
+    return device->read_rbm_header(RBM_START_ADDRESS).unsafe_get0();
   }
 
   auto open() {
@@ -130,7 +123,7 @@ TEST_F(rbm_test_t, mkfs_test)
        super.block_size == DEFAULT_BLOCK_SIZE &&
        super.end == DEFAULT_TEST_SIZE 
    );
-   config.block_size = 8196;
+   device->set_block_size(8196);
    mkfs();
    super = read_rbm_header();
    ASSERT_TRUE(