]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
seastore/rbm: rename NVMeManager to BlockRBManager
authormyoungwon oh <ohmyoungwon@gmail.com>
Tue, 26 Jul 2022 05:25:27 +0000 (14:25 +0900)
committermyoungwon oh <ohmyoungwon@gmail.com>
Wed, 27 Jul 2022 05:24:36 +0000 (14:24 +0900)
Signed-off-by: Myoungwon Oh <myoungwon.oh@samsung.com>
src/crimson/os/seastore/CMakeLists.txt
src/crimson/os/seastore/random_block_manager/block_rb_manager.cc [new file with mode: 0644]
src/crimson/os/seastore/random_block_manager/block_rb_manager.h [new file with mode: 0644]
src/crimson/os/seastore/random_block_manager/nvme_manager.cc [deleted file]
src/crimson/os/seastore/random_block_manager/nvme_manager.h [deleted file]
src/test/crimson/seastore/test_randomblock_manager.cc

index ee3aa47cc533226ca2ca8173637b5792ad3e7648..408f7b494dc7afce5ebec5b7ec9cabe9d0049c14 100644 (file)
@@ -36,7 +36,7 @@ set(crimson_seastore_srcs
   extent_placement_manager.cc
   object_data_handler.cc
   seastore.cc
-  random_block_manager/nvme_manager.cc
+  random_block_manager/block_rb_manager.cc
   random_block_manager/nvmedevice.cc
   journal/segmented_journal.cc
   journal/segment_allocator.cc
diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc
new file mode 100644 (file)
index 0000000..eff58f8
--- /dev/null
@@ -0,0 +1,708 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <sys/mman.h>
+#include <string.h>
+
+#include "crimson/os/seastore/logging.h"
+
+#include "include/buffer.h"
+#include "nvmedevice.h"
+#include "include/interval_set.h"
+#include "include/intarith.h"
+#include "block_rb_manager.h"
+
+SET_SUBSYS(seastore_device);
+
+namespace crimson::os::seastore {
+
+BlockRBManager::write_ertr::future<> BlockRBManager::rbm_sync_block_bitmap(
+    rbm_bitmap_block_t &block, blk_no_t block_no)
+{
+  LOG_PREFIX(BlockRBManager::rbm_sync_block_bitmap);
+  bufferptr bptr;
+  try {
+    bptr = bufferptr(ceph::buffer::create_page_aligned(block.get_size()));
+    bufferlist bl;
+    encode(block, bl);
+    auto iter = bl.cbegin();
+    iter.copy(block.get_size(), bptr.c_str());
+  } catch (const std::exception &e) {
+    DEBUG("rbm_sync_block_bitmap: exception creating aligned buffer {}", e);
+    ceph_assert(0 == "unhandled exception");
+  }
+  uint64_t bitmap_block_no = convert_block_no_to_bitmap_block(block_no);
+  return device->write(super.start_alloc_area +
+                      bitmap_block_no * super.block_size,
+                      bptr);
+}
+
+BlockRBManager::mkfs_ertr::future<> BlockRBManager::initialize_blk_alloc_area()
+{
+  LOG_PREFIX(BlockRBManager::initialize_blk_alloc_area);
+  auto start = super.start_data_area / super.block_size;
+  DEBUG("initialize_alloc_area: start to read at {} ", start);
+
+  /* write allocated bitmap info to rbm meta block */
+  rbm_bitmap_block_t b_block(super.block_size);
+  alloc_rbm_bitmap_block_buf(b_block);
+  for (uint64_t i = 0; i < start; i++) {
+    b_block.set_bit(i);
+  }
+
+  // CRC calculation is offloaded to NVMeDevice if data protection is enabled.
+  if (device->is_data_protection_enabled() == false) {
+    b_block.set_crc();
+  }
+
+  return seastar::do_with(
+    b_block,
+    [this, start, FNAME](auto &b_block) {
+    return rbm_sync_block_bitmap(b_block,
+      super.start_alloc_area / super.block_size
+    ).safe_then([this, &b_block, start, FNAME]() {
+
+      /* initialize bitmap blocks as unused */
+      auto max = max_block_by_bitmap_block();
+      auto max_block = super.size / super.block_size;
+      blk_no_t end = round_up_to(max_block, max) - 1;
+      DEBUG("init start {} end {} ", start, end);
+      return rbm_sync_block_bitmap_by_range(
+       start,
+       end,
+       bitmap_op_types_t::ALL_CLEAR
+      ).safe_then([this, &b_block, FNAME]() {
+       /*
+        * Set rest of the block bitmap, which is not used, to 1
+        * To do so, we only mark 1 to empty bitmap blocks
+        */
+       uint64_t na_block_no = super.size/super.block_size;
+       uint64_t remain_block = na_block_no % max_block_by_bitmap_block();
+       DEBUG("na_block_no: {}, remain_block: {} ",
+             na_block_no, remain_block);
+       if (remain_block) {
+         DEBUG("try to remained write alloc info ");
+         if (na_block_no > max_block_by_bitmap_block()) {
+           b_block.buf.clear();
+           alloc_rbm_bitmap_block_buf(b_block);
+         }
+         for (uint64_t i = remain_block; i < max_block_by_bitmap_block(); i++) {
+           b_block.set_bit(i);
+         }
+         b_block.set_crc();
+         return rbm_sync_block_bitmap(b_block, na_block_no
+         ).handle_error(
+           mkfs_ertr::pass_further{},
+           crimson::ct_error::assert_all{
+             "Invalid error rbm_sync_block_bitmap to update \
+             last bitmap block in BlockRBManager::initialize_blk_alloc_area"
+           }
+         );
+       }
+       return mkfs_ertr::now();
+      }).handle_error(
+       mkfs_ertr::pass_further{},
+       crimson::ct_error::assert_all{
+         "Invalid error rbm_sync_block_bitmap \
+           in BlockRBManager::initialize_blk_alloc_area"
+       }
+      );
+    }).handle_error(
+      mkfs_ertr::pass_further{},
+      crimson::ct_error::assert_all{
+       "Invalid error rbm_sync_block_bitmap_by_range \
+         in BlockRBManager::initialize_blk_alloc_area"
+      }
+    );
+  });
+}
+
+BlockRBManager::mkfs_ertr::future<> BlockRBManager::mkfs(mkfs_config_t config)
+{
+  LOG_PREFIX(BlockRBManager::mkfs);
+  DEBUG("path {}", path);
+  return _open_device(path).safe_then([this, &config, FNAME]() {
+    rbm_abs_addr addr = convert_paddr_to_abs_addr(
+      config.start);
+    return read_rbm_header(addr).safe_then([FNAME](auto super) {
+      DEBUG("already exists ");
+      return mkfs_ertr::now();
+    }).handle_error(
+      crimson::ct_error::enoent::handle([this, &config, FNAME](auto) {
+       super.uuid = uuid_d(); // TODO
+       super.magic = 0xFF; // TODO
+       super.start = convert_paddr_to_abs_addr(
+         config.start);
+       super.end = convert_paddr_to_abs_addr(
+         config.end);
+       super.block_size = config.block_size;
+       super.size = config.total_size;
+       super.free_block_count = config.total_size/config.block_size - 2;
+       super.alloc_area_size = get_alloc_area_size();
+       super.start_alloc_area = RBM_SUPERBLOCK_SIZE;
+       super.start_data_area =
+         super.start_alloc_area + super.alloc_area_size;
+       super.crc = 0;
+       super.feature |= RBM_BITMAP_BLOCK_CRC;
+       super.device_id = config.device_id;
+
+       DEBUG(" super {} ", super);
+       // write super block
+       return write_rbm_header().safe_then([this] {
+         return initialize_blk_alloc_area();
+       }).handle_error(
+         mkfs_ertr::pass_further{},
+         crimson::ct_error::assert_all{
+         "Invalid error write_rbm_header in BlockRBManager::mkfs"
+       });
+      }),
+      mkfs_ertr::pass_further{},
+      crimson::ct_error::assert_all{
+        "Invalid error read_rbm_header in BlockRBManager::mkfs"
+      }
+    );
+  }).safe_then([this]() {
+    if (device) {
+      return device->close(
+      ).safe_then([]() {
+       return mkfs_ertr::now();
+      });
+    }
+    return mkfs_ertr::now();
+  }).handle_error(
+    mkfs_ertr::pass_further{},
+    crimson::ct_error::assert_all{
+    "Invalid error open_device in BlockRBManager::mkfs"
+  });
+}
+
+BlockRBManager::find_block_ret BlockRBManager::find_free_block(Transaction &t, size_t size)
+{
+  LOG_PREFIX(BlockRBManager::find_free_block);
+  auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
+  return seastar::do_with(uint64_t(0),
+    uint64_t(super.start_alloc_area),
+    interval_set<blk_no_t>(),
+    bp,
+    [&, this, FNAME](auto &allocated, auto &addr, auto &alloc_extent, auto &bp) mutable {
+    return crimson::repeat(
+      [&, this, FNAME]() mutable {
+      return device->read(
+       addr,
+       bp
+      ).safe_then(
+       [&bp, &addr, size, &allocated, &alloc_extent, this, FNAME]() mutable {
+       DEBUG("find_free_list: allocate {}, addr {}", allocated, addr);
+       rbm_bitmap_block_t b_block(super.block_size);
+       bufferlist bl_bitmap_block;
+       bl_bitmap_block.append(bp);
+       decode(b_block, bl_bitmap_block);
+       auto max = max_block_by_bitmap_block();
+       for (uint64_t i = 0;
+           i < max && (uint64_t)size/super.block_size > allocated; i++) {
+         auto block_id = convert_bitmap_block_no_to_block_id(i, addr);
+         if (b_block.is_allocated(i)) {
+           continue;
+         }
+         DEBUG("find_free_list: allocated block no {} i {}",
+               convert_bitmap_block_no_to_block_id(i, addr), i);
+         if (allocated != 0 && alloc_extent.range_end() != block_id) {
+           /*
+            * if not continous block, just restart to find continuous blocks
+            * at the next block.
+            * in-memory allocator can handle this efficiently.
+            */
+           allocated = 0;
+           alloc_extent.clear(); // a range of block allocation
+           DEBUG("find_free_list: rety to find continuous blocks");
+           continue;
+         }
+         allocated += 1;
+         alloc_extent.insert(block_id);
+       }
+       addr += super.block_size;
+       DEBUG("find_free_list: allocated: {} alloc_extent {}",
+             allocated, alloc_extent);
+       if (((uint64_t)size)/super.block_size == allocated) {
+         return seastar::stop_iteration::yes;
+       } else if (addr >= super.start_data_area) {
+         alloc_extent.clear();
+         return seastar::stop_iteration::yes;
+       }
+       return seastar::stop_iteration::no;
+      });
+    }).safe_then([&allocated, &alloc_extent, size, this, FNAME]() {
+      DEBUG(" allocated: {} size {} ",
+           allocated * super.block_size, size);
+      if (allocated * super.block_size < size) {
+       alloc_extent.clear();
+      }
+      return find_block_ret(
+       find_block_ertr::ready_future_marker{},
+       alloc_extent);
+    }).handle_error(
+      find_block_ertr::pass_further{},
+      crimson::ct_error::assert_all{
+       "Invalid error in BlockRBManager::find_free_block"
+      }
+    );
+  });
+}
+
+/* TODO : block allocator */
+BlockRBManager::allocate_ret BlockRBManager::alloc_extent(
+    Transaction &t, size_t size)
+{
+
+  /*
+   * 1. find free blocks using block allocator
+   * 2. add free blocks to transaction
+   *    (the free block is reserved state, not stored)
+   * 3. link free blocks to onode
+   * Due to in-memory block allocator is the next work to do,
+   * just read the block bitmap directly to find free blocks.
+   *
+   */
+  LOG_PREFIX(BlockRBManager::alloc_extent);
+  return find_free_block(t, size
+  ).safe_then([this, FNAME](auto alloc_extent) mutable
+    -> allocate_ertr::future<paddr_t> {
+    DEBUG("after find_free_block: allocated {}", alloc_extent);
+    if (alloc_extent.empty()) {
+      return crimson::ct_error::enospc::make();
+    }
+    paddr_t paddr = convert_abs_addr_to_paddr(
+      alloc_extent.range_start() * super.block_size,
+      super.device_id);
+    return allocate_ret(
+      allocate_ertr::ready_future_marker{},
+      paddr);
+  }).handle_error(
+    allocate_ertr::pass_further{},
+    crimson::ct_error::assert_all{
+      "Invalid error find_free_block in BlockRBManager::alloc_extent"
+    }
+  );
+}
+
+void BlockRBManager::add_free_extent(
+    std::vector<alloc_delta_t>& v, rbm_abs_addr from, size_t len)
+{
+  ceph_assert(!(len % super.block_size));
+  paddr_t paddr = convert_abs_addr_to_paddr(
+    from,
+    super.device_id);
+  alloc_delta_t alloc_info;
+  alloc_info.alloc_blk_ranges.emplace_back(
+    paddr, L_ADDR_NULL, len, extent_types_t::ROOT);
+  alloc_info.op = alloc_delta_t::op_types_t::CLEAR;
+  v.push_back(alloc_info);
+}
+
+BlockRBManager::write_ertr::future<> BlockRBManager::rbm_sync_block_bitmap_by_range(
+    blk_no_t start, blk_no_t end, bitmap_op_types_t op)
+{
+  LOG_PREFIX(BlockRBManager::rbm_sync_block_bitmap_by_range);
+  auto addr = super.start_alloc_area +
+             (start / max_block_by_bitmap_block())
+             * super.block_size;
+  // aligned write
+  if (start % max_block_by_bitmap_block() == 0 &&
+      end % (max_block_by_bitmap_block() - 1) == 0) {
+    auto num_block = num_block_between_blk_ids(start, end);
+    bufferlist bl_bitmap_block;
+    add_cont_bitmap_blocks_to_buf(bl_bitmap_block, num_block, op);
+    return write(
+      addr,
+      bl_bitmap_block);
+  }
+  auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
+  // try to read first block, then check the block is aligned
+  return device->read(
+    addr,
+    bp
+  ).safe_then([bp, start, end, op, addr, this, FNAME]() {
+    rbm_bitmap_block_t b_block(super.block_size);
+    bufferlist bl_bitmap_block;
+    bl_bitmap_block.append(bp);
+    decode(b_block, bl_bitmap_block);
+    auto max = max_block_by_bitmap_block();
+    auto loop_end = end < (start / max + 1) * max ?
+                   end % max : max - 1;
+    for (uint64_t i = (start % max); i <= loop_end; i++) {
+      if (op == bitmap_op_types_t::ALL_SET) {
+       b_block.set_bit(i);
+      } else {
+       b_block.clear_bit(i);
+      }
+    }
+    auto num_block = num_block_between_blk_ids(start, end);
+    DEBUG("rbm_sync_block_bitmap_by_range: start {}, end {}, \
+         loop_end {}, num_block {}",
+         start, end, loop_end, num_block);
+
+    bl_bitmap_block.clear();
+    encode(b_block, bl_bitmap_block);
+    if (num_block == 1) {
+      // | front (unaligned) |
+      return write(
+         addr,
+         bl_bitmap_block);
+    } else if (!((end + 1) % max)) {
+      // | front (unaligned) | middle (aligned) |
+      add_cont_bitmap_blocks_to_buf(bl_bitmap_block, num_block - 1, op);
+      DEBUG("partially aligned write: addr {} length {}",
+           addr, bl_bitmap_block.length());
+      return write(
+         addr,
+         bl_bitmap_block);
+    } else if (num_block > 2) {
+      // | front (unaligned) | middle | end (unaligned) |
+      // fill up the middle
+      add_cont_bitmap_blocks_to_buf(bl_bitmap_block, num_block - 2, op);
+    }
+
+    auto next_addr = super.start_alloc_area +
+               (end / max_block_by_bitmap_block())
+               * super.block_size;
+    auto bptr = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
+    // | front (unaligned) | middle | end (unaligned) | or
+    // | front (unaligned) | end (unaligned) |
+    return device->read(
+      next_addr,
+      bptr
+    ).safe_then(
+      [bptr, bl_bitmap_block, end, op, addr, this, FNAME]() mutable {
+      rbm_bitmap_block_t b_block(super.block_size);
+      bufferlist block;
+      block.append(bptr);
+      decode(b_block, block);
+      auto max = max_block_by_bitmap_block();
+      for (uint64_t i = (end - (end % max)) % max;
+         i <= (end % max); i++) {
+       if (op == bitmap_op_types_t::ALL_SET) {
+         b_block.set_bit(i);
+       } else {
+         b_block.clear_bit(i);
+       }
+      }
+      DEBUG("start {} end {} ", end - (end % max), end);
+      bl_bitmap_block.claim_append(block);
+      return write(
+       addr,
+       bl_bitmap_block);
+    }).handle_error(
+      write_ertr::pass_further{},
+      crimson::ct_error::assert_all{
+       "Invalid error in BlockRBManager::rbm_sync_block_bitmap_by_range"
+      }
+    );
+  }).handle_error(
+    write_ertr::pass_further{},
+    crimson::ct_error::assert_all{
+      "Invalid error in BlockRBManager::rbm_sync_block_bitmap_by_range"
+    }
+  );
+}
+
+BlockRBManager::abort_allocation_ertr::future<> BlockRBManager::abort_allocation(
+    Transaction &t)
+{
+  /*
+   * TODO: clear all allocation infos associated with transaction in in-memory allocator
+   */
+  return abort_allocation_ertr::now();
+}
+
+BlockRBManager::write_ertr::future<> BlockRBManager::complete_allocation(
+    Transaction &t)
+{
+  return write_ertr::now();
+}
+
+BlockRBManager::write_ertr::future<> BlockRBManager::sync_allocation(
+    std::vector<alloc_delta_t> &alloc_blocks)
+{
+  LOG_PREFIX(BlockRBManager::sync_allocation);
+  if (alloc_blocks.empty()) {
+    return write_ertr::now();
+  }
+  return seastar::do_with(move(alloc_blocks),
+    [&, this, FNAME](auto &alloc_blocks) mutable {
+    return crimson::do_for_each(alloc_blocks,
+      [this, FNAME](auto &alloc) {
+      return crimson::do_for_each(alloc.alloc_blk_ranges,
+        [this, &alloc, FNAME](auto &range) -> write_ertr::future<> {
+        DEBUG("range {} ~ {}", range.paddr, range.len);
+       bitmap_op_types_t op =
+         (alloc.op == alloc_delta_t::op_types_t::SET) ?
+         bitmap_op_types_t::ALL_SET :
+         bitmap_op_types_t::ALL_CLEAR;
+       rbm_abs_addr addr = convert_paddr_to_abs_addr(
+         range.paddr);
+       blk_no_t start = addr / super.block_size;
+       blk_no_t end = start +
+         (round_up_to(range.len, super.block_size)) / super.block_size
+          - 1;
+       return rbm_sync_block_bitmap_by_range(
+         start,
+         end,
+         op);
+      });
+    }).safe_then([this, &alloc_blocks, FNAME]() mutable {
+      int alloc_block_count = 0;
+      for (const auto& b : alloc_blocks) {
+       for (auto r : b.alloc_blk_ranges) {
+         if (b.op == alloc_delta_t::op_types_t::SET) {
+           alloc_block_count +=
+             round_up_to(r.len, super.block_size) / super.block_size;
+           DEBUG("complete alloc block: start {} len {} ",
+                 r.paddr, r.len);
+         } else {
+           alloc_block_count -=
+             round_up_to(r.len, super.block_size) / super.block_size;
+           DEBUG("complete alloc block: start {} len {} ",
+                 r.paddr, r.len);
+         }
+       }
+      }
+      DEBUG("complete_alloction: complete to allocate {} blocks",
+           alloc_block_count);
+      super.free_block_count -= alloc_block_count;
+      return write_ertr::now();
+    });
+  });
+}
+
+BlockRBManager::open_ertr::future<> BlockRBManager::open(
+    const std::string &path, paddr_t paddr)
+{
+  LOG_PREFIX(BlockRBManager::open);
+  DEBUG("open: path{}", path);
+  rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr);
+  return _open_device(path
+  ).safe_then([this, addr]() {
+    return read_rbm_header(addr).safe_then([&](auto s)
+      -> open_ertr::future<> {
+      if (s.magic != 0xFF) {
+       return crimson::ct_error::enoent::make();
+      }
+      super = s;
+      return check_bitmap_blocks().safe_then([]() {
+       return open_ertr::now();
+         });
+    }).handle_error(
+      open_ertr::pass_further{},
+      crimson::ct_error::assert_all{
+       "Invalid error read_rbm_header in BlockRBManager::open"
+      }
+    );
+  });
+}
+
+BlockRBManager::write_ertr::future<> BlockRBManager::write(
+  paddr_t paddr,
+  bufferptr &bptr)
+{
+  ceph_assert(device);
+  rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr);
+  if (addr > super.end || addr < super.start ||
+      bptr.length() > super.end - super.start) {
+    return crimson::ct_error::erange::make();
+  }
+  return device->write(
+    addr,
+    bptr);
+}
+
+BlockRBManager::read_ertr::future<> BlockRBManager::read(
+  paddr_t paddr,
+  bufferptr &bptr)
+{
+  ceph_assert(device);
+  rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr);
+  if (addr > super.end || addr < super.start ||
+      bptr.length() > super.end - super.start) {
+    return crimson::ct_error::erange::make();
+  }
+  return device->read(
+    addr,
+    bptr);
+}
+
+BlockRBManager::close_ertr::future<> BlockRBManager::close()
+{
+  ceph_assert(device);
+  return device->close();
+}
+
+BlockRBManager::open_ertr::future<> BlockRBManager::_open_device(
+    const std::string path)
+{
+  ceph_assert(device);
+  return device->open(path, seastar::open_flags::rw);
+}
+
+BlockRBManager::write_ertr::future<> BlockRBManager::write_rbm_header()
+{
+  bufferlist meta_b_header;
+  super.crc = 0;
+  encode(super, meta_b_header);
+  // If NVMeDevice supports data protection, CRC for checksum is not required
+  // NVMeDevice is expected to generate and store checksum internally.
+  // CPU overhead for CRC might be saved.
+  if (device->is_data_protection_enabled()) {
+    super.crc = -1;
+  }
+  else {
+    super.crc = meta_b_header.crc32c(-1);
+  }
+
+  bufferlist bl;
+  encode(super, bl);
+  auto iter = bl.begin();
+  auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
+  assert(bl.length() < super.block_size);
+  iter.copy(bl.length(), bp.c_str());
+
+  return device->write(super.start, bp);
+}
+
+BlockRBManager::read_ertr::future<rbm_metadata_header_t> BlockRBManager::read_rbm_header(
+    rbm_abs_addr addr)
+{
+  LOG_PREFIX(BlockRBManager::read_rbm_header);
+  ceph_assert(device);
+  bufferptr bptr =
+    bufferptr(ceph::buffer::create_page_aligned(RBM_SUPERBLOCK_SIZE));
+  bptr.zero();
+  return device->read(
+    addr,
+    bptr
+  ).safe_then([length=bptr.length(), this, bptr, FNAME]()
+    -> read_ertr::future<rbm_metadata_header_t> {
+    bufferlist bl;
+    bl.append(bptr);
+    auto p = bl.cbegin();
+    rbm_metadata_header_t super_block;
+    try {
+      decode(super_block, p);
+    }
+    catch (ceph::buffer::error& e) {
+      DEBUG("read_rbm_header: unable to decode rbm super block {}",
+           e.what());
+      return crimson::ct_error::enoent::make();
+    }
+    checksum_t crc = super_block.crc;
+    bufferlist meta_b_header;
+    super_block.crc = 0;
+    encode(super_block, meta_b_header);
+
+    // Do CRC verification only if data protection is not supported.
+    if (device->is_data_protection_enabled() == false) {
+      if (meta_b_header.crc32c(-1) != crc) {
+        DEBUG("bad crc on super block, expected {} != actual {} ",
+              meta_b_header.crc32c(-1), crc);
+        return crimson::ct_error::input_output_error::make();
+      }
+    }
+    DEBUG("got {} ", super);
+    return read_ertr::future<rbm_metadata_header_t>(
+      read_ertr::ready_future_marker{},
+      super_block
+    );
+  }).handle_error(
+    read_ertr::pass_further{},
+    crimson::ct_error::assert_all{
+      "Invalid error in BlockRBManager::read_rbm_header"
+    }
+  );
+}
+
+BlockRBManager::check_bitmap_blocks_ertr::future<> BlockRBManager::check_bitmap_blocks()
+{
+  LOG_PREFIX(BlockRBManager::check_bitmap_blocks);
+  auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
+  return seastar::do_with(uint64_t(super.start_alloc_area), uint64_t(0), bp,
+    [&, this, FNAME](auto &addr, auto &free_blocks, auto &bp) mutable {
+    return crimson::repeat([&, this, FNAME]() mutable {
+      return device->read(addr, bp
+      ).safe_then(
+       [&bp, &addr, &free_blocks, this, FNAME]() mutable {
+       DEBUG("verify_bitmap_blocks: addr {}", addr);
+       rbm_bitmap_block_t b_block(super.block_size);
+       bufferlist bl_bitmap_block;
+       bl_bitmap_block.append(bp);
+       decode(b_block, bl_bitmap_block);
+       auto max = max_block_by_bitmap_block();
+       for (uint64_t i = 0; i < max; i++) {
+         if (!b_block.is_allocated(i)) {
+           free_blocks++;
+         }
+       }
+       addr += super.block_size;
+       if (addr >= super.start_data_area) {
+         return seastar::stop_iteration::yes;
+       }
+       return seastar::stop_iteration::no;
+      });
+    }).safe_then([&free_blocks, this, FNAME]() {
+      DEBUG("free_blocks: {} ", free_blocks);
+      super.free_block_count = free_blocks;
+      return check_bitmap_blocks_ertr::now();
+    }).handle_error(
+      check_bitmap_blocks_ertr::pass_further{},
+      crimson::ct_error::assert_all{
+        "Invalid error in BlockRBManager::find_free_block"
+      }
+    );
+  });
+}
+
+BlockRBManager::write_ertr::future<> BlockRBManager::write(
+  rbm_abs_addr addr,
+  bufferlist &bl)
+{
+  LOG_PREFIX(BlockRBManager::write);
+  ceph_assert(device);
+  bufferptr bptr;
+  try {
+    bptr = bufferptr(ceph::buffer::create_page_aligned(bl.length()));
+    auto iter = bl.cbegin();
+    iter.copy(bl.length(), bptr.c_str());
+  } catch (const std::exception &e) {
+    DEBUG("write: exception creating aligned buffer {}", e);
+    ceph_assert(0 == "unhandled exception");
+  }
+  return device->write(
+    addr,
+    bptr);
+}
+
+std::ostream &operator<<(std::ostream &out, const rbm_metadata_header_t &header)
+{
+  out << " rbm_metadata_header_t(size=" << header.size
+       << ", block_size=" << header.block_size
+       << ", start=" << header.start
+       << ", end=" << header.end
+       << ", magic=" << header.magic
+       << ", uuid=" << header.uuid
+       << ", free_block_count=" << header.free_block_count
+       << ", alloc_area_size=" << header.alloc_area_size
+       << ", start_alloc_area=" << header.start_alloc_area
+       << ", start_data_area=" << header.start_data_area
+       << ", flag=" << header.flag
+       << ", feature=" << header.feature
+       << ", crc=" << header.crc;
+  return out << ")";
+}
+
+std::ostream &operator<<(std::ostream &out,
+    const rbm_bitmap_block_header_t &header)
+{
+  out << " rbm_bitmap_block_header_t(size=" << header.size
+       << ", checksum=" << header.checksum;
+  return out << ")";
+}
+
+}
diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.h b/src/crimson/os/seastore/random_block_manager/block_rb_manager.h
new file mode 100644 (file)
index 0000000..0b46355
--- /dev/null
@@ -0,0 +1,379 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <iosfwd>
+
+#include <boost/intrusive_ptr.hpp>
+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
+#include <seastar/core/future.hh>
+
+#include "include/ceph_assert.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "include/buffer_fwd.h"
+#include "crimson/osd/exceptions.h"
+
+#include "crimson/os/seastore/transaction.h"
+#include "nvmedevice.h"
+#include "crimson/os/seastore/random_block_manager.h"
+
+#include "crimson/common/layout.h"
+#include "include/buffer.h"
+#include "include/uuid.h"
+
+namespace crimson::os::seastore {
+
+constexpr uint32_t RBM_SUPERBLOCK_SIZE = 4096;
+
+using NVMeBlockDevice = nvme_device::NVMeBlockDevice;
+using NVMeBlockDeviceRef = std::unique_ptr<NVMeBlockDevice>;
+
+enum {
+  // TODO: This allows the device to manage crc on a block by itself
+  RBM_NVME_END_TO_END_PROTECTION = 1,
+  RBM_BITMAP_BLOCK_CRC = 2,
+};
+
+constexpr uint32_t BITS_PER_CHAR = 8;
+inline char BIT_CHAR_MASK(uint64_t nr)
+{
+  return (char(1) << (nr % BITS_PER_CHAR));
+}
+
+struct rbm_metadata_header_t {
+  size_t size = 0;
+  size_t block_size = 0;
+  uint64_t start; // start location of the device
+  uint64_t end;   // end location of the device
+  uint64_t magic; // to indicate randomblock_manager
+  uuid_d uuid;
+  uint64_t free_block_count;
+  uint64_t alloc_area_size; // bitmap
+  uint32_t start_alloc_area; // block number
+  uint32_t start_data_area;
+  uint64_t flag; // reserved
+  uint64_t feature;
+  device_id_t device_id;
+  checksum_t crc;
+
+  DENC(rbm_metadata_header_t, v, p) {
+    DENC_START(1, 1, p);
+    denc(v.size, p);
+    denc(v.block_size, p);
+    denc(v.start, p);
+    denc(v.end, p);
+    denc(v.magic, p);
+    denc(v.uuid, p);
+    denc(v.free_block_count, p);
+    denc(v.alloc_area_size, p);
+    denc(v.start_alloc_area, p);
+    denc(v.start_data_area, p);
+    denc(v.flag, p);
+    denc(v.feature, p);
+    denc(v.device_id, p);
+
+    denc(v.crc, p);
+    DENC_FINISH(p);
+  }
+
+};
+
+struct rbm_bitmap_block_header_t {
+  uint32_t size;
+  checksum_t checksum;
+  DENC(rbm_bitmap_block_header_t, v, p) {
+    DENC_START(1, 1, p);
+    denc(v.size, p);
+    denc(v.checksum, p);
+    DENC_FINISH(p);
+  }
+};
+
+std::ostream &operator<<(std::ostream &out, const rbm_metadata_header_t &header);
+std::ostream &operator<<(std::ostream &out, const rbm_bitmap_block_header_t &header);
+
+enum class bitmap_op_types_t : uint8_t {
+  ALL_CLEAR = 1,
+  ALL_SET = 2
+};
+
+struct rbm_bitmap_block_t {
+  rbm_bitmap_block_header_t header;
+  bufferlist buf;
+
+  uint64_t get_size() {
+    return header.size;
+  }
+  void set_crc() {
+    header.checksum = buf.crc32c(-1);
+  }
+
+  bool is_correct_crc() {
+    ceph_assert(buf.length());
+    return buf.crc32c(-1) == header.checksum;
+  }
+
+  void set_bit(uint64_t nr) {
+    ceph_assert(buf.length());
+    char mask = BIT_CHAR_MASK(nr);
+    char *p = buf.c_str() + (nr / BITS_PER_CHAR);
+    *p |= mask;
+  }
+
+  void set_all_bits() {
+    ceph_assert(buf.length());
+    ::memset(buf.c_str(), std::numeric_limits<unsigned char>::max(), buf.length());
+  }
+
+  void set_clear_bits() {
+    ceph_assert(buf.length());
+    ::memset(buf.c_str(), 0, buf.length());
+  }
+
+  void clear_bit(uint64_t nr) {
+    ceph_assert(buf.length());
+    char mask = ~BIT_CHAR_MASK(nr);
+    char *p = buf.c_str() + (nr / BITS_PER_CHAR);
+    *p &= mask;
+  }
+
+  bool is_allocated(uint64_t nr) {
+    ceph_assert(buf.length());
+    char mask = BIT_CHAR_MASK(nr);
+    char *p = buf.c_str() + (nr / BITS_PER_CHAR);
+    return *p & mask;
+  }
+
+  rbm_bitmap_block_t(size_t size) {
+    header.size = size;
+  }
+
+  rbm_bitmap_block_t() = default;
+
+  DENC(rbm_bitmap_block_t, v, p) {
+    DENC_START(1, 1, p);
+    denc(v.header, p);
+    denc(v.buf, p);
+    DENC_FINISH(p);
+  }
+};
+
+}
+
+WRITE_CLASS_DENC_BOUNDED(
+  crimson::os::seastore::rbm_metadata_header_t
+)
+WRITE_CLASS_DENC_BOUNDED(
+  crimson::os::seastore::rbm_bitmap_block_t
+)
+WRITE_CLASS_DENC_BOUNDED(
+  crimson::os::seastore::rbm_bitmap_block_header_t
+)
+
+namespace crimson::os::seastore {
+
+class BlockRBManager final : public RandomBlockManager {
+public:
+  /*
+   * Ondisk layout
+   *
+   * ---------------------------------------------------------------------------
+   * | rbm_metadata_header_t | rbm_bitmap_block_t 1 |  ... |    data blocks    |
+   * ---------------------------------------------------------------------------
+   */
+
+  mkfs_ertr::future<> mkfs(mkfs_config_t) final;
+  read_ertr::future<> read(paddr_t addr, bufferptr &buffer) final;
+  write_ertr::future<> write(paddr_t addr, bufferptr &buf) final;
+  open_ertr::future<> open(const std::string &path, paddr_t start) final;
+  close_ertr::future<> close() final;
+
+  /*
+   * alloc_extent
+   *
+   * The role of this function is to find out free blocks the transaction requires.
+   * To do so, alloc_extent() looks into both in-memory allocator
+   * and freebitmap blocks.
+   * But, in-memory allocator is the future work, and is not implemented yet,
+   * we use freebitmap directly to allocate freeblocks for now.
+   *
+   * Each bit in freebitmap block represents whether a block is allocated or not.
+   *
+   * TODO: multiple allocation
+   *
+   */
+  allocate_ret alloc_extent(
+      Transaction &t, size_t size) final; // allocator, return blocks
+
+  /*
+   * free_extent
+   *
+   * add a range of free blocks to transaction
+   *
+   */
+  abort_allocation_ertr::future<> abort_allocation(Transaction &t) final;
+  write_ertr::future<> complete_allocation(Transaction &t) final;
+
+  open_ertr::future<> _open_device(const std::string path);
+  read_ertr::future<rbm_metadata_header_t> read_rbm_header(rbm_abs_addr addr);
+  write_ertr::future<> write_rbm_header();
+
+  size_t get_size() const final { return super.size; };
+  size_t get_block_size() const final { return super.block_size; }
+
+  // max block number a block can represent using bitmap
+  uint64_t max_block_by_bitmap_block() {
+    return (super.block_size - ceph::encoded_sizeof_bounded<rbm_bitmap_block_t>()) * 8;
+  }
+
+  uint64_t convert_block_no_to_bitmap_block(blk_no_t block_no)
+  {
+    ceph_assert(super.block_size);
+    return block_no / max_block_by_bitmap_block();
+  }
+
+  /*
+   * convert_bitmap_block_no_to_block_id
+   *
+   * return block id using address where freebitmap is stored and offset
+   */
+  blk_no_t convert_bitmap_block_no_to_block_id(uint64_t offset, rbm_abs_addr addr)
+  {
+    ceph_assert(super.block_size);
+    // freebitmap begins at block 1
+    return (addr / super.block_size - 1) * max_block_by_bitmap_block() + offset;
+  }
+
+  uint64_t get_alloc_area_size() {
+    ceph_assert(super.size);
+    ceph_assert(super.block_size);
+    uint64_t total_block_num = super.size / super.block_size;
+    uint64_t need_blocks = (total_block_num % max_block_by_bitmap_block()) ?
+                 (total_block_num / max_block_by_bitmap_block() + 1) :
+                 (total_block_num / max_block_by_bitmap_block());
+    ceph_assert(need_blocks);
+    return need_blocks * super.block_size;
+  }
+
+  using find_block_ertr = crimson::errorator<
+    crimson::ct_error::input_output_error,
+    crimson::ct_error::enoent>;
+  using find_block_ret = find_block_ertr::future<interval_set<blk_no_t>>;
+  /*
+   * find_free_block
+   *
+   * Try to find free blocks by reading bitmap blocks on the disk sequentially
+   * The free blocks will be added to allocated_blocks in Transaction.
+   * This needs to be improved after in-memory block allocation is introduced.
+   *
+   */
+  find_block_ret find_free_block(Transaction &t, size_t size);
+
+  /*
+   * rbm_sync_block_bitmap
+   *
+   * Write rbm_bitmap_block_t to the device
+   *
+   * @param rbm_bitmap_block_t
+   * @param uint64_t the block number the rbm_bitmap_block_t will be stored
+   *
+   */
+  write_ertr::future<> rbm_sync_block_bitmap(
+      rbm_bitmap_block_t &block, blk_no_t block_no);
+
+  using check_bitmap_blocks_ertr = crimson::errorator<
+    crimson::ct_error::input_output_error,
+    crimson::ct_error::invarg>;
+  check_bitmap_blocks_ertr::future<> check_bitmap_blocks();
+  uint64_t get_free_blocks() const {
+    return super.free_block_count;
+  }
+  /*
+   * We will have mulitple partitions (circularjournals and randbomblockmanagers)
+   * on a device, so start and end location of the device are needed to
+   * support such case.
+   */
+  BlockRBManager(NVMeBlockDevice * device, std::string path)
+    : device(device), path(path) {}
+
+  /*
+   * bitmap block area (freebitmap) layout
+   *
+   * -----------------------------------------------------------
+   * | header   1 |   bitmap  1   | header  2 |    bitmap  2   |
+   * -----------------------------------------------------------
+   *  <--       1 block        --> <--     1 block          -->
+   *
+   * 1 block contains both bitmap header and bitmap.
+   * We use this layout as a default layout here.
+   * But, we'll consider to exploit end to end data protection.
+   * If we use the end to end data protection, which is a feature specified in NVMe,
+   * we can avoid any calculation for checksum. The checksum regarding the block
+   * will be managed by the NVMe device.
+   *
+   */
+  mkfs_ertr::future<> initialize_blk_alloc_area();
+  uint64_t get_start_block_alloc_area() {
+    return super.start_alloc_area;
+  }
+
+  void alloc_rbm_bitmap_block_buf(rbm_bitmap_block_t &b_block) {
+    auto bitmap_blk = ceph::bufferptr(buffer::create_page_aligned(
+                       super.block_size -
+                       ceph::encoded_sizeof_bounded<rbm_bitmap_block_t>()));
+    bitmap_blk.zero();
+    b_block.buf.append(bitmap_blk);
+  }
+
+  rbm_abs_addr get_blk_paddr_by_block_no(blk_no_t id) {
+    return (id * super.block_size) + super.start;
+  }
+
+  int num_block_between_blk_ids(blk_no_t start, blk_no_t end) {
+    auto max = max_block_by_bitmap_block();
+    auto block_start = start / max;
+    auto block_end = end / max;
+    return block_end - block_start + 1;
+  }
+
+  write_ertr::future<> rbm_sync_block_bitmap_by_range(
+      blk_no_t start, blk_no_t end, bitmap_op_types_t op);
+  void add_cont_bitmap_blocks_to_buf(
+      bufferlist& buf, int num_block, bitmap_op_types_t op) {
+    rbm_bitmap_block_t b_block(super.block_size);
+    alloc_rbm_bitmap_block_buf(b_block);
+    if (op == bitmap_op_types_t::ALL_SET) {
+      b_block.set_all_bits();
+    } else {
+      b_block.set_clear_bits();
+    }
+    for (int i = 0; i < num_block; i++) {
+      encode(b_block, buf);
+    }
+  }
+
+  write_ertr::future<> write(rbm_abs_addr addr, bufferlist &bl);
+  write_ertr::future<> sync_allocation(
+      std::vector<alloc_delta_t>& alloc_blocks);
+  void add_free_extent(
+      std::vector<alloc_delta_t>& v, rbm_abs_addr from, size_t len);
+
+  device_id_t get_device_id() const final {
+    return super.device_id;
+  }
+
+private:
+  /*
+   * this contains the number of bitmap blocks, free blocks and
+   * rbm specific information
+   */
+  rbm_metadata_header_t super;
+  //FreelistManager free_manager; // TODO: block management
+  NVMeBlockDevice * device;
+  std::string path;
+  int stream_id; // for multi-stream
+};
+using BlockRBManagerRef = std::unique_ptr<BlockRBManager>;
+
+}
diff --git a/src/crimson/os/seastore/random_block_manager/nvme_manager.cc b/src/crimson/os/seastore/random_block_manager/nvme_manager.cc
deleted file mode 100644 (file)
index 51a8fde..0000000
+++ /dev/null
@@ -1,708 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include <sys/mman.h>
-#include <string.h>
-
-#include "crimson/os/seastore/logging.h"
-
-#include "include/buffer.h"
-#include "nvmedevice.h"
-#include "include/interval_set.h"
-#include "include/intarith.h"
-#include "nvme_manager.h"
-
-SET_SUBSYS(seastore_device);
-
-namespace crimson::os::seastore {
-
-NVMeManager::write_ertr::future<> NVMeManager::rbm_sync_block_bitmap(
-    rbm_bitmap_block_t &block, blk_no_t block_no)
-{
-  LOG_PREFIX(NVMeManager::rbm_sync_block_bitmap);
-  bufferptr bptr;
-  try {
-    bptr = bufferptr(ceph::buffer::create_page_aligned(block.get_size()));
-    bufferlist bl;
-    encode(block, bl);
-    auto iter = bl.cbegin();
-    iter.copy(block.get_size(), bptr.c_str());
-  } catch (const std::exception &e) {
-    DEBUG("rbm_sync_block_bitmap: exception creating aligned buffer {}", e);
-    ceph_assert(0 == "unhandled exception");
-  }
-  uint64_t bitmap_block_no = convert_block_no_to_bitmap_block(block_no);
-  return device->write(super.start_alloc_area +
-                      bitmap_block_no * super.block_size,
-                      bptr);
-}
-
-NVMeManager::mkfs_ertr::future<> NVMeManager::initialize_blk_alloc_area()
-{
-  LOG_PREFIX(NVMeManager::initialize_blk_alloc_area);
-  auto start = super.start_data_area / super.block_size;
-  DEBUG("initialize_alloc_area: start to read at {} ", start);
-
-  /* write allocated bitmap info to rbm meta block */
-  rbm_bitmap_block_t b_block(super.block_size);
-  alloc_rbm_bitmap_block_buf(b_block);
-  for (uint64_t i = 0; i < start; i++) {
-    b_block.set_bit(i);
-  }
-
-  // CRC calculation is offloaded to NVMeDevice if data protection is enabled.
-  if (device->is_data_protection_enabled() == false) {
-    b_block.set_crc();
-  }
-
-  return seastar::do_with(
-    b_block,
-    [this, start, FNAME](auto &b_block) {
-    return rbm_sync_block_bitmap(b_block,
-      super.start_alloc_area / super.block_size
-    ).safe_then([this, &b_block, start, FNAME]() {
-
-      /* initialize bitmap blocks as unused */
-      auto max = max_block_by_bitmap_block();
-      auto max_block = super.size / super.block_size;
-      blk_no_t end = round_up_to(max_block, max) - 1;
-      DEBUG("init start {} end {} ", start, end);
-      return rbm_sync_block_bitmap_by_range(
-       start,
-       end,
-       bitmap_op_types_t::ALL_CLEAR
-      ).safe_then([this, &b_block, FNAME]() {
-       /*
-        * Set rest of the block bitmap, which is not used, to 1
-        * To do so, we only mark 1 to empty bitmap blocks
-        */
-       uint64_t na_block_no = super.size/super.block_size;
-       uint64_t remain_block = na_block_no % max_block_by_bitmap_block();
-       DEBUG("na_block_no: {}, remain_block: {} ",
-             na_block_no, remain_block);
-       if (remain_block) {
-         DEBUG("try to remained write alloc info ");
-         if (na_block_no > max_block_by_bitmap_block()) {
-           b_block.buf.clear();
-           alloc_rbm_bitmap_block_buf(b_block);
-         }
-         for (uint64_t i = remain_block; i < max_block_by_bitmap_block(); i++) {
-           b_block.set_bit(i);
-         }
-         b_block.set_crc();
-         return rbm_sync_block_bitmap(b_block, na_block_no
-         ).handle_error(
-           mkfs_ertr::pass_further{},
-           crimson::ct_error::assert_all{
-             "Invalid error rbm_sync_block_bitmap to update \
-             last bitmap block in NVMeManager::initialize_blk_alloc_area"
-           }
-         );
-       }
-       return mkfs_ertr::now();
-      }).handle_error(
-       mkfs_ertr::pass_further{},
-       crimson::ct_error::assert_all{
-         "Invalid error rbm_sync_block_bitmap \
-           in NVMeManager::initialize_blk_alloc_area"
-       }
-      );
-    }).handle_error(
-      mkfs_ertr::pass_further{},
-      crimson::ct_error::assert_all{
-       "Invalid error rbm_sync_block_bitmap_by_range \
-         in NVMeManager::initialize_blk_alloc_area"
-      }
-    );
-  });
-}
-
-NVMeManager::mkfs_ertr::future<> NVMeManager::mkfs(mkfs_config_t config)
-{
-  LOG_PREFIX(NVMeManager::mkfs);
-  DEBUG("path {}", path);
-  return _open_device(path).safe_then([this, &config, FNAME]() {
-    rbm_abs_addr addr = convert_paddr_to_abs_addr(
-      config.start);
-    return read_rbm_header(addr).safe_then([FNAME](auto super) {
-      DEBUG("already exists ");
-      return mkfs_ertr::now();
-    }).handle_error(
-      crimson::ct_error::enoent::handle([this, &config, FNAME](auto) {
-       super.uuid = uuid_d(); // TODO
-       super.magic = 0xFF; // TODO
-       super.start = convert_paddr_to_abs_addr(
-         config.start);
-       super.end = convert_paddr_to_abs_addr(
-         config.end);
-       super.block_size = config.block_size;
-       super.size = config.total_size;
-       super.free_block_count = config.total_size/config.block_size - 2;
-       super.alloc_area_size = get_alloc_area_size();
-       super.start_alloc_area = RBM_SUPERBLOCK_SIZE;
-       super.start_data_area =
-         super.start_alloc_area + super.alloc_area_size;
-       super.crc = 0;
-       super.feature |= RBM_BITMAP_BLOCK_CRC;
-       super.device_id = config.device_id;
-
-       DEBUG(" super {} ", super);
-       // write super block
-       return write_rbm_header().safe_then([this] {
-         return initialize_blk_alloc_area();
-       }).handle_error(
-         mkfs_ertr::pass_further{},
-         crimson::ct_error::assert_all{
-         "Invalid error write_rbm_header in NVMeManager::mkfs"
-       });
-      }),
-      mkfs_ertr::pass_further{},
-      crimson::ct_error::assert_all{
-        "Invalid error read_rbm_header in NVMeManager::mkfs"
-      }
-    );
-  }).safe_then([this]() {
-    if (device) {
-      return device->close(
-      ).safe_then([]() {
-       return mkfs_ertr::now();
-      });
-    }
-    return mkfs_ertr::now();
-  }).handle_error(
-    mkfs_ertr::pass_further{},
-    crimson::ct_error::assert_all{
-    "Invalid error open_device in NVMeManager::mkfs"
-  });
-}
-
-NVMeManager::find_block_ret NVMeManager::find_free_block(Transaction &t, size_t size)
-{
-  LOG_PREFIX(NVMeManager::find_free_block);
-  auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
-  return seastar::do_with(uint64_t(0),
-    uint64_t(super.start_alloc_area),
-    interval_set<blk_no_t>(),
-    bp,
-    [&, this, FNAME](auto &allocated, auto &addr, auto &alloc_extent, auto &bp) mutable {
-    return crimson::repeat(
-      [&, this, FNAME]() mutable {
-      return device->read(
-       addr,
-       bp
-      ).safe_then(
-       [&bp, &addr, size, &allocated, &alloc_extent, this, FNAME]() mutable {
-       DEBUG("find_free_list: allocate {}, addr {}", allocated, addr);
-       rbm_bitmap_block_t b_block(super.block_size);
-       bufferlist bl_bitmap_block;
-       bl_bitmap_block.append(bp);
-       decode(b_block, bl_bitmap_block);
-       auto max = max_block_by_bitmap_block();
-       for (uint64_t i = 0;
-           i < max && (uint64_t)size/super.block_size > allocated; i++) {
-         auto block_id = convert_bitmap_block_no_to_block_id(i, addr);
-         if (b_block.is_allocated(i)) {
-           continue;
-         }
-         DEBUG("find_free_list: allocated block no {} i {}",
-               convert_bitmap_block_no_to_block_id(i, addr), i);
-         if (allocated != 0 && alloc_extent.range_end() != block_id) {
-           /*
-            * if not continous block, just restart to find continuous blocks
-            * at the next block.
-            * in-memory allocator can handle this efficiently.
-            */
-           allocated = 0;
-           alloc_extent.clear(); // a range of block allocation
-           DEBUG("find_free_list: rety to find continuous blocks");
-           continue;
-         }
-         allocated += 1;
-         alloc_extent.insert(block_id);
-       }
-       addr += super.block_size;
-       DEBUG("find_free_list: allocated: {} alloc_extent {}",
-             allocated, alloc_extent);
-       if (((uint64_t)size)/super.block_size == allocated) {
-         return seastar::stop_iteration::yes;
-       } else if (addr >= super.start_data_area) {
-         alloc_extent.clear();
-         return seastar::stop_iteration::yes;
-       }
-       return seastar::stop_iteration::no;
-      });
-    }).safe_then([&allocated, &alloc_extent, size, this, FNAME]() {
-      DEBUG(" allocated: {} size {} ",
-           allocated * super.block_size, size);
-      if (allocated * super.block_size < size) {
-       alloc_extent.clear();
-      }
-      return find_block_ret(
-       find_block_ertr::ready_future_marker{},
-       alloc_extent);
-    }).handle_error(
-      find_block_ertr::pass_further{},
-      crimson::ct_error::assert_all{
-       "Invalid error in NVMeManager::find_free_block"
-      }
-    );
-  });
-}
-
-/* TODO : block allocator */
-NVMeManager::allocate_ret NVMeManager::alloc_extent(
-    Transaction &t, size_t size)
-{
-
-  /*
-   * 1. find free blocks using block allocator
-   * 2. add free blocks to transaction
-   *    (the free block is reserved state, not stored)
-   * 3. link free blocks to onode
-   * Due to in-memory block allocator is the next work to do,
-   * just read the block bitmap directly to find free blocks.
-   *
-   */
-  LOG_PREFIX(NVMeManager::alloc_extent);
-  return find_free_block(t, size
-  ).safe_then([this, FNAME](auto alloc_extent) mutable
-    -> allocate_ertr::future<paddr_t> {
-    DEBUG("after find_free_block: allocated {}", alloc_extent);
-    if (alloc_extent.empty()) {
-      return crimson::ct_error::enospc::make();
-    }
-    paddr_t paddr = convert_abs_addr_to_paddr(
-      alloc_extent.range_start() * super.block_size,
-      super.device_id);
-    return allocate_ret(
-      allocate_ertr::ready_future_marker{},
-      paddr);
-  }).handle_error(
-    allocate_ertr::pass_further{},
-    crimson::ct_error::assert_all{
-      "Invalid error find_free_block in NVMeManager::alloc_extent"
-    }
-  );
-}
-
-void NVMeManager::add_free_extent(
-    std::vector<alloc_delta_t>& v, rbm_abs_addr from, size_t len)
-{
-  ceph_assert(!(len % super.block_size));
-  paddr_t paddr = convert_abs_addr_to_paddr(
-    from,
-    super.device_id);
-  alloc_delta_t alloc_info;
-  alloc_info.alloc_blk_ranges.emplace_back(
-    paddr, L_ADDR_NULL, len, extent_types_t::ROOT);
-  alloc_info.op = alloc_delta_t::op_types_t::CLEAR;
-  v.push_back(alloc_info);
-}
-
-NVMeManager::write_ertr::future<> NVMeManager::rbm_sync_block_bitmap_by_range(
-    blk_no_t start, blk_no_t end, bitmap_op_types_t op)
-{
-  LOG_PREFIX(NVMeManager::rbm_sync_block_bitmap_by_range);
-  auto addr = super.start_alloc_area +
-             (start / max_block_by_bitmap_block())
-             * super.block_size;
-  // aligned write
-  if (start % max_block_by_bitmap_block() == 0 &&
-      end % (max_block_by_bitmap_block() - 1) == 0) {
-    auto num_block = num_block_between_blk_ids(start, end);
-    bufferlist bl_bitmap_block;
-    add_cont_bitmap_blocks_to_buf(bl_bitmap_block, num_block, op);
-    return write(
-      addr,
-      bl_bitmap_block);
-  }
-  auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
-  // try to read first block, then check the block is aligned
-  return device->read(
-    addr,
-    bp
-  ).safe_then([bp, start, end, op, addr, this, FNAME]() {
-    rbm_bitmap_block_t b_block(super.block_size);
-    bufferlist bl_bitmap_block;
-    bl_bitmap_block.append(bp);
-    decode(b_block, bl_bitmap_block);
-    auto max = max_block_by_bitmap_block();
-    auto loop_end = end < (start / max + 1) * max ?
-                   end % max : max - 1;
-    for (uint64_t i = (start % max); i <= loop_end; i++) {
-      if (op == bitmap_op_types_t::ALL_SET) {
-       b_block.set_bit(i);
-      } else {
-       b_block.clear_bit(i);
-      }
-    }
-    auto num_block = num_block_between_blk_ids(start, end);
-    DEBUG("rbm_sync_block_bitmap_by_range: start {}, end {}, \
-         loop_end {}, num_block {}",
-         start, end, loop_end, num_block);
-
-    bl_bitmap_block.clear();
-    encode(b_block, bl_bitmap_block);
-    if (num_block == 1) {
-      // | front (unaligned) |
-      return write(
-         addr,
-         bl_bitmap_block);
-    } else if (!((end + 1) % max)) {
-      // | front (unaligned) | middle (aligned) |
-      add_cont_bitmap_blocks_to_buf(bl_bitmap_block, num_block - 1, op);
-      DEBUG("partially aligned write: addr {} length {}",
-           addr, bl_bitmap_block.length());
-      return write(
-         addr,
-         bl_bitmap_block);
-    } else if (num_block > 2) {
-      // | front (unaligned) | middle | end (unaligned) |
-      // fill up the middle
-      add_cont_bitmap_blocks_to_buf(bl_bitmap_block, num_block - 2, op);
-    }
-
-    auto next_addr = super.start_alloc_area +
-               (end / max_block_by_bitmap_block())
-               * super.block_size;
-    auto bptr = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
-    // | front (unaligned) | middle | end (unaligned) | or
-    // | front (unaligned) | end (unaligned) |
-    return device->read(
-      next_addr,
-      bptr
-    ).safe_then(
-      [bptr, bl_bitmap_block, end, op, addr, this, FNAME]() mutable {
-      rbm_bitmap_block_t b_block(super.block_size);
-      bufferlist block;
-      block.append(bptr);
-      decode(b_block, block);
-      auto max = max_block_by_bitmap_block();
-      for (uint64_t i = (end - (end % max)) % max;
-         i <= (end % max); i++) {
-       if (op == bitmap_op_types_t::ALL_SET) {
-         b_block.set_bit(i);
-       } else {
-         b_block.clear_bit(i);
-       }
-      }
-      DEBUG("start {} end {} ", end - (end % max), end);
-      bl_bitmap_block.claim_append(block);
-      return write(
-       addr,
-       bl_bitmap_block);
-    }).handle_error(
-      write_ertr::pass_further{},
-      crimson::ct_error::assert_all{
-       "Invalid error in NVMeManager::rbm_sync_block_bitmap_by_range"
-      }
-    );
-  }).handle_error(
-    write_ertr::pass_further{},
-    crimson::ct_error::assert_all{
-      "Invalid error in NVMeManager::rbm_sync_block_bitmap_by_range"
-    }
-  );
-}
-
-NVMeManager::abort_allocation_ertr::future<> NVMeManager::abort_allocation(
-    Transaction &t)
-{
-  /*
-   * TODO: clear all allocation infos associated with transaction in in-memory allocator
-   */
-  return abort_allocation_ertr::now();
-}
-
-NVMeManager::write_ertr::future<> NVMeManager::complete_allocation(
-    Transaction &t)
-{
-  return write_ertr::now();
-}
-
-NVMeManager::write_ertr::future<> NVMeManager::sync_allocation(
-    std::vector<alloc_delta_t> &alloc_blocks)
-{
-  LOG_PREFIX(NVMeManager::sync_allocation);
-  if (alloc_blocks.empty()) {
-    return write_ertr::now();
-  }
-  return seastar::do_with(move(alloc_blocks),
-    [&, this, FNAME](auto &alloc_blocks) mutable {
-    return crimson::do_for_each(alloc_blocks,
-      [this, FNAME](auto &alloc) {
-      return crimson::do_for_each(alloc.alloc_blk_ranges,
-        [this, &alloc, FNAME](auto &range) -> write_ertr::future<> {
-        DEBUG("range {} ~ {}", range.paddr, range.len);
-       bitmap_op_types_t op =
-         (alloc.op == alloc_delta_t::op_types_t::SET) ?
-         bitmap_op_types_t::ALL_SET :
-         bitmap_op_types_t::ALL_CLEAR;
-       rbm_abs_addr addr = convert_paddr_to_abs_addr(
-         range.paddr);
-       blk_no_t start = addr / super.block_size;
-       blk_no_t end = start +
-         (round_up_to(range.len, super.block_size)) / super.block_size
-          - 1;
-       return rbm_sync_block_bitmap_by_range(
-         start,
-         end,
-         op);
-      });
-    }).safe_then([this, &alloc_blocks, FNAME]() mutable {
-      int alloc_block_count = 0;
-      for (const auto& b : alloc_blocks) {
-       for (auto r : b.alloc_blk_ranges) {
-         if (b.op == alloc_delta_t::op_types_t::SET) {
-           alloc_block_count +=
-             round_up_to(r.len, super.block_size) / super.block_size;
-           DEBUG("complete alloc block: start {} len {} ",
-                 r.paddr, r.len);
-         } else {
-           alloc_block_count -=
-             round_up_to(r.len, super.block_size) / super.block_size;
-           DEBUG("complete alloc block: start {} len {} ",
-                 r.paddr, r.len);
-         }
-       }
-      }
-      DEBUG("complete_alloction: complete to allocate {} blocks",
-           alloc_block_count);
-      super.free_block_count -= alloc_block_count;
-      return write_ertr::now();
-    });
-  });
-}
-
-NVMeManager::open_ertr::future<> NVMeManager::open(
-    const std::string &path, paddr_t paddr)
-{
-  LOG_PREFIX(NVMeManager::open);
-  DEBUG("open: path{}", path);
-  rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr);
-  return _open_device(path
-  ).safe_then([this, addr]() {
-    return read_rbm_header(addr).safe_then([&](auto s)
-      -> open_ertr::future<> {
-      if (s.magic != 0xFF) {
-       return crimson::ct_error::enoent::make();
-      }
-      super = s;
-      return check_bitmap_blocks().safe_then([]() {
-       return open_ertr::now();
-         });
-    }).handle_error(
-      open_ertr::pass_further{},
-      crimson::ct_error::assert_all{
-       "Invalid error read_rbm_header in NVMeManager::open"
-      }
-    );
-  });
-}
-
-NVMeManager::write_ertr::future<> NVMeManager::write(
-  paddr_t paddr,
-  bufferptr &bptr)
-{
-  ceph_assert(device);
-  rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr);
-  if (addr > super.end || addr < super.start ||
-      bptr.length() > super.end - super.start) {
-    return crimson::ct_error::erange::make();
-  }
-  return device->write(
-    addr,
-    bptr);
-}
-
-NVMeManager::read_ertr::future<> NVMeManager::read(
-  paddr_t paddr,
-  bufferptr &bptr)
-{
-  ceph_assert(device);
-  rbm_abs_addr addr = convert_paddr_to_abs_addr(paddr);
-  if (addr > super.end || addr < super.start ||
-      bptr.length() > super.end - super.start) {
-    return crimson::ct_error::erange::make();
-  }
-  return device->read(
-    addr,
-    bptr);
-}
-
-NVMeManager::close_ertr::future<> NVMeManager::close()
-{
-  ceph_assert(device);
-  return device->close();
-}
-
-NVMeManager::open_ertr::future<> NVMeManager::_open_device(
-    const std::string path)
-{
-  ceph_assert(device);
-  return device->open(path, seastar::open_flags::rw);
-}
-
-NVMeManager::write_ertr::future<> NVMeManager::write_rbm_header()
-{
-  bufferlist meta_b_header;
-  super.crc = 0;
-  encode(super, meta_b_header);
-  // If NVMeDevice supports data protection, CRC for checksum is not required
-  // NVMeDevice is expected to generate and store checksum internally.
-  // CPU overhead for CRC might be saved.
-  if (device->is_data_protection_enabled()) {
-    super.crc = -1;
-  }
-  else {
-    super.crc = meta_b_header.crc32c(-1);
-  }
-
-  bufferlist bl;
-  encode(super, bl);
-  auto iter = bl.begin();
-  auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
-  assert(bl.length() < super.block_size);
-  iter.copy(bl.length(), bp.c_str());
-
-  return device->write(super.start, bp);
-}
-
-NVMeManager::read_ertr::future<rbm_metadata_header_t> NVMeManager::read_rbm_header(
-    rbm_abs_addr addr)
-{
-  LOG_PREFIX(NVMeManager::read_rbm_header);
-  ceph_assert(device);
-  bufferptr bptr =
-    bufferptr(ceph::buffer::create_page_aligned(RBM_SUPERBLOCK_SIZE));
-  bptr.zero();
-  return device->read(
-    addr,
-    bptr
-  ).safe_then([length=bptr.length(), this, bptr, FNAME]()
-    -> read_ertr::future<rbm_metadata_header_t> {
-    bufferlist bl;
-    bl.append(bptr);
-    auto p = bl.cbegin();
-    rbm_metadata_header_t super_block;
-    try {
-      decode(super_block, p);
-    }
-    catch (ceph::buffer::error& e) {
-      DEBUG("read_rbm_header: unable to decode rbm super block {}",
-           e.what());
-      return crimson::ct_error::enoent::make();
-    }
-    checksum_t crc = super_block.crc;
-    bufferlist meta_b_header;
-    super_block.crc = 0;
-    encode(super_block, meta_b_header);
-
-    // Do CRC verification only if data protection is not supported.
-    if (device->is_data_protection_enabled() == false) {
-      if (meta_b_header.crc32c(-1) != crc) {
-        DEBUG("bad crc on super block, expected {} != actual {} ",
-              meta_b_header.crc32c(-1), crc);
-        return crimson::ct_error::input_output_error::make();
-      }
-    }
-    DEBUG("got {} ", super);
-    return read_ertr::future<rbm_metadata_header_t>(
-      read_ertr::ready_future_marker{},
-      super_block
-    );
-  }).handle_error(
-    read_ertr::pass_further{},
-    crimson::ct_error::assert_all{
-      "Invalid error in NVMeManager::read_rbm_header"
-    }
-  );
-}
-
-NVMeManager::check_bitmap_blocks_ertr::future<> NVMeManager::check_bitmap_blocks()
-{
-  LOG_PREFIX(NVMeManager::check_bitmap_blocks);
-  auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
-  return seastar::do_with(uint64_t(super.start_alloc_area), uint64_t(0), bp,
-    [&, this, FNAME](auto &addr, auto &free_blocks, auto &bp) mutable {
-    return crimson::repeat([&, this, FNAME]() mutable {
-      return device->read(addr, bp
-      ).safe_then(
-       [&bp, &addr, &free_blocks, this, FNAME]() mutable {
-       DEBUG("verify_bitmap_blocks: addr {}", addr);
-       rbm_bitmap_block_t b_block(super.block_size);
-       bufferlist bl_bitmap_block;
-       bl_bitmap_block.append(bp);
-       decode(b_block, bl_bitmap_block);
-       auto max = max_block_by_bitmap_block();
-       for (uint64_t i = 0; i < max; i++) {
-         if (!b_block.is_allocated(i)) {
-           free_blocks++;
-         }
-       }
-       addr += super.block_size;
-       if (addr >= super.start_data_area) {
-         return seastar::stop_iteration::yes;
-       }
-       return seastar::stop_iteration::no;
-      });
-    }).safe_then([&free_blocks, this, FNAME]() {
-      DEBUG("free_blocks: {} ", free_blocks);
-      super.free_block_count = free_blocks;
-      return check_bitmap_blocks_ertr::now();
-    }).handle_error(
-      check_bitmap_blocks_ertr::pass_further{},
-      crimson::ct_error::assert_all{
-        "Invalid error in NVMeManager::find_free_block"
-      }
-    );
-  });
-}
-
-NVMeManager::write_ertr::future<> NVMeManager::write(
-  rbm_abs_addr addr,
-  bufferlist &bl)
-{
-  LOG_PREFIX(NVMeManager::write);
-  ceph_assert(device);
-  bufferptr bptr;
-  try {
-    bptr = bufferptr(ceph::buffer::create_page_aligned(bl.length()));
-    auto iter = bl.cbegin();
-    iter.copy(bl.length(), bptr.c_str());
-  } catch (const std::exception &e) {
-    DEBUG("write: exception creating aligned buffer {}", e);
-    ceph_assert(0 == "unhandled exception");
-  }
-  return device->write(
-    addr,
-    bptr);
-}
-
-std::ostream &operator<<(std::ostream &out, const rbm_metadata_header_t &header)
-{
-  out << " rbm_metadata_header_t(size=" << header.size
-       << ", block_size=" << header.block_size
-       << ", start=" << header.start
-       << ", end=" << header.end
-       << ", magic=" << header.magic
-       << ", uuid=" << header.uuid
-       << ", free_block_count=" << header.free_block_count
-       << ", alloc_area_size=" << header.alloc_area_size
-       << ", start_alloc_area=" << header.start_alloc_area
-       << ", start_data_area=" << header.start_data_area
-       << ", flag=" << header.flag
-       << ", feature=" << header.feature
-       << ", crc=" << header.crc;
-  return out << ")";
-}
-
-std::ostream &operator<<(std::ostream &out,
-    const rbm_bitmap_block_header_t &header)
-{
-  out << " rbm_bitmap_block_header_t(size=" << header.size
-       << ", checksum=" << header.checksum;
-  return out << ")";
-}
-
-}
diff --git a/src/crimson/os/seastore/random_block_manager/nvme_manager.h b/src/crimson/os/seastore/random_block_manager/nvme_manager.h
deleted file mode 100644 (file)
index 20f0087..0000000
+++ /dev/null
@@ -1,379 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#pragma once
-
-#include <iosfwd>
-
-#include <boost/intrusive_ptr.hpp>
-#include <boost/smart_ptr/intrusive_ref_counter.hpp>
-#include <seastar/core/future.hh>
-
-#include "include/ceph_assert.h"
-#include "crimson/os/seastore/seastore_types.h"
-#include "include/buffer_fwd.h"
-#include "crimson/osd/exceptions.h"
-
-#include "crimson/os/seastore/transaction.h"
-#include "nvmedevice.h"
-#include "crimson/os/seastore/random_block_manager.h"
-
-#include "crimson/common/layout.h"
-#include "include/buffer.h"
-#include "include/uuid.h"
-
-namespace crimson::os::seastore {
-
-constexpr uint32_t RBM_SUPERBLOCK_SIZE = 4096;
-
-using NVMeBlockDevice = nvme_device::NVMeBlockDevice;
-using NVMeBlockDeviceRef = std::unique_ptr<NVMeBlockDevice>;
-
-enum {
-  // TODO: This allows the device to manage crc on a block by itself
-  RBM_NVME_END_TO_END_PROTECTION = 1,
-  RBM_BITMAP_BLOCK_CRC = 2,
-};
-
-constexpr uint32_t BITS_PER_CHAR = 8;
-inline char BIT_CHAR_MASK(uint64_t nr)
-{
-  return (char(1) << (nr % BITS_PER_CHAR));
-}
-
-struct rbm_metadata_header_t {
-  size_t size = 0;
-  size_t block_size = 0;
-  uint64_t start; // start location of the device
-  uint64_t end;   // end location of the device
-  uint64_t magic; // to indicate randomblock_manager
-  uuid_d uuid;
-  uint64_t free_block_count;
-  uint64_t alloc_area_size; // bitmap
-  uint32_t start_alloc_area; // block number
-  uint32_t start_data_area;
-  uint64_t flag; // reserved
-  uint64_t feature;
-  device_id_t device_id;
-  checksum_t crc;
-
-  DENC(rbm_metadata_header_t, v, p) {
-    DENC_START(1, 1, p);
-    denc(v.size, p);
-    denc(v.block_size, p);
-    denc(v.start, p);
-    denc(v.end, p);
-    denc(v.magic, p);
-    denc(v.uuid, p);
-    denc(v.free_block_count, p);
-    denc(v.alloc_area_size, p);
-    denc(v.start_alloc_area, p);
-    denc(v.start_data_area, p);
-    denc(v.flag, p);
-    denc(v.feature, p);
-    denc(v.device_id, p);
-
-    denc(v.crc, p);
-    DENC_FINISH(p);
-  }
-
-};
-
-struct rbm_bitmap_block_header_t {
-  uint32_t size;
-  checksum_t checksum;
-  DENC(rbm_bitmap_block_header_t, v, p) {
-    DENC_START(1, 1, p);
-    denc(v.size, p);
-    denc(v.checksum, p);
-    DENC_FINISH(p);
-  }
-};
-
-std::ostream &operator<<(std::ostream &out, const rbm_metadata_header_t &header);
-std::ostream &operator<<(std::ostream &out, const rbm_bitmap_block_header_t &header);
-
-enum class bitmap_op_types_t : uint8_t {
-  ALL_CLEAR = 1,
-  ALL_SET = 2
-};
-
-struct rbm_bitmap_block_t {
-  rbm_bitmap_block_header_t header;
-  bufferlist buf;
-
-  uint64_t get_size() {
-    return header.size;
-  }
-  void set_crc() {
-    header.checksum = buf.crc32c(-1);
-  }
-
-  bool is_correct_crc() {
-    ceph_assert(buf.length());
-    return buf.crc32c(-1) == header.checksum;
-  }
-
-  void set_bit(uint64_t nr) {
-    ceph_assert(buf.length());
-    char mask = BIT_CHAR_MASK(nr);
-    char *p = buf.c_str() + (nr / BITS_PER_CHAR);
-    *p |= mask;
-  }
-
-  void set_all_bits() {
-    ceph_assert(buf.length());
-    ::memset(buf.c_str(), std::numeric_limits<unsigned char>::max(), buf.length());
-  }
-
-  void set_clear_bits() {
-    ceph_assert(buf.length());
-    ::memset(buf.c_str(), 0, buf.length());
-  }
-
-  void clear_bit(uint64_t nr) {
-    ceph_assert(buf.length());
-    char mask = ~BIT_CHAR_MASK(nr);
-    char *p = buf.c_str() + (nr / BITS_PER_CHAR);
-    *p &= mask;
-  }
-
-  bool is_allocated(uint64_t nr) {
-    ceph_assert(buf.length());
-    char mask = BIT_CHAR_MASK(nr);
-    char *p = buf.c_str() + (nr / BITS_PER_CHAR);
-    return *p & mask;
-  }
-
-  rbm_bitmap_block_t(size_t size) {
-    header.size = size;
-  }
-
-  rbm_bitmap_block_t() = default;
-
-  DENC(rbm_bitmap_block_t, v, p) {
-    DENC_START(1, 1, p);
-    denc(v.header, p);
-    denc(v.buf, p);
-    DENC_FINISH(p);
-  }
-};
-
-}
-
-WRITE_CLASS_DENC_BOUNDED(
-  crimson::os::seastore::rbm_metadata_header_t
-)
-WRITE_CLASS_DENC_BOUNDED(
-  crimson::os::seastore::rbm_bitmap_block_t
-)
-WRITE_CLASS_DENC_BOUNDED(
-  crimson::os::seastore::rbm_bitmap_block_header_t
-)
-
-namespace crimson::os::seastore {
-
-class NVMeManager final : public RandomBlockManager {
-public:
-  /*
-   * Ondisk layout
-   *
-   * ---------------------------------------------------------------------------
-   * | rbm_metadata_header_t | rbm_bitmap_block_t 1 |  ... |    data blocks    |
-   * ---------------------------------------------------------------------------
-   */
-
-  mkfs_ertr::future<> mkfs(mkfs_config_t) final;
-  read_ertr::future<> read(paddr_t addr, bufferptr &buffer) final;
-  write_ertr::future<> write(paddr_t addr, bufferptr &buf) final;
-  open_ertr::future<> open(const std::string &path, paddr_t start) final;
-  close_ertr::future<> close() final;
-
-  /*
-   * alloc_extent
-   *
-   * The role of this function is to find out free blocks the transaction requires.
-   * To do so, alloc_extent() looks into both in-memory allocator
-   * and freebitmap blocks.
-   * But, in-memory allocator is the future work, and is not implemented yet,
-   * we use freebitmap directly to allocate freeblocks for now.
-   *
-   * Each bit in freebitmap block represents whether a block is allocated or not.
-   *
-   * TODO: multiple allocation
-   *
-   */
-  allocate_ret alloc_extent(
-      Transaction &t, size_t size) final; // allocator, return blocks
-
-  /*
-   * free_extent
-   *
-   * add a range of free blocks to transaction
-   *
-   */
-  abort_allocation_ertr::future<> abort_allocation(Transaction &t) final;
-  write_ertr::future<> complete_allocation(Transaction &t) final;
-
-  open_ertr::future<> _open_device(const std::string path);
-  read_ertr::future<rbm_metadata_header_t> read_rbm_header(rbm_abs_addr addr);
-  write_ertr::future<> write_rbm_header();
-
-  size_t get_size() const final { return super.size; };
-  size_t get_block_size() const final { return super.block_size; }
-
-  // max block number a block can represent using bitmap
-  uint64_t max_block_by_bitmap_block() {
-    return (super.block_size - ceph::encoded_sizeof_bounded<rbm_bitmap_block_t>()) * 8;
-  }
-
-  uint64_t convert_block_no_to_bitmap_block(blk_no_t block_no)
-  {
-    ceph_assert(super.block_size);
-    return block_no / max_block_by_bitmap_block();
-  }
-
-  /*
-   * convert_bitmap_block_no_to_block_id
-   *
-   * return block id using address where freebitmap is stored and offset
-   */
-  blk_no_t convert_bitmap_block_no_to_block_id(uint64_t offset, rbm_abs_addr addr)
-  {
-    ceph_assert(super.block_size);
-    // freebitmap begins at block 1
-    return (addr / super.block_size - 1) * max_block_by_bitmap_block() + offset;
-  }
-
-  uint64_t get_alloc_area_size() {
-    ceph_assert(super.size);
-    ceph_assert(super.block_size);
-    uint64_t total_block_num = super.size / super.block_size;
-    uint64_t need_blocks = (total_block_num % max_block_by_bitmap_block()) ?
-                 (total_block_num / max_block_by_bitmap_block() + 1) :
-                 (total_block_num / max_block_by_bitmap_block());
-    ceph_assert(need_blocks);
-    return need_blocks * super.block_size;
-  }
-
-  using find_block_ertr = crimson::errorator<
-    crimson::ct_error::input_output_error,
-    crimson::ct_error::enoent>;
-  using find_block_ret = find_block_ertr::future<interval_set<blk_no_t>>;
-  /*
-   * find_free_block
-   *
-   * Try to find free blocks by reading bitmap blocks on the disk sequentially
-   * The free blocks will be added to allocated_blocks in Transaction.
-   * This needs to be improved after in-memory block allocation is introduced.
-   *
-   */
-  find_block_ret find_free_block(Transaction &t, size_t size);
-
-  /*
-   * rbm_sync_block_bitmap
-   *
-   * Write rbm_bitmap_block_t to the device
-   *
-   * @param rbm_bitmap_block_t
-   * @param uint64_t the block number the rbm_bitmap_block_t will be stored
-   *
-   */
-  write_ertr::future<> rbm_sync_block_bitmap(
-      rbm_bitmap_block_t &block, blk_no_t block_no);
-
-  using check_bitmap_blocks_ertr = crimson::errorator<
-    crimson::ct_error::input_output_error,
-    crimson::ct_error::invarg>;
-  check_bitmap_blocks_ertr::future<> check_bitmap_blocks();
-  uint64_t get_free_blocks() const {
-    return super.free_block_count;
-  }
-  /*
-   * We will have mulitple partitions (circularjournals and randbomblockmanagers)
-   * on a device, so start and end location of the device are needed to
-   * support such case.
-   */
-  NVMeManager(NVMeBlockDevice * device, std::string path)
-    : device(device), path(path) {}
-
-  /*
-   * bitmap block area (freebitmap) layout
-   *
-   * -----------------------------------------------------------
-   * | header   1 |   bitmap  1   | header  2 |    bitmap  2   |
-   * -----------------------------------------------------------
-   *  <--       1 block        --> <--     1 block          -->
-   *
-   * 1 block contains both bitmap header and bitmap.
-   * We use this layout as a default layout here.
-   * But, we'll consider to exploit end to end data protection.
-   * If we use the end to end data protection, which is a feature specified in NVMe,
-   * we can avoid any calculation for checksum. The checksum regarding the block
-   * will be managed by the NVMe device.
-   *
-   */
-  mkfs_ertr::future<> initialize_blk_alloc_area();
-  uint64_t get_start_block_alloc_area() {
-    return super.start_alloc_area;
-  }
-
-  void alloc_rbm_bitmap_block_buf(rbm_bitmap_block_t &b_block) {
-    auto bitmap_blk = ceph::bufferptr(buffer::create_page_aligned(
-                       super.block_size -
-                       ceph::encoded_sizeof_bounded<rbm_bitmap_block_t>()));
-    bitmap_blk.zero();
-    b_block.buf.append(bitmap_blk);
-  }
-
-  rbm_abs_addr get_blk_paddr_by_block_no(blk_no_t id) {
-    return (id * super.block_size) + super.start;
-  }
-
-  int num_block_between_blk_ids(blk_no_t start, blk_no_t end) {
-    auto max = max_block_by_bitmap_block();
-    auto block_start = start / max;
-    auto block_end = end / max;
-    return block_end - block_start + 1;
-  }
-
-  write_ertr::future<> rbm_sync_block_bitmap_by_range(
-      blk_no_t start, blk_no_t end, bitmap_op_types_t op);
-  void add_cont_bitmap_blocks_to_buf(
-      bufferlist& buf, int num_block, bitmap_op_types_t op) {
-    rbm_bitmap_block_t b_block(super.block_size);
-    alloc_rbm_bitmap_block_buf(b_block);
-    if (op == bitmap_op_types_t::ALL_SET) {
-      b_block.set_all_bits();
-    } else {
-      b_block.set_clear_bits();
-    }
-    for (int i = 0; i < num_block; i++) {
-      encode(b_block, buf);
-    }
-  }
-
-  write_ertr::future<> write(rbm_abs_addr addr, bufferlist &bl);
-  write_ertr::future<> sync_allocation(
-      std::vector<alloc_delta_t>& alloc_blocks);
-  void add_free_extent(
-      std::vector<alloc_delta_t>& v, rbm_abs_addr from, size_t len);
-
-  device_id_t get_device_id() const final {
-    return super.device_id;
-  }
-
-private:
-  /*
-   * this contains the number of bitmap blocks, free blocks and
-   * rbm specific information
-   */
-  rbm_metadata_header_t super;
-  //FreelistManager free_manager; // TODO: block management
-  NVMeBlockDevice * device;
-  std::string path;
-  int stream_id; // for multi-stream
-};
-using NVMeManagerRef = std::unique_ptr<NVMeManager>;
-
-}
index dafdcdee8ab685fe4e16c07285bd4746686e51fe..8696828f2c7225549f91471ad02e496bf9561cae 100644 (file)
@@ -6,7 +6,7 @@
 #include <random>
 
 #include "crimson/common/log.h"
-#include "crimson/os/seastore/random_block_manager/nvme_manager.h"
+#include "crimson/os/seastore/random_block_manager/block_rb_manager.h"
 #include "crimson/os/seastore/random_block_manager/nvmedevice.h"
 #include "test/crimson/seastore/transaction_manager_test_state.h"
 
@@ -25,7 +25,7 @@ constexpr uint64_t DEFAULT_BLOCK_SIZE = 4096;
 
 struct rbm_test_t :
   public seastar_test_suite_t, TMTestState {
-  std::unique_ptr<NVMeManager> rbm_manager;
+  std::unique_ptr<BlockRBManager> rbm_manager;
   std::unique_ptr<nvme_device::NVMeBlockDevice> device;
 
   struct rbm_transaction {
@@ -54,7 +54,7 @@ struct rbm_test_t :
 
   seastar::future<> set_up_fut() final {
     device.reset(new nvme_device::TestMemory(DEFAULT_TEST_SIZE));
-    rbm_manager.reset(new NVMeManager(device.get(), std::string()));
+    rbm_manager.reset(new BlockRBManager(device.get(), std::string()));
     device_id_t d_id = 1 << (std::numeric_limits<device_id_t>::digits - 1);
     config.start = paddr_t::make_blk_paddr(d_id, 0);
     config.end = paddr_t::make_blk_paddr(d_id, DEFAULT_TEST_SIZE);