]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: add block segment_manager
authorSamuel Just <sjust@redhat.com>
Fri, 30 Oct 2020 23:54:46 +0000 (16:54 -0700)
committerSamuel Just <sjust@redhat.com>
Tue, 8 Dec 2020 19:06:49 +0000 (19:06 +0000)
Signed-off-by: Samuel Just <sjust@redhat.com>
src/crimson/os/seastore/CMakeLists.txt
src/crimson/os/seastore/segment_manager.h
src/crimson/os/seastore/segment_manager/block.cc [new file with mode: 0644]
src/crimson/os/seastore/segment_manager/block.h [new file with mode: 0644]

index 3e04972223b314f48a76664d69b0af8af77435f3..8b29adb893a488e15099b3068ec0cc325f1d1978 100644 (file)
@@ -2,6 +2,7 @@ add_library(crimson-seastore STATIC
   cached_extent.cc
   seastore_types.cc
   segment_manager/ephemeral.cc
+  segment_manager/block.cc
   transaction_manager.cc
   journal.cc
   cache.cc
index d49841d304c8f9a8fa8d56c3fb36ab5778701bc0..79b1240c3dc5a7142221b05af60501051dacfed1 100644 (file)
@@ -21,10 +21,10 @@ class Segment : public boost::intrusive_ref_counter<
   boost::thread_unsafe_counter>{
 public:
 
-  enum class segment_state_t {
-    EMPTY,
-    OPEN,
-    CLOSED
+  enum class segment_state_t : uint8_t {
+    EMPTY = 0,
+    OPEN = 1,
+    CLOSED = 2
   };
 
   /**
diff --git a/src/crimson/os/seastore/segment_manager/block.cc b/src/crimson/os/seastore/segment_manager/block.cc
new file mode 100644 (file)
index 0000000..8c8fa71
--- /dev/null
@@ -0,0 +1,401 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <sys/mman.h>
+#include <string.h>
+
+#include "crimson/common/log.h"
+
+#include "include/buffer.h"
+#include "crimson/os/seastore/segment_manager/block.h"
+
+namespace {
+  seastar::logger& logger() {
+    return crimson::get_logger(ceph_subsys_filestore);
+  }
+}
+
+
+namespace crimson::os::seastore::segment_manager::block {
+
+static write_ertr::future<> do_write(
+  seastar::file &device,
+  uint64_t offset,
+  bufferptr &bptr)
+{
+  logger().debug(
+    "block: do_write offset {} len {}",
+    offset,
+    bptr.length());
+  return device.dma_write(
+    offset,
+    bptr.c_str(),
+    bptr.length()
+  ).handle_exception([](auto e) -> write_ertr::future<size_t> {
+      logger().error(
+       "do_write: dma_write got error {}",
+       e);
+      return crimson::ct_error::input_output_error::make();
+  }).then([&device, length=bptr.length()](auto result)
+              -> write_ertr::future<> {
+    if (result != length) {
+      return crimson::ct_error::input_output_error::make();
+    }
+    return write_ertr::now();
+  });
+}
+
+static read_ertr::future<> do_read(
+  seastar::file &device,
+  uint64_t offset,
+  bufferptr &bptr)
+{
+  logger().debug(
+    "block: do_read offset {} len {}",
+    offset,
+    bptr.length());
+  return device.dma_read(
+    offset,
+    bptr.c_str(),
+    bptr.length()
+  ).handle_exception([](auto e) -> read_ertr::future<size_t> {
+    logger().error(
+      "do_read: dma_read got error {}",
+      e);
+    return crimson::ct_error::input_output_error::make();
+  }).then([length=bptr.length()](auto result) -> read_ertr::future<> {
+    if (result != length) {
+      return crimson::ct_error::input_output_error::make();
+    }
+    return read_ertr::now();
+  });
+}
+
+write_ertr::future<>
+SegmentStateTracker::write_out(
+  seastar::file &device,
+  uint64_t offset)
+{
+  return do_write(device, offset, bptr);
+}
+
+write_ertr::future<>
+SegmentStateTracker::read_in(
+  seastar::file &device,
+  uint64_t offset)
+{
+  return do_read(
+    device,
+    offset,
+    bptr);
+}
+
+static
+block_sm_superblock_t make_superblock(
+  const BlockSegmentManager::mkfs_config_t &config,
+  const seastar::stat_data &data)
+{
+  logger().debug(
+    "{}: size {}, block_size {}, allocated_size {}, configured_size {}",
+    __func__,
+    data.size,
+    data.block_size,
+    data.allocated_size,
+    config.total_size);
+  size_t size = (data.size == 0) ? config.total_size : data.size;
+  size_t raw_segments = size / config.segment_size;
+  size_t tracker_size = SegmentStateTracker::get_raw_size(
+    raw_segments,
+    data.block_size);
+  size_t segments = (size - tracker_size - data.block_size)
+    / config.segment_size;
+  return block_sm_superblock_t{
+    size,
+    config.segment_size,
+    data.block_size,
+    segments,
+    data.block_size,
+    tracker_size + data.block_size
+  };
+}
+
+using open_device_ret = 
+  BlockSegmentManager::access_ertr::future<
+  std::pair<seastar::file, seastar::stat_data>
+  >;
+static
+open_device_ret open_device(const std::string &in_path, seastar::open_flags mode)
+{
+  return seastar::do_with(
+    in_path,
+    [mode](auto &path) {
+      return seastar::file_stat(path, seastar::follow_symlink::yes
+      ).then([mode, &path](auto stat) mutable {
+       return seastar::open_file_dma(path, mode).then([=](auto file) {
+         logger().debug("open_device: open successful");
+         return std::make_pair(file, stat);
+       });
+      }).handle_exception([](auto e) -> open_device_ret {
+       logger().error(
+         "open_device: got error {}",
+         e);
+       return crimson::ct_error::input_output_error::make();
+      });
+    });
+}
+
+  
+static
+BlockSegmentManager::access_ertr::future<>
+write_superblock(seastar::file &device, block_sm_superblock_t sb)
+{
+  assert(ceph::encoded_sizeof_bounded<block_sm_superblock_t>() <
+        sb.block_size);
+  return seastar::do_with(
+    bufferptr(ceph::buffer::create_page_aligned(sb.block_size)),
+    [=, &device](auto &bp) {
+      bufferlist bl;
+      encode(sb, bl);
+      auto iter = bl.begin();
+      assert(bl.length() < sb.block_size);
+      iter.copy(bl.length(), bp.c_str());
+      logger().debug("write_superblock: doing writeout");
+      return do_write(device, 0, bp);
+    });
+}
+
+static
+BlockSegmentManager::access_ertr::future<block_sm_superblock_t>
+read_superblock(seastar::file &device, seastar::stat_data sd)
+{
+  assert(ceph::encoded_sizeof_bounded<block_sm_superblock_t>() <
+        sd.block_size);
+  return seastar::do_with(
+    bufferptr(ceph::buffer::create_page_aligned(sd.block_size)),
+    [=, &device](auto &bp) {
+      return do_read(
+       device,
+       0,
+       bp
+      ).safe_then([=, &bp] {
+         bufferlist bl;
+         bl.push_back(bp);
+         block_sm_superblock_t ret;
+         auto bliter = bl.cbegin();
+         decode(ret, bliter);
+         return BlockSegmentManager::access_ertr::future<block_sm_superblock_t>(
+           BlockSegmentManager::access_ertr::ready_future_marker{},
+           ret);
+      });
+    });
+}
+
+BlockSegment::BlockSegment(
+  BlockSegmentManager &manager, segment_id_t id)
+  : manager(manager), id(id) {}
+
+segment_off_t BlockSegment::get_write_capacity() const
+{
+  return manager.get_segment_size();
+}
+
+Segment::close_ertr::future<> BlockSegment::close()
+{
+  manager.segment_close(id);
+  return close_ertr::now();
+}
+
+Segment::write_ertr::future<> BlockSegment::write(
+  segment_off_t offset, ceph::bufferlist bl)
+{
+  if (offset < write_pointer || offset % manager.superblock.block_size != 0)
+    return crimson::ct_error::invarg::make();
+
+  if (offset + bl.length() > manager.superblock.segment_size)
+    return crimson::ct_error::enospc::make();
+
+  write_pointer = offset + bl.length();
+  return manager.segment_write({id, offset}, bl);
+}
+
+Segment::close_ertr::future<> BlockSegmentManager::segment_close(segment_id_t id)
+{
+  assert(tracker);
+  tracker->set(id, segment_state_t::CLOSED);
+  return tracker->write_out(device, superblock.tracker_offset);
+}
+
+Segment::write_ertr::future<> BlockSegmentManager::segment_write(
+  paddr_t addr,
+  ceph::bufferlist bl,
+  bool ignore_check)
+{
+  assert((bl.length() % superblock.block_size) == 0);
+  logger().debug(
+    "segment_write to segment {} at offset {}, physical offset {}, len {}",
+    addr.segment,
+    addr.offset,
+    get_offset(addr),
+    bl.length());
+
+  
+  // TODO send an iovec and avoid the copy -- bl should have aligned
+  // constituent buffers and they will remain unmodified until the write
+  // completes
+  return seastar::do_with(
+    bufferptr(ceph::buffer::create_page_aligned(bl.length())),
+    [&](auto &bp) {
+      auto iter = bl.cbegin();
+      iter.copy(bl.length(), bp.c_str());
+      return do_write(device, get_offset(addr), bp);
+    });
+}
+
+BlockSegmentManager::~BlockSegmentManager()
+{
+}
+
+BlockSegmentManager::mount_ret BlockSegmentManager::mount(mount_config_t config)
+{
+  return open_device(
+    config.path, seastar::open_flags::rw | seastar::open_flags::dsync
+  ).safe_then([=](auto p) {
+    device = std::move(p.first);
+    auto sd = p.second;
+    return read_superblock(device, sd);
+  }).safe_then([=](auto sb) {
+    superblock = sb;
+    tracker = std::make_unique<SegmentStateTracker>(
+      superblock.segments,
+      superblock.block_size);
+    return tracker->read_in(
+      device,
+      superblock.tracker_offset
+    ).safe_then([this] {
+      for (segment_id_t i = 0; i < tracker->get_capacity(); ++i) {
+       if (tracker->get(i) == segment_state_t::OPEN) {
+         tracker->set(i, segment_state_t::CLOSED);
+       }
+      }
+      return tracker->write_out(device, superblock.tracker_offset);
+    });
+  });
+}
+
+BlockSegmentManager::mkfs_ret BlockSegmentManager::mkfs(mkfs_config_t config)
+{
+  return seastar::do_with(
+    seastar::file{},
+    seastar::stat_data{},
+    block_sm_superblock_t{},
+    std::unique_ptr<SegmentStateTracker>(),
+    [=](auto &device, auto &stat, auto &sb, auto &tracker) {
+      return open_device(
+       config.path, seastar::open_flags::rw
+      ).safe_then([&, config](auto p) {
+       device = p.first;
+       stat = p.second;
+       sb = make_superblock(config, stat);
+       return write_superblock(device, sb);
+      }).safe_then([&] {
+       logger().debug("BlockSegmentManager::mkfs: superblock written");
+       tracker.reset(new SegmentStateTracker(sb.segments, sb.block_size));
+       return tracker->write_out(device, sb.tracker_offset);
+      }).finally([&] {
+       return device.close();
+      }).safe_then([] {
+       logger().debug("BlockSegmentManager::mkfs: complete");
+       return mkfs_ertr::now();
+      });
+    });
+}
+
+BlockSegmentManager::close_ertr::future<> BlockSegmentManager::close()
+{
+  return device.close();
+}
+
+SegmentManager::open_ertr::future<SegmentRef> BlockSegmentManager::open(
+  segment_id_t id)
+{
+  if (id >= get_num_segments()) {
+    logger().error("BlockSegmentManager::open: invalid segment {}", id);
+    return crimson::ct_error::invarg::make();
+  }
+
+  if (tracker->get(id) != segment_state_t::EMPTY) {
+    logger().error(
+      "BlockSegmentManager::open: invalid segment {} state {}",
+      id,
+      tracker->get(id));
+    return crimson::ct_error::invarg::make();
+  }
+
+  tracker->set(id, segment_state_t::OPEN);
+  return tracker->write_out(device, superblock.tracker_offset
+  ).safe_then([this, id] {
+    return open_ertr::future<SegmentRef>(
+      open_ertr::ready_future_marker{},
+      SegmentRef(new BlockSegment(*this, id)));
+  });
+}
+
+SegmentManager::release_ertr::future<> BlockSegmentManager::release(
+  segment_id_t id)
+{
+  logger().debug("BlockSegmentManager::release: {}", id);
+
+  if (id >= get_num_segments()) {
+    logger().error(
+      "BlockSegmentManager::release: invalid segment {}",
+      id);
+    return crimson::ct_error::invarg::make();
+  }
+
+  if (tracker->get(id) != segment_state_t::CLOSED) {
+    logger().error(
+      "BlockSegmentManager::release: invalid segment {} state {}",
+      id,
+      tracker->get(id));
+    return crimson::ct_error::invarg::make();
+  }
+
+  tracker->set(id, segment_state_t::EMPTY);
+  return tracker->write_out(device, superblock.tracker_offset);
+}
+
+SegmentManager::read_ertr::future<> BlockSegmentManager::read(
+  paddr_t addr,
+  size_t len,
+  ceph::bufferptr &out)
+{
+  if (addr.segment >= get_num_segments()) {
+    logger().error(
+      "BlockSegmentManager::read: invalid segment {}",
+      addr);
+    return crimson::ct_error::invarg::make();
+  }
+
+  if (addr.offset + len > superblock.segment_size) {
+    logger().error(
+      "BlockSegmentManager::read: invalid offset {}~{}!",
+      addr,
+      len);
+    return crimson::ct_error::invarg::make();
+  }
+
+  if (tracker->get(addr.segment) == segment_state_t::EMPTY) {
+    logger().error(
+      "BlockSegmentManager::read: read on invalid segment {} state {}",
+      addr.segment,
+      tracker->get(addr.segment));
+    return crimson::ct_error::enoent::make();
+  }
+
+  return do_read(
+    device,
+    get_offset(addr),
+    out);
+}
+
+}
diff --git a/src/crimson/os/seastore/segment_manager/block.h b/src/crimson/os/seastore/segment_manager/block.h
new file mode 100644 (file)
index 0000000..0a8c317
--- /dev/null
@@ -0,0 +1,214 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <boost/intrusive_ptr.hpp>
+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
+
+#include <seastar/core/file.hh>
+#include <seastar/core/future.hh>
+#include <seastar/core/reactor.hh>
+
+#include "crimson/common/layout.h"
+
+#include "crimson/os/seastore/segment_manager.h"
+
+namespace crimson::os::seastore::segment_manager::block {
+
+struct block_sm_superblock_t {
+  size_t size = 0;
+  size_t segment_size = 0;
+  size_t block_size = 0;
+    
+  size_t segments = 0;
+  uint64_t tracker_offset = 0;
+  uint64_t first_segment_offset = 0;
+    
+  DENC(block_sm_superblock_t, v, p) {
+    DENC_START(1, 1, p);
+    denc(v.size, p);
+    denc(v.segment_size, p);
+    denc(v.block_size, p);
+    denc(v.segments, p);
+    denc(v.tracker_offset, p);
+    denc(v.first_segment_offset, p);
+    DENC_FINISH(p);
+  }
+};
+
+using write_ertr = crimson::errorator<
+  crimson::ct_error::input_output_error>;
+using read_ertr = crimson::errorator<
+  crimson::ct_error::input_output_error>;
+
+/**
+ * SegmentStateTracker
+ *
+ * Tracks lifecycle state of each segment using space at the beginning
+ * of the drive.
+ */
+class SegmentStateTracker {
+  using segment_state_t = Segment::segment_state_t;
+
+  bufferptr bptr;
+
+  using L = absl::container_internal::Layout<uint8_t>;
+  const L layout;
+
+public:
+  static size_t get_raw_size(size_t segments, size_t block_size) {
+    return p2roundup(segments, block_size);
+  }
+
+  SegmentStateTracker(size_t segments, size_t block_size)
+    : bptr(ceph::buffer::create_page_aligned(
+            get_raw_size(segments, block_size))),
+      layout(bptr.length())
+  {
+    ::memset(
+      bptr.c_str(),
+      static_cast<char>(segment_state_t::EMPTY),
+      bptr.length());
+  }
+
+  size_t get_size() const {
+    return bptr.length();
+  }
+
+  size_t get_capacity() const {
+    return bptr.length();
+  }
+
+  segment_state_t get(segment_id_t offset) const {
+    assert(offset < get_capacity());
+    return static_cast<segment_state_t>(
+      layout.template Pointer<0>(
+       bptr.c_str())[offset]);
+  }
+
+  void set(segment_id_t offset, segment_state_t state) {
+    assert(offset < get_capacity());
+    layout.template Pointer<0>(bptr.c_str())[offset] =
+      static_cast<uint8_t>(state);
+  }
+
+  write_ertr::future<> write_out(
+    seastar::file &device,
+    uint64_t offset);
+
+  read_ertr::future<> read_in(
+    seastar::file &device,
+    uint64_t offset);
+};
+
+class BlockSegmentManager;
+class BlockSegment final : public Segment {
+  friend class BlockSegmentManager;
+  BlockSegmentManager &manager;
+  const segment_id_t id;
+  segment_off_t write_pointer = 0;
+public:
+  BlockSegment(BlockSegmentManager &manager, segment_id_t id);
+
+  segment_id_t get_segment_id() const final { return id; }
+  segment_off_t get_write_capacity() const final;
+  segment_off_t get_write_ptr() const final { return write_pointer; }
+  close_ertr::future<> close() final;
+  write_ertr::future<> write(segment_off_t offset, ceph::bufferlist bl) final;
+
+  ~BlockSegment() {}
+};
+
+/**
+ * BlockSegmentManager
+ *
+ * Implements SegmentManager on a conventional block device.
+ * SegmentStateTracker uses space at the start of the device to store
+ * state analagous to that of the segments of a zns device.
+ */
+class BlockSegmentManager final : public SegmentManager {
+public:
+  using access_ertr = crimson::errorator<
+    crimson::ct_error::input_output_error,
+    crimson::ct_error::permission_denied,
+    crimson::ct_error::enoent>;
+
+
+  struct mount_config_t {
+    std::string path;
+  };
+  using mount_ertr = access_ertr;
+  using mount_ret = access_ertr::future<>;
+  mount_ret mount(mount_config_t);
+
+  struct mkfs_config_t {
+    std::string path;
+    size_t segment_size = 0;
+    size_t total_size = 0;
+  };
+  using mkfs_ertr = access_ertr;
+  using mkfs_ret = mkfs_ertr::future<>;
+  static mkfs_ret mkfs(mkfs_config_t);
+  
+  using close_ertr = crimson::errorator<
+    crimson::ct_error::input_output_error
+    >;
+  close_ertr::future<> close();
+
+  BlockSegmentManager() = default;
+  ~BlockSegmentManager();
+
+  open_ertr::future<SegmentRef> open(segment_id_t id) final;
+
+  release_ertr::future<> release(segment_id_t id) final;
+
+  read_ertr::future<> read(
+    paddr_t addr,
+    size_t len,
+    ceph::bufferptr &out) final;
+
+  size_t get_size() const final {
+    return superblock.size;
+  }
+  segment_off_t get_block_size() const {
+    return superblock.block_size;
+  }
+  segment_off_t get_segment_size() const {
+    return superblock.segment_size;
+  }
+
+  // public so tests can bypass segment interface when simpler
+  Segment::write_ertr::future<> segment_write(
+    paddr_t addr,
+    ceph::bufferlist bl,
+    bool ignore_check=false);
+
+private:
+  friend class BlockSegment;
+  using segment_state_t = Segment::segment_state_t;
+
+  
+  std::unique_ptr<SegmentStateTracker> tracker;
+  block_sm_superblock_t superblock;
+  seastar::file device;
+
+  size_t get_offset(paddr_t addr) {
+    return superblock.first_segment_offset +
+      (addr.segment * superblock.segment_size) +
+      addr.offset;
+  }
+
+  std::vector<segment_state_t> segment_state;
+
+  char *buffer = nullptr;
+
+  Segment::close_ertr::future<> segment_close(segment_id_t id);
+};
+
+}
+
+WRITE_CLASS_DENC_BOUNDED(
+  crimson::os::seastore::segment_manager::block::block_sm_superblock_t
+)
+