]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: multi-device support
authorXuehan Xu <xxhdx1985126@gmail.com>
Wed, 25 Aug 2021 02:35:27 +0000 (10:35 +0800)
committerXuehan Xu <xxhdx1985126@gmail.com>
Sun, 10 Oct 2021 06:22:10 +0000 (14:22 +0800)
Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
18 files changed:
src/crimson/os/seastore/extent_placement_manager.h
src/crimson/os/seastore/extent_reader.cc
src/crimson/os/seastore/extent_reader.h
src/crimson/os/seastore/seastore.cc
src/crimson/os/seastore/seastore.h
src/crimson/os/seastore/seastore_types.cc
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/segment_cleaner.h
src/crimson/os/seastore/segment_manager.h
src/crimson/os/seastore/segment_manager/block.cc
src/crimson/os/seastore/segment_manager/block.h
src/crimson/os/seastore/segment_manager/ephemeral.h
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h
src/crimson/tools/store_nbd/tm_driver.cc
src/test/crimson/seastore/test_randomblock_manager.cc
src/test/crimson/seastore/transaction_manager_test_state.h
src/vstart.sh

index b4b00ef491913b4f21d213a711bfd6688f3bde6c..d8df40cb892024a3acbea890d965fca344188e27 100644 (file)
@@ -384,6 +384,10 @@ public:
 
   void add_allocator(device_type_t type, ExtentAllocatorRef&& allocator) {
     allocators[type].emplace_back(std::move(allocator));
+    LOG_PREFIX(ExtentPlacementManager::add_allocator);
+    DEBUG("allocators for {}: {}",
+      device_type_to_string(type),
+      allocators[type].size());
   }
 
 private:
index c3b21ae57761154bdf165bbce7c0ebaf4b6cfdab..984be2d5198be37fc2107d0da5b6d2354ddc9c62 100644 (file)
@@ -16,6 +16,7 @@ namespace crimson::os::seastore {
 ExtentReader::read_segment_header_ret
 ExtentReader::read_segment_header(segment_id_t segment)
 {
+  auto& segment_manager = *segment_managers[segment.device_id()];
   return segment_manager.read(
     paddr_t{segment, 0},
     segment_manager.get_block_size()
@@ -24,7 +25,7 @@ ExtentReader::read_segment_header(segment_id_t segment)
     crimson::ct_error::assert_all{
       "Invalid error in ExtentReader::read_segment_header"
     }
-  ).safe_then([=](bufferptr bptr) -> read_segment_header_ret {
+  ).safe_then([=, &segment_manager](bufferptr bptr) -> read_segment_header_ret {
     logger().debug("segment {} bptr size {}", segment, bptr.length());
 
     segment_header_t header;
@@ -112,6 +113,8 @@ ExtentReader::scan_valid_records_ret ExtentReader::scan_valid_records(
   size_t budget,
   found_record_handler_t &handler)
 {
+  auto& segment_manager =
+    *segment_managers[cursor.offset.segment.device_id()];
   if (cursor.offset.offset == 0) {
     cursor.offset.offset = segment_manager.get_block_size();
   }
@@ -220,6 +223,7 @@ ExtentReader::read_validate_record_metadata(
   paddr_t start,
   segment_nonce_t nonce)
 {
+  auto& segment_manager = *segment_managers[start.segment.device_id()];
   auto block_size = segment_manager.get_block_size();
   if (start.offset + block_size > (int64_t)segment_manager.get_segment_size()) {
     return read_validate_record_metadata_ret(
@@ -228,7 +232,7 @@ ExtentReader::read_validate_record_metadata(
   }
   return segment_manager.read(start, block_size
   ).safe_then(
-    [=](bufferptr bptr) mutable
+    [=, &segment_manager](bufferptr bptr) mutable
     -> read_validate_record_metadata_ret {
       logger().debug("read_validate_record_metadata: reading {}", start);
       auto block_size = segment_manager.get_block_size();
@@ -308,6 +312,7 @@ ExtentReader::read_validate_data(
   paddr_t record_base,
   const record_header_t &header)
 {
+  auto& segment_manager = *segment_managers[record_base.segment.device_id()];
   return segment_manager.read(
     record_base.add_offset(header.mdlength),
     header.dlength
index 7f0d1ea653b6180ffe7ae2c3659fd81ca2c61df1..32aa1db517d301d729cdf8ebebc4dcee44ccb1ec 100644 (file)
@@ -10,6 +10,7 @@
 namespace crimson::os::seastore {
 
 class SegmentCleaner;
+class TransactionManager;
 
 class ExtentReader {
 public:
@@ -76,8 +77,11 @@ public:
   }
 
 private:
-  SegmentManager& segment_manager;
+  std::vector<SegmentManager*> segment_managers;
 
+  std::vector<SegmentManager*>& get_segment_managers() {
+    return segment_managers;
+  }
   /// read record metadata for record starting at start
   using read_validate_record_metadata_ertr = read_ertr;
   using read_validate_record_metadata_ret =
@@ -105,6 +109,7 @@ private:
   /// validate embedded metadata checksum
   static bool validate_metadata(const bufferlist &bl);
 
+  friend class TransactionManager;
 };
 
 using ExtentReaderRef = std::unique_ptr<ExtentReader>;
index f06aa650d8ee401b1855de56f006106151400887..0a81fd63daf42dc8149ee1cce71fc77a6c41678c 100644 (file)
@@ -33,11 +33,13 @@ using crimson::common::local_conf;
 namespace crimson::os::seastore {
 
 SeaStore::SeaStore(
+  std::string root,
   SegmentManagerRef sm,
   TransactionManagerRef tm,
   CollectionManagerRef cm,
   OnodeManagerRef om)
-  : segment_manager(std::move(sm)),
+  : root(root),
+    segment_manager(std::move(sm)),
     transaction_manager(std::move(tm)),
     collection_manager(std::move(cm)),
     onode_manager(std::move(om))
@@ -90,6 +92,24 @@ seastar::future<> SeaStore::mount()
 {
   return segment_manager->mount(
   ).safe_then([this] {
+    transaction_manager->add_segment_manager(segment_manager.get());
+    auto sec_devices = segment_manager->get_secondary_devices();
+    return crimson::do_for_each(sec_devices, [this](auto& device_entry) {
+      device_id_t id = device_entry.first;
+      magic_t magic = device_entry.second.magic;
+      device_type_t dtype = device_entry.second.dtype;
+      auto sm = std::make_unique<
+       segment_manager::block::BlockSegmentManager>(
+         root + "/block." + device_type_to_string(dtype)
+         + "." + std::to_string(id));
+      return sm->mount().safe_then([this, sm=std::move(sm), magic]() mutable {
+       assert(sm->get_magic() == magic);
+       transaction_manager->add_segment_manager(sm.get());
+       secondaries.emplace_back(std::move(sm));
+       return seastar::now();
+      });
+    });
+  }).safe_then([this] {
     return transaction_manager->mount();
   }).handle_error(
     crimson::ct_error::assert_all{
@@ -101,7 +121,15 @@ seastar::future<> SeaStore::mount()
 seastar::future<> SeaStore::umount()
 {
   return transaction_manager->close(
-  ).handle_error(
+  ).safe_then([this] {
+    return crimson::do_for_each(
+      secondaries,
+      [](auto& sm) -> SegmentManager::close_ertr::future<> {
+      return sm->close();
+    });
+  }).safe_then([this] {
+    return segment_manager->close();
+  }).handle_error(
     crimson::ct_error::assert_all{
       "Invalid error in SeaStore::umount"
     }
@@ -110,11 +138,89 @@ seastar::future<> SeaStore::umount()
 
 SeaStore::mkfs_ertr::future<> SeaStore::mkfs(uuid_d new_osd_fsid)
 {
-  return segment_manager->mkfs(
-    seastore_meta_t{new_osd_fsid}
-  ).safe_then([this] {
+  return seastar::do_with(
+    secondary_device_set_t(),
+    [this, new_osd_fsid](auto& sds) {
+    auto fut = seastar::now();
+    LOG_PREFIX(SeaStore::mkfs);
+    DEBUG("root: {}", root);
+    if (!root.empty()) {
+      fut = seastar::open_directory(root).then(
+       [this, &sds, new_osd_fsid](seastar::file rdir) mutable {
+       std::unique_ptr<seastar::file> root_f =
+         std::make_unique<seastar::file>(std::move(rdir));
+       auto sub = root_f->list_directory(
+         [this, &sds, new_osd_fsid](auto de) mutable
+         -> seastar::future<> {
+         LOG_PREFIX(SeaStore::mkfs);
+         DEBUG("found file: {}", de.name);
+         if (de.name.find("block.") == 0
+             && de.name.length() > 6 /* 6 for "block." */) {
+           std::string entry_name = de.name;
+           auto dtype_end = entry_name.find_first_of('.', 6);
+           device_type_t dtype =
+             string_to_device_type(
+               entry_name.substr(6, dtype_end - 6));
+           if (!dtype) {
+             // invalid device type
+             return seastar::now();
+           }
+           auto id = std::stoi(entry_name.substr(dtype_end + 1));
+           auto sm = std::make_unique<
+             segment_manager::block::BlockSegmentManager
+             >(root + "/" + entry_name);
+           magic_t magic = (magic_t)std::rand();
+           sds.emplace(
+             (device_id_t)id,
+             device_spec_t{
+                 magic,
+                 dtype,
+                 (device_id_t)id});
+           return sm->mkfs(
+             segment_manager_config_t{
+             false,
+             magic,
+             dtype,
+             (device_id_t)id,
+             seastore_meta_t{new_osd_fsid},
+             secondary_device_set_t()}
+           ).safe_then([this, sm=std::move(sm), id]() mutable {
+             LOG_PREFIX(SeaStore::mkfs);
+             DEBUG("mkfs: finished for segment manager {}", id);
+             secondaries.emplace_back(std::move(sm));
+             return seastar::now();
+           }).handle_error(crimson::ct_error::assert_all{"not possible"});
+         }
+         return seastar::now();
+       });
+       return sub.done().then(
+         [root_f=std::move(root_f)] {
+         return seastar::now();
+       });
+      });
+    }
+    return fut.then([this, &sds, new_osd_fsid] {
+      return segment_manager->mkfs(
+       segment_manager_config_t{
+         true,
+         (magic_t)std::rand(),
+         device_type_t::SEGMENTED,
+         0,
+         seastore_meta_t{new_osd_fsid},
+         sds}
+      );
+    }).safe_then([this] {
+      return crimson::do_for_each(secondaries, [this](auto& sec_sm) {
+       return sec_sm->mount().safe_then([this, &sec_sm] {
+         transaction_manager->add_segment_manager(sec_sm.get());
+         return seastar::now();
+       });
+      });
+    });
+  }).safe_then([this] {
     return segment_manager->mount();
   }).safe_then([this] {
+    transaction_manager->add_segment_manager(segment_manager.get());
     return transaction_manager->mkfs();
   }).safe_then([this] {
     return transaction_manager->mount();
@@ -1194,15 +1300,6 @@ std::unique_ptr<SeaStore> make_seastore(
 
   auto epm = std::make_unique<ExtentPlacementManager>(*cache, *lba_manager);
 
-  epm->add_allocator(
-    device_type_t::SEGMENTED,
-    std::make_unique<SegmentedAllocator>(
-      *segment_cleaner,
-      *sm,
-      *lba_manager,
-      *journal,
-      *cache));
-
   journal->set_segment_provider(&*segment_cleaner);
 
   auto tm = std::make_unique<TransactionManager>(
@@ -1211,10 +1308,12 @@ std::unique_ptr<SeaStore> make_seastore(
     std::move(journal),
     std::move(cache),
     std::move(lba_manager),
-    std::move(epm));
+    std::move(epm),
+    scanner_ref);
 
   auto cm = std::make_unique<collection_manager::FlatCollectionManager>(*tm);
   return std::make_unique<SeaStore>(
+    device,
     std::move(sm),
     std::move(tm),
     std::move(cm),
index d9eacf418986e570ef22636df6e0c7c9e854ddd3..f21e2832fab417fa9534fcdb51eaa7108d69ea9e 100644 (file)
@@ -42,6 +42,7 @@ class SeaStore final : public FuturizedStore {
 public:
 
   SeaStore(
+    std::string root,
     SegmentManagerRef sm,
     TransactionManagerRef tm,
     CollectionManagerRef cm,
@@ -263,7 +264,9 @@ private:
     const std::optional<std::string> &_start,
     OMapManager::omap_list_config_t config);
 
+  std::string root;
   SegmentManagerRef segment_manager;
+  std::vector<SegmentManagerRef> secondaries;
   TransactionManagerRef transaction_manager;
   CollectionManagerRef collection_manager;
   OnodeManagerRef onode_manager;
index 9e858243188f77aa96753828f081e94c2327dfd2..26c1946193d33b00e35c9fb0724c390dc980b78e 100644 (file)
@@ -209,4 +209,30 @@ bool can_delay_allocation(device_type_t type) {
   return type <= RANDOM_BLOCK;
 }
 
+device_type_t string_to_device_type(std::string type) {
+  if (type == "segmented") {
+    return device_type_t::SEGMENTED;
+  }
+  if (type == "random_block") {
+    return device_type_t::RANDOM_BLOCK;
+  }
+  if (type == "pmem") {
+    return device_type_t::PMEM;
+  }
+  return device_type_t::NONE;
+}
+
+std::string device_type_to_string(device_type_t dtype) {
+  switch (dtype) {
+  case device_type_t::SEGMENTED:
+    return "segmented";
+  case device_type_t::RANDOM_BLOCK:
+    return "random_block";
+  case device_type_t::PMEM:
+    return "pmem";
+  default:
+    ceph_assert(0 == "impossible");
+  }
+}
+
 }
index 1ab1a5a88df1e47b86db57d46ecabdf3297c0b48..b3517efcb0e1ccb1689386246fdc73169bff1959 100644 (file)
@@ -369,6 +369,8 @@ enum device_type_t {
 };
 
 bool can_delay_allocation(device_type_t type);
+device_type_t string_to_device_type(std::string type);
+std::string device_type_to_string(device_type_t type);
 
 /* Monotonically increasing identifier for the location of a
  * journal_record.
@@ -1015,3 +1017,40 @@ WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::delta_info_t)
 WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::record_header_t)
 WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::extent_info_t)
 WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::segment_header_t)
+
+template<>
+struct denc_traits<crimson::os::seastore::device_type_t> {
+  static constexpr bool supported = true;
+  static constexpr bool featured = false;
+  static constexpr bool bounded = true;
+  static constexpr bool need_contiguous = false;
+
+  static void bound_encode(
+    const crimson::os::seastore::device_type_t &o,
+    size_t& p,
+    uint64_t f=0) {
+    p += sizeof(crimson::os::seastore::device_type_t);
+  }
+  template<class It>
+  static std::enable_if_t<!is_const_iterator_v<It>>
+  encode(
+    const crimson::os::seastore::device_type_t &o,
+    It& p,
+    uint64_t f=0) {
+    get_pos_add<crimson::os::seastore::device_type_t>(p) = o;
+  }
+  template<class It>
+  static std::enable_if_t<is_const_iterator_v<It>>
+  decode(
+    crimson::os::seastore::device_type_t& o,
+    It& p,
+    uint64_t f=0) {
+    o = get_pos_add<crimson::os::seastore::device_type_t>(p);
+  }
+  static void decode(
+    crimson::os::seastore::device_type_t& o,
+    ceph::buffer::list::const_iterator &p) {
+    p.copy(sizeof(crimson::os::seastore::device_type_t),
+           reinterpret_cast<char*>(&o));
+  }
+};
index 7060a55e0cd23f47ad829f9dd9f79a8ca61425e6..b38b1b65f2e0dcfbd736f1f79e0fc7ef94f2eae4 100644 (file)
@@ -683,7 +683,7 @@ public:
     ExtentReaderRef&& scanner,
     bool detailed = false);
 
-  void mount(SegmentManager &psm, std::vector<SegmentManager*>& sms) {
+  void mount(device_id_t pdevice_id, std::vector<SegmentManager*>& sms) {
     crimson::get_logger(ceph_subsys_seastore).debug(
       "SegmentCleaner::mount: {} segment managers", sms.size());
     init_complete = false;
@@ -691,7 +691,7 @@ public:
     journal_tail_target = journal_seq_t{};
     journal_tail_committed = journal_seq_t{};
     journal_head = journal_seq_t{};
-    journal_device_id = psm.get_device_id();
+    journal_device_id = pdevice_id;
 
     for (auto& sm : sms) {
       if (sm)
index 53a5ff7a514598a8a2f92bec4fcb4d36a3851573..437aa72ad49f38b6adc29ce8240660ebb6d7967a 100644 (file)
 
 namespace crimson::os::seastore {
 
+using magic_t = uint64_t;
+
+struct device_spec_t{
+  magic_t magic;
+  device_type_t dtype;
+  device_id_t id;
+  DENC(device_spec_t, v, p) {
+    DENC_START(1, 1, p);
+    denc(v.magic, p);
+    denc(v.dtype, p);
+    denc(v.id, p);
+    DENC_FINISH(p);
+  }
+};
+
+using secondary_device_set_t =
+  std::map<device_id_t, device_spec_t>;
+
+struct block_sm_superblock_t {
+  size_t size = 0;
+  size_t segment_size = 0;
+  size_t block_size = 0;
+
+  size_t segments = 0;
+  uint64_t tracker_offset = 0;
+  uint64_t first_segment_offset = 0;
+
+  bool major_dev = false;
+  magic_t magic = 0;
+  device_type_t dtype = device_type_t::NONE;
+  device_id_t device_id = 0;
+
+  seastore_meta_t meta;
+
+  secondary_device_set_t secondary_devices;
+  DENC(block_sm_superblock_t, v, p) {
+    DENC_START(1, 1, p);
+    denc(v.size, p);
+    denc(v.segment_size, p);
+    denc(v.block_size, p);
+    denc(v.segments, p);
+    denc(v.tracker_offset, p);
+    denc(v.first_segment_offset, p);
+    denc(v.meta, p);
+    denc(v.major_dev, p);
+    denc(v.magic, p);
+    denc(v.dtype, p);
+    denc(v.device_id, p);
+    if (v.major_dev) {
+      denc(v.secondary_devices, p);
+    }
+    DENC_FINISH(p);
+  }
+};
+
+struct segment_manager_config_t {
+  bool major_dev = false;
+  magic_t magic = 0;
+  device_type_t dtype = device_type_t::NONE;
+  device_id_t device_id = 0;
+  seastore_meta_t meta;
+  secondary_device_set_t secondary_devices;
+};
+
 class Segment : public boost::intrusive_ref_counter<
   Segment,
   boost::thread_unsafe_counter>{
@@ -89,9 +153,14 @@ public:
   using mount_ret = access_ertr::future<>;
   virtual mount_ret mount() = 0;
 
+  using close_ertr = crimson::errorator<
+    crimson::ct_error::input_output_error
+    >;
+  virtual close_ertr::future<> close() = 0;
+
   using mkfs_ertr = access_ertr;
   using mkfs_ret = mkfs_ertr::future<>;
-  virtual mkfs_ret mkfs(seastore_meta_t meta) = 0;
+  virtual mkfs_ret mkfs(segment_manager_config_t meta) = 0;
 
   using open_ertr = crimson::errorator<
     crimson::ct_error::input_output_error,
@@ -137,8 +206,21 @@ public:
 
   virtual device_id_t get_device_id() const = 0;
 
+  virtual secondary_device_set_t& get_secondary_devices() = 0;
+
+  virtual device_spec_t get_device_spec() const = 0;
+
+  virtual magic_t get_magic() const = 0;
+
   virtual ~SegmentManager() {}
 };
 using SegmentManagerRef = std::unique_ptr<SegmentManager>;
 
 }
+
+WRITE_CLASS_DENC(
+  crimson::os::seastore::device_spec_t
+)
+WRITE_CLASS_DENC(
+  crimson::os::seastore::block_sm_superblock_t
+)
index 12e45d88a6bd842d24c96f2b473f1a1faf1dbb6f..5b395fac589b63ef8263c43c81f2f1c96e0e8d3d 100644 (file)
@@ -131,7 +131,7 @@ SegmentStateTracker::read_in(
 
 static
 block_sm_superblock_t make_superblock(
-  seastore_meta_t meta,
+  segment_manager_config_t sm_config,
   const seastar::stat_data &data)
 {
   using crimson::common::get_conf;
@@ -168,7 +168,12 @@ block_sm_superblock_t make_superblock(
     segments,
     data.block_size,
     tracker_size + data.block_size,
-    meta
+    sm_config.major_dev,
+    sm_config.magic,
+    sm_config.dtype,
+    sm_config.device_id,
+    sm_config.meta,
+    std::move(sm_config.secondary_devices)
   };
 }
 
@@ -260,12 +265,12 @@ open_device_ret open_device(
   });
 }
 
-  
+
 static
 BlockSegmentManager::access_ertr::future<>
 write_superblock(seastar::file &device, block_sm_superblock_t sb)
 {
-  assert(ceph::encoded_sizeof_bounded<block_sm_superblock_t>() <
+  assert(ceph::encoded_sizeof<block_sm_superblock_t>(sb) <
         sb.block_size);
   return seastar::do_with(
     bufferptr(ceph::buffer::create_page_aligned(sb.block_size)),
@@ -284,8 +289,6 @@ static
 BlockSegmentManager::access_ertr::future<block_sm_superblock_t>
 read_superblock(seastar::file &device, seastar::stat_data sd)
 {
-  assert(ceph::encoded_sizeof_bounded<block_sm_superblock_t>() <
-        sd.block_size);
   return seastar::do_with(
     bufferptr(ceph::buffer::create_page_aligned(sd.block_size)),
     [=, &device](auto &bp) {
@@ -299,7 +302,13 @@ read_superblock(seastar::file &device, seastar::stat_data sd)
          bl.push_back(bp);
          block_sm_superblock_t ret;
          auto bliter = bl.cbegin();
-         decode(ret, bliter);
+         try {
+           decode(ret, bliter);
+         } catch (...) {
+           ceph_assert(0 == "invalid superblock");
+         }
+         assert(ceph::encoded_sizeof<block_sm_superblock_t>(ret) <
+                sd.block_size);
          return BlockSegmentManager::access_ertr::future<block_sm_superblock_t>(
            BlockSegmentManager::access_ertr::ready_future_marker{},
            ret);
@@ -384,11 +393,11 @@ BlockSegmentManager::mount_ret BlockSegmentManager::mount()
   ).safe_then([=](auto p) {
     device = std::move(p.first);
     auto sd = p.second;
-    stats.data_read.increment(
-        ceph::encoded_sizeof_bounded<block_sm_superblock_t>());
     return read_superblock(device, sd);
   }).safe_then([=](auto sb) {
     superblock = sb;
+    stats.data_read.increment(
+        ceph::encoded_sizeof<block_sm_superblock_t>(superblock));
     tracker = std::make_unique<SegmentStateTracker>(
       superblock.segments,
       superblock.block_size);
@@ -405,11 +414,17 @@ BlockSegmentManager::mount_ret BlockSegmentManager::mount()
       stats.metadata_write.increment(tracker->get_size());
       return tracker->write_out(device, superblock.tracker_offset);
     });
+  }).safe_then([this] {
+    logger().debug("segment manager {} mounted", get_device_id());
+    register_metrics();
   });
 }
 
-BlockSegmentManager::mkfs_ret BlockSegmentManager::mkfs(seastore_meta_t meta)
+BlockSegmentManager::mkfs_ret BlockSegmentManager::mkfs(
+  segment_manager_config_t sm_config)
 {
+  logger().debug("BlockSegmentManager::mkfs: magic={}, dtype={}, id={}",
+    sm_config.magic, sm_config.dtype, sm_config.device_id);
   return seastar::do_with(
     seastar::file{},
     seastar::stat_data{},
@@ -426,12 +441,12 @@ BlockSegmentManager::mkfs_ret BlockSegmentManager::mkfs(seastore_meta_t meta)
 
       return maybe_create.safe_then([this] {
        return open_device(device_path, seastar::open_flags::rw);
-      }).safe_then([&, meta](auto p) {
+      }).safe_then([&, sm_config](auto p) {
        device = p.first;
        stat = p.second;
-       sb = make_superblock(meta, stat);
+       sb = make_superblock(sm_config, stat);
        stats.metadata_write.increment(
-           ceph::encoded_sizeof_bounded<block_sm_superblock_t>());
+           ceph::encoded_sizeof<block_sm_superblock_t>(sb));
        return write_superblock(device, sb);
       }).safe_then([&] {
        logger().debug("BlockSegmentManager::mkfs: superblock written");
@@ -449,6 +464,7 @@ BlockSegmentManager::mkfs_ret BlockSegmentManager::mkfs(seastore_meta_t meta)
 
 BlockSegmentManager::close_ertr::future<> BlockSegmentManager::close()
 {
+  logger().debug("closing segment manager {}", get_device_id());
   metrics.clear();
   return device.close();
 }
@@ -549,8 +565,11 @@ SegmentManager::read_ertr::future<> BlockSegmentManager::read(
 
 void BlockSegmentManager::register_metrics()
 {
+  logger().debug("{} {}", __func__, get_device_id());
   namespace sm = seastar::metrics;
-  // TODO: add label for device_id
+  sm::label label("device_id");
+  std::vector<sm::label_instance> label_instances;
+  label_instances.push_back(label(get_device_id()));
   stats.reset();
   metrics.add_group(
     "segment_manager",
@@ -558,52 +577,62 @@ void BlockSegmentManager::register_metrics()
       sm::make_counter(
         "data_read_num",
         stats.data_read.num,
-        sm::description("total number of data read")
+        sm::description("total number of data read"),
+       label_instances
       ),
       sm::make_counter(
         "data_read_bytes",
         stats.data_read.bytes,
-        sm::description("total bytes of data read")
+        sm::description("total bytes of data read"),
+       label_instances
       ),
       sm::make_counter(
         "data_write_num",
         stats.data_write.num,
-        sm::description("total number of data write")
+        sm::description("total number of data write"),
+       label_instances
       ),
       sm::make_counter(
         "data_write_bytes",
         stats.data_write.bytes,
-        sm::description("total bytes of data write")
+        sm::description("total bytes of data write"),
+       label_instances
       ),
       sm::make_counter(
         "metadata_write_num",
         stats.metadata_write.num,
-        sm::description("total number of metadata write")
+        sm::description("total number of metadata write"),
+       label_instances
       ),
       sm::make_counter(
         "metadata_write_bytes",
         stats.metadata_write.bytes,
-        sm::description("total bytes of metadata write")
+        sm::description("total bytes of metadata write"),
+       label_instances
       ),
       sm::make_counter(
         "opened_segments",
         stats.opened_segments,
-        sm::description("total segments opened")
+        sm::description("total segments opened"),
+       label_instances
       ),
       sm::make_counter(
         "closed_segments",
         stats.closed_segments,
-        sm::description("total segments closed")
+        sm::description("total segments closed"),
+       label_instances
       ),
       sm::make_counter(
         "closed_segments_unused_bytes",
         stats.closed_segments_unused_bytes,
-        sm::description("total unused bytes of closed segments")
+        sm::description("total unused bytes of closed segments"),
+       label_instances
       ),
       sm::make_counter(
         "released_segments",
         stats.released_segments,
-        sm::description("total segments released")
+        sm::description("total segments released"),
+       label_instances
       ),
     }
   );
index 2712243e3b40fdb2b43ec7440fbc7adf4181fa8a..5ce2943f461acad1af9d4de3ac4a2e5e4e81c27b 100644 (file)
 
 namespace crimson::os::seastore::segment_manager::block {
 
-struct block_sm_superblock_t {
-  size_t size = 0;
-  size_t segment_size = 0;
-  size_t block_size = 0;
-    
-  size_t segments = 0;
-  uint64_t tracker_offset = 0;
-  uint64_t first_segment_offset = 0;
-
-  seastore_meta_t meta;
-    
-  DENC(block_sm_superblock_t, v, p) {
-    DENC_START(1, 1, p);
-    denc(v.size, p);
-    denc(v.segment_size, p);
-    denc(v.block_size, p);
-    denc(v.segments, p);
-    denc(v.tracker_offset, p);
-    denc(v.first_segment_offset, p);
-    denc(v.meta, p);
-    DENC_FINISH(p);
-  }
-};
-
 using write_ertr = crimson::errorator<
   crimson::ct_error::input_output_error>;
 using read_ertr = crimson::errorator<
@@ -134,20 +110,14 @@ class BlockSegmentManager final : public SegmentManager {
 public:
   mount_ret mount() final;
 
-  mkfs_ret mkfs(seastore_meta_t) final;
-  
-  using close_ertr = crimson::errorator<
-    crimson::ct_error::input_output_error
-    >;
+  mkfs_ret mkfs(segment_manager_config_t) final;
+
   close_ertr::future<> close();
 
   BlockSegmentManager(
-    const std::string &path,
-    device_id_t device_id = 0)
-  : device_path(path),
-    device_id(device_id) {
-    register_metrics();
-  }
+    const std::string &path)
+  : device_path(path) {}
+
   ~BlockSegmentManager();
 
   open_ertr::future<SegmentRef> open(segment_id_t id) final;
@@ -170,15 +140,27 @@ public:
   }
 
   device_id_t get_device_id() const final {
-    return device_id;
+    return superblock.device_id;
+  }
+  secondary_device_set_t& get_secondary_devices() final {
+    return superblock.secondary_devices;
   }
-
   // public so tests can bypass segment interface when simpler
   Segment::write_ertr::future<> segment_write(
     paddr_t addr,
     ceph::bufferlist bl,
     bool ignore_check=false);
 
+  device_spec_t get_device_spec() const final {
+    return {superblock.magic,
+           superblock.dtype,
+           superblock.device_id};
+  }
+
+  magic_t get_magic() const final {
+    return superblock.magic;
+  }
+
 private:
   friend class BlockSegment;
   using segment_state_t = Segment::segment_state_t;
@@ -242,8 +224,3 @@ private:
 };
 
 }
-
-WRITE_CLASS_DENC_BOUNDED(
-  crimson::os::seastore::segment_manager::block::block_sm_superblock_t
-)
-
index fd8b28c43e20a54176593796ef728861c00a6dc5..10fd2c6b345cc53e01a7f1bed11f1dd71085f728 100644 (file)
@@ -20,12 +20,18 @@ struct ephemeral_config_t {
   size_t size = 0;
   size_t block_size = 0;
   size_t segment_size = 0;
+  magic_t magic = 0;
+  device_type_t dtype = device_type_t::NONE;
+  device_id_t id = 0;
 };
 
 constexpr ephemeral_config_t DEFAULT_TEST_EPHEMERAL = {
   1 << 30,
   4 << 10,
-  8 << 20
+  8 << 20,
+  0xabcd,
+  device_type_t::SEGMENTED,
+  0
 };
 
 std::ostream &operator<<(std::ostream &, const ephemeral_config_t &);
@@ -65,17 +71,20 @@ class EphemeralSegmentManager final : public SegmentManager {
 
   Segment::close_ertr::future<> segment_close(segment_id_t id);
 
-  device_id_t device_id = 0;
+  secondary_device_set_t sec_device_set;
 
 public:
   EphemeralSegmentManager(
-    ephemeral_config_t config,
-    device_id_t device_id = 0)
-    : config(config), device_id(device_id) {}
+    ephemeral_config_t config)
+    : config(config) {}
   ~EphemeralSegmentManager();
 
+  close_ertr::future<> close() final {
+    return close_ertr::now();
+  }
+
   device_id_t get_device_id() const {
-    return device_id;
+    return config.id;
   }
 
   using init_ertr = crimson::errorator<
@@ -88,7 +97,7 @@ public:
     return mount_ertr::now();
   }
 
-  mkfs_ret mkfs(seastore_meta_t) {
+  mkfs_ret mkfs(segment_manager_config_t) {
     return mkfs_ertr::now();
   }
 
@@ -116,6 +125,18 @@ public:
     return *meta;
   }
 
+  secondary_device_set_t& get_secondary_devices() final {
+    return sec_device_set;
+  }
+
+  device_spec_t get_device_spec() const final {
+    return {config.magic, config.dtype, config.id};
+  }
+
+  magic_t get_magic() const final {
+    return config.magic;
+  }
+
   void remount();
 
   // public so tests can bypass segment interface when simpler
index 2bb4686bdaaaa954c5cc3c3b122c2035c05a353b..5d52fa83d2c17c149913e1d5b29f5783478f04a2 100644 (file)
@@ -17,13 +17,15 @@ TransactionManager::TransactionManager(
   JournalRef _journal,
   CacheRef _cache,
   LBAManagerRef _lba_manager,
-  ExtentPlacementManagerRef&& epm)
+  ExtentPlacementManagerRef&& epm,
+  ExtentReader& scanner)
   : segment_manager(_segment_manager),
     segment_cleaner(std::move(_segment_cleaner)),
     cache(std::move(_cache)),
     lba_manager(std::move(_lba_manager)),
     journal(std::move(_journal)),
-    epm(std::move(epm))
+    epm(std::move(epm)),
+    scanner(scanner)
 {
   segment_cleaner->set_extent_callback(this);
   journal->set_write_pipeline(&write_pipeline);
@@ -33,7 +35,9 @@ TransactionManager::TransactionManager(
 TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs()
 {
   LOG_PREFIX(TransactionManager::mkfs);
-  segment_cleaner->mount(segment_manager, segment_managers);
+  segment_cleaner->mount(
+    segment_manager.get_device_id(),
+    scanner.get_segment_managers());
   return journal->open_for_write().safe_then([this, FNAME](auto addr) {
     DEBUG("about to do_with");
     segment_cleaner->init_mkfs(addr);
@@ -64,7 +68,9 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount()
 {
   LOG_PREFIX(TransactionManager::mount);
   cache->init();
-  segment_cleaner->mount(segment_manager, segment_managers);
+  segment_cleaner->mount(
+    segment_manager.get_device_id(),
+    scanner.get_segment_managers());
   return segment_cleaner->init_segments().safe_then(
     [this](auto&& segments) {
     return journal->replay(
index 80d1459b0e22792c7329b172610c3eb9e8803771..7dc7b654c4168e921f51232974f955bd170bc9a3 100644 (file)
@@ -71,7 +71,8 @@ public:
     JournalRef journal,
     CacheRef cache,
     LBAManagerRef lba_manager,
-    ExtentPlacementManagerRef&& epm);
+    ExtentPlacementManagerRef&& epm,
+    ExtentReader& scanner);
 
   /// Writes initial metadata to disk
   using mkfs_ertr = base_ertr;
@@ -499,17 +500,35 @@ public:
     return segment_cleaner->stat();
   }
 
+  void add_segment_manager(SegmentManager* sm) {
+    LOG_PREFIX(TransactionManager::add_segment_manager);
+    DEBUG("adding segment manager {}", sm->get_device_id());
+    scanner.add_segment_manager(sm);
+    epm->add_allocator(
+      device_type_t::SEGMENTED,
+      std::make_unique<SegmentedAllocator>(
+       *segment_cleaner,
+       *sm,
+       *lba_manager,
+       *journal,
+       *cache));
+  }
+
   ~TransactionManager();
 
 private:
   friend class Transaction;
 
+  // although there might be multiple devices backing seastore,
+  // only one of them are supposed to hold the journal. This
+  // segment manager is that device
   SegmentManager &segment_manager;
   SegmentCleanerRef segment_cleaner;
   CacheRef cache;
   LBAManagerRef lba_manager;
   JournalRef journal;
   ExtentPlacementManagerRef epm;
+  ExtentReader& scanner;
 
   WritePipeline write_pipeline;
 
index 5e97007a563abca42e314fb8dff93d4c88c408ec..f83801730a7cf5b441142afb2c4ac6f32c06cb2b 100644 (file)
@@ -134,7 +134,8 @@ void TMDriver::init()
     SegmentCleaner::config_t::get_default(),
     std::move(scanner),
     false /* detailed */);
-  segment_cleaner->mount(*segment_manager);
+  std::vector<SegmentManager*> sms;
+  segment_cleaner->mount(segment_manager->get_device_id(), sms);
   auto journal = std::make_unique<Journal>(*segment_manager, scanner_ref);
   auto cache = std::make_unique<Cache>(scanner_ref, segment_manager->get_block_size());
   auto lba_manager = lba_manager::create_lba_manager(*segment_manager, *cache);
@@ -158,7 +159,8 @@ void TMDriver::init()
     std::move(journal),
     std::move(cache),
     std::move(lba_manager),
-    std::move(epm));
+    std::move(epm),
+    scanner_ref);
 }
 
 void TMDriver::clear()
@@ -181,7 +183,13 @@ seastar::future<> TMDriver::mkfs()
   seastore_meta_t meta;
   meta.seastore_id.generate_random();
   return segment_manager->mkfs(
-    std::move(meta)
+    segment_manager_config_t{
+      true,
+      (magic_t)std::rand(),
+      device_type_t::SEGMENTED,
+      0,
+      meta,
+      secondary_device_set_t()}
   ).safe_then([this] {
     logger().debug("");
     return segment_manager->mount();
index f4c2526203fb7ca1a5a650ae6d9a6b79ad181e3f..2973ef487dccc1474f9fdcc585828cfc49eb5249 100644 (file)
@@ -61,6 +61,7 @@ struct rbm_test_t : public  seastar_test_suite_t,
       reader(new ExtentReader()),
       cache(*reader, segment_manager->get_block_size())
   {
+    reader->add_segment_manager(segment_manager.get());
     device = new nvme_device::TestMemory(DEFAULT_TEST_SIZE);
     rbm_manager.reset(new NVMeManager(device, std::string()));
     config.start = 0;
index e12b79eddbb1846754fc0579315d213b3de741a8..00e77b68b784108d33f6e221ae4bd7b99891445c 100644 (file)
@@ -76,7 +76,6 @@ auto get_transaction_manager(
   auto segment_cleaner = std::make_unique<SegmentCleaner>(
     SegmentCleaner::config_t::get_default(),
     std::move(scanner),
-    segment_manager,
     true);
   auto journal = std::make_unique<Journal>(segment_manager, scanner_ref);
   auto cache = std::make_unique<Cache>(scanner_ref, segment_manager.get_block_size());
@@ -101,13 +100,15 @@ auto get_transaction_manager(
     std::move(journal),
     std::move(cache),
     std::move(lba_manager),
-    std::move(epm));
+    std::move(epm),
+    scanner_ref);
 }
 
 auto get_seastore(SegmentManagerRef sm) {
   auto tm = get_transaction_manager(*sm);
   auto cm = std::make_unique<collection_manager::FlatCollectionManager>(*tm);
   return std::make_unique<SeaStore>(
+    "",
     std::move(sm),
     std::move(tm),
     std::move(cm),
@@ -195,6 +196,7 @@ protected:
 class TestSegmentManagerWrapper final : public SegmentManager {
   SegmentManager &sm;
   device_id_t device_id = 0;
+  secondary_device_set_t set;
 public:
   TestSegmentManagerWrapper(
     SegmentManager &sm,
@@ -209,10 +211,25 @@ public:
     return mount_ertr::now(); // we handle this above
   }
 
-  mkfs_ret mkfs(seastore_meta_t c) final {
+  mkfs_ret mkfs(segment_manager_config_t c) final {
     return mkfs_ertr::now(); // we handle this above
   }
 
+  close_ertr::future<> close() final {
+    return sm.close();
+  }
+
+  secondary_device_set_t& get_secondary_devices() final {
+    return sm.get_secondary_devices();
+  }
+
+  device_spec_t get_device_spec() const final {
+    return sm.get_device_spec();
+  }
+
+  magic_t get_magic() const final {
+    return sm.get_magic();
+  }
 
   open_ertr::future<SegmentRef> open(segment_id_t id) final {
     return sm.open(id);
index f66fc90d797d65f601b0f610d4762bb7d33907f5..ef35228e4cc7f6e5711c9c588f791fdc8f610389 100755 (executable)
@@ -180,6 +180,7 @@ with_mgr_restful=false
 filestore_path=
 kstore_path=
 declare -a block_devs
+declare -a secondary_block_devs
 
 VSTART_SEC="client.vstart.sh"
 
@@ -245,6 +246,7 @@ options:
        --no-parallel: dont start all OSDs in parallel
        --jaeger: use jaegertracing for tracing
        --seastore-devs: comma-separated list of blockdevs to use for seastore
+        --seastore-secondary-des: comma-separated list of secondary blockdevs to use for seastore
 EOF
 
 usage_exit() {
@@ -267,6 +269,21 @@ parse_block_devs() {
     done
 }
 
+parse_secondary_devs() {
+    local opt_name=$1
+    shift
+    local devs=$1
+    shift
+    local dev
+    IFS=',' read -r -a secondary_block_devs <<< "$devs"
+    for dev in "${secondary_block_devs[@]}"; do
+        if [ ! -b $dev ] || [ ! -w $dev ]; then
+            echo "All $opt_name must refer to writable block devices"
+            exit 1
+        fi
+    done
+}
+
 while [ $# -ge 1 ]; do
 case $1 in
     -d | --debug)
@@ -475,6 +492,10 @@ case $1 in
         parse_block_devs --seastore-devs "$2"
         shift
         ;;
+    --seastore-secondary-devs)
+        parse_secondary_devs --seastore-devs "$2"
+        shift
+        ;;
     --bluestore-spdk)
         [ -z "$2" ] && usage_exit
         IFS=',' read -r -a bluestore_spdk_dev <<< "$2"
@@ -952,6 +973,10 @@ EOF
                     dd if=/dev/zero of=${block_devs[$osd]} bs=1M count=1
                     ln -s ${block_devs[$osd]} $CEPH_DEV_DIR/osd$osd/block
                 fi
+                if [ -n "${secondary_block_devs[$osd]}" ]; then
+                    dd if=/dev/zero of=${secondary_block_devs[$osd]} bs=1M count=1
+                    ln -s ${secondary_block_devs[$osd]} $CEPH_DEV_DIR/osd$osd/block.segmented.1
+                fi
             fi
             if [ "$objectstore" == "bluestore" ]; then
                 wconf <<EOF