]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: fix inconsistent segment allocation/reclaim with multiple devices
authorYingxin Cheng <yingxin.cheng@intel.com>
Thu, 31 Mar 2022 03:43:41 +0000 (11:43 +0800)
committerYingxin Cheng <yingxin.cheng@intel.com>
Wed, 6 Apr 2022 02:48:14 +0000 (10:48 +0800)
The current cleaning mechanism does not distingush devices, so we cannot
allocate segments from a specific segment manager safely.

Replace segment manager by segment manager group in SegmentAllocator and
the related classes,

Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
20 files changed:
src/crimson/os/seastore/extent_placement_manager.cc
src/crimson/os/seastore/extent_placement_manager.h
src/crimson/os/seastore/journal.cc
src/crimson/os/seastore/journal.h
src/crimson/os/seastore/journal/segment_allocator.cc
src/crimson/os/seastore/journal/segment_allocator.h
src/crimson/os/seastore/journal/segmented_journal.cc
src/crimson/os/seastore/journal/segmented_journal.h
src/crimson/os/seastore/seastore.cc
src/crimson/os/seastore/segment_cleaner.cc
src/crimson/os/seastore/segment_cleaner.h
src/crimson/os/seastore/segment_manager.h
src/crimson/os/seastore/segment_manager_group.cc
src/crimson/os/seastore/segment_manager_group.h
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h
src/crimson/tools/store_nbd/tm_driver.cc
src/test/crimson/seastore/test_btree_lba_manager.cc
src/test/crimson/seastore/test_seastore_journal.cc
src/test/crimson/seastore/transaction_manager_test_state.h

index ddba2186228b4afe120cd7309b1935fb13a20960..722ffc00511cbd982d223e4f6980294c3a3cebf7 100644 (file)
@@ -10,19 +10,18 @@ SET_SUBSYS(seastore_journal);
 namespace crimson::os::seastore {
 
 SegmentedAllocator::SegmentedAllocator(
-  SegmentProvider& sp,
-  SegmentManager& sm,
+  SegmentProvider &sp,
   SegmentSeqAllocator &ssa)
-  : cold_writer{"COLD", sp, sm, ssa},
-    rewrite_writer{"REWRITE", sp, sm, ssa}
-{}
+  : cold_writer{"COLD", sp, ssa},
+    rewrite_writer{"REWRITE", sp, ssa}
+{
+}
 
 SegmentedAllocator::Writer::Writer(
   std::string name,
   SegmentProvider& sp,
-  SegmentManager& sm,
   SegmentSeqAllocator &ssa)
-  : segment_allocator(name, segment_type_t::OOL, sp, sm, ssa),
+  : segment_allocator(name, segment_type_t::OOL, sp, ssa),
     record_submitter(crimson::common::get_conf<uint64_t>(
                        "seastore_journal_iodepth_limit"),
                      crimson::common::get_conf<uint64_t>(
index 8865aa9201e5edf31a06d06d6c7f017853087fa2..a1fdf4db7f56aa24444a55ffd161bae4ff2ff0a4 100644 (file)
@@ -76,8 +76,7 @@ class SegmentedAllocator : public ExtentAllocator {
   class Writer : public ExtentOolWriter {
   public:
     Writer(std::string name,
-           SegmentProvider& sp,
-           SegmentManager& sm,
+           SegmentProvider &sp,
            SegmentSeqAllocator &ssa);
     Writer(Writer &&) = default;
 
@@ -113,8 +112,7 @@ class SegmentedAllocator : public ExtentAllocator {
   };
 public:
   SegmentedAllocator(
-    SegmentProvider& sp,
-    SegmentManager& sm,
+    SegmentProvider &sp,
     SegmentSeqAllocator &ssa);
 
   Writer &get_writer(placement_hint_t hint) {
index 2ed4d49739a72e582718a6a7296f7b4f3317f5f1..98dfd7dd85378aa5e90722ab77658128931d64b0 100644 (file)
@@ -7,11 +7,10 @@
 namespace crimson::os::seastore::journal {
 
 JournalRef make_segmented(
-  SegmentManager &sm,
   SegmentManagerGroup &sms,
   SegmentProvider &provider)
 {
-  return std::make_unique<SegmentedJournal>(sm, sms, provider);
+  return std::make_unique<SegmentedJournal>(sms, provider);
 }
 
 }
index b0448d7e9508f63c128d734e5fad69cf8b2e254e..b16a3874ac7adf37f99c4f23af575ad4b9bb2274 100644 (file)
@@ -15,7 +15,6 @@ namespace nvme_device {
 class NVMeBlockDevice;
 }
 
-class SegmentManager;
 class SegmentManagerGroup;
 class SegmentProvider;
 
@@ -92,7 +91,6 @@ using JournalRef = std::unique_ptr<Journal>;
 namespace journal {
 
 JournalRef make_segmented(
-  SegmentManager &sm,
   SegmentManagerGroup &sms,
   SegmentProvider &provider);
 
index a3684d91c82537ba8aaa9e439296d28a12b27775..6b2dabefbc7f0f3a572c12a440707ad39ebb5983 100644 (file)
@@ -16,13 +16,12 @@ SegmentAllocator::SegmentAllocator(
   std::string name,
   segment_type_t type,
   SegmentProvider &sp,
-  SegmentManager &sm,
   SegmentSeqAllocator &ssa)
   : name{name},
     print_name{fmt::format("D?_{}", name)},
     type{type},
     segment_provider{sp},
-    segment_manager{sm},
+    sm_group{*sp.get_segment_manager_group()},
     segment_seq_allocator(ssa)
 {
   ceph_assert(type != segment_type_t::NULL_SEG);
@@ -36,14 +35,14 @@ SegmentAllocator::do_open()
   ceph_assert(!current_segment);
   segment_seq_t new_segment_seq =
     segment_seq_allocator.get_and_inc_next_segment_seq();
-  auto meta = segment_manager.get_meta();
+  auto meta = sm_group.get_meta();
   current_segment_nonce = ceph_crc32c(
     new_segment_seq,
     reinterpret_cast<const unsigned char *>(meta.seastore_id.bytes()),
     sizeof(meta.seastore_id.uuid));
-  auto new_segment_id = segment_provider.get_segment(
-      get_device_id(), new_segment_seq, type);
-  return segment_manager.open(new_segment_id
+  auto new_segment_id = segment_provider.get_segment(new_segment_seq, type);
+  ceph_assert(new_segment_id != NULL_SEG_ID);
+  return sm_group.open(new_segment_id
   ).handle_error(
     open_ertr::pass_further{},
     crimson::ct_error::assert_all{
@@ -67,7 +66,7 @@ SegmentAllocator::do_open()
     INFO("{} writing header to new segment ... -- {}",
          print_name, header);
 
-    auto header_length = segment_manager.get_block_size();
+    auto header_length = get_block_size();
     bufferlist bl;
     encode(header, bl);
     bufferptr bp(ceph::buffer::create_page_aligned(header_length));
@@ -117,9 +116,16 @@ SegmentAllocator::open_ret
 SegmentAllocator::open()
 {
   LOG_PREFIX(SegmentAllocator::open);
-  print_name = fmt::format("D{}_{}",
-                           device_id_printer_t{get_device_id()},
-                           name);
+  auto& device_ids = sm_group.get_device_ids();
+  ceph_assert(device_ids.size());
+  std::ostringstream oss;
+  oss << "D";
+  for (auto& device_id : device_ids) {
+    oss << "_" << device_id_printer_t{device_id};
+  }
+  oss << "_" << name;
+  print_name = oss.str();
+
   INFO("{}", print_name);
   return do_open();
 }
@@ -147,7 +153,7 @@ SegmentAllocator::write(ceph::bufferlist to_write)
   };
   TRACE("{} {}~{}", print_name, write_start_seq, write_length);
   assert(write_length > 0);
-  assert((write_length % segment_manager.get_block_size()) == 0);
+  assert((write_length % get_block_size()) == 0);
   assert(!needs_roll(write_length));
 
   auto write_result = write_result_t{
@@ -230,20 +236,16 @@ SegmentAllocator::close_segment(bool is_rolling)
        current_segment_nonce,
        tail.journal_tail);
 
-  bufferptr bp(
-    ceph::buffer::create_page_aligned(
-      segment_manager.get_block_size()));
+  bufferptr bp(ceph::buffer::create_page_aligned(get_block_size()));
   bp.zero();
   auto iter = bl.cbegin();
   iter.copy(bl.length(), bp.c_str());
   bl.clear();
   bl.append(bp);
 
-  assert(bl.length() ==
-    (size_t)segment_manager.get_rounded_tail_length());
+  assert(bl.length() == sm_group.get_rounded_tail_length());
   return seg_to_close->write(
-    segment_manager.get_segment_size()
-      - segment_manager.get_rounded_tail_length(),
+    sm_group.get_segment_size() - sm_group.get_rounded_tail_length(),
     bl
   ).safe_then([seg_to_close=std::move(seg_to_close)] {
     return seg_to_close->close();
index 0a1b9812fd626696a6d81430025805b36d701134..ebb7ff08430c3af4d5cb2c974e5071d157c32147 100644 (file)
@@ -11,7 +11,7 @@
 #include "include/buffer.h"
 
 #include "crimson/common/errorator.h"
-#include "crimson/os/seastore/segment_manager.h"
+#include "crimson/os/seastore/segment_manager_group.h"
 #include "crimson/os/seastore/segment_seq_allocator.h"
 
 namespace crimson::os::seastore {
@@ -33,29 +33,20 @@ class SegmentAllocator {
   SegmentAllocator(std::string name,
                    segment_type_t type,
                    SegmentProvider &sp,
-                   SegmentManager &sm,
                    SegmentSeqAllocator &ssa);
 
   const std::string& get_name() const {
     return print_name;
   }
 
-  device_id_t get_device_id() const {
-    return segment_manager.get_device_id();
-  }
-
   seastore_off_t get_block_size() const {
-    return segment_manager.get_block_size();
+    return sm_group.get_block_size();
   }
 
   extent_len_t get_max_write_length() const {
-    return segment_manager.get_segment_size() -
-           p2align(ceph::encoded_sizeof_bounded<segment_header_t>(),
-                   size_t(segment_manager.get_block_size()));
-  }
-
-  device_segment_id_t get_num_segments() const {
-    return segment_manager.get_num_segments();
+    return sm_group.get_segment_size() -
+           sm_group.get_rounded_header_length() -
+           sm_group.get_rounded_tail_length();
   }
 
   bool can_write() const {
@@ -80,8 +71,10 @@ class SegmentAllocator {
   // returns true iff the current segment has insufficient space
   bool needs_roll(std::size_t length) const {
     assert(can_write());
-    auto write_capacity = current_segment->get_write_capacity()
-      - segment_manager.get_rounded_tail_length();
+    assert(current_segment->get_write_capacity() ==
+           sm_group.get_segment_size());
+    auto write_capacity = current_segment->get_write_capacity() -
+                          sm_group.get_rounded_tail_length();
     return length + written_to > std::size_t(write_capacity);
   }
 
@@ -125,7 +118,7 @@ class SegmentAllocator {
   std::string print_name;
   const segment_type_t type; // JOURNAL or OOL
   SegmentProvider &segment_provider;
-  SegmentManager &segment_manager;
+  SegmentManagerGroup &sm_group;
   SegmentRef current_segment;
   seastore_off_t written_to;
   SegmentSeqAllocator &segment_seq_allocator;
index 18b29e50fab1e3a628fb21040a80137846a7c226..dc86de3c2006ef257273b6e89635017b645e3999 100644 (file)
@@ -27,7 +27,6 @@ SET_SUBSYS(seastore_journal);
 namespace crimson::os::seastore::journal {
 
 SegmentedJournal::SegmentedJournal(
-  SegmentManager &segment_manager,
   SegmentManagerGroup &sms,
   SegmentProvider &segment_provider)
   : segment_provider(segment_provider),
@@ -36,8 +35,7 @@ SegmentedJournal::SegmentedJournal(
     journal_segment_allocator("JOURNAL",
                               segment_type_t::JOURNAL,
                               segment_provider,
-                              segment_manager,
-                             *segment_seq_allocator),
+                              *segment_seq_allocator),
     record_submitter(crimson::common::get_conf<uint64_t>(
                        "seastore_journal_iodepth_limit"),
                      crimson::common::get_conf<uint64_t>(
@@ -86,10 +84,9 @@ SegmentedJournal::prep_replay_segments(
   std::for_each(
     segments.begin(),
     segments.end(),
-    [this, FNAME](auto &seg)
+    [FNAME](auto &seg)
   {
     if (seg.first != seg.second.physical_segment_id ||
-        seg.first.device_id() != journal_segment_allocator.get_device_id() ||
         seg.second.get_type() != segment_type_t::JOURNAL) {
       ERROR("illegal journal segment for replay -- {}", seg.second);
       ceph_abort();
@@ -248,48 +245,11 @@ SegmentedJournal::replay_segment(
   );
 }
 
-SegmentedJournal::find_journal_segments_ret
-SegmentedJournal::find_journal_segments()
-{
-  return seastar::do_with(
-    find_journal_segments_ret_bare{},
-    [this](auto &ret) -> find_journal_segments_ret {
-      return crimson::do_for_each(
-       boost::counting_iterator<device_segment_id_t>(0),
-       boost::counting_iterator<device_segment_id_t>(
-         journal_segment_allocator.get_num_segments()),
-       [this, &ret](device_segment_id_t d_segment_id) {
-         segment_id_t segment_id{
-           journal_segment_allocator.get_device_id(),
-           d_segment_id};
-         return sms.read_segment_header(
-           segment_id
-         ).safe_then([segment_id, &ret](auto &&header) {
-           if (header.get_type() == segment_type_t::JOURNAL) {
-             ret.emplace_back(std::make_pair(segment_id, std::move(header)));
-           }
-         }).handle_error(
-           crimson::ct_error::enoent::handle([](auto) {
-             return find_journal_segments_ertr::now();
-           }),
-           crimson::ct_error::enodata::handle([](auto) {
-             return find_journal_segments_ertr::now();
-           }),
-           crimson::ct_error::input_output_error::pass_further{}
-         );
-       }).safe_then([&ret]() mutable {
-         return find_journal_segments_ret{
-           find_journal_segments_ertr::ready_future_marker{},
-           std::move(ret)};
-       });
-    });
-}
-
 SegmentedJournal::replay_ret SegmentedJournal::replay(
   delta_handler_t &&delta_handler)
 {
   LOG_PREFIX(Journal::replay);
-  return find_journal_segments(
+  return sms.find_journal_segment_headers(
   ).safe_then([this, FNAME, delta_handler=std::move(delta_handler)]
     (auto &&segment_headers) mutable -> replay_ret {
     INFO("got {} segments", segment_headers.size());
index 973d04fd6f1ecbd6df0fe3b2477110d949ad2ba3..f3a51a4c77462b65b0ed598328e97440fa9df020 100644 (file)
@@ -25,7 +25,6 @@ namespace crimson::os::seastore::journal {
 class SegmentedJournal : public Journal {
 public:
   SegmentedJournal(
-    SegmentManager &segment_manager,
     SegmentManagerGroup& sms,
     SegmentProvider& cleaner);
   ~SegmentedJournal() {}
@@ -59,15 +58,6 @@ private:
   SegmentManagerGroup& sms;
   WritePipeline* write_pipeline = nullptr;
 
-  /// read journal segment headers from sms
-  using find_journal_segments_ertr = crimson::errorator<
-    crimson::ct_error::input_output_error>;
-  using find_journal_segments_ret_bare = std::vector<
-    std::pair<segment_id_t, segment_header_t>>;
-  using find_journal_segments_ret = find_journal_segments_ertr::future<
-    find_journal_segments_ret_bare>;
-  find_journal_segments_ret find_journal_segments();
-
   /// return ordered vector of segments to replay
   using replay_segments_t = std::vector<
     std::pair<journal_seq_t, segment_header_t>>;
index 4771c95f92e9aa52ff9b0e3e345774c9628955e8..6e81ce50f7c9d52c050b603044bdd68b632918cf 100644 (file)
@@ -1653,7 +1653,7 @@ seastar::future<std::unique_ptr<SeaStore>> make_seastore(
   return Device::make_device(
     device
   ).then([&device](DeviceRef device_obj) {
-    auto tm = make_transaction_manager(*device_obj, false /* detailed */);
+    auto tm = make_transaction_manager(false /* detailed */);
     auto cm = std::make_unique<collection_manager::FlatCollectionManager>(*tm);
     return std::make_unique<SeaStore>(
       device,
index 82d164f9184a93072a6e8cf2fe27476f949c31cd..416099ca65952f399ad644a34977c4a24b337dd8 100644 (file)
@@ -238,14 +238,13 @@ void SegmentCleaner::register_metrics()
 }
 
 segment_id_t SegmentCleaner::get_segment(
-    device_id_t device_id,
     segment_seq_t seq,
     segment_type_t type)
 {
   LOG_PREFIX(SegmentCleaner::get_segment);
   assert(seq != NULL_SEG_SEQ);
-  for (auto it = segments.device_begin(device_id);
-       it != segments.device_end(device_id);
+  for (auto it = segments.begin();
+       it != segments.end();
        ++it) {
     auto seg_id = it->first;
     auto& segment_info = it->second;
@@ -255,8 +254,7 @@ segment_id_t SegmentCleaner::get_segment(
       return seg_id;
     }
   }
-  ERROR("(TODO) handle out of space from device {} with segment_seq={}",
-        device_id, segment_seq_printer_t{seq});
+  ERROR("out of space with segment_seq={}", segment_seq_printer_t{seq});
   ceph_abort();
   return NULL_SEG_ID;
 }
@@ -502,8 +500,7 @@ SegmentCleaner::gc_reclaim_space_ret SegmentCleaner::gc_reclaim_space()
   });
 }
 
-SegmentCleaner::mount_ret SegmentCleaner::mount(
-  device_id_t pdevice_id)
+SegmentCleaner::mount_ret SegmentCleaner::mount()
 {
   const auto& sms = sm_group->get_segment_managers();
   logger().debug(
@@ -513,7 +510,6 @@ SegmentCleaner::mount_ret SegmentCleaner::mount(
   journal_tail_target = JOURNAL_SEQ_NULL;
   journal_tail_committed = JOURNAL_SEQ_NULL;
   journal_head = JOURNAL_SEQ_NULL;
-  journal_device_id = pdevice_id;
   
   space_tracker.reset(
     detailed ?
@@ -647,11 +643,11 @@ SegmentCleaner::scan_extents_ret SegmentCleaner::scan_nonfull_segment(
          }
          return seastar::now();
        }),
-       [&cursor, header, segment_id, this](auto& handler) {
+       [&cursor, header, this](auto& handler) {
          return sm_group->scan_valid_records(
            cursor,
            header.segment_nonce,
-           segments[segment_id.device_id()]->segment_size,
+           segments.get_segment_size(),
            handler);
        }
       );
index b5a53ba87ff0abd3759b6c15bc99b3f485c27ef6..fa7578cde0ca4f64002369f7ae1dd0c3e9728a02 100644 (file)
@@ -30,13 +30,11 @@ class segment_info_set_t {
     segment_manager_info_t(
       device_id_t device_id,
       device_segment_id_t num_segments,
-      seastore_off_t segment_size,
       seastore_off_t block_size,
       size_t empty_segments,
       size_t size)
       : device_id(device_id),
        num_segments(num_segments),
-       segment_size(segment_size),
        block_size(block_size),
        empty_segments(empty_segments),
        size(size),
@@ -45,7 +43,6 @@ class segment_info_set_t {
 
     device_id_t device_id = 0;
     device_segment_id_t num_segments = 0;
-    seastore_off_t segment_size = 0;
     seastore_off_t block_size = 0;
     size_t empty_segments = 0;
     size_t size = 0;
@@ -120,11 +117,14 @@ public:
     journal_segments = 0;
     avail_bytes = 0;
     opened_segments = 0;
+    segment_size = 0;
   }
 
   void add_segment_manager(SegmentManager& segment_manager)
   {
     device_id_t d_id = segment_manager.get_device_id();
+    auto ssize = segment_manager.get_segment_size();
+    ceph_assert(ssize != 0);
     segments.add_device(
       d_id,
       segment_manager.get_num_segments(),
@@ -132,13 +132,19 @@ public:
     sm_infos[segment_manager.get_device_id()].emplace(
       d_id,
       segment_manager.get_num_segments(),
-      segment_manager.get_segment_size(),
       segment_manager.get_block_size(),
       segment_manager.get_num_segments(),
       segment_manager.get_size());
 
     total_bytes += segment_manager.get_size();
     avail_bytes += segment_manager.get_size();
+
+    // assume all the segment managers share the same settings as follows.
+    if (segment_size == 0) {
+      segment_size = ssize;
+    } else {
+      ceph_assert(segment_size == ssize);
+    }
   }
 
   device_segment_id_t size() const {
@@ -159,13 +165,6 @@ public:
     return segments.end();
   }
 
-  auto device_begin(device_id_t id) {
-    return segments.device_begin(id);
-  }
-  auto device_end(device_id_t id) {
-    return segments.device_end(id);
-  }
-
   // the following methods are used for keeping track of
   // seastore disk space usage
   void segment_opened(segment_id_t segment) {
@@ -174,15 +173,15 @@ public:
     ceph_assert(segments[segment].is_empty());
     // must be opening a new segment
     auto [iter, inserted] = sm_info->open_segment_avails.emplace(
-      segment, sm_info->segment_size);
+      segment, get_segment_size());
     opened_segments++;
     ceph_assert(inserted);
   }
   void segment_emptied(segment_id_t segment) {
     auto& sm_info = sm_infos[segment.device_id()];
     sm_info->empty_segments++;
-    sm_info->avail_bytes += sm_info->segment_size;
-    avail_bytes += sm_info->segment_size;
+    sm_info->avail_bytes += get_segment_size();
+    avail_bytes += get_segment_size();
   }
   void segment_closed(segment_id_t segment) {
     assert(segments.contains(segment));
@@ -199,11 +198,11 @@ public:
       opened_segments--;
     } else {
       ceph_assert(segment_info.is_empty());
-      assert(sm_info->avail_bytes >= (size_t)sm_info->segment_size);
-      assert(avail_bytes >= (size_t)sm_info->segment_size);
+      assert(sm_info->avail_bytes >= (std::size_t)get_segment_size());
+      assert(avail_bytes >= (std::size_t)get_segment_size());
       assert(sm_info->empty_segments > 0);
-      sm_info->avail_bytes -= sm_info->segment_size;
-      avail_bytes -= sm_info->segment_size;
+      sm_info->avail_bytes -= get_segment_size();
+      avail_bytes -= get_segment_size();
       sm_info->empty_segments--;
     }
     segment_info.set_closed();
@@ -219,7 +218,8 @@ public:
        offset);
       return;
     }
-    auto new_avail_bytes = sm_info->segment_size - offset.as_seg_paddr().get_segment_off();
+    auto new_avail_bytes = get_segment_size() -
+                           offset.as_seg_paddr().get_segment_off();
     if (iter->second < new_avail_bytes) {
       crimson::get_logger(ceph_subsys_seastore_cleaner).error(
        "SegmentCleaner::update_segment_avail_bytes:"
@@ -272,6 +272,11 @@ public:
     }
     return num;
   }
+  seastore_off_t get_segment_size() const {
+    assert(segment_size != 0);
+    return segment_size;
+  }
+
 private:
   std::vector<std::optional<segment_manager_info_t>> sm_infos;
   segment_map_t<segment_info_t> segments;
@@ -280,6 +285,7 @@ private:
   size_t total_bytes = 0;
   size_t avail_bytes = 0;
   size_t opened_segments = 0;
+  seastore_off_t segment_size = 0;
 
   friend class SegmentCleaner;
 };
@@ -290,7 +296,7 @@ private:
 class SegmentProvider {
 public:
   virtual segment_id_t get_segment(
-      device_id_t id, segment_seq_t seq, segment_type_t type) = 0;
+      segment_seq_t seq, segment_type_t type) = 0;
 
   virtual void close_segment(segment_id_t) {}
 
@@ -310,6 +316,8 @@ public:
 
   virtual void update_segment_avail_bytes(paddr_t offset) = 0;
 
+  virtual SegmentManagerGroup* get_segment_manager_group() = 0;
+
   virtual ~SegmentProvider() {}
 };
 
@@ -692,8 +700,6 @@ private:
   /// head of journal
   journal_seq_t journal_head;
 
-  device_id_t journal_device_id;
-
   ExtentCallbackInterface *ecb = nullptr;
 
   /// populated if there is an IO blocked on hard limits
@@ -714,10 +720,10 @@ public:
   using mount_ertr = crimson::errorator<
     crimson::ct_error::input_output_error>;
   using mount_ret = mount_ertr::future<>;
-  mount_ret mount(device_id_t pdevice_id);
+  mount_ret mount();
 
   segment_id_t get_segment(
-      device_id_t id, segment_seq_t seq, segment_type_t type) final;
+      segment_seq_t seq, segment_type_t type) final;
 
   void close_segment(segment_id_t segment) final;
 
@@ -754,6 +760,10 @@ public:
     return segments[id].get_type();
   }
 
+  SegmentManagerGroup* get_segment_manager_group() final {
+    return sm_group.get();
+  }
+
   using release_ertr = SegmentManagerGroup::release_ertr;
   release_ertr::future<> maybe_release_segment(Transaction &t);
 
@@ -1053,9 +1063,7 @@ private:
   }
 
   size_t get_bytes_available_current_segment() const {
-    auto& seg_addr = journal_head.offset.as_seg_paddr();
-    auto segment_size =
-      segments[seg_addr.get_segment_id().device_id()]->segment_size;
+    auto segment_size = segments.get_segment_size();
     return segment_size - get_bytes_used_current_segment();
   }
 
@@ -1109,12 +1117,10 @@ private:
     if (journal_head == JOURNAL_SEQ_NULL) {
       // this for calculating journal bytes in the journal
       // replay phase in which journal_head is not set
-      return segments.get_journal_segments() * segments[journal_device_id]->segment_size;
+      return segments.get_journal_segments() * segments.get_segment_size();
     } else {
       assert(journal_head >= journal_tail_committed);
-      auto& seg_addr = journal_head.offset.as_seg_paddr();
-      auto segment_size =
-       segments[seg_addr.get_segment_id().device_id()]->segment_size;
+      auto segment_size = segments.get_segment_size();
       return (journal_head.segment_seq - journal_tail_committed.segment_seq + 1) *
        segment_size;
     }
@@ -1319,7 +1325,6 @@ private:
     assert(s_type != segment_type_t::NULL_SEG);
     segments[segment].type = s_type;
     if (s_type == segment_type_t::JOURNAL) {
-      assert(journal_device_id == segment.device_id());
       segments.new_journal_segment();
     } else {
       assert(s_type == segment_type_t::OOL);
index eb4cc6ccab27eaab7016cee327971d414894a976..44d249dcac9fc8e846398fafe2e5a5d3f363e741 100644 (file)
@@ -173,11 +173,6 @@ public:
     ceph_assert(get_size() % get_segment_size() == 0);
     return ((device_segment_id_t)(get_size() / get_segment_size()));
   }
-  seastore_off_t get_rounded_tail_length() const {
-    return p2roundup(
-      ceph::encoded_sizeof_bounded<segment_tail_t>(),
-      (size_t)get_block_size());
-  }
 
   virtual ~SegmentManager() {}
 
index b691e5100583d27f061c3e19d43cac8741dcb56d..659f5e413a5af1a38cfab95cb4158541a0257931 100644 (file)
@@ -17,9 +17,8 @@ SegmentManagerGroup::read_segment_tail(segment_id_t segment)
   return segment_manager.read(
     paddr_t::make_seg_paddr(
       segment,
-      segment_manager.get_segment_size() -
-        segment_manager.get_rounded_tail_length()),
-    segment_manager.get_rounded_tail_length()
+      segment_manager.get_segment_size() - get_rounded_tail_length()),
+    get_rounded_tail_length()
   ).handle_error(
     read_segment_header_ertr::pass_further{},
     crimson::ct_error::assert_all{
@@ -59,7 +58,7 @@ SegmentManagerGroup::read_segment_header(segment_id_t segment)
   auto& segment_manager = *segment_managers[segment.device_id()];
   return segment_manager.read(
     paddr_t::make_seg_paddr(segment, 0),
-    segment_manager.get_block_size()
+    get_rounded_header_length()
   ).handle_error(
     read_segment_header_ertr::pass_further{},
     crimson::ct_error::assert_all{
@@ -387,4 +386,49 @@ SegmentManagerGroup::consume_next_records(
   });
 }
 
+SegmentManagerGroup::find_journal_segment_headers_ret
+SegmentManagerGroup::find_journal_segment_headers()
+{
+  return seastar::do_with(
+    get_segment_managers(),
+    find_journal_segment_headers_ret_bare{},
+    [this](auto &sms, auto& ret) -> find_journal_segment_headers_ret
+  {
+    return crimson::do_for_each(sms,
+      [this, &ret](SegmentManager *sm)
+    {
+      LOG_PREFIX(SegmentManagerGroup::find_journal_segment_headers);
+      auto device_id = sm->get_device_id();
+      auto num_segments = sm->get_num_segments();
+      INFO("processing {} with {} segments",
+           device_id_printer_t{device_id}, num_segments);
+      return crimson::do_for_each(
+        boost::counting_iterator<device_segment_id_t>(0),
+        boost::counting_iterator<device_segment_id_t>(num_segments),
+        [this, &ret, device_id](device_segment_id_t d_segment_id)
+      {
+        segment_id_t segment_id{device_id, d_segment_id};
+        return read_segment_header(segment_id
+        ).safe_then([segment_id, &ret](auto &&header) {
+          if (header.get_type() == segment_type_t::JOURNAL) {
+            ret.emplace_back(std::make_pair(segment_id, std::move(header)));
+          }
+        }).handle_error(
+          crimson::ct_error::enoent::handle([](auto) {
+            return find_journal_segment_headers_ertr::now();
+          }),
+          crimson::ct_error::enodata::handle([](auto) {
+            return find_journal_segment_headers_ertr::now();
+          }),
+          crimson::ct_error::input_output_error::pass_further{}
+        );
+      });
+    }).safe_then([&ret]() mutable {
+      return find_journal_segment_headers_ret{
+        find_journal_segment_headers_ertr::ready_future_marker{},
+        std::move(ret)};
+    });
+  });
+}
+
 } // namespace crimson::os::seastore
index cc715e470866db369692acc99f883333f823e23b..f2690c3448839a2452533907a9c126f5f0532d57 100644 (file)
@@ -45,6 +45,38 @@ public:
     device_ids.clear();
   }
 
+  /**
+   * get device info
+   *
+   * Assume all segment managers share the same following information.
+   */
+  seastore_off_t get_block_size() const {
+    assert(device_ids.size());
+    return segment_managers[*device_ids.begin()]->get_block_size();
+  }
+
+  seastore_off_t get_segment_size() const {
+    assert(device_ids.size());
+    return segment_managers[*device_ids.begin()]->get_segment_size();
+  }
+
+  const seastore_meta_t &get_meta() const {
+    assert(device_ids.size());
+    return segment_managers[*device_ids.begin()]->get_meta();
+  }
+
+  std::size_t get_rounded_header_length() const {
+    return p2roundup(
+      ceph::encoded_sizeof_bounded<segment_header_t>(),
+      (std::size_t)get_block_size());
+  }
+
+  std::size_t get_rounded_tail_length() const {
+    return p2roundup(
+      ceph::encoded_sizeof_bounded<segment_tail_t>(),
+      (std::size_t)get_block_size());
+  }
+
   using read_segment_header_ertr = crimson::errorator<
     crimson::ct_error::enoent,
     crimson::ct_error::enodata,
@@ -101,6 +133,23 @@ public:
     found_record_handler_t &handler    ///< [in] handler for records
   ); ///< @return used budget
 
+  /*
+   * read journal segment headers
+   */
+  using find_journal_segment_headers_ertr = crimson::errorator<
+    crimson::ct_error::input_output_error>;
+  using find_journal_segment_headers_ret_bare = std::vector<
+    std::pair<segment_id_t, segment_header_t>>;
+  using find_journal_segment_headers_ret = find_journal_segment_headers_ertr::future<
+    find_journal_segment_headers_ret_bare>;
+  find_journal_segment_headers_ret find_journal_segment_headers();
+
+  using open_ertr = SegmentManager::open_ertr;
+  open_ertr::future<SegmentRef> open(segment_id_t id) {
+    assert(has_device(id.device_id()));
+    return segment_managers[id.device_id()]->open(id);
+  }
+
   using release_ertr = SegmentManager::release_ertr;
   release_ertr::future<> release_segment(segment_id_t id) {
     assert(has_device(id.device_id()));
index 768007027233d8d4e4800e532ea79e1d6529787d..5f3b670c864edee23ac8be747db4608f9500dbe1 100644 (file)
@@ -44,7 +44,6 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs()
   LOG_PREFIX(TransactionManager::mkfs);
   INFO("enter");
   return segment_cleaner->mount(
-    epm->get_primary_device().get_device_id()
   ).safe_then([this] {
     return journal->open_for_write();
   }).safe_then([this](auto addr) {
@@ -84,7 +83,6 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount()
   INFO("enter");
   cache->init();
   return segment_cleaner->mount(
-    epm->get_primary_device().get_device_id()
   ).safe_then([this] {
     return journal->replay(
       [this](const auto &offsets, const auto &e, auto last_modified) {
@@ -156,10 +154,10 @@ TransactionManager::close_ertr::future<> TransactionManager::close() {
     cache->dump_contents();
     return journal->close();
   }).safe_then([this] {
-    sms.reset();
     return epm->close();
-  }).safe_then([FNAME] {
+  }).safe_then([FNAME, this] {
     INFO("completed");
+    sms.reset();
     return seastar::now();
   });
 }
@@ -541,9 +539,7 @@ TransactionManager::get_extent_if_live_ret TransactionManager::get_extent_if_liv
 
 TransactionManager::~TransactionManager() {}
 
-TransactionManagerRef make_transaction_manager(
-    Device &device,
-    bool detailed)
+TransactionManagerRef make_transaction_manager(bool detailed)
 {
   auto sms = std::make_unique<SegmentManagerGroup>();
   auto& sms_ref = *sms.get();
@@ -551,10 +547,7 @@ TransactionManagerRef make_transaction_manager(
     SegmentCleaner::config_t::get_default(),
     std::move(sms),
     detailed);
-  ceph_assert(device.get_device_type() == device_type_t::SEGMENTED);
-  auto sm = dynamic_cast<SegmentManager*>(&device);
-  ceph_assert(sm != nullptr);
-  auto journal = journal::make_segmented(*sm, sms_ref, *segment_cleaner);
+  auto journal = journal::make_segmented(sms_ref, *segment_cleaner);
   auto epm = std::make_unique<ExtentPlacementManager>();
   auto cache = std::make_unique<Cache>(*epm);
   auto lba_manager = lba_manager::create_lba_manager(*cache);
index 2e5385025b2052f0c71d0cedd871768cbf721158..76f503b5d7bfe07290373e252c5f5374ecea4a33 100644 (file)
@@ -539,16 +539,15 @@ public:
     SUBDEBUG(seastore_tm, "adding device {}, is_primary={}",
              dev->get_device_id(), is_primary);
     epm->add_device(dev, is_primary);
+    epm->add_allocator(
+      dev->get_device_type(),
+      std::make_unique<SegmentedAllocator>(
+        *segment_cleaner,
+        segment_cleaner->get_ool_segment_seq_allocator()));
 
     ceph_assert(dev->get_device_type() == device_type_t::SEGMENTED);
     auto sm = dynamic_cast<SegmentManager*>(dev);
     ceph_assert(sm != nullptr);
-    epm->add_allocator(
-      dev->get_device_type(),
-      std::make_unique<SegmentedAllocator>(
-       *segment_cleaner,
-       *sm,
-       segment_cleaner->get_ool_segment_seq_allocator()));
     sms.add_segment_manager(sm);
   }
 
@@ -581,8 +580,6 @@ public:
 };
 using TransactionManagerRef = std::unique_ptr<TransactionManager>;
 
-TransactionManagerRef make_transaction_manager(
-    Device &device,
-    bool detailed);
+TransactionManagerRef make_transaction_manager(bool detailed);
 
 }
index 76f4825ec8c7911ce4e470dcac66f139bd2470d8..94a8684a48828795a1ae16f05989d154ff78a3f9 100644 (file)
@@ -131,7 +131,7 @@ seastar::future<bufferlist> TMDriver::read(
 
 void TMDriver::init()
 {
-  tm = make_transaction_manager(*device, false /* detailed */);
+  tm = make_transaction_manager(false /* detailed */);
   tm->add_device(device.get(), true);
 }
 
index 66f405006e995672f434887b19fd0ea019b9a2b0..54d36da45c05596f272688154371a4195f8a6dde 100644 (file)
@@ -39,11 +39,10 @@ struct btree_test_base :
 
   segment_id_t next;
 
-  btree_test_base() = default;
-
   std::map<segment_id_t, segment_seq_t> segment_seqs;
   std::map<segment_id_t, segment_type_t> segment_types;
 
+  btree_test_base() = default;
 
   seastar::lowres_system_clock::time_point get_last_modified(
     segment_id_t id) const final {
@@ -57,30 +56,32 @@ struct btree_test_base :
   void update_segment_avail_bytes(paddr_t offset) final {}
 
   segment_id_t get_segment(
-    device_id_t id,
     segment_seq_t seq,
-    segment_type_t type) final
-  {
+    segment_type_t type
+  ) final {
     auto ret = next;
     next = segment_id_t{
-      next.device_id(),
+      segment_manager->get_device_id(),
       next.device_segment_id() + 1};
     segment_seqs[ret] = seq;
     segment_types[ret] = type;
     return ret;
   }
 
-  segment_seq_t get_seq(segment_id_t id) {
+  journal_seq_t get_journal_tail_target() const final { return journal_seq_t{}; }
+
+  void update_journal_tail_committed(journal_seq_t committed) final {}
+
+  SegmentManagerGroup* get_segment_manager_group() final { return sms.get(); }
+
+  segment_seq_t get_seq(segment_id_t id) final {
     return segment_seqs[id];
   }
 
-  segment_type_t get_type(segment_id_t id) {
+  segment_type_t get_type(segment_id_t id) final {
     return segment_types[id];
   }
 
-  journal_seq_t get_journal_tail_target() const final { return journal_seq_t{}; }
-  void update_journal_tail_committed(journal_seq_t committed) final {}
-
   virtual void complete_commit(Transaction &t) {}
   seastar::future<> submit_transaction(TransactionRef t)
   {
@@ -100,8 +101,7 @@ struct btree_test_base :
     segment_manager = segment_manager::create_test_ephemeral();
     sms.reset(new SegmentManagerGroup());
     auto& sms_ref = *sms.get();
-    journal = journal::make_segmented(
-      *segment_manager, sms_ref, *this);
+    journal = journal::make_segmented(sms_ref, *this);
     epm.reset(new ExtentPlacementManager());
     cache.reset(new Cache(*epm));
 
index 0daae06f734762bcb38bf75bb7d3f3c55ec0b30a..e9d9b203de83ff1a9e129278695c974230742b5a 100644 (file)
@@ -98,20 +98,25 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
   void update_segment_avail_bytes(paddr_t offset) final {}
 
   segment_id_t get_segment(
-    device_id_t id,
     segment_seq_t seq,
-    segment_type_t type) final
-  {
+    segment_type_t type
+  ) final {
     auto ret = next;
     next = segment_id_t{
-      next.device_id(),
+      segment_manager->get_device_id(),
       next.device_segment_id() + 1};
     segment_seqs[ret] = seq;
     segment_types[ret] = type;
     return ret;
   }
 
-  segment_seq_t get_seq(segment_id_t id) {
+  journal_seq_t get_journal_tail_target() const final { return journal_seq_t{}; }
+
+  void update_journal_tail_committed(journal_seq_t paddr) final {}
+
+  SegmentManagerGroup* get_segment_manager_group() final { return sms.get(); }
+
+  segment_seq_t get_seq(segment_id_t id) final {
     return segment_seqs[id];
   }
 
@@ -119,15 +124,12 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
     return segment_types[id];
   }
 
-  journal_seq_t get_journal_tail_target() const final { return journal_seq_t{}; }
-  void update_journal_tail_committed(journal_seq_t paddr) final {}
-
   seastar::future<> set_up_fut() final {
     segment_manager = segment_manager::create_test_ephemeral();
     block_size = segment_manager->get_block_size();
     sms.reset(new SegmentManagerGroup());
     next = segment_id_t(segment_manager->get_device_id(), 0);
-    journal = journal::make_segmented(*segment_manager, *sms, *this);
+    journal = journal::make_segmented(*sms, *this);
     journal->set_write_pipeline(&pipeline);
     sms->add_segment_manager(segment_manager.get());
     return segment_manager->init(
@@ -158,7 +160,7 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
     return journal->close(
     ).safe_then([this, f=std::move(f)]() mutable {
       journal = journal::make_segmented(
-       *segment_manager, *sms, *this);
+       *sms, *this);
       journal->set_write_pipeline(&pipeline);
       return journal->replay(std::forward<T>(std::move(f)));
     }).safe_then([this] {
index e987cd44fcc5312e36b3320ef9df98ce760a4bde..356b55976bbdc4a2b061dc7389c879eab3cf5af0 100644 (file)
@@ -71,7 +71,7 @@ protected:
 };
 
 auto get_seastore(SeaStore::MDStoreRef mdstore, SegmentManagerRef sm) {
-  auto tm = make_transaction_manager(*sm, true);
+  auto tm = make_transaction_manager(true);
   auto cm = std::make_unique<collection_manager::FlatCollectionManager>(*tm);
   return std::make_unique<SeaStore>(
     "",
@@ -92,7 +92,7 @@ protected:
   TMTestState() : EphemeralTestState() {}
 
   virtual void _init() override {
-    tm = make_transaction_manager(*segment_manager, true);
+    tm = make_transaction_manager(true);
     tm->add_device(segment_manager.get(), true);
     segment_cleaner = tm->get_segment_cleaner();
     lba_manager = tm->get_lba_manager();