]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore/lba_manager: take lba entry indirection into account when getting...
authorXuehan Xu <xxhdx1985126@gmail.com>
Sat, 6 May 2023 03:01:58 +0000 (03:01 +0000)
committerMatan Breizman <mbreizma@redhat.com>
Thu, 19 Oct 2023 07:13:17 +0000 (07:13 +0000)
Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
(cherry picked from commit 48023c7f67600cbbccc49f6499f0df3526e1b882)

13 files changed:
src/crimson/os/seastore/backref/btree_backref_manager.cc
src/crimson/os/seastore/backref/btree_backref_manager.h
src/crimson/os/seastore/btree/btree_range_pin.h
src/crimson/os/seastore/btree/fixed_kv_btree.h
src/crimson/os/seastore/cached_extent.h
src/crimson/os/seastore/lba_manager.h
src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h
src/crimson/os/seastore/object_data_handler.cc
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h
src/test/crimson/seastore/test_btree_lba_manager.cc

index c3ac906f2f9a1f3fe4192b3ba44ee17aab194873..30ff4554074eaecd084efcc8aaf2961f67cecd3d 100644 (file)
@@ -114,10 +114,9 @@ BtreeBackrefManager::get_mapping(
       } else {
        TRACET("{} got {}, {}",
               c.trans, offset, iter.get_key(), iter.get_val());
-       auto e = iter.get_pin(c);
        return get_mapping_ret(
          interruptible::ready_future_marker{},
-         std::move(e));
+         iter.get_pin(c));
       }
     });
   });
@@ -151,7 +150,7 @@ BtreeBackrefManager::get_mappings(
          TRACET("{}~{} got {}, {}, repeat ...",
                 c.trans, offset, end, pos.get_key(), pos.get_val());
          ceph_assert((pos.get_key().add_offset(pos.get_val().len)) > offset);
-         ret.push_back(pos.get_pin(c));
+         ret.emplace_back(pos.get_pin(c));
          return BackrefBtree::iterate_repeat_ret_inner(
            interruptible::ready_future_marker{},
            seastar::stop_iteration::no);
@@ -248,7 +247,8 @@ BtreeBackrefManager::new_mapping(
          });
        });
     }).si_then([c](auto &&state) {
-      return state.ret->get_pin(c);
+      return new_mapping_iertr::make_ready_future<BackrefMappingRef>(
+       state.ret->get_pin(c));
     });
 }
 
index 48ef4d83191719fd44bb6b566c1a59f45b5e01ee..952e78b65189a584eaee06b244eb8d7bac30aa0d 100644 (file)
@@ -34,6 +34,13 @@ public:
   extent_types_t get_type() const final {
     return type;
   }
+
+protected:
+  std::unique_ptr<BtreeNodeMapping<paddr_t, laddr_t>> _duplicate(
+    op_context_t<paddr_t> ctx) const final {
+    return std::unique_ptr<BtreeNodeMapping<paddr_t, laddr_t>>(
+      new BtreeBackrefMapping(ctx));
+  }
 };
 
 using BackrefBtree = FixedKVBtree<
index c753a1c3b035c34b8e232277953f8404f1f5be6b..68188e9ff55022b91df5fd528b7120a51e871d04 100644 (file)
@@ -21,21 +21,6 @@ struct op_context_t {
 
 constexpr uint16_t MAX_FIXEDKVBTREE_DEPTH = 8;
 
-template <typename T>
-struct min_max_t {};
-
-template <>
-struct min_max_t<laddr_t> {
-  static constexpr laddr_t max = L_ADDR_MAX;
-  static constexpr laddr_t min = L_ADDR_MIN;
-};
-
-template <>
-struct min_max_t<paddr_t> {
-  static constexpr paddr_t max = P_ADDR_MAX;
-  static constexpr paddr_t min = P_ADDR_MIN;
-};
-
 template <typename bound_t>
 struct fixed_kv_node_meta_t {
   bound_t begin = min_max_t<bound_t>::min;
@@ -117,7 +102,7 @@ struct fixed_kv_node_meta_le_t {
 
 template <typename key_t, typename val_t>
 class BtreeNodeMapping : public PhysicalNodeMapping<key_t, val_t> {
-
+protected:
   op_context_t<key_t> ctx;
   /**
    * parent
@@ -128,10 +113,15 @@ class BtreeNodeMapping : public PhysicalNodeMapping<key_t, val_t> {
   CachedExtentRef parent;
 
   pladdr_t value;
-  extent_len_t len;
+  extent_len_t len = 0;
   fixed_kv_node_meta_t<key_t> range;
   uint16_t pos = std::numeric_limits<uint16_t>::max();
 
+  virtual std::unique_ptr<BtreeNodeMapping> _duplicate(op_context_t<key_t>) const = 0;
+  fixed_kv_node_meta_t<key_t> _get_pin_range() const {
+    return range;
+  }
+
 public:
   using val_type = val_t;
   BtreeNodeMapping(op_context_t<key_t> ctx) : ctx(ctx) {}
@@ -142,12 +132,12 @@ public:
     uint16_t pos,
     pladdr_t value,
     extent_len_t len,
-    fixed_kv_node_meta_t<key_t> &&meta)
+    fixed_kv_node_meta_t<key_t> meta)
     : ctx(ctx),
       parent(parent),
       value(value),
       len(len),
-      range(std::move(meta)),
+      range(meta),
       pos(pos)
   {
     if (!parent->is_pending()) {
@@ -190,13 +180,12 @@ public:
     }
   }
 
-  key_t get_key() const final {
+  key_t get_key() const override {
     return range.begin;
   }
 
   PhysicalNodeMappingRef<key_t, val_t> duplicate() const final {
-    auto ret = std::unique_ptr<BtreeNodeMapping<key_t, val_t>>(
-      new BtreeNodeMapping<key_t, val_t>(ctx));
+    auto ret = _duplicate(ctx);
     ret->range = range;
     ret->value = value;
     ret->parent = parent;
index 87a957e3d223e7cd9b3473e5c1e32bb4910e40e2..2970d04408506905ac36363c6769448a9248d8fb 100644 (file)
@@ -218,8 +218,7 @@ public:
       return leaf.pos == 0;
     }
 
-    PhysicalNodeMappingRef<node_key_t, typename pin_t::val_type>
-    get_pin(op_context_t<node_key_t> ctx) const {
+    std::unique_ptr<pin_t> get_pin(op_context_t<node_key_t> ctx) const {
       assert(!is_end());
       auto val = get_val();
       auto key = get_key();
index 26efa453dd75e0b74b0a5a759ccd221b6f2bef50..99d140ff53f14ebcb12832eca9194bae182b88c0 100644 (file)
@@ -1019,6 +1019,15 @@ public:
   virtual bool has_been_invalidated() const = 0;
   virtual CachedExtentRef get_parent() const = 0;
   virtual uint16_t get_pos() const = 0;
+  // An lba pin may be indirect, see comments in lba_manager/btree/btree_lba_manager.h
+  virtual bool is_indirect() const { return false; }
+  virtual key_t get_intermediate_key() const { return min_max_t<key_t>::null; }
+  virtual key_t get_intermediate_base() const { return min_max_t<key_t>::null; }
+  virtual extent_len_t get_intermediate_length() const { return 0; }
+  // The start offset of the pin, must be 0 if the pin is not indirect
+  virtual extent_len_t get_intermediate_offset() const {
+    return std::numeric_limits<extent_len_t>::max();
+  }
 
   virtual get_child_ret_t<LogicalCachedExtent>
   get_logical_extent(Transaction &t) = 0;
@@ -1186,6 +1195,12 @@ public:
     laddr = nladdr;
   }
 
+  void maybe_set_intermediate_laddr(LBAMapping* mapping) {
+    laddr = mapping.is_indirect()
+      ? mapping.get_intermediate_key()
+      : mapping.get_key();
+  }
+
   void apply_delta_and_adjust_crc(
     paddr_t base, const ceph::bufferlist &bl) final {
     apply_delta(bl);
index 6275d4dbbf5cac732c78961e4b45d532cf206ce8..21933672e7efabdee5d1705fc98c97bd0f28308c 100644 (file)
@@ -39,6 +39,8 @@ public:
    * Fetches mappings for laddr_t in range [offset, offset + len)
    *
    * Future will not resolve until all pins have resolved (set_paddr called)
+   * For indirect lba mappings, get_mappings will always retrieve the original
+   * lba value.
    */
   using get_mappings_iertr = base_iertr;
   using get_mappings_ret = get_mappings_iertr::future<lba_pin_list_t>;
@@ -50,6 +52,8 @@ public:
    * Fetches mappings for a list of laddr_t in range [offset, offset + len)
    *
    * Future will not resolve until all pins have resolved (set_paddr called)
+   * For indirect lba mappings, get_mappings will always retrieve the original
+   * lba value.
    */
   virtual get_mappings_ret get_mappings(
     Transaction &t,
@@ -59,6 +63,8 @@ public:
    * Fetches the mapping for laddr_t
    *
    * Future will not resolve until the pin has resolved (set_paddr called)
+   * For indirect lba mappings, get_mapping will always retrieve the original
+   * lba value.
    */
   using get_mapping_iertr = base_iertr::extend<
     crimson::ct_error::enoent>;
@@ -88,7 +94,8 @@ public:
     laddr_t hint,
     extent_len_t len,
     laddr_t intermediate_key,
-    paddr_t actual_addr) = 0;
+    paddr_t actual_addr,
+    laddr_t intermediate_base) = 0;
 
   virtual alloc_extent_ret reserve_region(
     Transaction &t,
@@ -97,7 +104,7 @@ public:
 
   struct ref_update_result_t {
     unsigned refcount = 0;
-    paddr_t addr;
+    pladdr_t addr;
     extent_len_t length = 0;
   };
   using ref_iertr = base_iertr::extend<
index c1bfc25dd06632625df0d8a13c373e75aee3faed..9a4809d6a99270b2b91246acbbb6d9cc315458fc 100644 (file)
@@ -127,29 +127,94 @@ BtreeLBAManager::get_mappings(
   return with_btree_state<LBABtree, lba_pin_list_t>(
     cache,
     c,
-    [c, offset, length, FNAME](auto &btree, auto &ret) {
-      return LBABtree::iterate_repeat(
-       c,
-       btree.upper_bound_right(c, offset),
-       [&ret, offset, length, c, FNAME](auto &pos) {
-         if (pos.is_end() || pos.get_key() >= (offset + length)) {
-           TRACET("{}~{} done with {} results",
-                  c.trans, offset, length, ret.size());
-           return typename LBABtree::iterate_repeat_ret_inner(
+    [c, offset, length, FNAME, this](auto &btree, auto &ret) {
+      return seastar::do_with(
+       std::list<BtreeLBAMappingRef>(),
+       [offset, length, c, FNAME, this, &ret, &btree](auto &pin_list) {
+       return LBABtree::iterate_repeat(
+         c,
+         btree.upper_bound_right(c, offset),
+         [&pin_list, offset, length, c, FNAME](auto &pos) {
+           if (pos.is_end() || pos.get_key() >= (offset + length)) {
+             TRACET("{}~{} done with {} results",
+                    c.trans, offset, length, pin_list.size());
+             return LBABtree::iterate_repeat_ret_inner(
+               interruptible::ready_future_marker{},
+               seastar::stop_iteration::yes);
+           }
+           TRACET("{}~{} got {}, {}, repeat ...",
+                  c.trans, offset, length, pos.get_key(), pos.get_val());
+           ceph_assert((pos.get_key() + pos.get_val().len) > offset);
+           pin_list.push_back(pos.get_pin(c));
+           return LBABtree::iterate_repeat_ret_inner(
              interruptible::ready_future_marker{},
-             seastar::stop_iteration::yes);
-         }
-         TRACET("{}~{} got {}, {}, repeat ...",
-                c.trans, offset, length, pos.get_key(), pos.get_val());
-         ceph_assert((pos.get_key() + pos.get_val().len) > offset);
-         ret.push_back(pos.get_pin(c));
-         return typename LBABtree::iterate_repeat_ret_inner(
-           interruptible::ready_future_marker{},
-           seastar::stop_iteration::no);
+             seastar::stop_iteration::no);
+         }).si_then([this, &ret, c, &pin_list] {
+           return _get_original_mappings(c, pin_list
+           ).si_then([&ret](auto _ret) {
+             ret = std::move(_ret);
+           });
+         });
        });
     });
 }
 
+BtreeLBAManager::_get_original_mappings_ret
+BtreeLBAManager::_get_original_mappings(
+  op_context_t<laddr_t> c,
+  std::list<BtreeLBAMappingRef> &pin_list)
+{
+  return seastar::do_with(
+    lba_pin_list_t(),
+    [this, c, &pin_list](auto &ret) {
+    return trans_intr::do_for_each(
+      pin_list,
+      [this, c, &ret](auto &pin) {
+       LOG_PREFIX(BtreeLBAManager::get_mappings);
+       if (pin->get_raw_val().is_paddr()) {
+         ret.emplace_back(std::move(pin));
+         return get_mappings_iertr::now();
+       }
+       TRACET(
+         "getting original mapping for indirect mapping {}~{}",
+         c.trans, pin->get_key(), pin->get_length());
+       return this->get_mappings(
+         c.trans, pin->get_raw_val().get_laddr(), pin->get_length()
+       ).si_then([&pin, &ret, c](auto new_pin_list) {
+         LOG_PREFIX(BtreeLBAManager::get_mappings);
+         assert(new_pin_list.size() == 1);
+         auto &new_pin = new_pin_list.front();
+         auto intermediate_key = pin->get_raw_val().get_laddr();
+         assert(!new_pin->is_indirect());
+         assert(new_pin->get_key() <= intermediate_key);
+         assert(new_pin->get_key() + new_pin->get_length() >=
+         intermediate_key + pin->get_length());
+
+         TRACET("Got mapping {}~{} for indirect mapping {}~{}, "
+           "intermediate_key {}",
+           c.trans,
+           new_pin->get_key(), new_pin->get_length(),
+           pin->get_key(), pin->get_length(),
+           pin->get_raw_val().get_laddr());
+         auto &btree_new_pin = static_cast<BtreeLBAMapping&>(*new_pin);
+         btree_new_pin.set_key_for_indirect(
+           pin->get_key(),
+           pin->get_length(),
+           pin->get_raw_val().get_laddr());
+         ret.emplace_back(std::move(new_pin));
+         return seastar::now();
+       }).handle_error_interruptible(
+         crimson::ct_error::input_output_error::pass_further{},
+         crimson::ct_error::assert_all("unexpected enoent")
+       );
+      }
+    ).si_then([&ret] {
+      return std::move(ret);
+    });
+  });
+}
+
+
 BtreeLBAManager::get_mappings_ret
 BtreeLBAManager::get_mappings(
   Transaction &t,
@@ -181,14 +246,27 @@ BtreeLBAManager::get_mapping(
 {
   LOG_PREFIX(BtreeLBAManager::get_mapping);
   TRACET("{}", t, offset);
+  return _get_mapping(t, offset
+  ).si_then([](auto pin) {
+    return get_mapping_iertr::make_ready_future<LBAMappingRef>(std::move(pin));
+  });
+}
+
+BtreeLBAManager::_get_mapping_ret
+BtreeLBAManager::_get_mapping(
+  Transaction &t,
+  laddr_t offset)
+{
+  LOG_PREFIX(BtreeLBAManager::_get_mapping);
+  TRACET("{}", t, offset);
   auto c = get_context(t);
-  return with_btree_ret<LBABtree, LBAMappingRef>(
+  return with_btree_ret<LBABtree, BtreeLBAMappingRef>(
     cache,
     c,
-    [FNAME, c, offset](auto &btree) {
+    [FNAME, c, offset, this](auto &btree) {
       return btree.lower_bound(
        c, offset
-      ).si_then([FNAME, offset, c](auto iter) -> get_mapping_ret {
+      ).si_then([FNAME, offset, c](auto iter) -> _get_mapping_ret {
        if (iter.is_end() || iter.get_key() != offset) {
          ERRORT("laddr={} doesn't exist", c.trans, offset);
          return crimson::ct_error::enoent::make();
@@ -196,10 +274,28 @@ BtreeLBAManager::get_mapping(
          TRACET("{} got {}, {}",
                 c.trans, offset, iter.get_key(), iter.get_val());
          auto e = iter.get_pin(c);
-         return get_mapping_ret(
+         return _get_mapping_ret(
            interruptible::ready_future_marker{},
            std::move(e));
        }
+      }).si_then([this, c](auto pin) -> _get_mapping_ret {
+       if (pin->get_raw_val().is_laddr()) {
+         return seastar::do_with(
+           std::move(pin),
+           [this, c](auto &pin) {
+           return _get_mapping(
+             c.trans, pin->get_raw_val().get_laddr()
+           ).si_then([&pin](auto new_pin) {
+             ceph_assert(pin->get_length() == new_pin->get_length());
+             new_pin->set_key_for_indirect(
+               pin->get_key(),
+               pin->get_length());
+             return new_pin;
+           });
+         });
+       } else {
+         return get_mapping_iertr::make_ready_future<BtreeLBAMappingRef>(std::move(pin));
+       }
       });
     });
 }
@@ -211,6 +307,7 @@ BtreeLBAManager::_alloc_extent(
   extent_len_t len,
   pladdr_t addr,
   paddr_t actual_addr,
+  laddr_t intermediate_base,
   LogicalCachedExtent* nextent)
 {
   struct state_t {
@@ -287,11 +384,12 @@ BtreeLBAManager::_alloc_extent(
            state.ret = iter;
          });
        });
-    }).si_then([c, actual_addr, addr](auto &&state) {
+    }).si_then([c, actual_addr, addr, intermediate_base](auto &&state) {
       auto ret_pin = state.ret->get_pin(c);
       if (actual_addr != P_ADDR_NULL) {
        ceph_assert(addr.is_laddr());
        ret_pin->set_paddr(actual_addr);
+       ret_pin->set_intermediate_base(intermediate_base);
       } else {
        ceph_assert(addr.is_paddr());
       }
@@ -398,8 +496,8 @@ BtreeLBAManager::scan_mappings(
              seastar::stop_iteration::yes);
          }
          ceph_assert((pos.get_key() + pos.get_val().len) > begin);
-         f(pos.get_key(), pos.get_val().pladdr, pos.get_val().len);
-         return typename LBABtree::iterate_repeat_ret_inner(
+         f(pos.get_key(), pos.get_val().pladdr.get_paddr(), pos.get_val().len);
+         return LBABtree::iterate_repeat_ret_inner(
            interruptible::ready_future_marker{},
            seastar::stop_iteration::no);
        });
index 396b024ec62fdddd747abaa325c1c50d7dfed5af..b37709ee6b159432da445abde810ccdc92185c08 100644 (file)
 namespace crimson::os::seastore::lba_manager::btree {
 
 class BtreeLBAMapping : public BtreeNodeMapping<laddr_t, paddr_t> {
+// To support cloning, there are two kinds of lba mappings:
+//     1. physical lba mapping: the pladdr in the value of which is the paddr of
+//        the corresponding extent;
+//     2. indirect lba mapping: the pladdr in the value of which is an laddr pointing
+//        to the physical lba mapping that's pointing to the actual paddr of the
+//        extent being searched;
+//
+// Accordingly, BtreeLBAMapping may also work under two modes: indirect or direct
+//     1. BtreeLBAMappings that come from quering an indirect lba mapping in the lba tree
+//        are indirect;
+//     2. BtreeLBAMappings that come from quering a physical lba mapping in the lba tree
+//        are direct.
+//
+// For direct BtreeLBAMappings, there are two important fields:
+//      1. key: the laddr of the lba mapping being queried;
+//      2. paddr: the paddr recorded in the value of the lba mapping being queried.
+// For indirect BtreeLBAMappings, BtreeLBAMapping has three important fields:
+//     1. key: the laddr key of the lba entry being queried;
+//     2. intermediate_key: the laddr within the scope of the physical lba mapping
+//        that the current indirect lba mapping points to; although an indirect mapping
+//        points to the start of the physical lba mapping, it may change to other
+//        laddr after remap
+//     3. intermediate_base: the laddr key of the physical lba mapping, intermediate_key
+//        and intermediate_base should be the same when doing cloning
+//     4. intermediate_offset: intermediate_key - intermediate_base
+//     5. paddr: the paddr recorded in the physical lba mapping pointed to by the
+//        indirect lba mapping being queried;
+//
+// NOTE THAT, for direct BtreeLBAMappings, their intermediate_keys are the same as
+// their keys.
 public:
   BtreeLBAMapping(op_context_t<laddr_t> ctx)
     : BtreeNodeMapping(ctx) {}
@@ -34,17 +64,112 @@ public:
     CachedExtentRef parent,
     uint16_t pos,
     lba_map_val_t &val,
-    lba_node_meta_t &&meta)
+    lba_node_meta_t meta)
     : BtreeNodeMapping(
        c,
        parent,
        pos,
-       val.pladdr,
+       val.pladdr.is_paddr() ? val.pladdr.get_paddr() : P_ADDR_NULL,
        val.len,
-       std::forward<lba_node_meta_t>(meta))
+       meta),
+      key(meta.begin),
+      indirect(val.pladdr.is_laddr() ? true : false),
+      intermediate_key(indirect ? val.pladdr.get_laddr() : L_ADDR_NULL),
+      intermediate_length(indirect ? val.len : 0),
+      raw_val(val.pladdr),
+      map_val(val)
   {}
+
+  lba_map_val_t get_map_val() const {
+    return map_val;
+  }
+
+  bool is_indirect() const final {
+    return indirect;
+  }
+
+  void set_key_for_indirect(
+    laddr_t new_key,
+    extent_len_t length,
+    laddr_t interkey = L_ADDR_NULL)
+  {
+    turn_indirect(interkey);
+    key = new_key;
+    intermediate_length = len;
+    len = length;
+  }
+
+  laddr_t get_key() const final {
+    return key;
+  }
+
+  pladdr_t get_raw_val() const {
+    return raw_val;
+  }
+
+  void set_paddr(paddr_t addr) {
+    value = addr;
+  }
+
+  laddr_t get_intermediate_key() const final {
+    assert(is_indirect());
+    assert(intermediate_key != L_ADDR_NULL);
+    return intermediate_key;
+  }
+
+  laddr_t get_intermediate_base() const final {
+    assert(is_indirect());
+    assert(intermediate_base != L_ADDR_NULL);
+    return intermediate_base;
+  }
+
+  extent_len_t get_intermediate_offset() const final {
+    assert(intermediate_key >= intermediate_base);
+    assert((intermediate_key == L_ADDR_NULL)
+      == (intermediate_base == L_ADDR_NULL));
+    return intermediate_key - intermediate_base;
+  }
+
+  extent_len_t get_intermediate_length() const final {
+    assert(is_indirect());
+    assert(intermediate_length);
+    return intermediate_length;
+  }
+
+  void set_intermediate_base(laddr_t base) {
+    intermediate_base = base;
+  }
+
+protected:
+  std::unique_ptr<BtreeNodeMapping<laddr_t, paddr_t>> _duplicate(
+    op_context_t<laddr_t> ctx) const final {
+    auto pin = std::unique_ptr<BtreeLBAMapping>(new BtreeLBAMapping(ctx));
+    pin->key = key;
+    pin->intermediate_base = intermediate_base;
+    pin->intermediate_key = intermediate_key;
+    pin->indirect = indirect;
+    pin->raw_val = raw_val;
+    pin->map_val = map_val;
+    return pin;
+  }
+private:
+  void turn_indirect(laddr_t interkey) {
+    assert(value.is_paddr());
+    intermediate_base = key;
+    intermediate_key = (interkey == L_ADDR_NULL ? key : interkey);
+    indirect = true;
+  }
+  laddr_t key = L_ADDR_NULL;
+  bool indirect = false;
+  laddr_t intermediate_key = L_ADDR_NULL;
+  laddr_t intermediate_base = L_ADDR_NULL;
+  extent_len_t intermediate_length = 0;
+  pladdr_t raw_val;
+  lba_map_val_t map_val;
 };
 
+using BtreeLBAMappingRef = std::unique_ptr<BtreeLBAMapping>;
+
 using LBABtree = FixedKVBtree<
   laddr_t, lba_map_val_t, LBAInternalNode,
   LBALeafNode, BtreeLBAMapping, LBA_BLOCK_SIZE, true>;
@@ -94,7 +219,14 @@ public:
     laddr_t hint,
     extent_len_t len)
   {
-    return _alloc_extent(t, hint, len, P_ADDR_ZERO, P_ADDR_NULL, nullptr);
+    return _alloc_extent(
+      t,
+      hint,
+      len,
+      P_ADDR_ZERO,
+      P_ADDR_NULL,
+      L_ADDR_NULL,
+      nullptr);
   }
 
   alloc_extent_ret clone_extent(
@@ -102,9 +234,17 @@ public:
     laddr_t hint,
     extent_len_t len,
     laddr_t intermediate_key,
-    paddr_t actual_addr)
+    paddr_t actual_addr,
+    laddr_t intermediate_base)
   {
-    return _alloc_extent(t, hint, len, intermediate_key, actual_addr, nullptr);
+    return _alloc_extent(
+      t,
+      hint,
+      len,
+      intermediate_key,
+      actual_addr,
+      intermediate_base,
+      nullptr);
   }
 
   alloc_extent_ret alloc_extent(
@@ -114,8 +254,14 @@ public:
     paddr_t addr,
     LogicalCachedExtent &ext) final
   {
-    assert(ext);
-    return _alloc_extent(t, hint, len, addr, P_ADDR_NULL, &ext);
+    return _alloc_extent(
+      t,
+      hint,
+      len,
+      addr,
+      P_ADDR_NULL,
+      L_ADDR_NULL,
+      &ext);
   }
 
   ref_ret decref_extent(
@@ -216,7 +362,19 @@ private:
     extent_len_t len,
     pladdr_t addr,
     paddr_t actual_addr,
+    laddr_t intermediate_base,
     LogicalCachedExtent*);
+
+  using _get_mapping_ret = get_mapping_iertr::future<BtreeLBAMappingRef>;
+  _get_mapping_ret _get_mapping(
+    Transaction &t,
+    laddr_t offset);
+
+  using _get_original_mappings_ret = get_mappings_ret;
+  _get_original_mappings_ret _get_original_mappings(
+    op_context_t<laddr_t> c,
+    std::list<BtreeLBAMappingRef> &pin_list);
+
 };
 using BtreeLBAManagerRef = std::unique_ptr<BtreeLBAManager>;
 
index 9ac406f3eb7529a35ccbd0d1dbea0036735200c6..5d1336e1aae8ebb9504ab241d741510b65db7fa1 100644 (file)
@@ -1353,17 +1353,21 @@ ObjectDataHandler::read_ret ObjectDataHandler::read(
                      current = end;
                      return seastar::now();
                    } else {
+                     auto key = pin->get_key();
+                     bool is_indirect = pin->is_indirect();
                      return ctx.tm.read_pin<ObjectDataBlock>(
                        ctx.t,
                        std::move(pin)
-                     ).si_then([&ret, &current, end](auto extent) {
+                     ).si_then([&ret, &current, end, key, is_indirect](auto extent) {
                        ceph_assert(
-                         (extent->get_laddr() + extent->get_length()) >= end);
+                         is_indirect
+                           ? (key + extent->get_length()) >= end
+                           : (extent->get_laddr() + extent->get_length()) >= end);
                        ceph_assert(end > current);
                        ret.append(
                          bufferptr(
                            extent->get_bptr(),
-                           current - extent->get_laddr(),
+                           current - (is_indirect ? key : extent->get_laddr()),
                            end - current));
                        current = end;
                        return seastar::now();
index 17438ec0479da98bd634b5e874b24d88df211581..0b4ad853687fe650de0b1186fdc65efae331c6a9 100644 (file)
@@ -1115,6 +1115,23 @@ struct __attribute((packed)) pladdr_le_t {
   }
 };
 
+template <typename T>
+struct min_max_t {};
+
+template <>
+struct min_max_t<laddr_t> {
+  static constexpr laddr_t max = L_ADDR_MAX;
+  static constexpr laddr_t min = L_ADDR_MIN;
+  static constexpr laddr_t null = L_ADDR_NULL;
+};
+
+template <>
+struct min_max_t<paddr_t> {
+  static constexpr paddr_t max = P_ADDR_MAX;
+  static constexpr paddr_t min = P_ADDR_MIN;
+  static constexpr paddr_t null = P_ADDR_NULL;
+};
+
 // logical offset, see LBAManager, TransactionManager
 using extent_len_t = uint32_t;
 constexpr extent_len_t EXTENT_LEN_MAX =
index ff3a39809139c0b5171acf871aeaee055b6f4918..a66cc7c11151061639dcfbca7e971a1c22c0df44 100644 (file)
@@ -237,9 +237,11 @@ TransactionManager::ref_ret TransactionManager::dec_ref(
   ).si_then([this, FNAME, offset, &t](auto result) -> ref_ret {
     DEBUGT("extent refcount is decremented to {} -- {}~{}, {}",
            t, result.refcount, offset, result.length, result.addr);
-    if (result.refcount == 0 && !result.addr.is_zero()) {
+    if (result.refcount == 0 &&
+        (result.addr.is_paddr() &&
+         !result.addr.get_paddr().is_zero())) {
       return cache->retire_extent_addr(
-       t, result.addr, result.length
+       t, result.addr.get_paddr(), result.length
       ).si_then([] {
        return ref_ret(
          interruptible::ready_future_marker{},
index 10cc6f0e7ced5f7addc9c75237f072df84eed762..8d5ca1c567cff0dfff14ed86ea21eb591ce1d98d 100644 (file)
@@ -178,7 +178,15 @@ public:
   {
     auto v = pin->get_logical_extent(t);
     if (v.has_child()) {
-      return v.get_child_fut().safe_then([](auto extent) {
+      return v.get_child_fut().safe_then([pin=std::move(pin)](auto extent) {
+#ifndef NDEBUG
+        auto lextent = extent->template cast<LogicalCachedExtent>();
+        auto pin_laddr = pin->get_key();
+        if (pin->is_indirect()) {
+          pin_laddr = pin->get_intermediate_key();
+        }
+        assert(lextent->get_laddr() == pin_laddr);
+#endif
        return extent->template cast<T>();
       });
     } else {
@@ -453,7 +461,8 @@ public:
       hint,
       mapping.get_length(),
       clone_offset,
-      mapping.get_val()
+      mapping.get_val(),
+      clone_offset
     ).si_then([this, &t, clone_offset](auto pin) {
       return inc_ref(t, clone_offset
       ).si_then([pin=std::move(pin)](auto) mutable {
@@ -726,7 +735,7 @@ private:
        assert(!pin->has_been_invalidated());
        assert(pin->get_parent());
        pin->link_child(&extent);
-       extent.set_laddr(pin->get_key());
+       extent.maybe_set_intermediate_laddr(*pin);
       }
     ).si_then([FNAME, &t](auto ref) mutable -> ret {
       SUBTRACET(seastore_tm, "got extent -- {}", t, *ref);
@@ -767,7 +776,7 @@ private:
        assert(pin->get_parent());
        assert(!pin->get_parent()->is_pending());
        pin->link_child(&lextent);
-       lextent.set_laddr(pin->get_key());
+       lextent.maybe_set_intermediate_laddr(*pin);
       }
     ).si_then([FNAME, &t](auto ref) {
       SUBTRACET(seastore_tm, "got extent -- {}", t, *ref);
index 95b165fab29fbe9937519ea7771418f277b6523c..082de1199567936503405b612844e890f9f5c209 100644 (file)
@@ -472,7 +472,8 @@ struct btree_lba_manager_test : btree_test_base {
        ).si_then([this, &t, target](auto result) {
          EXPECT_EQ(result.refcount, target->second.refcount);
          if (result.refcount == 0) {
-           return cache->retire_extent_addr(t, result.addr, result.length);
+           return cache->retire_extent_addr(
+             t, result.addr.get_paddr(), result.length);
          }
          return Cache::retire_extent_iertr::now();
        });