]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/lba_manager: add rewrite_extent
authorSamuel Just <sjust@redhat.com>
Tue, 11 Aug 2020 17:47:28 +0000 (10:47 -0700)
committerSamuel Just <sjust@redhat.com>
Fri, 25 Sep 2020 19:51:41 +0000 (12:51 -0700)
Adds support for writing out an extent to a new location and updating
lba mappings.

Signed-off-by: Samuel Just <sjust@redhat.com>
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/lba_manager.h
src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h
src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h
src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc
src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h

index 48ff8a36f07b0ec1b2765a27d66e7b3f900a3931..649c511c1e7d70f4ea066cac9b4c6704e51fc9df 100644 (file)
@@ -115,6 +115,36 @@ void Cache::replace_extent(CachedExtentRef next, CachedExtentRef prev)
   }
 }
 
+CachedExtentRef Cache::alloc_new_extent_by_type(
+  Transaction &t,       ///< [in, out] current transaction
+  extent_types_t type,  ///< [in] type tag
+  segment_off_t length  ///< [in] length
+)
+{
+  switch (type) {
+  case extent_types_t::ROOT:
+    assert(0 == "ROOT is never directly alloc'd");
+    return CachedExtentRef();
+  case extent_types_t::LADDR_INTERNAL:
+    return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length);
+  case extent_types_t::LADDR_LEAF:
+    return alloc_new_extent<lba_manager::btree::LBALeafNode>(t, length);
+  case extent_types_t::ONODE_BLOCK:
+    return alloc_new_extent<OnodeBlock>(t, length);
+  case extent_types_t::TEST_BLOCK:
+    return alloc_new_extent<TestBlock>(t, length);
+  case extent_types_t::TEST_BLOCK_PHYSICAL:
+    return alloc_new_extent<TestBlockPhysical>(t, length);
+  case extent_types_t::NONE: {
+    ceph_assert(0 == "NONE is an invalid extent type");
+    return CachedExtentRef();
+  }
+  default:
+    ceph_assert(0 == "impossible");
+    return CachedExtentRef();
+  }
+}
+
 CachedExtentRef Cache::duplicate_for_write(
   Transaction &t,
   CachedExtentRef i) {
index d73433d4833ae9df20621b41b55b103b2ec98561..72c98e65a9d50ef97f4919cae91f308599e95e32 100644 (file)
@@ -244,6 +244,17 @@ public:
     return ret;
   }
 
+  /**
+   * alloc_new_extent
+   *
+   * Allocates a fresh extent.  addr will be relative until commit.
+   */
+  CachedExtentRef alloc_new_extent_by_type(
+    Transaction &t,       ///< [in, out] current transaction
+    extent_types_t type,  ///< [in] type tag
+    segment_off_t length  ///< [in] length
+    );
+
   /**
    * Allocates mutable buffer from extent_set on offset~len
    *
@@ -354,6 +365,31 @@ public:
       });
   }
 
+  /**
+   * update_extent_from_transaction
+   *
+   * Updates passed extent based on t.  If extent has been retired,
+   * a null result will be returned.
+   */
+  CachedExtentRef update_extent_from_transaction(
+    Transaction &t,
+    CachedExtentRef extent) {
+    if (extent->get_type() == extent_types_t::ROOT) {
+      if (t.root) {
+       return t.root;
+      } else {
+       return extent;
+      }
+    } else {
+      auto result = t.get_extent(extent->get_paddr(), &extent);
+      if (result == Transaction::get_extent_ret::RETIRED) {
+       return CachedExtentRef();
+      } else {
+       return extent;
+      }
+    }
+  }
+
   /**
    * print
    *
index d9c68ef0c392d6672a7f3748fcc261f46eff9c1d..fa897a27779085cafa06dbb7203c4b4804380caa 100644 (file)
@@ -135,6 +135,20 @@ public:
     Transaction &t,
     CachedExtentRef e) = 0;
 
+
+  /**
+   * rewrite_extent
+   *
+   * rewrite extent into passed transaction
+   */
+  using rewrite_extent_ertr = crimson::errorator<
+    crimson::ct_error::input_output_error>;
+  using rewrite_extent_ret = rewrite_extent_ertr::future<>;
+  virtual rewrite_extent_ret rewrite_extent(
+    Transaction &t,
+    CachedExtentRef extent) = 0;
+
+
   virtual void add_pin(LBAPin &pin) = 0;
 
   virtual ~LBAManager() {}
index 7d6ae234e8cc67f4b68393310a676c9d5a345a1e..edc55a0c441679fef0bd0dbaf1d4493c8d9d8a53 100644 (file)
@@ -293,6 +293,82 @@ BtreeLBAManager::init_cached_extent_ret BtreeLBAManager::init_cached_extent(
     });
 }
 
+BtreeLBAManager::rewrite_extent_ret BtreeLBAManager::rewrite_extent(
+  Transaction &t,
+  CachedExtentRef extent)
+{
+  if (extent->is_logical()) {
+    auto lextent = extent->cast<LogicalCachedExtent>();
+    cache.retire_extent(t, extent);
+    auto nlextent = cache.alloc_new_extent_by_type(
+      t,
+      lextent->get_type(),
+      lextent->get_length())->cast<LogicalCachedExtent>();
+    lextent->get_bptr().copy_out(
+      0,
+      lextent->get_length(),
+      nlextent->get_bptr().c_str());
+    nlextent->set_laddr(lextent->get_laddr());
+    nlextent->set_pin(lextent->get_pin().duplicate());
+
+    logger().debug(
+      "{}: rewriting {} into {}",
+      __func__,
+      *lextent,
+      *nlextent);
+
+    return update_mapping(
+      t,
+      lextent->get_laddr(),
+      [prev_addr = lextent->get_paddr(), addr = nlextent->get_paddr()](
+       const lba_map_val_t &in) {
+       lba_map_val_t ret = in;
+       ceph_assert(in.paddr == prev_addr);
+       ret.paddr = addr;
+       return ret;
+      }).safe_then([nlextent](auto e) {}).handle_error(
+       rewrite_extent_ertr::pass_further{},
+        /* ENOENT in particular should be impossible */
+       crimson::ct_error::assert_all{}
+      );
+  } else if (is_lba_node(*extent)) {
+    auto lba_extent = extent->cast<LBANode>();
+    cache.retire_extent(t, extent);
+    auto nlba_extent = cache.alloc_new_extent_by_type(
+      t,
+      lba_extent->get_type(),
+      lba_extent->get_length())->cast<LBANode>();
+    lba_extent->get_bptr().copy_out(
+      0,
+      lba_extent->get_length(),
+      nlba_extent->get_bptr().c_str());
+    nlba_extent->pin.set_range(nlba_extent->get_node_meta());
+
+    /* This is a bit underhanded.  Any relative addrs here must necessarily
+     * be record relative as we are rewriting a dirty extent.  Thus, we
+     * are using resolve_relative_addrs with a (likely negative) block
+     * relative offset to correct them to block-relative offsets adjusted
+     * for our new transaction location.
+     *
+     * Upon commit, these now block relative addresses will be interpretted
+     * against the real final address.
+     */
+    nlba_extent->resolve_relative_addrs(
+      make_record_relative_paddr(0) - nlba_extent->get_paddr());
+
+    return update_internal_mapping(
+      t,
+      nlba_extent->get_node_meta().depth,
+      nlba_extent->get_node_meta().begin,
+      nlba_extent->get_paddr()).safe_then(
+       [](auto) {},
+       rewrite_extent_ertr::pass_further {},
+       crimson::ct_error::assert_all{});
+  } else {
+    return rewrite_extent_ertr::now();
+  }
+}
+
 BtreeLBAManager::BtreeLBAManager(
   SegmentManager &segment_manager,
   Cache &cache)
@@ -373,4 +449,48 @@ BtreeLBAManager::update_mapping_ret BtreeLBAManager::update_mapping(
   });
 }
 
+BtreeLBAManager::update_internal_mapping_ret
+BtreeLBAManager::update_internal_mapping(
+  Transaction &t,
+  depth_t depth,
+  laddr_t laddr,
+  paddr_t paddr)
+{
+  return cache.get_root(t).safe_then([=, &t](RootBlockRef croot) {
+    if (depth == croot->get_lba_root().lba_depth) {
+      logger().debug(
+       "update_internal_mapping: updating lba root to: {}->{}",
+       laddr,
+       paddr);
+      {
+       auto mut_croot = cache.duplicate_for_write(t, croot);
+       croot = mut_croot->cast<RootBlock>();
+      }
+      ceph_assert(laddr == 0);
+      auto old_paddr = croot->get_lba_root().lba_root_addr;
+      croot->get_lba_root().lba_root_addr = paddr;
+      return update_internal_mapping_ret(
+       update_internal_mapping_ertr::ready_future_marker{},
+       old_paddr);
+    } else {
+      logger().debug(
+       "update_internal_mapping: updating lba node at depth {} to: {}->{}",
+       depth,
+       laddr,
+       paddr);
+      return get_lba_btree_extent(
+       get_context(t),
+       croot->get_lba_root().lba_depth,
+       croot->get_lba_root().lba_root_addr,
+       paddr_t()).safe_then([=, &t](LBANodeRef broot) {
+         return broot->mutate_internal_address(
+           get_context(t),
+           depth,
+           laddr,
+           paddr);
+       });
+    }
+  });
+}
+
 }
index 720f3ae4e79ec5a44e341154f6f5170bfbefe250..1dec91c9c15dfc7b667123e4761181198526e621 100644 (file)
@@ -87,6 +87,10 @@ public:
     Transaction &t,
     CachedExtentRef e) final;
 
+  rewrite_extent_ret rewrite_extent(
+    Transaction &t,
+    CachedExtentRef extent);
+
   void add_pin(LBAPin &pin) final {
     pin_set.add_pin(reinterpret_cast<BtreeLBAPin*>(&pin)->pin);
   }
@@ -151,6 +155,14 @@ private:
     Transaction &t,
     laddr_t addr,
     update_func_t &&f);
+
+  using update_internal_mapping_ertr = LBANode::mutate_internal_address_ertr;
+  using update_internal_mapping_ret = LBANode::mutate_internal_address_ret;
+  update_internal_mapping_ret update_internal_mapping(
+    Transaction &t,
+    depth_t depth,
+    laddr_t laddr,
+    paddr_t paddr);
 };
 using BtreeLBAManagerRef = std::unique_ptr<BtreeLBAManager>;
 
index 86b7c8c5b619e767d16086b66dc09a1fd2827f35..852e989818c2d6a81e9a1b52e50ce152bc8c9d67 100644 (file)
@@ -138,6 +138,25 @@ struct LBANode : CachedExtent {
     laddr_t laddr,
     mutate_func_t &&f) = 0;
 
+  /**
+   * mutate_internal_address
+   *
+   * Looks up internal node mapping at laddr, depth and
+   * updates the mapping to paddr.  Returns previous paddr
+   * (for debugging purposes).
+   */
+  using mutate_internal_address_ertr = crimson::errorator<
+    crimson::ct_error::enoent,            ///< mapping does not exist
+    crimson::ct_error::input_output_error
+    >;
+  using mutate_internal_address_ret = mutate_internal_address_ertr::future<
+    paddr_t>;
+  virtual mutate_internal_address_ret mutate_internal_address(
+    op_context_t c,
+    depth_t depth,
+    laddr_t laddr,
+    paddr_t paddr) = 0;
+
   /**
    * make_split_children
    *
@@ -201,7 +220,6 @@ struct LBANode : CachedExtent {
     resolve_relative_addrs(get_paddr());
   }
 
-protected:
   virtual void resolve_relative_addrs(paddr_t base) = 0;
 };
 using LBANodeRef = LBANode::LBANodeRef;
index 40ebcedd65d09499587632d6b6bb7cc802c12fed..07a7b4f2e9ab87fe43dbe7a12d0d5218f201d0dc 100644 (file)
@@ -39,12 +39,11 @@ LBAInternalNode::lookup_ret LBAInternalNode::lookup(
   }
   assert(meta.begin <= addr);
   assert(meta.end > addr);
-  auto [begin, end] = bound(addr, 0);
-  assert(begin == end + 1);
+  auto iter = lower_bound(addr);
   return get_lba_btree_extent(
     c,
-    meta.depth,
-    begin->get_val(),
+    meta.depth - 1,
+    iter->get_val(),
     get_paddr()).safe_then([c, addr, depth](auto child) {
       return child->lookup(c, addr, depth);
     });
@@ -131,6 +130,54 @@ LBAInternalNode::mutate_mapping_ret LBAInternalNode::mutate_mapping(
   });
 }
 
+LBAInternalNode::mutate_internal_address_ret LBAInternalNode::mutate_internal_address(
+  op_context_t c,
+  depth_t depth,
+  laddr_t laddr,
+  paddr_t paddr)
+{
+  if (get_meta().depth == (depth + 1)) {
+    if (!is_pending()) {
+      return c.cache.duplicate_for_write(c.trans, this)->cast<LBAInternalNode>(
+      )->mutate_internal_address(
+       c,
+       depth,
+       laddr,
+       paddr);
+    }
+    auto iter = get_containing_child(laddr);
+    if (iter->get_key() != laddr) {
+      return crimson::ct_error::enoent::make();
+    }
+
+    auto old_paddr = iter->get_val();
+
+    journal_update(
+      iter,
+      maybe_generate_relative(paddr),
+      maybe_get_delta_buffer());
+
+    return mutate_internal_address_ret(
+      mutate_internal_address_ertr::ready_future_marker{},
+      old_paddr
+    );
+  } else {
+    auto iter = get_containing_child(laddr);
+    return get_lba_btree_extent(
+      c,
+      get_meta().depth - 1,
+      iter->get_val(),
+      get_paddr()
+    ).safe_then([=](auto node) {
+      return node->mutate_internal_address(
+       c,
+       depth,
+       laddr,
+       paddr);
+    });
+  }
+}
+
 LBAInternalNode::find_hole_ret LBAInternalNode::find_hole(
   op_context_t c,
   laddr_t min,
@@ -434,6 +481,18 @@ LBALeafNode::mutate_mapping_ret LBALeafNode::mutate_mapping(
   }
 }
 
+LBALeafNode::mutate_internal_address_ret LBALeafNode::mutate_internal_address(
+  op_context_t c,
+  depth_t depth,
+  laddr_t laddr,
+  paddr_t paddr)
+{
+  ceph_assert(0 == "Impossible");
+  return mutate_internal_address_ret(
+    mutate_internal_address_ertr::ready_future_marker{},
+    paddr);
+}
+
 LBALeafNode::find_hole_ret LBALeafNode::find_hole(
   op_context_t c,
   laddr_t min,
index f6167b7e58450ae2afbd28f8da63e29a9701a20c..dbdf62e35b19857be7df0331009b31b4ac3a3ae1 100644 (file)
@@ -109,6 +109,12 @@ struct LBAInternalNode
     laddr_t laddr,
     mutate_func_t &&f) final;
 
+  mutate_internal_address_ret mutate_internal_address(
+    op_context_t c,
+    depth_t depth,
+    laddr_t laddr,
+    paddr_t paddr) final;
+
   find_hole_ret find_hole(
     op_context_t c,
     laddr_t min,
@@ -348,6 +354,12 @@ struct LBALeafNode
     laddr_t laddr,
     mutate_func_t &&f) final;
 
+  mutate_internal_address_ret mutate_internal_address(
+    op_context_t c,
+    depth_t depth,
+    laddr_t laddr,
+    paddr_t paddr) final;
+
   find_hole_ret find_hole(
     op_context_t c,
     laddr_t min,