]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson: seastore add OP_ZERO support
authorchunmei-liu <chunmei.liu@intel.com>
Sat, 2 Apr 2022 03:39:15 +0000 (20:39 -0700)
committerchunmei-liu <chunmei.liu@intel.com>
Wed, 13 Apr 2022 00:48:31 +0000 (17:48 -0700)
Signed-off-by: chunmei-liu <chunmei.liu@intel.com>
src/crimson/os/seastore/object_data_handler.cc
src/crimson/os/seastore/object_data_handler.h
src/crimson/os/seastore/seastore.cc
src/crimson/os/seastore/seastore.h

index e59ad3dee7e0e24a99f3f0f02cdf21f77c841ed5..fb50b633e8720f845bf3fc3dd896a06e0bc741d2 100644 (file)
@@ -369,6 +369,90 @@ extent_to_write_list_t get_buffers(laddr_t offset, bufferlist &bl)
   return ret;
 };
 
+ObjectDataHandler::write_ret ObjectDataHandler::zerowrite(
+  context_t ctx,
+  laddr_t _offset,
+  extent_len_t _len,
+  lba_pin_list_t &&_pins)
+{
+  return seastar::do_with(
+    _offset,
+    _offset + _len,
+    std::move(_pins),
+    extent_to_write_list_t(),
+    bufferlist(),
+    bufferlist(),
+    [ctx](laddr_t &offset, laddr_t &end, auto &pins, auto &to_write,
+      auto &head_bl, auto &end_bl) {
+    LOG_PREFIX(ObjectDataHandler::zerowrite);
+    DEBUGT("zerowrite: {}~{}",
+           ctx.t,
+           offset,
+           end);
+    ceph_assert(pins.size() >= 1);
+    auto pin_begin = pins.front()->get_key();
+    ceph_assert(pin_begin <= offset);
+    auto pin_end = pins.back()->get_key() + pins.back()->get_length();
+    ceph_assert(pin_end >= end);
+    return split_pin_left(
+      ctx,
+      pins.front(),
+      offset
+    ).si_then([ctx, pin_begin, &offset, &end, &pins, &to_write, &head_bl]
+      (auto p) {
+      auto &[left_extent, headptr] = p;
+      if (left_extent) {
+        ceph_assert(left_extent->addr == pin_begin);
+        to_write.push_front(std::move(*left_extent));
+      }
+      if (headptr) {
+        head_bl.append(*headptr);
+        offset -= headptr->length();
+        assert_aligned(offset);
+      }
+      return split_pin_right(
+        ctx,
+        pins.back(),
+        end);
+    }).si_then([ctx, pin_end, &offset, &end, &pins, &to_write, &head_bl, &end_bl]
+      (auto p) {
+      auto &[right_extent, tailptr] = p;
+      if (tailptr) {
+        end_bl.append(*tailptr);
+        assert_aligned(end - pins.back()->get_key() + end_bl.length());
+      }
+      if (pins.front() == pins.back()) {
+        bufferptr newbpt = bufferptr(ceph::buffer::create(end -
+          (offset + head_bl.length()) , 0));
+        bufferlist newbl;
+        newbl.append(head_bl);
+        newbl.append(newbpt);
+        newbl.append(end_bl);
+        head_bl.swap(newbl);
+        to_write.splice(to_write.end(), get_buffers(offset, head_bl));
+      } else {
+        to_write.splice(to_write.end(), get_buffers(offset, head_bl));
+        bufferptr newbpt = bufferptr(ceph::buffer::create(end -
+         pins.back()->get_key(), 0));
+        bufferlist newbl;
+        newbl.append(newbpt);
+        newbl.append(end_bl);
+        end_bl.swap(newbl);
+        to_write.splice(to_write.end(), get_buffers(pins.back()->get_key(), end_bl));
+      }
+      if (right_extent) {
+        ceph_assert((right_extent->addr  + right_extent->len) == pin_end);
+        to_write.push_back(std::move(*right_extent));
+      }
+      return write_iertr::now();
+    }).si_then([ctx, &pins] {
+      return do_removals(ctx, pins);
+    }).si_then([ctx, &to_write] {
+      return do_insertions(ctx, to_write);
+    });
+  });
+}
+
 ObjectDataHandler::write_ret ObjectDataHandler::overwrite(
   context_t ctx,
   laddr_t _offset,
@@ -436,6 +520,39 @@ ObjectDataHandler::write_ret ObjectDataHandler::overwrite(
     });
 }
 
+ObjectDataHandler::zero_ret ObjectDataHandler::zero(
+  context_t ctx,
+  objaddr_t offset,
+  extent_len_t len)
+{
+  return with_object_data(
+    ctx,
+    [this, ctx, offset, len](auto &object_data) {
+      LOG_PREFIX(ObjectDataHandler::zero);
+      DEBUGT("zero to {}~{}, object_data: {}~{}, is_null {}",
+             ctx.t,
+             offset,
+             len,
+             object_data.get_reserved_data_base(),
+             object_data.get_reserved_data_len(),
+             object_data.is_null());
+      return prepare_data_reservation(
+       ctx,
+       object_data,
+       p2roundup(offset + len, ctx.tm.get_block_size())
+      ).si_then([this, ctx, offset, len, &object_data] {
+       auto logical_offset = object_data.get_reserved_data_base() + offset;
+       return ctx.tm.get_pins(
+         ctx.t,
+         logical_offset,
+         len
+       ).si_then([this, ctx, logical_offset, len](auto pins) {
+         return zerowrite(ctx, logical_offset, len, std::move(pins));
+       });
+      });
+    });
+}
+
 ObjectDataHandler::write_ret ObjectDataHandler::write(
   context_t ctx,
   objaddr_t offset,
index dd91f343623b33eaeb45cd75d3bf22abe5557645..031ddd510eabca0f43c20d38278b58ee5df9aa87 100644 (file)
@@ -66,6 +66,13 @@ public:
     objaddr_t offset,
     const bufferlist &bl);
 
+  using zero_iertr = base_iertr;
+  using zero_ret = zero_iertr::future<>;
+  zero_ret zero(
+    context_t ctx,
+    objaddr_t offset,
+    extent_len_t len);
+
   /// Reads data in [offset, offset + len)
   using read_iertr = base_iertr;
   using read_ret = read_iertr::future<bufferlist>;
@@ -103,6 +110,14 @@ private:
     lba_pin_list_t &&pins ///< [in] set of pins overlapping above region
   );
 
+  //Zero region [offset, offset + len]
+  write_ret zerowrite(
+    context_t ctx,        ///< [in] ctx
+    laddr_t offset,       ///< [in] zero offset
+    extent_len_t len,     ///< [in] len to zero
+    lba_pin_list_t &&pins ///< [in] set of pins overlapping above region
+  );
+
   /// Ensures object_data reserved region is prepared
   write_ret prepare_data_reservation(
     context_t ctx,
index 424c0ea8ee9f01ac0c07c219904513d7a2279ed2..ad05bf56cb65e5e7e7619a7e1a0accfdfbc09b42 100644 (file)
@@ -1167,6 +1167,12 @@ SeaStore::tm_ret SeaStore::_do_transaction_step(
       i.decode_bl(hint);
       return tm_iertr::now();
     }
+    case Transaction::OP_ZERO:
+    {
+      objaddr_t off = op->off;
+      extent_len_t len = op->len;
+      return _zero(ctx, get_onode(op->oid), off, len);
+    }
     default:
       ERROR("bad op {}", static_cast<unsigned>(op->op));
       return crimson::ct_error::input_output_error::make();
@@ -1225,6 +1231,33 @@ SeaStore::tm_ret SeaStore::_write(
     });
 }
 
+SeaStore::tm_ret SeaStore::_zero(
+  internal_context_t &ctx,
+  OnodeRef &onode,
+  objaddr_t offset,
+  extent_len_t len)
+{
+  LOG_PREFIX(SeaStore::_zero);
+  DEBUGT("onode={} {}~{}", *ctx.transaction, *onode, offset, len);
+  if (offset + len >= max_object_size) {
+    return crimson::ct_error::input_output_error::make();
+  }
+  auto &object_size = onode->get_mutable_layout(*ctx.transaction).size;
+  object_size = std::max<uint64_t>(offset + len, object_size);
+  return seastar::do_with(
+    ObjectDataHandler(max_object_size),
+    [=, &ctx, &onode](auto &objhandler) {
+      return objhandler.zero(
+        ObjectDataHandler::context_t{
+          *transaction_manager,
+          *ctx.transaction,
+          *onode,
+        },
+        offset,
+        len);
+  });
+}
+
 SeaStore::omap_set_kvs_ret
 SeaStore::_omap_set_kvs(
   OnodeRef &onode,
index d75b07a06defbcd775cb34743528b225543e005b..f618a746a06c8842f7798179f2342ae6e7f8e027 100644 (file)
@@ -339,6 +339,10 @@ private:
     uint64_t offset, size_t len,
     ceph::bufferlist &&bl,
     uint32_t fadvise_flags);
+  tm_ret _zero(
+    internal_context_t &ctx,
+    OnodeRef &onode,
+    objaddr_t offset, extent_len_t len);
   tm_ret _omap_set_values(
     internal_context_t &ctx,
     OnodeRef &onode,