]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd: Keep track of modified_ranges 49594/head
authorMatan Breizman <mbreizma@redhat.com>
Sun, 1 Jan 2023 11:41:34 +0000 (11:41 +0000)
committerMatan Breizman <mbreizma@redhat.com>
Thu, 4 Jan 2024 15:43:57 +0000 (15:43 +0000)
* `modifies_ranges` interval_set is added to osd_op_params_t
* keep track of modified_ranges while executing relevant ops
* Add `osd_op_params` parameter to `PGBackend::remove()`.

Signed-off-by: Matan Breizman <mbreizma@redhat.com>
src/crimson/osd/ops_executer.cc
src/crimson/osd/osd_operations/osdop_params.h
src/crimson/osd/pg_backend.cc
src/crimson/osd/pg_backend.h

index d57b668c5c7014c1adce9f396659aa21a71863ed..034fdde716935a9b42dfff784f8f75232f7ee206 100644 (file)
@@ -674,7 +674,16 @@ OpsExecuter::do_execute_op(OSDOp& osd_op)
       whiteout = true;
     }
     return do_write_op([this, whiteout](auto& backend, auto& os, auto& txn) {
-      return backend.remove(os, txn, delta_stats, whiteout);
+      int num_bytes = 0;
+      // Calculate num_bytes to be removed
+      if (obc->obs.oi.soid.is_snap()) {
+        ceph_assert(obc->ssc->snapset.clone_overlap.count(obc->obs.oi.soid.snap));
+        num_bytes = obc->ssc->snapset.get_clone_bytes(obc->obs.oi.soid.snap);
+      } else {
+        num_bytes = obc->obs.oi.size;
+      }
+      return backend.remove(os, txn, *osd_op_params,
+                            delta_stats, whiteout, num_bytes);
     });
   }
   case CEPH_OSD_OP_CALL:
@@ -961,7 +970,17 @@ std::unique_ptr<OpsExecuter::CloningContext> OpsExecuter::execute_clone(
   osd_op_params->at_version.version++;
   encode(cloned_snaps, cloning_ctx->log_entry.snaps);
 
-  // TODO: update most recent clone_overlap and usage stats
+  // update most recent clone_overlap and usage stats
+  assert(cloning_ctx->new_snapset.clones.size() > 0);
+  // In classic, we check for evicted clones before
+  // adjusting the clone_overlap.
+  // This check is redundant here since `clone_obc`
+  // was just created (See prepare_clone()).
+  interval_set<uint64_t> &newest_overlap =
+    cloning_ctx->new_snapset.clone_overlap.rbegin()->second;
+  osd_op_params->modified_ranges.intersection_of(newest_overlap);
+  delta_stats.num_bytes += osd_op_params->modified_ranges.size();
+  newest_overlap.subtract(osd_op_params->modified_ranges);
   return cloning_ctx;
 }
 
index c7b81e765d9d764bb7ee741acab1634b6adbc5f2..0f842f185f41489fcf4e33f8c1dd46d0a37f72a4 100644 (file)
@@ -17,6 +17,7 @@ struct osd_op_params_t {
   version_t user_at_version = 0;
   bool user_modify = false;
   ObjectCleanRegions clean_regions;
-
+  interval_set<uint64_t> modified_ranges;
+  //TODO: Move delta_stats to osd_op_params_t
   osd_op_params_t() = default;
 };
index a15b6f4bef88ded46fe53c64874722f8f2b94dd6..8c68301530b46027bb814d243183da2b7b92ee77 100644 (file)
@@ -506,7 +506,9 @@ PGBackend::write_iertr::future<> PGBackend::_writefull(
       coll->get_cid(), ghobject_t{os.oi.soid}, 0, bl.length(),
       bl, flags);
     update_size_and_usage(
-      delta_stats, os.oi, 0,
+      delta_stats,
+      osd_op_params.modified_ranges,
+      os.oi, 0,
       bl.length(), true);
     osd_op_params.clean_regions.mark_data_region_dirty(
       0,
@@ -543,7 +545,9 @@ PGBackend::write_iertr::future<> PGBackend::_truncate(
       coll->get_cid(),
       ghobject_t{os.oi.soid}, offset);
     if (os.oi.size > offset) {
-      // TODO: modified_ranges.union_of(trim);
+      interval_set<uint64_t> trim;
+      trim.insert(offset, os.oi.size - offset);
+      osd_op_params.modified_ranges.union_of(trim);
       osd_op_params.clean_regions.mark_data_region_dirty(
         offset,
        os.oi.size - offset);
@@ -581,9 +585,19 @@ bool PGBackend::maybe_create_new_object(
 }
 
 void PGBackend::update_size_and_usage(object_stat_sum_t& delta_stats,
+  interval_set<uint64_t>& modified,
   object_info_t& oi, uint64_t offset,
   uint64_t length, bool write_full)
 {
+  interval_set<uint64_t> ch;
+  if (write_full) {
+    if (oi.size) {
+      ch.insert(0, oi.size);
+    } else if (length) {
+      ch.insert(offset, length);
+    }
+    modified.union_of(ch);
+  }
   if (write_full ||
       (offset + length > oi.size && length)) {
     uint64_t new_size = offset + length;
@@ -681,12 +695,14 @@ PGBackend::write_iertr::future<> PGBackend::write(
                    ghobject_t{os.oi.soid}, op.extent.truncate_size);
       if (op.extent.truncate_size != os.oi.size) {
         os.oi.size = length;
-        if (op.extent.truncate_size > os.oi.size) {
-          osd_op_params.clean_regions.mark_data_region_dirty(os.oi.size,
-              op.extent.truncate_size - os.oi.size);
-        } else {
-          osd_op_params.clean_regions.mark_data_region_dirty(op.extent.truncate_size,
-              os.oi.size - op.extent.truncate_size);
+        if (op.extent.truncate_size < os.oi.size) {
+          interval_set<uint64_t> trim;
+          trim.insert(op.extent.truncate_size,
+            os.oi.size - op.extent.truncate_size);
+          osd_op_params.modified_ranges.union_of(trim);
+          osd_op_params.clean_regions.mark_data_region_dirty(
+            op.extent.truncate_size, os.oi.size - op.extent.truncate_size);
+          os.oi.clear_data_digest();
         }
       }
       truncate_update_size_and_usage(delta_stats, os.oi, op.extent.truncate_size);
@@ -705,10 +721,12 @@ PGBackend::write_iertr::future<> PGBackend::write(
   } else {
     txn.write(coll->get_cid(), ghobject_t{os.oi.soid},
              offset, length, std::move(buf), op.flags);
-    update_size_and_usage(delta_stats, os.oi, offset, length);
+    update_size_and_usage(delta_stats, osd_op_params.modified_ranges,
+                          os.oi, offset, length);
   }
   osd_op_params.clean_regions.mark_data_region_dirty(op.extent.offset,
                                                     op.extent.length);
+  logger().debug("{} clean_regions modified", __func__);
 
   return seastar::now();
 }
@@ -738,7 +756,8 @@ PGBackend::interruptible_future<> PGBackend::write_same(
   txn.write(coll->get_cid(), ghobject_t{os.oi.soid},
             op.writesame.offset, len,
             std::move(repeated_indata), op.flags);
-  update_size_and_usage(delta_stats, os.oi, op.writesame.offset, len);
+  update_size_and_usage(delta_stats, osd_op_params.modified_ranges,
+                        os.oi, op.writesame.offset, len);
   osd_op_params.clean_regions.mark_data_region_dirty(op.writesame.offset, len);
   return seastar::now();
 }
@@ -788,7 +807,7 @@ PGBackend::rollback_iertr::future<> PGBackend::rollback(
   target_coid.snap = snapid;
   return obc_loader.with_clone_obc_only<RWState::RWWRITE>(
     head, target_coid,
-    [this, &os, &txn, &delta_stats, &osd_op_params]
+    [this, &os, &txn, &delta_stats, &osd_op_params, &snapid]
     (auto, auto resolved_obc) {
     if (resolved_obc->obs.oi.soid.is_head()) {
       // no-op: The resolved oid returned the head object
@@ -824,9 +843,24 @@ PGBackend::rollback_iertr::future<> PGBackend::rollback(
     osd_op_params.clean_regions.mark_data_region_dirty(0,
       std::max(os.oi.size, resolved_obc->obs.oi.size));
     osd_op_params.clean_regions.mark_omap_dirty();
-    // TODO: 3) Calculate clone_overlaps by following overlaps
-    //          forward from rollback snapshot
-    //          https://tracker.ceph.com/issues/58263
+
+    // 3) Calculate clone_overlaps by following overlaps
+    const auto& clone_overlap =
+      resolved_obc->ssc->snapset.clone_overlap;
+    auto iter = clone_overlap.lower_bound(snapid);
+    ceph_assert(iter != clone_overlap.end());
+    interval_set<uint64_t> overlaps = iter->second;
+    for (const auto&i: clone_overlap) {
+      overlaps.intersection_of(i.second);
+    }
+
+    if (os.oi.size > 0) {
+      interval_set<uint64_t> modified;
+      modified.insert(0, os.oi.size);
+      overlaps.intersection_of(modified);
+      modified.subtract(overlaps);
+      osd_op_params.modified_ranges.union_of(modified);
+    }
     return rollback_iertr::now();
   }).safe_then_interruptible([] {
     logger().debug("PGBackend::rollback succefully");
@@ -835,12 +869,13 @@ PGBackend::rollback_iertr::future<> PGBackend::rollback(
     // if there's no snapshot, we delete the object;
     // otherwise, do nothing.
     crimson::ct_error::enoent::handle(
-    [this, &os, &snapid, &txn, &delta_stats, &snapc, &ss] {
+    [this, &os, &snapid, &txn, &delta_stats, &snapc, &ss, &osd_op_params] {
       logger().debug("PGBackend::rollback: deleting head on {}"
                      " with snap_id of {}"
                      " because got ENOENT|whiteout on obc lookup",
                      os.oi.soid, snapid);
-      return remove(os, txn, delta_stats, should_whiteout(ss, snapc));
+      return remove(os, txn, osd_op_params, delta_stats,
+                    should_whiteout(ss, snapc), os.oi.size);
     }),
     rollback_ertr::pass_further{},
     crimson::ct_error::assert_all{"unexpected error in rollback"}
@@ -863,8 +898,9 @@ PGBackend::append_ierrorator::future<> PGBackend::append(
     txn.write(coll->get_cid(), ghobject_t{os.oi.soid},
               os.oi.size /* offset */, op.extent.length,
               std::move(osd_op.indata), op.flags);
-    update_size_and_usage(delta_stats, os.oi, os.oi.size,
-      op.extent.length);
+    update_size_and_usage(delta_stats,
+                          osd_op_params.modified_ranges,
+                          os.oi, os.oi.size, op.extent.length);
     osd_op_params.clean_regions.mark_data_region_dirty(os.oi.size,
                                                        op.extent.length);
   }
@@ -921,7 +957,9 @@ PGBackend::write_iertr::future<> PGBackend::zero(
            ghobject_t{os.oi.soid},
            op.extent.offset,
            op.extent.length);
-  // TODO: modified_ranges.union_of(zeroed);
+  interval_set<uint64_t> ch;
+  ch.insert(op.extent.offset, op.extent.length);
+  osd_op_params.modified_ranges.union_of(ch);
   osd_op_params.clean_regions.mark_data_region_dirty(op.extent.offset,
                                                     op.extent.length);
   delta_stats.num_wr++;
@@ -975,7 +1013,10 @@ PGBackend::remove(ObjectState& os, ceph::os::Transaction& txn)
 
 PGBackend::remove_iertr::future<>
 PGBackend::remove(ObjectState& os, ceph::os::Transaction& txn,
-  object_stat_sum_t& delta_stats, bool whiteout)
+  osd_op_params_t& osd_op_params,
+  object_stat_sum_t& delta_stats,
+  bool whiteout,
+  int num_bytes)
 {
   if (!os.exists) {
     return crimson::ct_error::enoent::make();
@@ -991,17 +1032,28 @@ PGBackend::remove(ObjectState& os, ceph::os::Transaction& txn,
   }
   txn.remove(coll->get_cid(),
             ghobject_t{os.oi.soid, ghobject_t::NO_GEN, shard});
-  delta_stats.num_bytes -= os.oi.size;
 
   if (os.oi.is_omap()) {
     os.oi.clear_flag(object_info_t::FLAG_OMAP);
     delta_stats.num_objects_omap--;
   }
 
+  if (os.oi.size > 0) {
+    interval_set<uint64_t> ch;
+    ch.insert(0, os.oi.size);
+    osd_op_params.modified_ranges.union_of(ch);
+    osd_op_params.clean_regions.mark_data_region_dirty(0, os.oi.size);
+  }
+
+  osd_op_params.clean_regions.mark_omap_dirty();
+  delta_stats.num_wr++;
+  // num_bytes of the removed clone or head object
+  delta_stats.num_bytes -= num_bytes;
   os.oi.size = 0;
   os.oi.new_object();
 
-  // todo: clone_overlap
+  // todo: update watchers
+
   if (whiteout) {
     logger().debug("{} setting whiteout on {} ",__func__, os.oi.soid);
     os.oi.set_flag(object_info_t::FLAG_WHITEOUT);
@@ -1010,12 +1062,17 @@ PGBackend::remove(ObjectState& os, ceph::os::Transaction& txn,
                ghobject_t{os.oi.soid, ghobject_t::NO_GEN, shard});
     return seastar::now();
   }
-  // todo: update watchers
+
+  // delete the head
+  delta_stats.num_objects--;
+  if (os.oi.soid.is_snap()) {
+    delta_stats.num_object_clones--;
+  }
   if (os.oi.is_whiteout()) {
+    logger().debug("{} deleting whiteout on {}", __func__, os.oi.soid);
     os.oi.clear_flag(object_info_t::FLAG_WHITEOUT);
     delta_stats.num_whiteouts--;
   }
-  delta_stats.num_objects--;
   os.exists = false;
   return seastar::now();
 }
index aa26b2cd2c1edc81060fd0b784fb1d1d92c206df..981a983075bc90ae7df64240bb769449faa8a5e3 100644 (file)
@@ -149,8 +149,10 @@ public:
   remove_iertr::future<> remove(
     ObjectState& os,
     ceph::os::Transaction& txn,
+    osd_op_params_t& osd_op_params,
     object_stat_sum_t& delta_stats,
-    bool whiteout);
+    bool whiteout,
+    int num_bytes);
   interruptible_future<> remove(
     ObjectState& os,
     ceph::os::Transaction& txn);
@@ -432,6 +434,7 @@ private:
     ceph::os::Transaction& txn,
     object_stat_sum_t& delta_stats);
   void update_size_and_usage(object_stat_sum_t& delta_stats,
+    interval_set<uint64_t>& modified,
     object_info_t& oi, uint64_t offset,
     uint64_t length, bool write_full = false);
   void truncate_update_size_and_usage(