From: Matan Breizman Date: Sun, 1 Jan 2023 11:41:34 +0000 (+0000) Subject: crimson/osd: Keep track of modified_ranges X-Git-Tag: v19.1.0~570^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=b5df21f55606d58077333df42bc3af98fa1a3d3c;p=ceph.git crimson/osd: Keep track of modified_ranges * `modifies_ranges` interval_set is added to osd_op_params_t * keep track of modified_ranges while executing relevant ops * Add `osd_op_params` parameter to `PGBackend::remove()`. Signed-off-by: Matan Breizman --- diff --git a/src/crimson/osd/ops_executer.cc b/src/crimson/osd/ops_executer.cc index d57b668c5c701..034fdde716935 100644 --- a/src/crimson/osd/ops_executer.cc +++ b/src/crimson/osd/ops_executer.cc @@ -674,7 +674,16 @@ OpsExecuter::do_execute_op(OSDOp& osd_op) whiteout = true; } return do_write_op([this, whiteout](auto& backend, auto& os, auto& txn) { - return backend.remove(os, txn, delta_stats, whiteout); + int num_bytes = 0; + // Calculate num_bytes to be removed + if (obc->obs.oi.soid.is_snap()) { + ceph_assert(obc->ssc->snapset.clone_overlap.count(obc->obs.oi.soid.snap)); + num_bytes = obc->ssc->snapset.get_clone_bytes(obc->obs.oi.soid.snap); + } else { + num_bytes = obc->obs.oi.size; + } + return backend.remove(os, txn, *osd_op_params, + delta_stats, whiteout, num_bytes); }); } case CEPH_OSD_OP_CALL: @@ -961,7 +970,17 @@ std::unique_ptr OpsExecuter::execute_clone( osd_op_params->at_version.version++; encode(cloned_snaps, cloning_ctx->log_entry.snaps); - // TODO: update most recent clone_overlap and usage stats + // update most recent clone_overlap and usage stats + assert(cloning_ctx->new_snapset.clones.size() > 0); + // In classic, we check for evicted clones before + // adjusting the clone_overlap. + // This check is redundant here since `clone_obc` + // was just created (See prepare_clone()). + interval_set &newest_overlap = + cloning_ctx->new_snapset.clone_overlap.rbegin()->second; + osd_op_params->modified_ranges.intersection_of(newest_overlap); + delta_stats.num_bytes += osd_op_params->modified_ranges.size(); + newest_overlap.subtract(osd_op_params->modified_ranges); return cloning_ctx; } diff --git a/src/crimson/osd/osd_operations/osdop_params.h b/src/crimson/osd/osd_operations/osdop_params.h index c7b81e765d9d7..0f842f185f414 100644 --- a/src/crimson/osd/osd_operations/osdop_params.h +++ b/src/crimson/osd/osd_operations/osdop_params.h @@ -17,6 +17,7 @@ struct osd_op_params_t { version_t user_at_version = 0; bool user_modify = false; ObjectCleanRegions clean_regions; - + interval_set modified_ranges; + //TODO: Move delta_stats to osd_op_params_t osd_op_params_t() = default; }; diff --git a/src/crimson/osd/pg_backend.cc b/src/crimson/osd/pg_backend.cc index a15b6f4bef88d..8c68301530b46 100644 --- a/src/crimson/osd/pg_backend.cc +++ b/src/crimson/osd/pg_backend.cc @@ -506,7 +506,9 @@ PGBackend::write_iertr::future<> PGBackend::_writefull( coll->get_cid(), ghobject_t{os.oi.soid}, 0, bl.length(), bl, flags); update_size_and_usage( - delta_stats, os.oi, 0, + delta_stats, + osd_op_params.modified_ranges, + os.oi, 0, bl.length(), true); osd_op_params.clean_regions.mark_data_region_dirty( 0, @@ -543,7 +545,9 @@ PGBackend::write_iertr::future<> PGBackend::_truncate( coll->get_cid(), ghobject_t{os.oi.soid}, offset); if (os.oi.size > offset) { - // TODO: modified_ranges.union_of(trim); + interval_set trim; + trim.insert(offset, os.oi.size - offset); + osd_op_params.modified_ranges.union_of(trim); osd_op_params.clean_regions.mark_data_region_dirty( offset, os.oi.size - offset); @@ -581,9 +585,19 @@ bool PGBackend::maybe_create_new_object( } void PGBackend::update_size_and_usage(object_stat_sum_t& delta_stats, + interval_set& modified, object_info_t& oi, uint64_t offset, uint64_t length, bool write_full) { + interval_set ch; + if (write_full) { + if (oi.size) { + ch.insert(0, oi.size); + } else if (length) { + ch.insert(offset, length); + } + modified.union_of(ch); + } if (write_full || (offset + length > oi.size && length)) { uint64_t new_size = offset + length; @@ -681,12 +695,14 @@ PGBackend::write_iertr::future<> PGBackend::write( ghobject_t{os.oi.soid}, op.extent.truncate_size); if (op.extent.truncate_size != os.oi.size) { os.oi.size = length; - if (op.extent.truncate_size > os.oi.size) { - osd_op_params.clean_regions.mark_data_region_dirty(os.oi.size, - op.extent.truncate_size - os.oi.size); - } else { - osd_op_params.clean_regions.mark_data_region_dirty(op.extent.truncate_size, - os.oi.size - op.extent.truncate_size); + if (op.extent.truncate_size < os.oi.size) { + interval_set trim; + trim.insert(op.extent.truncate_size, + os.oi.size - op.extent.truncate_size); + osd_op_params.modified_ranges.union_of(trim); + osd_op_params.clean_regions.mark_data_region_dirty( + op.extent.truncate_size, os.oi.size - op.extent.truncate_size); + os.oi.clear_data_digest(); } } truncate_update_size_and_usage(delta_stats, os.oi, op.extent.truncate_size); @@ -705,10 +721,12 @@ PGBackend::write_iertr::future<> PGBackend::write( } else { txn.write(coll->get_cid(), ghobject_t{os.oi.soid}, offset, length, std::move(buf), op.flags); - update_size_and_usage(delta_stats, os.oi, offset, length); + update_size_and_usage(delta_stats, osd_op_params.modified_ranges, + os.oi, offset, length); } osd_op_params.clean_regions.mark_data_region_dirty(op.extent.offset, op.extent.length); + logger().debug("{} clean_regions modified", __func__); return seastar::now(); } @@ -738,7 +756,8 @@ PGBackend::interruptible_future<> PGBackend::write_same( txn.write(coll->get_cid(), ghobject_t{os.oi.soid}, op.writesame.offset, len, std::move(repeated_indata), op.flags); - update_size_and_usage(delta_stats, os.oi, op.writesame.offset, len); + update_size_and_usage(delta_stats, osd_op_params.modified_ranges, + os.oi, op.writesame.offset, len); osd_op_params.clean_regions.mark_data_region_dirty(op.writesame.offset, len); return seastar::now(); } @@ -788,7 +807,7 @@ PGBackend::rollback_iertr::future<> PGBackend::rollback( target_coid.snap = snapid; return obc_loader.with_clone_obc_only( head, target_coid, - [this, &os, &txn, &delta_stats, &osd_op_params] + [this, &os, &txn, &delta_stats, &osd_op_params, &snapid] (auto, auto resolved_obc) { if (resolved_obc->obs.oi.soid.is_head()) { // no-op: The resolved oid returned the head object @@ -824,9 +843,24 @@ PGBackend::rollback_iertr::future<> PGBackend::rollback( osd_op_params.clean_regions.mark_data_region_dirty(0, std::max(os.oi.size, resolved_obc->obs.oi.size)); osd_op_params.clean_regions.mark_omap_dirty(); - // TODO: 3) Calculate clone_overlaps by following overlaps - // forward from rollback snapshot - // https://tracker.ceph.com/issues/58263 + + // 3) Calculate clone_overlaps by following overlaps + const auto& clone_overlap = + resolved_obc->ssc->snapset.clone_overlap; + auto iter = clone_overlap.lower_bound(snapid); + ceph_assert(iter != clone_overlap.end()); + interval_set overlaps = iter->second; + for (const auto&i: clone_overlap) { + overlaps.intersection_of(i.second); + } + + if (os.oi.size > 0) { + interval_set modified; + modified.insert(0, os.oi.size); + overlaps.intersection_of(modified); + modified.subtract(overlaps); + osd_op_params.modified_ranges.union_of(modified); + } return rollback_iertr::now(); }).safe_then_interruptible([] { logger().debug("PGBackend::rollback succefully"); @@ -835,12 +869,13 @@ PGBackend::rollback_iertr::future<> PGBackend::rollback( // if there's no snapshot, we delete the object; // otherwise, do nothing. crimson::ct_error::enoent::handle( - [this, &os, &snapid, &txn, &delta_stats, &snapc, &ss] { + [this, &os, &snapid, &txn, &delta_stats, &snapc, &ss, &osd_op_params] { logger().debug("PGBackend::rollback: deleting head on {}" " with snap_id of {}" " because got ENOENT|whiteout on obc lookup", os.oi.soid, snapid); - return remove(os, txn, delta_stats, should_whiteout(ss, snapc)); + return remove(os, txn, osd_op_params, delta_stats, + should_whiteout(ss, snapc), os.oi.size); }), rollback_ertr::pass_further{}, crimson::ct_error::assert_all{"unexpected error in rollback"} @@ -863,8 +898,9 @@ PGBackend::append_ierrorator::future<> PGBackend::append( txn.write(coll->get_cid(), ghobject_t{os.oi.soid}, os.oi.size /* offset */, op.extent.length, std::move(osd_op.indata), op.flags); - update_size_and_usage(delta_stats, os.oi, os.oi.size, - op.extent.length); + update_size_and_usage(delta_stats, + osd_op_params.modified_ranges, + os.oi, os.oi.size, op.extent.length); osd_op_params.clean_regions.mark_data_region_dirty(os.oi.size, op.extent.length); } @@ -921,7 +957,9 @@ PGBackend::write_iertr::future<> PGBackend::zero( ghobject_t{os.oi.soid}, op.extent.offset, op.extent.length); - // TODO: modified_ranges.union_of(zeroed); + interval_set ch; + ch.insert(op.extent.offset, op.extent.length); + osd_op_params.modified_ranges.union_of(ch); osd_op_params.clean_regions.mark_data_region_dirty(op.extent.offset, op.extent.length); delta_stats.num_wr++; @@ -975,7 +1013,10 @@ PGBackend::remove(ObjectState& os, ceph::os::Transaction& txn) PGBackend::remove_iertr::future<> PGBackend::remove(ObjectState& os, ceph::os::Transaction& txn, - object_stat_sum_t& delta_stats, bool whiteout) + osd_op_params_t& osd_op_params, + object_stat_sum_t& delta_stats, + bool whiteout, + int num_bytes) { if (!os.exists) { return crimson::ct_error::enoent::make(); @@ -991,17 +1032,28 @@ PGBackend::remove(ObjectState& os, ceph::os::Transaction& txn, } txn.remove(coll->get_cid(), ghobject_t{os.oi.soid, ghobject_t::NO_GEN, shard}); - delta_stats.num_bytes -= os.oi.size; if (os.oi.is_omap()) { os.oi.clear_flag(object_info_t::FLAG_OMAP); delta_stats.num_objects_omap--; } + if (os.oi.size > 0) { + interval_set ch; + ch.insert(0, os.oi.size); + osd_op_params.modified_ranges.union_of(ch); + osd_op_params.clean_regions.mark_data_region_dirty(0, os.oi.size); + } + + osd_op_params.clean_regions.mark_omap_dirty(); + delta_stats.num_wr++; + // num_bytes of the removed clone or head object + delta_stats.num_bytes -= num_bytes; os.oi.size = 0; os.oi.new_object(); - // todo: clone_overlap + // todo: update watchers + if (whiteout) { logger().debug("{} setting whiteout on {} ",__func__, os.oi.soid); os.oi.set_flag(object_info_t::FLAG_WHITEOUT); @@ -1010,12 +1062,17 @@ PGBackend::remove(ObjectState& os, ceph::os::Transaction& txn, ghobject_t{os.oi.soid, ghobject_t::NO_GEN, shard}); return seastar::now(); } - // todo: update watchers + + // delete the head + delta_stats.num_objects--; + if (os.oi.soid.is_snap()) { + delta_stats.num_object_clones--; + } if (os.oi.is_whiteout()) { + logger().debug("{} deleting whiteout on {}", __func__, os.oi.soid); os.oi.clear_flag(object_info_t::FLAG_WHITEOUT); delta_stats.num_whiteouts--; } - delta_stats.num_objects--; os.exists = false; return seastar::now(); } diff --git a/src/crimson/osd/pg_backend.h b/src/crimson/osd/pg_backend.h index aa26b2cd2c1ed..981a983075bc9 100644 --- a/src/crimson/osd/pg_backend.h +++ b/src/crimson/osd/pg_backend.h @@ -149,8 +149,10 @@ public: remove_iertr::future<> remove( ObjectState& os, ceph::os::Transaction& txn, + osd_op_params_t& osd_op_params, object_stat_sum_t& delta_stats, - bool whiteout); + bool whiteout, + int num_bytes); interruptible_future<> remove( ObjectState& os, ceph::os::Transaction& txn); @@ -432,6 +434,7 @@ private: ceph::os::Transaction& txn, object_stat_sum_t& delta_stats); void update_size_and_usage(object_stat_sum_t& delta_stats, + interval_set& modified, object_info_t& oi, uint64_t offset, uint64_t length, bool write_full = false); void truncate_update_size_and_usage(