From 9bb170104446bfea0ad87b34244f3a3d47962fcc Mon Sep 17 00:00:00 2001 From: Oguzhan Ozmen Date: Thu, 31 Jul 2025 22:15:24 +0000 Subject: [PATCH] RGW: multi object delete op; skip olh update for all deletes but the last one Fixes: https://tracker.ceph.com/issues/72375 Signed-off-by: Oguzhan Ozmen --- src/rgw/driver/rados/rgw_rados.cc | 41 +++++++++++------ src/rgw/driver/rados/rgw_rados.h | 9 ++-- src/rgw/driver/rados/rgw_sal_rados.cc | 4 +- src/rgw/rgw_op.cc | 63 ++++++++++++++++++++++++--- src/rgw/rgw_op.h | 8 +++- src/rgw/rgw_sal.h | 1 + 6 files changed, 100 insertions(+), 26 deletions(-) diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index 4ab7fe06e94..f2f9d9ca5cd 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -6379,7 +6379,8 @@ struct tombstone_entry { int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvider* dpp, bool log_op, - const bool force) + const bool force, + const bool skip_olh_obj_update) { RGWRados *store = target->get_store(); const rgw_obj& src_obj = target->get_obj(); @@ -6443,7 +6444,7 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, r = store->set_olh(dpp, target->get_ctx(), target->get_bucket_info(), marker, true, &meta, params.olh_epoch, params.unmod_since, params.high_precision_time, - y, params.zones_trace, add_log); + y, params.zones_trace, add_log, skip_olh_obj_update); if (r < 0) { return r; } @@ -6490,7 +6491,7 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, r = store->unlink_obj_instance( dpp, target->get_ctx(), target->get_bucket_info(), obj, params.olh_epoch, y, params.bilog_flags, - params.null_verid, params.zones_trace, add_log, force); + params.null_verid, params.zones_trace, add_log, force, skip_olh_obj_update); if (r < 0) { return r; } @@ -6683,7 +6684,8 @@ int RGWRados::delete_obj(const DoutPrefixProvider *dpp, const real_time& expiration_time, rgw_zone_set *zones_trace, bool log_op, - const bool force) // force removal even if head object is broken + const bool force, // force removal even if head object is broken + const bool skip_olh_obj_update) // true for all deletes (except the last one) initiated by a multi-object delete op { RGWRados::Object del_target(this, bucket_info, obj_ctx, obj); RGWRados::Object::Delete del_op(&del_target); @@ -6695,7 +6697,7 @@ int RGWRados::delete_obj(const DoutPrefixProvider *dpp, del_op.params.zones_trace = zones_trace; del_op.params.null_verid = null_verid; - return del_op.delete_obj(y, dpp, log_op, force); + return del_op.delete_obj(y, dpp, log_op, force, skip_olh_obj_update); } int RGWRados::delete_raw_obj(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, optional_yield y) @@ -9194,7 +9196,7 @@ int RGWRados::apply_olh_log(const DoutPrefixProvider *dpp, rgw_obj obj_instance(bucket, key); int ret = delete_obj(dpp, obj_ctx, bucket_info, obj_instance, 0, y, null_verid, RGW_BILOG_FLAG_VERSIONED_OP, - ceph::real_time(), zones_trace, log_op, force); + ceph::real_time(), zones_trace, log_op, force, true /* skip_olh_obj_update */); if (ret < 0 && ret != -ENOENT) { ldpp_dout(dpp, 0) << "ERROR: delete_obj() returned " << ret << " obj_instance=" << obj_instance << dendl; return ret; @@ -9388,9 +9390,11 @@ int RGWRados::set_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, // it's possible that the pending xattr from this op prevented the olh // object from being cleaned by another thread that was deleting the last // existing version. We invoke a best-effort update_olh here to handle this case. - int r = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y, zones_trace, log_data_change); - if (r < 0 && r != -ECANCELED) { - ldpp_dout(dpp, 20) << "update_olh() target_obj=" << olh_obj << " returned " << r << dendl; + if (! skip_olh_obj_update) { + int r = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y, zones_trace, log_data_change); + if (r < 0 && r != -ECANCELED) { + ldpp_dout(dpp, 20) << "update_olh() target_obj=" << olh_obj << " returned " << r << dendl; + } } return ret; } @@ -9404,6 +9408,7 @@ int RGWRados::set_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, // exit early if we're skipping the olh update and just updating the index if (skip_olh_obj_update) { + ldpp_dout(dpp, 20) << "skip update_olh() target_obj=" << olh_obj << dendl; return 0; } @@ -9429,7 +9434,8 @@ int RGWRados::unlink_obj_instance(const DoutPrefixProvider* dpp, bool null_verid, rgw_zone_set* zones_trace, bool log_op, - const bool force) + const bool force, + const bool skip_olh_obj_update) { string op_tag; @@ -9487,10 +9493,12 @@ int RGWRados::unlink_obj_instance(const DoutPrefixProvider* dpp, // it's possible that the pending xattr from this op prevented the olh // object from being cleaned by another thread that was deleting the last // existing version. We invoke a best-effort update_olh here to handle this case. - int r = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y, - zones_trace, null_verid, log_op, force); - if (r < 0 && r != -ECANCELED) { - ldpp_dout(dpp, 20) << "update_olh() target_obj=" << olh_obj << " returned " << r << dendl; + if (! skip_olh_obj_update) { + int r = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y, + zones_trace, null_verid, log_op, force); + if (r < 0 && r != -ECANCELED) { + ldpp_dout(dpp, 20) << "update_olh() target_obj=" << olh_obj << " returned " << r << dendl; + } } return ret; } // if error in bucket_index_unlink_instance call @@ -9502,6 +9510,11 @@ int RGWRados::unlink_obj_instance(const DoutPrefixProvider* dpp, return -EIO; } + if (skip_olh_obj_update) { + ldpp_dout(dpp, 20) << "skip update_olh() target_obj=" << olh_obj << dendl; + return 0; + } + ret = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y, zones_trace, null_verid, log_op, force); if (ret == -ECANCELED) { /* already did what we needed, no need to retry, raced with another user */ diff --git a/src/rgw/driver/rados/rgw_rados.h b/src/rgw/driver/rados/rgw_rados.h index 04e70d8ed8e..709ab610634 100644 --- a/src/rgw/driver/rados/rgw_rados.h +++ b/src/rgw/driver/rados/rgw_rados.h @@ -906,7 +906,9 @@ public: int delete_obj(optional_yield y, const DoutPrefixProvider* dpp, bool log_op, - const bool force); // if head object missing, do a best effort + const bool force, // if head object missing, do a best effort + const bool skip_olh_obj_update // true for all deletes (except the last one) initiated by a multi-object delete op + ); }; // struct RGWRados::Object::Delete struct Stat { @@ -1328,7 +1330,8 @@ int restore_obj_from_cloud(RGWLCCloudTierCtx& tier_ctx, const ceph::real_time& expiration_time = ceph::real_time(), rgw_zone_set *zones_trace = nullptr, bool log_op = true, - const bool force = false); // if head object missing, do a best effort + const bool force = false, // if head object missing, do a best effort + const bool skip_olh_obj_update = false); // true for all deletes (except the last one) initiated by a multi-object delete op int delete_raw_obj(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, optional_yield y); @@ -1475,7 +1478,7 @@ int restore_obj_from_cloud(RGWLCCloudTierCtx& tier_ctx, uint64_t olh_epoch, optional_yield y, uint16_t bilog_flags, bool null_verid, rgw_zone_set *zones_trace = nullptr, - bool log_op = true, const bool force = false); + bool log_op = true, const bool force = false, const bool skip_olh_obj_update = false); void check_pending_olh_entries(const DoutPrefixProvider *dpp, std::map& pending_entries, std::map *rm_pending_entries); int remove_olh_pending_entries(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, std::map& pending_attrs, optional_yield y); diff --git a/src/rgw/driver/rados/rgw_sal_rados.cc b/src/rgw/driver/rados/rgw_sal_rados.cc index b8c1d7c26d2..cb23502e34b 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.cc +++ b/src/rgw/driver/rados/rgw_sal_rados.cc @@ -3602,7 +3602,7 @@ int RadosObject::RadosDeleteOp::delete_obj(const DoutPrefixProvider* dpp, option parent_op.params.check_objv = params.objv_tracker->version_for_check(); } - int ret = parent_op.delete_obj(y, dpp, flags & FLAG_LOG_OP, flags & FLAG_FORCE_OP); + int ret = parent_op.delete_obj(y, dpp, flags & FLAG_LOG_OP, flags & FLAG_FORCE_OP, flags & FLAG_SKIP_UPDATE_OLH); if (ret < 0) { return ret; } @@ -3633,7 +3633,7 @@ int RadosObject::delete_object(const DoutPrefixProvider* dpp, } // convert flags to bool params - return del_op.delete_obj(y, dpp, flags & FLAG_LOG_OP, flags & FLAG_FORCE_OP); + return del_op.delete_obj(y, dpp, flags & FLAG_LOG_OP, flags & FLAG_FORCE_OP, flags & FLAG_SKIP_UPDATE_OLH); } // RadosObject::delete_object int RadosObject::copy_object(const ACLOwner& owner, diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 41ca7ad2c43..2117171ca52 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -7529,7 +7529,9 @@ void RGWDeleteMultiObj::write_ops_log_entry(rgw_log_entry& entry) const { entry.delete_multi_obj_meta.objects = std::move(ops_log_entries); } -void RGWDeleteMultiObj::handle_individual_object(const RGWMultiDelObject& object, optional_yield y) +void RGWDeleteMultiObj::handle_individual_object(const RGWMultiDelObject& object, + optional_yield y, + const bool skip_olh_obj_update) { const string& key = object.get_key(); const string& instance = object.get_version_id(); @@ -7620,11 +7622,12 @@ void RGWDeleteMultiObj::handle_individual_object(const RGWMultiDelObject& object del_op->params.if_match = object.get_if_match(); del_op->params.size_match = object.get_size_match(); - op_ret = del_op->delete_obj(dpp, y, rgw::sal::FLAG_LOG_OP); + op_ret = del_op->delete_obj(dpp, y, + rgw::sal::FLAG_LOG_OP | (skip_olh_obj_update ? rgw::sal::FLAG_SKIP_UPDATE_OLH : 0)); if (op_ret == -ENOENT) { op_ret = 0; } - + if (auto ret = rgw::bucketlogging::log_record(driver, rgw::bucketlogging::LoggingType::Any, obj.get(), s, canonical_name(), etag, obj_size, this, y, true, false); ret < 0) { // don't reply with an error in case of failed delete logging ldpp_dout(this, 5) << "WARNING: multi DELETE operation ignores bucket logging failure: " << ret << dendl; @@ -7642,9 +7645,46 @@ void RGWDeleteMultiObj::handle_individual_object(const RGWMultiDelObject& object send_partial_response(o, del_op->result.delete_marker, del_op->result.version_id, op_ret); } -void RGWDeleteMultiObj::handle_objects(const std::vector& objects, - uint32_t max_aio, - boost::asio::yield_context yield) +void RGWDeleteMultiObj::handle_versioned_objects(const std::vector& objects, + uint32_t max_aio, + boost::asio::yield_context yield) +{ + auto group = ceph::async::spawn_throttle{yield, max_aio}; + std::map> grouped_objects; + + // group objects by their keys + for (const auto& object : objects) { + const std::string& key = object.get_key(); + grouped_objects[key].push_back(object); + } + + // for each group of objects, handle all but the last object and skip update_olh + for (const auto& [_, objects] : grouped_objects) { + for (size_t i = 0; i + 1 < objects.size(); ++i) { // skip the last element + group.spawn([this, &objects, i] (boost::asio::yield_context yield) { + handle_individual_object(objects[i], yield, true /* skip_olh_obj_update */); + }); + + rgw_flush_formatter(s, s->formatter); + } + } + group.wait(); + + // Now handle the last object of each group with update_olh + for (const auto& [_, objects] : grouped_objects) { + const auto& object = objects.back(); + group.spawn([this, &object] (boost::asio::yield_context yield) { + handle_individual_object(object, yield); + }); + + rgw_flush_formatter(s, s->formatter); + } + group.wait(); +} + +void RGWDeleteMultiObj::handle_non_versioned_objects(const std::vector& objects, + uint32_t max_aio, + boost::asio::yield_context yield) { auto group = ceph::async::spawn_throttle{yield, max_aio}; @@ -7658,6 +7698,17 @@ void RGWDeleteMultiObj::handle_objects(const std::vector& obj group.wait(); } +void RGWDeleteMultiObj::handle_objects(const std::vector& objects, + uint32_t max_aio, + boost::asio::yield_context yield) +{ + if (bucket->versioned()) { + handle_versioned_objects(objects, max_aio, yield); + } else { + handle_non_versioned_objects(objects, max_aio, yield); + } +} + void RGWDeleteMultiObj::execute(optional_yield y) { const char* buf = data.c_str(); diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index 35d7fb637ee..bae735da11b 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -2176,8 +2176,14 @@ class RGWDeleteMultiObj : public RGWOp { * Handles the deletion of an individual object and uses * set_partial_response to record the outcome. */ - void handle_individual_object(const RGWMultiDelObject& object, optional_yield y); + void handle_individual_object(const RGWMultiDelObject& object, + optional_yield y, + const bool skip_olh_obj_update = false); + void handle_versioned_objects(const std::vector& objects, + uint32_t max_aio, boost::asio::yield_context yield); + void handle_non_versioned_objects(const std::vector& objects, + uint32_t max_aio, boost::asio::yield_context yield); void handle_objects(const std::vector& objects, uint32_t max_aio, boost::asio::yield_context yield); diff --git a/src/rgw/rgw_sal.h b/src/rgw/rgw_sal.h index 3f25a774137..bc53d1de84d 100644 --- a/src/rgw/rgw_sal.h +++ b/src/rgw/rgw_sal.h @@ -163,6 +163,7 @@ static constexpr uint32_t FLAG_PREVENT_VERSIONING = 0x0002; // if cannot do all elements of op, do as much as possible (e.g., // delete object where head object is missing) static constexpr uint32_t FLAG_FORCE_OP = 0x0004; +static constexpr uint32_t FLAG_SKIP_UPDATE_OLH = 0x0008; enum class RGWRestoreStatus : uint8_t { None = 0, -- 2.39.5