From: Oguzhan Ozmen Date: Thu, 31 Jul 2025 22:15:24 +0000 (+0000) Subject: RGW: multi object delete op; skip olh update for all deletes but the last one X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1fd14e9ca0ca94fa121fc355b12d11d0429be07e;p=ceph-ci.git RGW: multi object delete op; skip olh update for all deletes but the last one Fixes: https://tracker.ceph.com/issues/72375 Resolves: rhbz#2387764 Signed-off-by: Oguzhan Ozmen (cherry picked from commit 9bb170104446bfea0ad87b34244f3a3d47962fcc) Signed-off-by: Matt Benjamin --- diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index 4e05775df2b..f211e7a35fa 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -6431,7 +6431,8 @@ struct tombstone_entry { int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvider* dpp, bool log_op, - const bool force) + const bool force, + const bool skip_olh_obj_update) { RGWRados *store = target->get_store(); const rgw_obj& src_obj = target->get_obj(); @@ -6495,7 +6496,7 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, r = store->set_olh(dpp, target->get_ctx(), target->get_bucket_info(), marker, true, &meta, params.olh_epoch, params.unmod_since, params.high_precision_time, - y, params.zones_trace, add_log); + y, params.zones_trace, add_log, skip_olh_obj_update); if (r < 0) { return r; } @@ -6542,7 +6543,7 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, r = store->unlink_obj_instance( dpp, target->get_ctx(), target->get_bucket_info(), obj, params.olh_epoch, y, params.bilog_flags, - params.null_verid, params.zones_trace, add_log, force); + params.null_verid, params.zones_trace, add_log, force, skip_olh_obj_update); if (r < 0) { return r; } @@ -6735,7 +6736,8 @@ int RGWRados::delete_obj(const DoutPrefixProvider *dpp, const real_time& expiration_time, rgw_zone_set *zones_trace, bool log_op, - const bool force) // force removal even if head object is broken + const bool force, // force removal even if head object is broken + const bool skip_olh_obj_update) // true for all deletes (except the last one) initiated by a multi-object delete op { RGWRados::Object del_target(this, bucket_info, obj_ctx, obj); RGWRados::Object::Delete del_op(&del_target); @@ -6747,7 +6749,7 @@ int RGWRados::delete_obj(const DoutPrefixProvider *dpp, del_op.params.zones_trace = zones_trace; del_op.params.null_verid = null_verid; - return del_op.delete_obj(y, dpp, log_op, force); + return del_op.delete_obj(y, dpp, log_op, force, skip_olh_obj_update); } int RGWRados::delete_raw_obj(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, optional_yield y) @@ -9246,7 +9248,7 @@ int RGWRados::apply_olh_log(const DoutPrefixProvider *dpp, rgw_obj obj_instance(bucket, key); int ret = delete_obj(dpp, obj_ctx, bucket_info, obj_instance, 0, y, null_verid, RGW_BILOG_FLAG_VERSIONED_OP, - ceph::real_time(), zones_trace, log_op, force); + ceph::real_time(), zones_trace, log_op, force, true /* skip_olh_obj_update */); if (ret < 0 && ret != -ENOENT) { ldpp_dout(dpp, 0) << "ERROR: delete_obj() returned " << ret << " obj_instance=" << obj_instance << dendl; return ret; @@ -9440,9 +9442,11 @@ int RGWRados::set_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, // it's possible that the pending xattr from this op prevented the olh // object from being cleaned by another thread that was deleting the last // existing version. We invoke a best-effort update_olh here to handle this case. - int r = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y, zones_trace, log_data_change); - if (r < 0 && r != -ECANCELED) { - ldpp_dout(dpp, 20) << "update_olh() target_obj=" << olh_obj << " returned " << r << dendl; + if (! skip_olh_obj_update) { + int r = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y, zones_trace, log_data_change); + if (r < 0 && r != -ECANCELED) { + ldpp_dout(dpp, 20) << "update_olh() target_obj=" << olh_obj << " returned " << r << dendl; + } } return ret; } @@ -9456,6 +9460,7 @@ int RGWRados::set_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, // exit early if we're skipping the olh update and just updating the index if (skip_olh_obj_update) { + ldpp_dout(dpp, 20) << "skip update_olh() target_obj=" << olh_obj << dendl; return 0; } @@ -9481,7 +9486,8 @@ int RGWRados::unlink_obj_instance(const DoutPrefixProvider* dpp, bool null_verid, rgw_zone_set* zones_trace, bool log_op, - const bool force) + const bool force, + const bool skip_olh_obj_update) { string op_tag; @@ -9539,10 +9545,12 @@ int RGWRados::unlink_obj_instance(const DoutPrefixProvider* dpp, // it's possible that the pending xattr from this op prevented the olh // object from being cleaned by another thread that was deleting the last // existing version. We invoke a best-effort update_olh here to handle this case. - int r = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y, - zones_trace, null_verid, log_op, force); - if (r < 0 && r != -ECANCELED) { - ldpp_dout(dpp, 20) << "update_olh() target_obj=" << olh_obj << " returned " << r << dendl; + if (! skip_olh_obj_update) { + int r = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y, + zones_trace, null_verid, log_op, force); + if (r < 0 && r != -ECANCELED) { + ldpp_dout(dpp, 20) << "update_olh() target_obj=" << olh_obj << " returned " << r << dendl; + } } return ret; } // if error in bucket_index_unlink_instance call @@ -9554,6 +9562,11 @@ int RGWRados::unlink_obj_instance(const DoutPrefixProvider* dpp, return -EIO; } + if (skip_olh_obj_update) { + ldpp_dout(dpp, 20) << "skip update_olh() target_obj=" << olh_obj << dendl; + return 0; + } + ret = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y, zones_trace, null_verid, log_op, force); if (ret == -ECANCELED) { /* already did what we needed, no need to retry, raced with another user */ diff --git a/src/rgw/driver/rados/rgw_rados.h b/src/rgw/driver/rados/rgw_rados.h index de671b787d2..813542b268c 100644 --- a/src/rgw/driver/rados/rgw_rados.h +++ b/src/rgw/driver/rados/rgw_rados.h @@ -906,7 +906,9 @@ public: int delete_obj(optional_yield y, const DoutPrefixProvider* dpp, bool log_op, - const bool force); // if head object missing, do a best effort + const bool force, // if head object missing, do a best effort + const bool skip_olh_obj_update // true for all deletes (except the last one) initiated by a multi-object delete op + ); }; // struct RGWRados::Object::Delete struct Stat { @@ -1330,7 +1332,8 @@ int restore_obj_from_cloud(RGWLCCloudTierCtx& tier_ctx, const ceph::real_time& expiration_time = ceph::real_time(), rgw_zone_set *zones_trace = nullptr, bool log_op = true, - const bool force = false); // if head object missing, do a best effort + const bool force = false, // if head object missing, do a best effort + const bool skip_olh_obj_update = false); // true for all deletes (except the last one) initiated by a multi-object delete op int delete_raw_obj(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, optional_yield y); @@ -1477,7 +1480,7 @@ int restore_obj_from_cloud(RGWLCCloudTierCtx& tier_ctx, uint64_t olh_epoch, optional_yield y, uint16_t bilog_flags, bool null_verid, rgw_zone_set *zones_trace = nullptr, - bool log_op = true, const bool force = false); + bool log_op = true, const bool force = false, const bool skip_olh_obj_update = false); void check_pending_olh_entries(const DoutPrefixProvider *dpp, std::map& pending_entries, std::map *rm_pending_entries); int remove_olh_pending_entries(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, std::map& pending_attrs, optional_yield y); diff --git a/src/rgw/driver/rados/rgw_sal_rados.cc b/src/rgw/driver/rados/rgw_sal_rados.cc index ae01f2da28c..ab8f019a1ae 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.cc +++ b/src/rgw/driver/rados/rgw_sal_rados.cc @@ -3580,7 +3580,7 @@ int RadosObject::RadosDeleteOp::delete_obj(const DoutPrefixProvider* dpp, option parent_op.params.check_objv = params.objv_tracker->version_for_check(); } - int ret = parent_op.delete_obj(y, dpp, flags & FLAG_LOG_OP, flags & FLAG_FORCE_OP); + int ret = parent_op.delete_obj(y, dpp, flags & FLAG_LOG_OP, flags & FLAG_FORCE_OP, flags & FLAG_SKIP_UPDATE_OLH); if (ret < 0) { return ret; } @@ -3611,7 +3611,7 @@ int RadosObject::delete_object(const DoutPrefixProvider* dpp, } // convert flags to bool params - return del_op.delete_obj(y, dpp, flags & FLAG_LOG_OP, flags & FLAG_FORCE_OP); + return del_op.delete_obj(y, dpp, flags & FLAG_LOG_OP, flags & FLAG_FORCE_OP, flags & FLAG_SKIP_UPDATE_OLH); } // RadosObject::delete_object int RadosObject::copy_object(const ACLOwner& owner, diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index e3d325b9b37..93c5d17934e 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -7819,7 +7819,9 @@ void RGWDeleteMultiObj::write_ops_log_entry(rgw_log_entry& entry) const { entry.delete_multi_obj_meta.objects = std::move(ops_log_entries); } -void RGWDeleteMultiObj::handle_individual_object(const RGWMultiDelObject& object, optional_yield y) +void RGWDeleteMultiObj::handle_individual_object(const RGWMultiDelObject& object, + optional_yield y, + const bool skip_olh_obj_update) { const string& key = object.get_key(); const string& instance = object.get_version_id(); @@ -7910,11 +7912,12 @@ void RGWDeleteMultiObj::handle_individual_object(const RGWMultiDelObject& object del_op->params.if_match = object.get_if_match(); del_op->params.size_match = object.get_size_match(); - op_ret = del_op->delete_obj(dpp, y, rgw::sal::FLAG_LOG_OP); + op_ret = del_op->delete_obj(dpp, y, + rgw::sal::FLAG_LOG_OP | (skip_olh_obj_update ? rgw::sal::FLAG_SKIP_UPDATE_OLH : 0)); if (op_ret == -ENOENT) { op_ret = 0; } - + if (auto ret = rgw::bucketlogging::log_record(driver, rgw::bucketlogging::LoggingType::Any, obj.get(), s, canonical_name(), etag, obj_size, this, y, true, false); ret < 0) { // don't reply with an error in case of failed delete logging ldpp_dout(this, 5) << "WARNING: multi DELETE operation ignores bucket logging failure: " << ret << dendl; @@ -7932,6 +7935,70 @@ void RGWDeleteMultiObj::handle_individual_object(const RGWMultiDelObject& object send_partial_response(o, del_op->result.delete_marker, del_op->result.version_id, op_ret); } +void RGWDeleteMultiObj::handle_versioned_objects(const std::vector& objects, + uint32_t max_aio, + boost::asio::yield_context yield) +{ + auto group = ceph::async::spawn_throttle{yield, max_aio}; + std::map> grouped_objects; + + // group objects by their keys + for (const auto& object : objects) { + const std::string& key = object.get_key(); + grouped_objects[key].push_back(object); + } + + // for each group of objects, handle all but the last object and skip update_olh + for (const auto& [_, objects] : grouped_objects) { + for (size_t i = 0; i + 1 < objects.size(); ++i) { // skip the last element + group.spawn([this, &objects, i] (boost::asio::yield_context yield) { + handle_individual_object(objects[i], yield, true /* skip_olh_obj_update */); + }); + + rgw_flush_formatter(s, s->formatter); + } + } + group.wait(); + + // Now handle the last object of each group with update_olh + for (const auto& [_, objects] : grouped_objects) { + const auto& object = objects.back(); + group.spawn([this, &object] (boost::asio::yield_context yield) { + handle_individual_object(object, yield); + }); + + rgw_flush_formatter(s, s->formatter); + } + group.wait(); +} + +void RGWDeleteMultiObj::handle_non_versioned_objects(const std::vector& objects, + uint32_t max_aio, + boost::asio::yield_context yield) +{ + auto group = ceph::async::spawn_throttle{yield, max_aio}; + + for (const auto& object : objects) { + group.spawn([this, &object] (boost::asio::yield_context yield) { + handle_individual_object(object, yield); + }); + + rgw_flush_formatter(s, s->formatter); + } + group.wait(); +} + +void RGWDeleteMultiObj::handle_objects(const std::vector& objects, + uint32_t max_aio, + boost::asio::yield_context yield) +{ + if (bucket->versioned()) { + handle_versioned_objects(objects, max_aio, yield); + } else { + handle_non_versioned_objects(objects, max_aio, yield); + } +} + void RGWDeleteMultiObj::execute(optional_yield y) { const char* buf = data.c_str(); diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index a6ef42089e2..ba2f1a7663f 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -2117,7 +2117,16 @@ class RGWDeleteMultiObj : public RGWOp { * Handles the deletion of an individual object and uses * set_partial_response to record the outcome. */ - void handle_individual_object(const RGWMultiDelObject& object, optional_yield y); + void handle_individual_object(const RGWMultiDelObject& object, + optional_yield y, + const bool skip_olh_obj_update = false); + + void handle_versioned_objects(const std::vector& objects, + uint32_t max_aio, boost::asio::yield_context yield); + void handle_non_versioned_objects(const std::vector& objects, + uint32_t max_aio, boost::asio::yield_context yield); + void handle_objects(const std::vector& objects, + uint32_t max_aio, boost::asio::yield_context yield); protected: std::vector ops_log_entries; diff --git a/src/rgw/rgw_sal.h b/src/rgw/rgw_sal.h index d1abf17779c..90e3eba27ff 100644 --- a/src/rgw/rgw_sal.h +++ b/src/rgw/rgw_sal.h @@ -163,6 +163,7 @@ static constexpr uint32_t FLAG_PREVENT_VERSIONING = 0x0002; // if cannot do all elements of op, do as much as possible (e.g., // delete object where head object is missing) static constexpr uint32_t FLAG_FORCE_OP = 0x0004; +static constexpr uint32_t FLAG_SKIP_UPDATE_OLH = 0x0008; enum class RGWRestoreStatus : uint8_t { None = 0,