From: Jiffin Tony Thottan Date: Thu, 26 Sep 2024 12:03:05 +0000 (+0530) Subject: rgw/cloudtier : handle multisite sync for cloud objects X-Git-Tag: testing/wip-pdonnell-testing-20250226.141530-debug~7^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=9efa1b97b0e693428ef52608317f289e73f565eb;p=ceph-ci.git rgw/cloudtier : handle multisite sync for cloud objects The cloud objects (restore/transitioned) are not handled properly for multisite scenarios. We cannot rely on mtime for this usecase for syncing the object across the site. Hence a xattr known as internal_mtime introduce and give weightage during multisite. Signed-off-by: Jiffin Tony Thottan --- diff --git a/src/rgw/driver/motr/rgw_sal_motr.h b/src/rgw/driver/motr/rgw_sal_motr.h index 98dc77aaf05..19f66bf1986 100644 --- a/src/rgw/driver/motr/rgw_sal_motr.h +++ b/src/rgw/driver/motr/rgw_sal_motr.h @@ -1199,6 +1199,19 @@ struct obj_time_weight { mtime = state->mtime; zone_short_id = state->zone_short_id; pg_ver = state->pg_ver; + bufferlist bl; + if (state->get_attr(RGW_ATTR_INTERNAL_MTIME, bl)) { + try { + auto iter = bl.cbegin(); + real_time internal_mtime; + decode(internal_mtime, iter); + if (internal_mtime > mtime) { + mtime = internal_mtime; + } + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: couldn't decode RGW_ATTR_INTERNAL_MTIME" << dendl; + } + } } }; diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index c5da03b4142..1e3ada948fe 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -4050,6 +4050,18 @@ struct obj_time_weight { mtime = state->mtime; zone_short_id = state->zone_short_id; pg_ver = state->pg_ver; + bufferlist bl; + if (state->get_attr(RGW_ATTR_INTERNAL_MTIME, bl)) { + try { + auto iter = bl.cbegin(); + real_time internal_mtime; + decode(internal_mtime, iter); + if (internal_mtime > mtime) { + mtime = internal_mtime; + } + } catch (buffer::error& err) { + } + } } }; @@ -4394,8 +4406,7 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& dest_obj_ctx, static constexpr int NUM_ENPOINT_IOERROR_RETRIES = 20; for (int tries = 0; tries < NUM_ENPOINT_IOERROR_RETRIES; tries++) { ret = conn->get_obj(rctx.dpp, user_id, info, src_obj, pmod, unmod_ptr, - dest_mtime_weight.zone_short_id, dest_mtime_weight.pg_ver, - prepend_meta, get_op, rgwx_stat, + dest_mtime_weight.zone_short_id, dest_mtime_weight.pg_ver, prepend_meta, get_op, rgwx_stat, sync_manifest, skip_decrypt, &dst_zone_trace, sync_cloudtiered, true, &cb, &in_stream_req); @@ -5301,7 +5312,8 @@ int RGWRados::restore_obj_from_cloud(RGWLCCloudTierCtx& tier_ctx, if (ret < 0) { return ret; } - + //set same old mtime as that of transition time + set_mtime = mtime; if (cb.get_data_len() != accounted_size) { ret = -EIO; ldpp_dout(dpp, -1) << "ERROR: object truncated during fetching, expected " @@ -5319,7 +5331,7 @@ int RGWRados::restore_obj_from_cloud(RGWLCCloudTierCtx& tier_ctx, { bufferlist bl; encode(restore_time, bl); - attrs[RGW_ATTR_RESTORE_TIME] = std::move(bl); + attrs[RGW_ATTR_RESTORE_TIME] = attrs[RGW_ATTR_INTERNAL_MTIME] = std::move(bl); } real_time delete_at = real_time(); @@ -5356,9 +5368,6 @@ int RGWRados::restore_obj_from_cloud(RGWLCCloudTierCtx& tier_ctx, attrs[RGW_ATTR_CLOUDTIER_STORAGE_CLASS] = std::move(bl); ldpp_dout(dpp, 20) << "Setting RGW_ATTR_CLOUDTIER_STORAGE_CLASS: " << tier_ctx.storage_class << dendl; } - //set same old mtime as that of transition time - set_mtime = mtime; - // set tier-config only for temp restored objects, as // permanent copies will be treated as regular objects { @@ -5377,8 +5386,6 @@ int RGWRados::restore_obj_from_cloud(RGWLCCloudTierCtx& tier_ctx, attrs[RGW_ATTR_RESTORE_TYPE] = std::move(bl); ldpp_dout(dpp, 20) << "Permanent restore, object:" << dest_obj << dendl; } - //set mtime to now() - set_mtime = real_clock::now(); } { @@ -7200,6 +7207,7 @@ int RGWRados::Object::Read::prepare(optional_yield y, const DoutPrefixProvider * } } } + if (conds.if_match || conds.if_nomatch) { r = get_attr(dpp, RGW_ATTR_ETAG, etag, y); if (r < 0) diff --git a/src/rgw/driver/rados/rgw_sal_rados.cc b/src/rgw/driver/rados/rgw_sal_rados.cc index ea2572b4121..b648741d6e7 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.cc +++ b/src/rgw/driver/rados/rgw_sal_rados.cc @@ -3154,7 +3154,12 @@ int RadosObject::handle_obj_expiry(const DoutPrefixProvider* dpp, optional_yield bufferlist bl; bl.append(tier_config.name); attrs[RGW_ATTR_STORAGE_CLASS] = bl; - + { + ceph::real_time deletion_time = real_clock::now(); + bufferlist bl; + encode(deletion_time, bl); + attrs[RGW_ATTR_INTERNAL_MTIME] = std::move(bl); + } const req_context rctx{dpp, y, nullptr}; return obj_op.write_meta(0, 0, attrs, rctx, head_obj->get_trace()); } catch (const buffer::end_of_buffer&) { @@ -3207,6 +3212,7 @@ int RadosObject::write_cloud_tier(const DoutPrefixProvider* dpp, obj_op.meta.user_data = NULL; obj_op.meta.zones_trace = NULL; obj_op.meta.olh_epoch = olh_epoch; + obj_op.meta.set_mtime = head_obj->get_mtime(); RGWObjManifest *pmanifest; RGWObjManifest manifest; @@ -3231,13 +3237,19 @@ int RadosObject::write_cloud_tier(const DoutPrefixProvider* dpp, bl.append(tier->get_storage_class()); attrs[RGW_ATTR_STORAGE_CLASS] = bl; + ceph::real_time transition_time = real_clock::now(); + { + bufferlist bl; + encode(transition_time, bl); + attrs[RGW_ATTR_TRANSITION_TIME] = attrs[RGW_ATTR_INTERNAL_MTIME] = std::move(bl); + } + attrs.erase(RGW_ATTR_ID_TAG); attrs.erase(RGW_ATTR_TAIL_TAG); // erase restore attrs attrs.erase(RGW_ATTR_RESTORE_STATUS); attrs.erase(RGW_ATTR_RESTORE_TYPE); - attrs.erase(RGW_ATTR_RESTORE_TIME); attrs.erase(RGW_ATTR_RESTORE_EXPIRY_DATE); attrs.erase(RGW_ATTR_CLOUDTIER_STORAGE_CLASS); diff --git a/src/rgw/radosgw-admin/radosgw-admin.cc b/src/rgw/radosgw-admin/radosgw-admin.cc index 2c14e61506a..85503381273 100644 --- a/src/rgw/radosgw-admin/radosgw-admin.cc +++ b/src/rgw/radosgw-admin/radosgw-admin.cc @@ -8878,6 +8878,8 @@ next: decode(rs, bl); formatter->dump_string("RestoreStatus", rgw::sal::rgw_restore_status_dump(rs)); handled = true; + } else if (iter->first == RGW_ATTR_TRANSITION_TIME) { + handled = decode_dump("transition_time", bl, formatter.get()); } if (!handled) @@ -8895,6 +8897,8 @@ next: decode_dump("user.rgw.replicated-at", bl, formatter.get()); } else if (iter->first == RGW_ATTR_RESTORE_TIME) { decode_dump("user.rgw.restore-at", bl, formatter.get()); + } else if (iter->first == RGW_ATTR_INTERNAL_MTIME) { + decode_dump("user.rgw.rgw-internal-mtime", bl, formatter.get()); } else { dump_string(iter->first.c_str(), iter->second, formatter.get()); } diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 88f5f7a9c52..51e4f385ea4 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -125,6 +125,7 @@ using ceph::crypto::MD5; #define RGW_ATTR_RESTORE_TYPE RGW_ATTR_PREFIX "restore-type" #define RGW_ATTR_RESTORE_TIME RGW_ATTR_PREFIX "restored-at" #define RGW_ATTR_RESTORE_EXPIRY_DATE RGW_ATTR_PREFIX "restore-expiry-date" +#define RGW_ATTR_TRANSITION_TIME RGW_ATTR_PREFIX "transition-at" #define RGW_ATTR_TEMPURL_KEY1 RGW_ATTR_META_PREFIX "temp-url-key" #define RGW_ATTR_TEMPURL_KEY2 RGW_ATTR_META_PREFIX "temp-url-key-2" @@ -190,6 +191,8 @@ using ceph::crypto::MD5; #define RGW_ATTR_BUCKET_NOTIFICATION RGW_ATTR_PREFIX "bucket-notification" +#define RGW_ATTR_INTERNAL_MTIME RGW_ATTR_PREFIX "rgw-internal-mtime" + enum class RGWFormat : int8_t { BAD_FORMAT = -1, PLAIN = 0, diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 0095395237c..64ad3be0eed 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -1036,7 +1036,7 @@ int handle_cloudtier_obj(req_state* s, const DoutPrefixProvider *dpp, rgw::sal:: op_ret = get_system_versioning_params(s, &epoch, NULL); ldpp_dout(dpp, 20) << "getting versioning params tier placement handle cloud tier" << op_ret << dendl; if (op_ret < 0) { - ldpp_dout(dpp, 20) << "failed to get versioning params, op_ret = " << op_ret << dendl; + ldpp_dout(dpp, 20) << "failed to get versioning params, op_ret = " << op_ret << dendl; s->err.message = "failed to restore object"; return op_ret; }