From: Soumya Koduri Date: Wed, 17 Mar 2021 21:12:54 +0000 (+0530) Subject: rgw/CloudTransition: Handle versioned objects X-Git-Tag: v17.1.0~411^2~3 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=cf241166900c9102feacacdcf0ff4e2aaf89142c;p=ceph-ci.git rgw/CloudTransition: Handle versioned objects For versioned and locked objects, similar semantics as that of LifecycleExpiration are applied as stated below - If the bucket versioning is enabled and the object transitioned to cloud is - current version, irrespective of what the config option "retain_object" value is, the object is not deleted but instead delete marker is created on the source rgw server. - noncurrent version, it is deleted or retained based on the config option "retain_object" value. If the object is locked, and is - current version, it is transitioned to cloud post which it is made noncurrent with delete marker created. - noncurrent version, transition is skipped. Also misc rebase fixes and cleanup - * Rename config option to "retain_head_object" to reflect its functionality to keep head object post transitioning to cloud if enabled Signed-off-by: Soumya Koduri --- diff --git a/doc/radosgw/cloud-transition.rst b/doc/radosgw/cloud-transition.rst index 663ceba7aa8..85f98357984 100644 --- a/doc/radosgw/cloud-transition.rst +++ b/doc/radosgw/cloud-transition.rst @@ -27,7 +27,7 @@ Cloud Storage Class Configuration "target_storage_class": , "multipart_sync_threshold": {object_size}, "multipart_min_part_size": {part_size}, - "retain_object": + "retain_head_object": } @@ -83,9 +83,10 @@ For example: ``target_path = rgwx-archive-${zonegroup}/`` A string that defines the target storage class to which the object transitions to. If not specified, object is transitioned to STANDARD storage class. -* ``retain_object`` (true | false) +* ``retain_head_object`` (true | false) If true, retains the metadata of the object transitioned to cloud. If false (default), the object is deleted post transition. +This option is ignored for current versioned objects. For more details, refer to section "Versioned Objects" below. S3 Specific Configurables: @@ -148,7 +149,7 @@ For example: "val": { "tier_type": "cloud-s3", "storage_class": "CLOUDTIER", - "retain_object": "false", + "retain_head_object": "false", "s3": { "endpoint": "", "access_key": "", @@ -192,7 +193,7 @@ For example: access_key=,secret=, \ multipart_sync_threshold=44432, \ multipart_min_part_size=44432, \ - retain_object=true + retain_head_object=true Nested values can be accessed using period. For example: @@ -279,13 +280,6 @@ The cloud storage class once configured can then be used like any other storage Since the transition is unidirectional, while configuring S3 lifecycle rules, the cloud storage class should be specified last among all the storage classes the object transitions to. Subsequent rules (if any) do not apply post transition to the cloud. -To avoid object names collision across various buckets, source bucket name is prepended to the target object name. If the object is versioned, object versionid is appended to the end. - -Below is the sample object name format: -:: - - s3:////(:) - Due to API limitations there is no way to preserve original object modification time and ETag but they get stored as metadata attributes on the destination objects, as shown below: :: @@ -296,7 +290,7 @@ Due to API limitations there is no way to preserve original object modification x-amz-meta-rgwx-source-mtime: 1608546349.757100363 x-amz-meta-rgwx-versioned-epoch: 0 -By default, post transition, the source object gets deleted. But it is possible to retain its metadata but with updated values (like storage-class and object-size) by setting config option 'retain_object' to true. However GET on those objects shall still fail with 'InvalidObjectState' error. +By default, post transition, the source object gets deleted. But it is possible to retain its metadata but with updated values (like storage-class and object-size) by setting config option 'retain_head_object' to true. However GET on those objects shall still fail with 'InvalidObjectState' error. For example, :: @@ -318,6 +312,23 @@ For example, download: 's3://bucket/lc.txt' -> 'lc_restore.txt' [1 of 1] ERROR: S3 error: 403 (InvalidObjectState) +To avoid object names collision across various buckets, source bucket name is prepended to the target object name. If the object is versioned, object versionid is appended to the end. + +Below is the sample object name format: +:: + + s3:////(-) + + +Versioned Objects +~~~~~~~~~~~~~~~~~ + +For versioned and locked objects, similar semantics as that of LifecycleExpiration are applied as stated below. + +* If the object is current, post transitioning to cloud, it is made noncurrent with delete marker created. + +* If the object is noncurrent and is locked, its transition is skipped. + Future Work ----------- diff --git a/src/rgw/rgw_cr_rest.cc b/src/rgw/rgw_cr_rest.cc index ded47b3bfd4..56688e3e640 100644 --- a/src/rgw/rgw_cr_rest.cc +++ b/src/rgw/rgw_cr_rest.cc @@ -382,13 +382,13 @@ int RGWStreamWriteCR::operate() { ldout(cct, 0) << "ERROR: fail to read object data, ret = " << ret << dendl; return set_cr_error(ret); } - read_len = ret; - if (retcode < 0) { ldout(cct, 20) << __func__ << ": read_op.read() retcode=" << retcode << dendl; return set_cr_error(ret); } + read_len = bl.length(); + if (bl.length() == 0) { break; } diff --git a/src/rgw/rgw_json_enc.cc b/src/rgw/rgw_json_enc.cc index b234e17165d..7d18d1f2495 100644 --- a/src/rgw/rgw_json_enc.cc +++ b/src/rgw/rgw_json_enc.cc @@ -1453,7 +1453,7 @@ void RGWZoneGroupPlacementTier::dump(Formatter *f) const { encode_json("tier_type", tier_type, f); encode_json("storage_class", storage_class, f); - encode_json("retain_object", retain_object, f); + encode_json("retain_head_object", retain_head_object, f); if (tier_type == "cloud-s3") { encode_json("s3", t.s3, f); @@ -1478,7 +1478,7 @@ void RGWZoneGroupPlacementTier::decode_json(JSONObj *obj) { JSONDecoder::decode_json("tier_type", tier_type, obj); JSONDecoder::decode_json("storage_class", storage_class, obj); - JSONDecoder::decode_json("retain_object", retain_object, obj); + JSONDecoder::decode_json("retain_head_object", retain_head_object, obj); if (tier_type == "cloud-s3") { JSONDecoder::decode_json("s3", t.s3, obj); diff --git a/src/rgw/rgw_lc.cc b/src/rgw/rgw_lc.cc index e00c88a78ea..7015b1f34aa 100644 --- a/src/rgw/rgw_lc.cc +++ b/src/rgw/rgw_lc.cc @@ -1278,8 +1278,13 @@ public: /* If bucket is versioned, create delete_marker for current version */ - ret = remove_expired_obj(oc.dpp, oc, - !(oc.o.is_current() && oc.bucket->versioned())); + if (oc.bucket->versioned() && oc.o.is_current() && !oc.o.is_delete_marker()) { + ret = remove_expired_obj(oc.dpp, oc, false); + ldpp_dout(oc.dpp, 20) << "delete_tier_obj Object(key:" << oc.o.key << ") current & not delete_marker" << "s versioned_epoch: " << oc.o.versioned_epoch << "flags: " << oc.o.flags << dendl; + } else { + ret = remove_expired_obj(oc.dpp, oc, true); + ldpp_dout(oc.dpp, 20) << "delete_tier_obj Object(key:" << oc.o.key << ") not current " << "s versioned_epoch: " << oc.o.versioned_epoch << "flags: " << oc.o.flags << dendl; + } return ret; } @@ -1364,6 +1369,8 @@ public: obj_op->params.attrs = &attrs; + r = obj_op->prepare(null_yield); + r = obj_op->write_meta(oc.dpp, tier_ctx.o.meta.size, 0, null_yield); if (r < 0) { return r; @@ -1384,8 +1391,8 @@ public: const RGWZoneGroup& zonegroup = oc.store->get_zone()->get_zonegroup(); bool delete_object; - /* Option 'retain_object' is not applicable for CurrentVersionTransition */ - delete_object = (!oc.tier.retain_object || + /* If CurrentVersion object, remove it & create delete marker */ + delete_object = (!oc.tier.retain_head_object || (oc.o.is_current() && oc.bucket->versioned())); if (bucket_name.empty()) { @@ -1426,17 +1433,16 @@ public: ldpp_dout(oc.dpp, 0) << "ERROR: failed in RGWCloudCheckCR() ret=" << ret << dendl; } - if (!al_tiered) { - ldout(tier_ctx.cct, 20) << "is already tiered false" << dendl; - ret = crs.run(new RGWLCCloudTierCR(tier_ctx)); + if (al_tiered) { + ldout(tier_ctx.cct, 20) << "Object (" << oc.o.key << ") is already tiered" << dendl; + http_manager.stop(); + return 0; } else { - ldout(tier_ctx.cct, 20) << "is already tiered true" << dendl; + ret = crs.run(new RGWLCCloudTierCR(tier_ctx)); } - http_manager.stop(); if (ret < 0) { ldpp_dout(oc.dpp, 0) << "ERROR: failed in RGWCloudTierCR() ret=" << ret << dendl; - return ret; } if (delete_object) { @@ -1452,6 +1458,7 @@ public: return ret; } } + http_manager.stop(); return 0; } @@ -1470,17 +1477,24 @@ public: if (!r && oc.tier.tier_type == "cloud-s3") { ldpp_dout(oc.dpp, 20) << "Found cloud s3 tier: " << target_placement.storage_class << dendl; + if (oc.o.meta.category == RGWObjCategory::CloudTiered) { + /* Skip objects which are already cloud tiered. */ + ldpp_dout(oc.dpp, 20) << "Object(key:" << oc.o.key << ") is already cloud tiered to cloud-s3 tier: " << oc.o.meta.storage_class << dendl; + return 0; + } if (!oc.o.is_current() && !pass_object_lock_check(oc.store, oc.obj.get(), oc.rctx, oc.dpp)) { /* Skip objects which has object lock enabled. */ - ldpp_dout(oc.dpp, 10) << "Object(key:" << oc.o.key << ") is locked Skipping transition to cloud-s3 tier: " << target_placement.storage_class << dendl; + ldpp_dout(oc.dpp, 10) << "Object(key:" << oc.o.key << ") is locked. Skipping transition to cloud-s3 tier: " << target_placement.storage_class << dendl; + return 0; } r = transition_obj_to_cloud(oc); if (r < 0) { - ldpp_dout(oc.dpp, 0) << "ERROR: failed to transition obj to cloud (r=" << r << ")" + ldpp_dout(oc.dpp, 0) << "ERROR: failed to transition obj(key:" << oc.o.key << ") to cloud (r=" << r << ")" << dendl; + return r; } } else { if (!oc.store->get_zone()->get_params(). diff --git a/src/rgw/rgw_lc_tier.cc b/src/rgw/rgw_lc_tier.cc index a57c9291a6d..805604438d8 100644 --- a/src/rgw/rgw_lc_tier.cc +++ b/src/rgw/rgw_lc_tier.cc @@ -40,7 +40,7 @@ static string get_key_oid(const rgw_obj_key& key) string oid = key.name; if (!key.instance.empty() && !key.have_null_instance()) { - oid += string(":") + key.instance; + oid += string("-") + key.instance; } return oid; } @@ -531,16 +531,29 @@ class RGWLCStreamObjToCloudPlainCR : public RGWCoroutine { tier_ctx.acl_mappings, tier_ctx.target_storage_class); - rgw_bucket target_bucket; string target_obj_name; + RGWBucketInfo b; + int reterr = 0; - target_bucket.name = tier_ctx.target_bucket_name; + b.bucket.name = tier_ctx.target_bucket_name; target_obj_name = tier_ctx.bucket_info.bucket.name + "/" + - (*tier_ctx.obj)->get_name() + get_key_instance((*tier_ctx.obj)->get_key()); - - tier_ctx.store->get_bucket(tier_ctx.dpp, nullptr, target_bucket, &dest_bucket, null_yield); + (*tier_ctx.obj)->get_name(); + if (!tier_ctx.o.is_current()) { + target_obj_name += get_key_instance((*tier_ctx.obj)->get_key()); + } + reterr = tier_ctx.store->get_bucket(nullptr, b, &dest_bucket); + if (reterr < 0) { + ldout(tier_ctx.cct, 0) << "ERROR: failed to initialize dest_bucket - " << tier_ctx.target_bucket_name << " , reterr = " << reterr << dendl; + return reterr; + } + dest_obj = dest_bucket->get_object(rgw_obj_key(target_obj_name)); + if (!dest_obj) { + ldout(tier_ctx.cct, 0) << "ERROR: failed to initialize dest_object path - " << target_obj_name << dendl; + return -1; + } + rgw::sal::RGWObject *o = static_cast(dest_obj.get()); @@ -588,19 +601,33 @@ class RGWLCStreamObjToCloudMultipartPartCR : public RGWCoroutine { upload_id(_upload_id), part_info(_part_info), petag(_petag) {} int operate() override { + ldout(cct, 0) << "In CloudMultipartPartCR XXXXXXXXXXXXXXXXXXX" << dendl; rgw_lc_obj_properties obj_properties(tier_ctx.o.meta.mtime, tier_ctx.o.meta.etag, tier_ctx.o.versioned_epoch, tier_ctx.acl_mappings, tier_ctx.target_storage_class); - rgw_bucket target_bucket; string target_obj_name; off_t end; + RGWBucketInfo b; + int reterr = 0; - target_bucket.name = tier_ctx.target_bucket_name; + b.bucket.name = tier_ctx.target_bucket_name; target_obj_name = tier_ctx.bucket_info.bucket.name + "/" + - (*tier_ctx.obj)->get_name() + get_key_instance((*tier_ctx.obj)->get_key()); + (*tier_ctx.obj)->get_name(); + if (!tier_ctx.o.is_current()) { + target_obj_name += get_key_instance((*tier_ctx.obj)->get_key()); + } - tier_ctx.store->get_bucket(tier_ctx.dpp, nullptr, target_bucket, &dest_bucket, null_yield); + reterr = tier_ctx.store->get_bucket(nullptr, b, &dest_bucket); + if (reterr < 0) { + ldout(tier_ctx.cct, 0) << "ERROR: failed to initialize dest_bucket - " << tier_ctx.target_bucket_name << " , reterr = " << reterr << dendl; + return reterr; + } + dest_obj = dest_bucket->get_object(rgw_obj_key(target_obj_name)); + if (!dest_obj) { + ldout(tier_ctx.cct, 0) << "ERROR: failed to initialize dest_object path - " << target_obj_name << dendl; + return -1; + } reenter(this) { // tier_ctx.obj.set_atomic(&tier_ctx.rctx); -- might need when updated to zipper SAL @@ -915,11 +942,10 @@ class RGWLCStreamObjToCloudMultipartCR : public RGWCoroutine { rgw_rest_obj rest_obj; rgw_lc_multipart_upload_info status; + std::shared_ptr in_crf; map new_attrs; - rgw_lc_multipart_part_info *pcur_part_info{nullptr}; - int ret_err{0}; rgw_raw_obj status_obj; @@ -943,11 +969,15 @@ class RGWLCStreamObjToCloudMultipartCR : public RGWCoroutine { string target_obj_name; target_obj_name = tier_ctx.bucket_info.bucket.name + "/" + - (*tier_ctx.obj)->get_name() + get_key_instance((*tier_ctx.obj)->get_key()); + (*tier_ctx.obj)->get_name(); + if (!tier_ctx.o.is_current()) { + target_obj_name += get_key_instance((*tier_ctx.obj)->get_key()); + } rgw_obj dest_obj(target_bucket, target_obj_name); - std::shared_ptr in_crf; rgw_rest_obj rest_obj; + reenter(this) { + status_obj = rgw_raw_obj(tier_ctx.store->get_zone()->get_params().log_pool, "lc_multipart_" + (*tier_ctx.obj)->get_oid()); @@ -968,8 +998,6 @@ class RGWLCStreamObjToCloudMultipartCR : public RGWCoroutine { } } - reenter(this) { - if (ret_err >= 0) { /* check here that mtime and size did not change */ if (status.mtime != obj_properties.mtime || status.obj_size != obj_size || @@ -1007,19 +1035,18 @@ class RGWLCStreamObjToCloudMultipartCR : public RGWCoroutine { status.part_size = std::max(min_conf_size, min_part_size); status.num_parts = (obj_size + status.part_size - 1) / status.part_size; status.cur_part = 1; + status.cur_ofs = 0; } for (; (uint32_t)status.cur_part <= status.num_parts; ++status.cur_part) { - ldout(tier_ctx.cct, 20) << "status.cur_part = "<get_name() + get_key_instance((*tier_ctx.obj)->get_key()); + (*tier_ctx.obj)->get_name(); + if (!tier_ctx.o.is_current()) { + target_obj_name += get_key_instance((*tier_ctx.obj)->get_key()); + } std::unique_ptr dest_bucket; std::unique_ptr dest_obj; - tier_ctx.store->get_bucket(tier_ctx.dpp, nullptr, target_bucket, &dest_bucket, null_yield); - - dest_obj = dest_bucket->get_object(rgw_obj_key(target_obj_name)); + reterr = tier_ctx.store->get_bucket(nullptr, b, &dest_bucket); + if (reterr < 0) { + ldout(tier_ctx.cct, 0) << "ERROR: failed to initialize dest_bucket - " << tier_ctx.target_bucket_name << " , reterr = " << reterr << dendl; + return reterr; + } + + dest_obj = dest_bucket->get_object(rgw_obj_key(target_obj_name)); + if (!dest_obj) { + ldout(tier_ctx.cct, 0) << "ERROR: failed to initialize dest_object path - " << target_obj_name << dendl; + return -1; + } std::shared_ptr get_crf; get_crf.reset(new RGWLCStreamGetCRF((CephContext *)(tier_ctx.cct), get_env(), this, @@ -1141,7 +1180,7 @@ int RGWLCCloudTierCR::operate() { tier_ctx.target_bucket_name, nullptr, bl, &out_bl)); } if (retcode < 0 ) { - ldout(tier_ctx.cct, 0) << "ERROR: failed to create target bucket: " << tier_ctx.target_bucket_name << dendl; + ldout(tier_ctx.cct, 0) << "ERROR: failed to create target bucket: " << tier_ctx.target_bucket_name << ", retcode:" << retcode << dendl; return set_cr_error(retcode); } if (out_bl.length() > 0) { diff --git a/src/rgw/rgw_zone.cc b/src/rgw/rgw_zone.cc index 5e28704e91e..3c288d4285d 100644 --- a/src/rgw/rgw_zone.cc +++ b/src/rgw/rgw_zone.cc @@ -2099,12 +2099,12 @@ int RGWZoneGroupPlacementTier::update_params(const JSONFormattable& config) { int r = -1; - if (config.exists("retain_object")) { - string s = config["retain_object"]; + if (config.exists("retain_head_object")) { + string s = config["retain_head_object"]; if (s == "true") { - retain_object = true; + retain_head_object = true; } else { - retain_object = false; + retain_head_object = false; } } @@ -2179,8 +2179,8 @@ int RGWZoneGroupPlacementTierS3::update_params(const JSONFormattable& config) } int RGWZoneGroupPlacementTier::clear_params(const JSONFormattable& config) { - if (config.exists("retain_object")) { - retain_object = false; + if (config.exists("retain_head_object")) { + retain_head_object = false; } if (tier_type == "cloud-s3") { diff --git a/src/rgw/rgw_zone.h b/src/rgw/rgw_zone.h index ae527403538..2376f8bdb7a 100644 --- a/src/rgw/rgw_zone.h +++ b/src/rgw/rgw_zone.h @@ -782,7 +782,7 @@ WRITE_CLASS_ENCODER(RGWZoneGroupPlacementTierS3) struct RGWZoneGroupPlacementTier { std::string tier_type; std::string storage_class; - bool retain_object = false; + bool retain_head_object = false; struct _tier { RGWZoneGroupPlacementTierS3 s3; @@ -795,7 +795,7 @@ struct RGWZoneGroupPlacementTier { ENCODE_START(1, 1, bl); encode(tier_type, bl); encode(storage_class, bl); - encode(retain_object, bl); + encode(retain_head_object, bl); if (tier_type == "cloud-s3") { encode(t.s3, bl); } @@ -806,7 +806,7 @@ struct RGWZoneGroupPlacementTier { DECODE_START(1, bl); decode(tier_type, bl); decode(storage_class, bl); - decode(retain_object, bl); + decode(retain_head_object, bl); if (tier_type == "cloud-s3") { decode(t.s3, bl); }