From 92cf7989a535be65ab858ebdeb1a3af118543aaf Mon Sep 17 00:00:00 2001 From: Anthony D'Atri Date: Thu, 3 Apr 2025 13:58:49 -0400 Subject: [PATCH] doc/radosgw: Improve cloud-restore and cloud-transition Signed-off-by: Anthony D'Atri (cherry picked from commit 6a6807470850ea972c5c3786be3735d26875b221) --- doc/radosgw/cloud-restore.rst | 298 +++++++++++++++++++++++++++++++ doc/radosgw/cloud-transition.rst | 220 ++++++++++++++--------- 2 files changed, 431 insertions(+), 87 deletions(-) create mode 100644 doc/radosgw/cloud-restore.rst diff --git a/doc/radosgw/cloud-restore.rst b/doc/radosgw/cloud-restore.rst new file mode 100644 index 0000000000000..701439d950fce --- /dev/null +++ b/doc/radosgw/cloud-restore.rst @@ -0,0 +1,298 @@ +=============== + Cloud Restore +=============== + +The :doc:`cloud-transition>` feature makes it possible to transition objects to a remote +cloud service. The ``cloud-restore`` feature described below enables restoration +of those transitioned objects from the remote S3 endpoints into the local +RGW deployment. + +This feature currently enables the restoration of objects transitioned to +S3-compatible cloud services. In order to faciliate this, +the ``retain_head_object`` option should be set to ``true`` +in the ``tier-config`` when configuring the storage class. + +Objects can be restored using the `S3 RestoreObject ` +PI . The restored copies will be retained within RGW only for the number +of ``days`` specified. However if ``days`` is not provided, the restored copies +are considered permanent and will be treated as regular objects. +In addition, by enabling the ``allow_read_through`` option, +the `S3 GetObject ` +API can be used to restore the object temporarily. + + +Cloud Storage Class Tier Configuration +-------------------------------------- + +The `tier configuration ` +of the cloud storage class configured for data transition is used to restore +objects as well: + +``` + { + "access_key": , + "secret": ,` + "endpoint": , + "region": , + "host_style": , + "acls": [ { "type": , + "source_id": , + "dest_id": } ... ], + "target_path": , + "target_storage_class": , + "multipart_sync_threshold": {object_size}, + "multipart_min_part_size": {part_size}, + "retain_head_object": + } +``` + +The below options have been added to the tier configuration to facilitate object restoration. + +* ``restore_storage_class`` (string) + +The storage class to which object data is to be restored. Default value is ``STANDARD``. + + +read-through specific Configurables: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ``allow_read_through`` (``true`` | ``false``) + +If true, enables ``read-through``. Objects can then be restored using the ``S3 GetObject`` API. + +* ``read_through_restore_days`` (integer) + +The duration for which objects restored via ``read-through`` are retained. +Default value is 1 day. + +For example: + +``` + # radosgw-admin zonegroup placement modify --rgw-zonegroup default \ + --placement-id default-placement \ + --storage-class CLOUDTIER \ + --tier-config=endpoint=http://XX.XX.XX.XX:YY,\ + access_key=,secret=, \ + retain_head_object=true, \ + restore_storage_class=COLDTIER, \ + allow_read_through=true, \ + read_through_restore_days=10 +``` + + +S3 Glacier specific Configurables: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To restore objects archived in an S3 Glacier or Tape cloud storage class, the +data must first be restored to the cloud service before being read and +downloaded into RGW. To enable this process, ensure the storage class +is configured with ``--tier-type=cloud-s3-glacier``. Additionally, +the following configurables should be set accordingly: + +* ``glacier_restore_days`` (integer) + +The duration for which the objects are to be restored on the remote cloud service. + +* ``glacier_restore_tier_type`` (``Standard`` | ``Expedited``) + +The type of retrieval within the cloud service, which may represent different +pricing. Supported options are ``Standard`` and ``Expedited``. + + +For example: + +``` + # radosgw-admin zonegroup placement add --rgw-zonegroup=default \ + --placement-id=default-placement \ + --storage-class=CLOUDTIER-GLACIER --tier-type=cloud-s3-glacier + + # radosgw-admin zonegroup placement modify --rgw-zonegroup default \ + --placement-id default-placement \ + --storage-class CLOUDTIER \ + --tier-config=endpoint=http://XX.XX.XX.XX:YY,\ + access_key=XXXXX,secret=YYYYY, \ + retain_head_object=true, \ + target_storage_class=Glacier, \ + ............ + ............ + restore_storage_class=COLDTIER, \ + glacier_restore_days=2, \ + glacier_restore_tier_type=Expedited + + + [ + { + "key": "default-placement", + "val": { + "name": "default-placement", + "tags": [], + "storage_classes": [ + "CLOUDTIER-GLACIER", + "STANDARD" + ], + "tier_targets": [ + { + "key": "CLOUDTIER-GLACIER", + "val": { + "tier_type": "cloud-s3-glacier", + "storage_class": "CLOUDTIER-GLACIER", + "retain_head_object": "true", + "s3": { + "endpoint": http://XX.XX.XX.XX:YY, + "access_key": "XXXXX", + "secret": "YYYYY", + "host_style": "path", + "target_storage_class": "Glacier", + ....... + ....... + } + "allow_read_through": true, + "read_through_restore_days": 10, + "restore_storage_class": "COLDTIER", + "s3-glacier": { + "glacier_restore_days": 2 + "glacier_restore_tier_type": "Expedited" + } + } + } + ] + } + } + ] +``` + + +Examples of Restore Objects: +---------------------------- + +Using the S3 RestoreObject CLI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Th `S3 restore-object ` +CLI supports these options: + +``` + $ aws s3api restore-object + --bucket + --key + [--version-id ] + --restore-request (structure) { + Days= + } +``` + +Note: ``Days`` is optional and if not provided, the object is restored permanently. + +Example 1: + +``` + $ aws s3api restore-object --bucket bucket1 --key doc1.rtf + [--version-id 3sL4kqtJlcpXroDTDmJ+rmSpXd3dIbrHY+MTRCxf3vjVBH40Nr8X8gdRQBpUMLUo] + --restore-request Days=10 + .... +``` + +This will restore the object ``doc1.rtf`` at an optional version, +for the duration of 10 days. + +Example 2: + +``` + $ aws s3api restore-object --bucket bucket1 --key doc2.rtf --restore-request {} .... +``` + + +This will restore the object ``doc2.rtf`` permanently and it will be treated as regular object. + + +Using the S3 GetObject CLI +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Ensure that the ``allow_read_through`` tier-config option is enabled. + +Example 3: + +``` + $ aws s3api get-object --bucket bucket1 --key doc3.rtf .... +``` + +This will restore the object `doc3.rtf`` for ``read_through_restore_days`` days. + +Note: The above CLI command may time out if object restoration takes too long. +You can verify the restore status before reissuing the command. + + +Verifying the restoration status +-------------------------------- +Verify the status of the restoration by issuing +an `S3 HeadObject ` +request. The response includes the ``x-amz-restore`` header if object restoration +is in progress or a copy of it is already restored. + +Example: + +``` + $ aws s3api head-object --key doc1.rtf --bucket bucket1 .... +``` + +The ``radosgw-admin`` CLI can be used to check restoration status and other +details. + +Example: + +``` + $ radosgw-admin object stat --bucket bucket1 --object doc1.rtf +``` + + +Restored Object Properties +-------------------------- + +Storage +~~~~~~ +Objects are restored to the storage class configured via ``restore_storage_class`` +in the tier-config. However, as +per `` +the storage class of restored objects should remain unchanged. Therefore, for +temporary copies, the ```x-amz-storage-class``` will continue to reflect the +original cloud-tier storage class. + + +mtime +~~~~ +The ``mtime`` of the transitioned and restored objects should remain unchanged. + + +Lifecycle +~~~~~~~~ +``Temporary`` copies are not subject to transition to the cloud. However, as is the +case with cloud-transitioned objects, they can be deleted via regular LC (Life Cycle) +expiration rules or an external S3 ``delete`` request. + +``Permanent`` copies are treated as regular objects and are subject to applicable LC +policies. + + +Replication +~~~~~~~~~~ +``Temporary`` copies are not replicated and will be retained only by the zone +on which the restore request is initiated. + +``Permanent`` copies are replicated like other regular objects. + + +Versioned Objects +~~~~~~~~~~~~~~~~ +For versioned objects, if an object has been cloud-transitioned, it is in a +non-current state. After a restore, the same non-current object will be +updated with the downloaded data, and its ``HEAD`` object will be modified accordingly. + + + +Future Work +----------- + +* Admin Ops + +* Notifications + diff --git a/doc/radosgw/cloud-transition.rst b/doc/radosgw/cloud-transition.rst index c00ad790b1709..a84b648d02091 100644 --- a/doc/radosgw/cloud-transition.rst +++ b/doc/radosgw/cloud-transition.rst @@ -1,20 +1,41 @@ -================ -Cloud Transition -================ +================== + Cloud Transition +================== -This feature enables data transition to a remote cloud service as part of `Lifecycle Configuration `__ via :ref:`storage_classes`. The transition is unidirectional; data cannot be transitioned back from the remote zone. The goal of this feature is to enable data transition to multiple cloud providers. The currently supported cloud providers are those that are compatible with AWS (S3). +This feature enables transitioning S3 objects to a remote cloud service as part +of `` +via :ref:`storage_classes`. The transition is unidirectional: data cannot be +transitioned back from the remote zone. The goal of this feature is to enable +data transition to multiple cloud providers. The currently supported cloud +providers are those that are compatible with AWS (S3). -Special storage class of tier type ``cloud-s3`` is used to configure the remote cloud S3 object store service to which the data needs to be transitioned. These are defined in terms of zonegroup placement targets and unlike regular storage classes, do not need a data pool. +A special storage class of tier type ``cloud-s3`` or ``cloud-s3-glacier`` +is used to configure the remote cloud S3 object store service to which +data is transitioned. These are defined in terms of zonegroup placement +targets and, unlike regular storage classes, do not need a data pool. -User credentials for the remote cloud object store service need to be configured. Note that source ACLs will not -be preserved. It is possible to map permissions of specific source users to specific destination users. +User credentials for the remote cloud object store service must be +configured. Note that source ACLs will not be preserved. It is possible +to map permissions of specific source users to specific destination users. -Cloud Storage Class Configuration ---------------------------------- +Cloud Storage Class Tier Type +----------------------------- -:: +* ``tier-type`` (string) +The type of remote cloud service that will be used to transition objects. +The below tier types are supported: + +* ``cloud-s3`` : Regular S3 compatible object store service + +* ``cloud-s3-glacier`` : S3 Glacier or Tape storage services + + +Cloud Storage Class Tier Configuration +-------------------------------------- + +``` { "access_key": , "secret": , @@ -30,14 +51,14 @@ Cloud Storage Class Configuration "multipart_min_part_size": {part_size}, "retain_head_object": } - +``` Cloud Transition Specific Configurables: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ``access_key`` (string) -The remote cloud S3 access key that will be used for a specific connection. +The remote cloud S3 access key. * ``secret`` (string) @@ -45,7 +66,7 @@ The secret key for the remote cloud S3 service. * ``endpoint`` (string) -URL of remote cloud S3 service endpoint. +URL of remote cloud S3 service. * ``region`` (string) @@ -53,7 +74,7 @@ The remote cloud S3 service region name. * ``host_style`` (path | virtual) -Type of host style to be used when accessing remote cloud S3 endpoint (default: ``path``). +Type of host style to be used when accessing the remote cloud S3 service (default: ``path``). * ``acls`` (array) @@ -62,12 +83,13 @@ Contains a list of ``acl_mappings``. * ``acl_mapping`` (container) Each ``acl_mapping`` structure contains ``type``, ``source_id``, and ``dest_id``. These -will define the ACL mutation that will be done on each object. An ACL mutation allows converting source -user id to a destination id. +define the ACL mutation to be done on each object. An ACL mutation makes it possible to +convert a source userid to a destination userid. * ``type`` (id | email | uri) -ACL type: ``id`` defines user id, ``email`` defines user by email, and ``uri`` defines user by ``uri`` (group). +ACL type: ``id`` defines userid, ``email`` defines user by email, +and ``uri`` defines user by ``uri`` (group). * ``source_id`` (string) @@ -75,38 +97,42 @@ ID of user in the source zone. * ``dest_id`` (string) -ID of user in the destination. +ID of user on the destination. * ``target_path`` (string) -A string that defines how the target path is created. The target path specifies a prefix to which -the source 'bucket-name/object-name' is appended. If not specified the target_path created is "rgwx-${zonegroup}-${storage-class}-cloud-bucket". +A string that defines how the target path is constructed. The target path +specifies a prefix to which the source bucket-name/object-name is appended. +If not specified the ``target_path`` created is ``rgwx-${zonegroup}-${storage-class}-cloud-bucket``. For example: ``target_path = rgwx-archive-${zonegroup}/`` * ``target_storage_class`` (string) -A string that defines the target storage class to which the object transitions to. If not specified, object is transitioned to STANDARD storage class. +A string that defines the target storage class to which the object transitions. +If not specified, the object is transitioned to the ``STANDARD`` storage class. * ``retain_head_object`` (true | false) -If true, retains the metadata of the object transitioned to cloud. If false (default), the object is deleted post transition. -This option is ignored for current versioned objects. For more details, refer to section "Versioned Objects" below. +If ``true``, the metadata of the object transitioned to the cloud service is retained. +If ``false`` (default), the object is deleted after the transition. +This option is ignored for current-versioned objects. For more details, +refer to the "Versioned Objects" section below. S3 Specific Configurables: ~~~~~~~~~~~~~~~~~~~~~~~~~~ -Currently cloud transition will only work with backends that are compatible with AWS S3. There are -a few configurables that can be used to tweak its behavior when accessing these cloud services: - -:: +Currently, cloud transition will work only with backends that are compatible with +AWS S3. There are a few configurables that can be used to tweak behavior +when accessing cloud services: +``` { "multipart_sync_threshold": {object_size}, "multipart_min_part_size": {part_size} } - +``` * ``multipart_sync_threshold`` (integer) @@ -114,7 +140,7 @@ Objects this size or larger will be transitioned to the cloud using multipart up * ``multipart_min_part_size`` (integer) -Minimum parts size to use when transitioning objects using multipart upload. +Minimum part size to use when transitioning objects using multipart upload. How to Configure @@ -122,24 +148,23 @@ How to Configure See :ref:`adding_a_storage_class` for how to configure storage-class for a zonegroup. The cloud transition requires a creation of a special storage class with tier type defined as ``cloud-s3`` -.. note:: If you have not done any previous `Multisite Configuration`_, +.. note:: If you have not performed previous `Multisite Configuration`_, a ``default`` zone and zonegroup are created for you, and changes to the zone/zonegroup will not take effect until the Ceph Object - Gateways are restarted. If you have created a realm for multisite, + Gateways (RGW daemons) are restarted. If you have created a realm for multisite, the zone/zonegroup changes will take effect once the changes are committed with ``radosgw-admin period update --commit``. -:: - +``` # radosgw-admin zonegroup placement add --rgw-zonegroup={zone-group-name} \ --placement-id={placement-id} \ --storage-class={storage-class-name} \ --tier-type=cloud-s3 +``` For example: -:: - +``` # radosgw-admin zonegroup placement add --rgw-zonegroup=default \ --placement-id=default-placement \ --storage-class=CLOUDTIER --tier-type=cloud-s3 @@ -177,27 +202,27 @@ For example: } } ] +``` +.. note:: Once a storage class + of ``--tier-type=cloud-s3`` or ``--tier-type=cloud-s3-glacier``, + is created it cannot be later modified to any other storage class type. -.. note:: Once a storage class is created of ``--tier-type=cloud-s3``, it cannot be later modified to any other storage class type. - -The tier configuration can be then done using the following command - -:: +The tier configuration can be then performed using the following command: +``` # radosgw-admin zonegroup placement modify --rgw-zonegroup={zone-group-name} \ --placement-id={placement-id} \ --storage-class={storage-class-name} \ --tier-config={key}={val}[,{key}={val}] +``` -The ``key`` in the configuration specifies the config variable that needs to be updated, and +The ``key`` in the configuration specifies the config variable to be updated, and the ``val`` specifies its new value. - For example: -:: - +``` # radosgw-admin zonegroup placement modify --rgw-zonegroup default \ --placement-id default-placement \ --storage-class CLOUDTIER \ @@ -206,38 +231,37 @@ For example: multipart_sync_threshold=44432, \ multipart_min_part_size=44432, \ retain_head_object=true +``` -Nested values can be accessed using period. For example: - -:: +Nested values can be accessed using periods. For example: +``` # radosgw-admin zonegroup placement modify --rgw-zonegroup={zone-group-name} \ --placement-id={placement-id} \ --storage-class={storage-class-name} \ --tier-config=acls.source_id=${source-id}, \ acls.dest_id=${dest-id} +``` +Configuration array entries can be accessed by specifying the specific entry to +be referenced enclosed in square brackets, and adding a new array entry can be +performed with an empty array `[]`. +For example, creating a new ``acl`` array entry: - -Configuration array entries can be accessed by specifying the specific entry to be referenced enclosed -in square brackets, and adding new array entry can be done by using `[]`. -For example, creating a new acl array entry: - -:: - +``` # radosgw-admin zonegroup placement modify --rgw-zonegroup={zone-group-name} \ --placement-id={placement-id} \ --storage-class={storage-class-name} \ --tier-config=acls[].source_id=${source-id}, \ acls[${source-id}].dest_id=${dest-id}, \ acls[${source-id}].type=email +``` -An entry can be removed by using ``--tier-config-rm={key}``. - -For example, +An entry can be removed by supplying ``--tier-config-rm={key}``. -:: +For example: +``` # radosgw-admin zonegroup placement modify --rgw-zonegroup default \ --placement-id default-placement \ --storage-class CLOUDTIER \ @@ -247,19 +271,19 @@ For example, --placement-id default-placement \ --storage-class CLOUDTIER \ --tier-config-rm=target_path +``` -The storage class can be removed using the following command - -:: +The storage class can be removed using the following command: +``` # radosgw-admin zonegroup placement rm --rgw-zonegroup={zone-group-name} \ --placement-id={placement-id} \ --storage-class={storage-class-name} +``` For example, -:: - +``` # radosgw-admin zonegroup placement rm --rgw-zonegroup default \ --placement-id default-placement \ --storage-class CLOUDTIER @@ -275,50 +299,63 @@ For example, } } ] +``` -Object modification & Limitations +Object Modification and Limitations ---------------------------------- -The cloud storage class once configured can then be used like any other storage class in the bucket lifecycle rules. For example, - -:: +The cloud storage class, once configured, can be used like any other storage +class when defining bucket lifecycle (LC) rules. For example, +``` CLOUDTIER .... .... +``` +Since the transition is unidirectional, when configuring S3 +lifecycle rules, the cloud storage class should be specified +last among all the storage classes the object transitions to. +Subsequent rules (if any) do not apply post-transition to the cloud. -Since the transition is unidirectional, while configuring S3 lifecycle rules, the cloud storage class should be specified last among all the storage classes the object transitions to. Subsequent rules (if any) do not apply post transition to the cloud. - -Due to API limitations there is no way to preserve original object modification time and ETag but they get stored as metadata attributes on the destination objects, as shown below: - -:: +Due to API limitations, there is no way to preserve the original object +modification time and ETag, which are stored as metadata attributes +on the destination objects, as shown below: +``` x-amz-meta-rgwx-source: rgw x-amz-meta-rgwx-source-etag: ed076287532e86365e841e92bfc50d8c x-amz-meta-rgwx-source-key: lc.txt x-amz-meta-rgwx-source-mtime: 1608546349.757100363 x-amz-meta-rgwx-versioned-epoch: 0 +``` -In order to allow some cloud services detect the source and map the user-defined 'x-amz-meta-' attributes, below two additional new attributes are added to the objects being transitioned +In order to allow cloud services to detect the source and map +user-defined ``x-amz-meta-`` attributes, two additional new +attributes are added to the objects being transitioned: -:: - - x-rgw-cloud : true/false - (set to "true", by default, if the object is being transitioned from RGW) +* ``x-rgw-cloud`` : ``true``/``false`` - x-rgw-cloud-keep-attrs : true/false - (if set to default value "true", the cloud service should map and store all the x-amz-meta-* attributes. If it cannot, then the operation should fail. - if set to "false", the cloud service can ignore such attributes and just store the object data being sent.) + ``true``, by default, if the object is being transitioned from RGW. +* ``x-rgw-cloud-keep-attrs`` : ``true`` / ``false`` -By default, post transition, the source object gets deleted. But it is possible to retain its metadata but with updated values (like storage-class and object-size) by setting config option 'retain_head_object' to true. However GET on those objects shall still fail with 'InvalidObjectState' error. + If set to default ``true``, the cloud service should map and store all + ``the x-amz-meta-*`` attributes. If it cannot, then the operation should fail. + if set to ``false``, the cloud service can ignore such attributes and + just store the object data being sent. -For example, -:: +By default, post-transition, the source object gets deleted. But it is possible +to retain its metadata but with updated values (including ``storage-class`` +and ``object-size``) by setting the config option ``retain_head_object`` +to true. However a ``GET`` operation on such an object will still fail +with an ``InvalidObjectState`` error. + +For example: +``` # s3cmd info s3://bucket/lc.txt s3://bucket/lc.txt (object): File size: 12 @@ -335,14 +372,17 @@ For example, # s3cmd get s3://bucket/lc.txt lc_restore.txt download: 's3://bucket/lc.txt' -> 'lc_restore.txt' [1 of 1] ERROR: S3 error: 403 (InvalidObjectState) +``` -To avoid object names collision across various buckets, source bucket name is prepended to the target object name. If the object is versioned, object versionid is appended to the end. +To avoid object name collisions across buckets, the source bucket name is +prepended to the target object name. If the object is versioned, the object's +``versionid`` is appended. -Below is the sample object name format: -:: +Below is the object name format: +``` s3:////(-) - +``` Versioned Objects ~~~~~~~~~~~~~~~~~ @@ -354,6 +394,12 @@ For versioned and locked objects, similar semantics as that of LifecycleExpirati * If the object is noncurrent and is locked, its transition is skipped. +Restoring Objects +----------------- +The objects transitioned to cloud can now be restored. For more information, refer to +[Restoring Objects from Cloud](https://docs.aws.amazon.com/AmazonS3/latest/dev/cloud-restore.html) + + Future Work ----------- -- 2.39.5