]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw/cloud-transition: add per-bucket target options
authorMatthew N. Heler <matthew.heler@hotmail.com>
Sat, 22 Nov 2025 14:12:56 +0000 (08:12 -0600)
committerMatthew N. Heler <matthew.heler@hotmail.com>
Thu, 30 Apr 2026 15:33:38 +0000 (10:33 -0500)
Add per-bucket cloud tier targeting via new options target_by_bucket
and target_by_bucket_prefix, and use them in transition/restore to
derive the destination bucket name

Signed-off-by: Matthew N. Heler <matthew.heler@hotmail.com>
doc/radosgw/cloud-transition.rst
src/rgw/driver/rados/rgw_lc_tier.cc
src/rgw/driver/rados/rgw_lc_tier.h
src/rgw/driver/rados/rgw_sal_rados.cc
src/rgw/rgw_zone.cc
src/rgw/rgw_zone_types.h

index 238996442ae39bef57d6605beb34fc219e57aae5..4ae46c19aaa3abd1d6214f857fd754b9c113c871 100644 (file)
@@ -50,6 +50,8 @@ Cloud Storage Class Tier Configuration
                 "dest_id": <dest_id> } ... ],
     "location_constraint": <location-constraint>,
     "target_path": <target_path>,
+    "target_by_bucket": <true | false>,
+    "target_by_bucket_prefix": <template>,
     "target_storage_class": <target-storage-class>,
     "multipart_sync_threshold": {object_size},
     "multipart_min_part_size": {part_size},
@@ -109,13 +111,53 @@ Cloud Transition Specific Configurables
 
   A string that defines how the target path is constructed. The target path
   specifies a prefix to which the source bucket-name/object-name is appended.
-  If not specified the ``target_path`` created is ``rgwx-${zonegroup}-${storage-class}-cloud-bucket``.
+  If not specified and ``target_by_bucket`` is ``false``, the ``target_path``
+  created is ``rgwx-${zonegroup}-${storage_class}-cloud-bucket``.
 
-  For example: ``target_path = rgwx-archive-${zonegroup}/``
+  Supports the template variables ``${zonegroup}`` and ``${storage_class}``.
+
+  The ``target_path`` value is only used when ``target_by_bucket`` is ``false``.
+
+  For example: ``target_path = rgwx-${zonegroup}-archive/``
+
+* ``target_by_bucket`` (boolean)
+
+  When enabled, each source bucket transitions to a dedicated destination
+  bucket rather than sharing a common target. Defaults to ``false`` to
+  preserve the legacy behavior.
+
+* ``target_by_bucket_prefix`` (string)
+
+  Optional template used when ``target_by_bucket`` is true to derive the
+  destination bucket name. Supports the variables ``${zonegroup}``,
+  ``${storage_class}``, ``${bucket}``, ``${tenant}``, and ``${owner}``.
+  The ``${owner}`` variable expands to the bucket owner's identifier
+  (the user ID for user-owned buckets, or the account ID for account-owned
+  buckets). If unset, the template falls back to the built-in default
+  ``rgwx-${zonegroup}-${storage_class}-${bucket}``.
+
+.. note::
+   S3 bucket naming constraints still apply on the destination side
+   (lowercase letters and numbers, 3-63 characters, no slashes). RGW
+   lowercases the derived bucket name. If you include slashes or invalid
+   characters in a custom template, bucket creation may still fail on the
+   target cloud.
+
+For example, to enable per-bucket targeting with a custom prefix:
+
+.. prompt:: bash #
+
+   radosgw-admin zonegroup placement modify --rgw-zonegroup default \
+                                              --placement-id default-placement \
+                                              --storage-class CLOUDTIER \
+                                              --tier-config=target_by_bucket=true,\
+                                              target_by_bucket_prefix=archive-${owner}-${bucket}
 
 * ``location_constraint`` (string)
 
-  Specifies the region where the target bucket will be created on the remote S3 endpoint. For AWS, this location needs to be specified only if the region is other than US East (us-east-1).
+  Specifies the region where the target bucket will be created on the remote
+  S3 endpoint. For AWS, specify this only if the region is other than
+  US East (``us-east-1``).
 
 * ``target_storage_class`` (string)
 
index f2fdf8e5c0a181d327e94ab567f5f3c9784d9eb3..8a714f298c580fd1710de9127faf47ed7c19563b 100644 (file)
@@ -97,8 +97,24 @@ static inline string get_key_oid(const rgw_obj_key& key)
 
 static inline string obj_to_aws_path(const rgw_obj& obj)
 {
-  string path = obj.bucket.name + "/" + get_key_oid(obj.key);
-  return path;
+  return obj.bucket.name + "/" + get_key_oid(obj.key);
+}
+
+static inline string make_target_obj_name(const RGWLCCloudTierCtx& tier_ctx)
+{
+  string target_obj_name;
+  if (tier_ctx.target_by_bucket) {
+    // Per-bucket targeting: object key only, no source bucket prefix
+    target_obj_name = tier_ctx.obj->get_name();
+  } else {
+    // Legacy: include source bucket name as prefix
+    target_obj_name = tier_ctx.bucket_info.bucket.name + "/" +
+                      tier_ctx.obj->get_name();
+  }
+  if (!tier_ctx.o.is_current()) {
+    target_obj_name += get_key_instance(tier_ctx.obj->get_key());
+  }
+  return target_obj_name;
 }
 
 static int read_upload_status(const DoutPrefixProvider *dpp, rgw::sal::Driver *driver,
@@ -273,11 +289,7 @@ int rgw_cloud_tier_restore_object(RGWLCCloudTierCtx& tier_ctx,
 
   rgw_bucket dest_bucket;
   dest_bucket.name = tier_ctx.target_bucket_name;
-  target_obj_name = tier_ctx.bucket_info.bucket.name + "/" +
-                    tier_ctx.obj->get_name();
-  if (!tier_ctx.o.is_current()) {
-    target_obj_name += get_key_instance(tier_ctx.obj->get_key());
-  }
+  target_obj_name = make_target_obj_name(tier_ctx);
 
   if (!in_progress) { // first time. Send RESTORE req.
 
@@ -340,11 +352,7 @@ int rgw_cloud_tier_get_object(RGWLCCloudTierCtx& tier_ctx, bool head,
 
   rgw_bucket dest_bucket;
   dest_bucket.name = tier_ctx.target_bucket_name;
-  target_obj_name = tier_ctx.bucket_info.bucket.name + "/" +
-                    tier_ctx.obj->get_name();
-  if (!tier_ctx.o.is_current()) {
-    target_obj_name += get_key_instance(tier_ctx.obj->get_key());
-  }
+  target_obj_name = make_target_obj_name(tier_ctx);
 
   rgw_obj dest_obj(dest_bucket, rgw_obj_key(target_obj_name));
 
@@ -941,11 +949,7 @@ static int cloud_tier_plain_transfer(RGWLCCloudTierCtx& tier_ctx) {
   rgw_bucket dest_bucket;
   dest_bucket.name = tier_ctx.target_bucket_name;
 
-  target_obj_name = tier_ctx.bucket_info.bucket.name + "/" +
-    tier_ctx.obj->get_name();
-  if (!tier_ctx.o.is_current()) {
-    target_obj_name += get_key_instance(tier_ctx.obj->get_key());
-  }
+  target_obj_name = make_target_obj_name(tier_ctx);
 
   rgw_obj dest_obj(dest_bucket, rgw_obj_key(target_obj_name));
 
@@ -985,11 +989,7 @@ static int cloud_tier_send_multipart_part(RGWLCCloudTierCtx& tier_ctx,
   rgw_bucket dest_bucket;
   dest_bucket.name = tier_ctx.target_bucket_name;
 
-  target_obj_name = tier_ctx.bucket_info.bucket.name + "/" +
-    tier_ctx.obj->get_name();
-  if (!tier_ctx.o.is_current()) {
-    target_obj_name += get_key_instance(tier_ctx.obj->get_key());
-  }
+  target_obj_name = make_target_obj_name(tier_ctx);
 
   rgw_obj dest_obj(dest_bucket, rgw_obj_key(target_obj_name));
 
@@ -1344,11 +1344,7 @@ static int cloud_tier_multipart_transfer(RGWLCCloudTierCtx& tier_ctx) {
 
   target_bucket.name = tier_ctx.target_bucket_name;
 
-  target_obj_name = tier_ctx.bucket_info.bucket.name + "/" +
-    tier_ctx.obj->get_name();
-  if (!tier_ctx.o.is_current()) {
-    target_obj_name += get_key_instance(tier_ctx.obj->get_key());
-  }
+  target_obj_name = make_target_obj_name(tier_ctx);
   dest_obj.init(target_bucket, target_obj_name);
 
   rgw_pool pool = static_cast<rgw::sal::RadosStore*>(tier_ctx.driver)->svc()->zone->get_zone_params().log_pool;
index 44e2ea3883ab23f44d683a928b0105083b01bf7b..9b77514a1363adc87852f9b5eac06aad38cd27a5 100644 (file)
@@ -39,6 +39,7 @@ struct RGWLCCloudTierCtx {
 
   bool is_multipart_upload{false};
   bool target_bucket_created{true};
+  bool target_by_bucket{false};
 
   optional_yield y;
 
@@ -46,10 +47,12 @@ struct RGWLCCloudTierCtx {
       rgw_bucket_dir_entry& _o, rgw::sal::Driver *_driver,
       RGWBucketInfo &_binfo, rgw::sal::Object *_obj,
       RGWRESTConn& _conn, std::string& _bucket,
-      std::string& _storage_class, optional_yield _y) :
+      std::string& _storage_class, bool _target_by_bucket,
+      optional_yield _y) :
     cct(_cct), dpp(_dpp), o(_o), driver(_driver), bucket_info(_binfo),
     obj(_obj), conn(_conn), target_bucket_name(_bucket),
-    target_storage_class(_storage_class), y(_y) {}
+    target_storage_class(_storage_class),
+    target_by_bucket(_target_by_bucket), y(_y) {}
 };
 
 /* Transition object to cloud endpoint */
index 71350710e31cca94a811d52d2c87daf8a39c7bfd..8e78174741ab0dd0e46576ec78c9384f5309edf8 100644 (file)
@@ -3226,8 +3226,18 @@ int RadosObject::restore_obj_from_cloud(Bucket* bucket,
   RGWAccessKey key = rtier->get_rt().t.s3.key;
   string region = rtier->get_rt().t.s3.region;
   HostStyle host_style = rtier->get_rt().t.s3.host_style;
-  string bucket_name = rtier->get_rt().t.s3.target_path;
   const rgw::sal::ZoneGroup& zonegroup = store->get_zone()->get_zonegroup();
+  // extract owner (user_id or account_id depending on ownership type)
+  std::string owner;
+  if (const auto* acct = std::get_if<rgw_account_id>(&bucket->get_owner()); acct) {
+    owner = *acct;
+  } else if (const auto* user = std::get_if<rgw_user>(&bucket->get_owner()); user) {
+    owner = user->id;
+  }
+  string bucket_name = rtier->get_rt().t.s3.make_target_bucket_name(
+      zonegroup.get_name(),
+      tier->get_storage_class(), bucket->get_name(),
+      bucket->get_tenant(), owner);
   int ret = 0;
 
   auto& attrs = get_attrs();
@@ -3249,12 +3259,6 @@ int RadosObject::restore_obj_from_cloud(Bucket* bucket,
   // update tier_config in case tier params are updated
   tier_config.tier_placement = rtier->get_rt();
 
-  if (bucket_name.empty()) {
-    bucket_name = "rgwx-" + zonegroup.get_name() + "-" + tier->get_storage_class() +
-                    "-cloud-bucket";
-    boost::algorithm::to_lower(bucket_name);
-  }
-
   rgw_bucket_dir_entry ent;
   ent.key.name = get_key().name;
   ent.key.instance = get_key().instance;
@@ -3271,7 +3275,8 @@ int RadosObject::restore_obj_from_cloud(Bucket* bucket,
   // save source cloudtier storage class
   RGWLCCloudTierCtx tier_ctx(cct, dpp, ent, store, bucket->get_info(),
            this, conn, bucket_name,
-           rtier->get_rt().t.s3.target_storage_class, y);
+           rtier->get_rt().t.s3.target_storage_class,
+           rtier->get_rt().t.s3.target_by_bucket, y);
   tier_ctx.acl_mappings = rtier->get_rt().t.s3.acl_mappings;
   tier_ctx.multipart_min_part_size = rtier->get_rt().t.s3.multipart_min_part_size;
   tier_ctx.multipart_sync_threshold = rtier->get_rt().t.s3.multipart_sync_threshold;
@@ -3333,21 +3338,26 @@ int RadosObject::transition_to_cloud(Bucket* bucket,
   RGWAccessKey key = rtier->get_rt().t.s3.key;
   string region = rtier->get_rt().t.s3.region;
   HostStyle host_style = rtier->get_rt().t.s3.host_style;
-  string bucket_name = rtier->get_rt().t.s3.target_path;
   const rgw::sal::ZoneGroup& zonegroup = store->get_zone()->get_zonegroup();
-
-  if (bucket_name.empty()) {
-    bucket_name = "rgwx-" + zonegroup.get_name() + "-" + tier->get_storage_class() +
-                    "-cloud-bucket";
-    boost::algorithm::to_lower(bucket_name);
-  }
+  // extract owner (user_id or account_id depending on ownership type)
+  std::string owner;
+  if (const auto* acct = std::get_if<rgw_account_id>(&bucket->get_owner()); acct) {
+    owner = *acct;
+  } else if (const auto* user = std::get_if<rgw_user>(&bucket->get_owner()); user) {
+    owner = user->id;
+  }
+  string bucket_name = rtier->get_rt().t.s3.make_target_bucket_name(
+      zonegroup.get_name(),
+      tier->get_storage_class(), bucket->get_name(),
+      bucket->get_tenant(), owner);
 
   /* Create RGW REST connection */
   S3RESTConn conn(cct, id, { endpoint }, key, zonegroup.get_id(), region, host_style);
 
   RGWLCCloudTierCtx tier_ctx(cct, dpp, o, store, bucket->get_info(),
                             this, conn, bucket_name,
-                            rtier->get_rt().t.s3.target_storage_class, y);
+                            rtier->get_rt().t.s3.target_storage_class,
+                            rtier->get_rt().t.s3.target_by_bucket, y);
   tier_ctx.acl_mappings = rtier->get_rt().t.s3.acl_mappings;
   tier_ctx.multipart_min_part_size = rtier->get_rt().t.s3.multipart_min_part_size;
   tier_ctx.multipart_sync_threshold = rtier->get_rt().t.s3.multipart_sync_threshold;
index 77a88fd44c7fe61f7c1eedca67ec64dac9741413..8f9e88f303555c8712a4568c9525785bb77b4b41 100644 (file)
@@ -2,6 +2,7 @@
 // vim: ts=8 sw=2 sts=2 expandtab ft=cpp
 
 #include <optional>
+#include <boost/algorithm/string.hpp>
 
 #include "common/errno.h"
 
@@ -665,11 +666,69 @@ void RGWZoneGroupPlacementTierS3::decode_json(JSONObj *obj)
   JSONDecoder::decode_json("location_constraint", location_constraint, obj);
   JSONDecoder::decode_json("target_storage_class", target_storage_class, obj);
   JSONDecoder::decode_json("target_path", target_path, obj);
+  JSONDecoder::decode_json("target_by_bucket", target_by_bucket, obj);
+  JSONDecoder::decode_json("target_by_bucket_prefix", target_by_bucket_prefix, obj);
   JSONDecoder::decode_json("acl_mappings", acl_mappings, obj);
   JSONDecoder::decode_json("multipart_sync_threshold", multipart_sync_threshold, obj);
   JSONDecoder::decode_json("multipart_min_part_size", multipart_min_part_size, obj);
 }
 
+std::string RGWZoneGroupPlacementTierS3::make_target_bucket_name(
+    const std::string& zonegroup_name,
+    const std::string& storage_class,
+    const std::string& bucket_name,
+    const std::string& tenant,
+    const std::string& owner) const
+{
+  auto substitute = [](std::string& target, const std::string& placeholder,
+                       const std::string& value) {
+    size_t pos = 0;
+    while ((pos = target.find(placeholder, pos)) != std::string::npos) {
+      target.replace(pos, placeholder.size(), value);
+      pos += value.size();
+    }
+  };
+
+  const bool has_custom_target = !target_path.empty();
+  const bool has_custom_bucket_prefix = !target_by_bucket_prefix.empty();
+
+  std::string templ;
+
+  if (target_by_bucket) {
+    if (has_custom_bucket_prefix) {
+      templ = target_by_bucket_prefix;
+    } else {
+      templ = "rgwx-${zonegroup}-${storage_class}-${bucket}";
+    }
+  } else {
+    if (has_custom_target) {
+      templ = target_path;
+    } else {
+      templ = "rgwx-${zonegroup}-${storage_class}-cloud-bucket";
+    }
+  }
+
+  if (target_by_bucket) {
+    const bool has_bucket_token = templ.find("${bucket}") != std::string::npos;
+    substitute(templ, "${bucket}", bucket_name);
+    if (!has_bucket_token) {
+      if (!templ.empty() && templ.back() != '-' && templ.back() != '/') {
+        templ.push_back('-');
+      }
+      templ.append(bucket_name);
+    }
+    substitute(templ, "${tenant}", tenant);
+    substitute(templ, "${owner}", owner);
+  }
+
+  substitute(templ, "${zonegroup}", zonegroup_name);
+  substitute(templ, "${storage_class}", storage_class);
+
+  boost::algorithm::to_lower(templ);
+
+  return templ;
+}
+
 void RGWZoneStorageClass::dump(Formatter *f) const
 {
   if (data_pool) {
@@ -723,6 +782,8 @@ void RGWZoneGroupPlacementTierS3::dump(Formatter *f) const
   encode_json("location_constraint", location_constraint, f);
   encode_json("target_storage_class", target_storage_class, f);
   encode_json("target_path", target_path, f);
+  encode_json("target_by_bucket", target_by_bucket, f);
+  encode_json("target_by_bucket_prefix", target_by_bucket_prefix, f);
   encode_json("acl_mappings", acl_mappings, f);
   encode_json("multipart_sync_threshold", multipart_sync_threshold, f);
   encode_json("multipart_min_part_size", multipart_min_part_size, f);
@@ -2020,6 +2081,19 @@ int RGWZoneGroupPlacementTierS3::update_params(const JSONFormattable& config)
   if (config.exists("target_path")) {
     target_path = config["target_path"];
   }
+  if (config.exists("target_by_bucket")) {
+    string s = config["target_by_bucket"];
+    target_by_bucket = (s == "true");
+  }
+  if (config.exists("target_by_bucket_prefix")) {
+    target_by_bucket_prefix = config["target_by_bucket_prefix"];
+  }
+  if (target_by_bucket) {
+    if (!target_by_bucket_prefix.empty() &&
+        target_by_bucket_prefix.find('/') != std::string::npos) {
+      ldout(g_ceph_context, 1) << "cloud tier target_by_bucket_prefix contains '/', which may be invalid for bucket names" << dendl;
+    }
+  }
   if (config.exists("region")) {
     region = config["region"];
   }
@@ -2087,6 +2161,12 @@ int RGWZoneGroupPlacementTierS3::clear_params(const JSONFormattable& config)
   if (config.exists("target_path")) {
     target_path.clear();
   }
+  if (config.exists("target_by_bucket")) {
+    target_by_bucket = false;
+  }
+  if (config.exists("target_by_bucket_prefix")) {
+    target_by_bucket_prefix.clear();
+  }
   if (config.exists("region")) {
     region.clear();
   }
index bc0457ef1b18c065f076d389d5de5190675dfebe..802e41ca69f4c74b984233531ba3d3697e20187a 100644 (file)
@@ -499,6 +499,8 @@ struct RGWZoneGroupPlacementTierS3 {
 
   /* Should below be bucket/zone specific?? */
   std::string target_path;
+  bool target_by_bucket{false};
+  std::string target_by_bucket_prefix;
   std::map<std::string, RGWTierACLMapping> acl_mappings;
 
   uint64_t multipart_sync_threshold{DEFAULT_MULTIPART_SYNC_PART_SIZE};
@@ -506,9 +508,14 @@ struct RGWZoneGroupPlacementTierS3 {
 
   int update_params(const JSONFormattable& config);
   int clear_params(const JSONFormattable& config);
+  std::string make_target_bucket_name(const std::string& zonegroup_name,
+                                      const std::string& storage_class,
+                                      const std::string& bucket_name,
+                                      const std::string& tenant,
+                                      const std::string& owner = {}) const;
 
   void encode(bufferlist& bl) const {
-    ENCODE_START(2, 1, bl);
+    ENCODE_START(3, 1, bl);
     encode(endpoint, bl);
     encode(key, bl);
     encode(region, bl);
@@ -519,11 +526,13 @@ struct RGWZoneGroupPlacementTierS3 {
     encode(multipart_sync_threshold, bl);
     encode(multipart_min_part_size, bl);
     encode(location_constraint, bl);
+    encode(target_by_bucket, bl);
+    encode(target_by_bucket_prefix, bl);
     ENCODE_FINISH(bl);
   }
 
   void decode(bufferlist::const_iterator& bl) {
-    DECODE_START(2, bl);
+    DECODE_START(3, bl);
     decode(endpoint, bl);
     decode(key, bl);
     decode(region, bl);
@@ -541,6 +550,10 @@ struct RGWZoneGroupPlacementTierS3 {
     if (struct_v >= 2) {
       decode(location_constraint, bl);
     }
+    if (struct_v >= 3) {
+      decode(target_by_bucket, bl);
+      decode(target_by_bucket_prefix, bl);
+    }
 
     DECODE_FINISH(bl);
   }