]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rgw/cloud-transition: add per-bucket target options
authorMatthew N. Heler <matthew.heler@hotmail.com>
Sat, 22 Nov 2025 14:12:56 +0000 (08:12 -0600)
committerMatthew N. Heler <matthew.heler@hotmail.com>
Mon, 16 Feb 2026 13:18:59 +0000 (07:18 -0600)
Add per-bucket cloud tier targeting via new options target_by_bucket
and target_by_bucket_prefix, and use them in transition/restore to
derive the destination bucket name

Signed-off-by: Matthew N. Heler <matthew.heler@hotmail.com>
doc/radosgw/cloud-transition.rst
src/rgw/driver/rados/rgw_lc_tier.cc
src/rgw/driver/rados/rgw_lc_tier.h
src/rgw/driver/rados/rgw_sal_rados.cc
src/rgw/rgw_zone.cc
src/rgw/rgw_zone_types.h

index 665d4c1bb3fd5ddd860ab5c32d2fe537127ecdb5..4ae50ccc0b8d8bfa4a3f0c7c486ab0f732571220 100644 (file)
@@ -50,6 +50,8 @@ Cloud Storage Class Tier Configuration
                 "dest_id": <dest_id> } ... ],
     "location_constraint": <location-constraint>,
     "target_path": <target_path>,
+    "target_by_bucket": <true | false>,
+    "target_by_bucket_prefix": <template>,
     "target_storage_class": <target-storage-class>,
     "multipart_sync_threshold": {object_size},
     "multipart_min_part_size": {part_size},
@@ -109,13 +111,53 @@ Cloud Transition Specific Configurables
 
   A string that defines how the target path is constructed. The target path
   specifies a prefix to which the source bucket-name/object-name is appended.
-  If not specified the ``target_path`` created is ``rgwx-${zonegroup}-${storage-class}-cloud-bucket``.
+  If not specified and ``target_by_bucket`` is ``false``, the ``target_path``
+  created is ``rgwx-${zonegroup}-${storage_class}-cloud-bucket``.
 
-  For example: ``target_path = rgwx-archive-${zonegroup}/``
+  Supports the template variables ``${zonegroup}`` and ``${storage_class}``.
+
+  The ``target_path`` value is only used when ``target_by_bucket`` is ``false``.
+
+  For example: ``target_path = rgwx-${zonegroup}-archive/``
+
+* ``target_by_bucket`` (boolean)
+
+  When enabled, each source bucket transitions to a dedicated destination
+  bucket rather than sharing a common target. Defaults to ``false`` to
+  preserve the legacy behavior.
+
+* ``target_by_bucket_prefix`` (string)
+
+  Optional template used when ``target_by_bucket`` is true to derive the
+  destination bucket name. Supports the variables ``${zonegroup}``,
+  ``${storage_class}``, ``${bucket}``, ``${tenant}``, and ``${owner}``.
+  The ``${owner}`` variable expands to the bucket owner's identifier
+  (the user ID for user-owned buckets, or the account ID for account-owned
+  buckets). If unset, the template falls back to the built-in default
+  ``rgwx-${zonegroup}-${storage_class}-${bucket}``.
+
+.. note::
+   S3 bucket naming constraints still apply on the destination side
+   (lowercase letters and numbers, 3-63 characters, no slashes). RGW
+   lowercases the derived bucket name. If you include slashes or invalid
+   characters in a custom template, bucket creation may still fail on the
+   target cloud.
+
+For example, to enable per-bucket targeting with a custom prefix:
+
+.. prompt:: bash #
+
+   radosgw-admin zonegroup placement modify --rgw-zonegroup default \
+                                              --placement-id default-placement \
+                                              --storage-class CLOUDTIER \
+                                              --tier-config=target_by_bucket=true,\
+                                              target_by_bucket_prefix=archive-${owner}-${bucket}
 
 * ``location_constraint`` (string)
 
-  Specifies the region where the target bucket will be created on the remote S3 endpoint. For AWS, this location needs to be specified only if the region is other than US East (us-east-1).
+  Specifies the region where the target bucket will be created on the remote
+  S3 endpoint. For AWS, specify this only if the region is other than
+  US East (``us-east-1``).
 
 * ``target_storage_class`` (string)
 
index 15aa5653652ee604d893e0cdbcf4d73e1cdc49ba..046072e56516c6a62626018abc2427a926bec2e0 100644 (file)
@@ -97,8 +97,24 @@ static inline string get_key_oid(const rgw_obj_key& key)
 
 static inline string obj_to_aws_path(const rgw_obj& obj)
 {
-  string path = obj.bucket.name + "/" + get_key_oid(obj.key);
-  return path;
+  return obj.bucket.name + "/" + get_key_oid(obj.key);
+}
+
+static inline string make_target_obj_name(const RGWLCCloudTierCtx& tier_ctx)
+{
+  string target_obj_name;
+  if (tier_ctx.target_by_bucket) {
+    // Per-bucket targeting: object key only, no source bucket prefix
+    target_obj_name = tier_ctx.obj->get_name();
+  } else {
+    // Legacy: include source bucket name as prefix
+    target_obj_name = tier_ctx.bucket_info.bucket.name + "/" +
+                      tier_ctx.obj->get_name();
+  }
+  if (!tier_ctx.o.is_current()) {
+    target_obj_name += get_key_instance(tier_ctx.obj->get_key());
+  }
+  return target_obj_name;
 }
 
 static int read_upload_status(const DoutPrefixProvider *dpp, rgw::sal::Driver *driver,
@@ -271,11 +287,7 @@ int rgw_cloud_tier_restore_object(RGWLCCloudTierCtx& tier_ctx,
 
   rgw_bucket dest_bucket;
   dest_bucket.name = tier_ctx.target_bucket_name;
-  target_obj_name = tier_ctx.bucket_info.bucket.name + "/" +
-                    tier_ctx.obj->get_name();
-  if (!tier_ctx.o.is_current()) {
-    target_obj_name += get_key_instance(tier_ctx.obj->get_key());
-  }
+  target_obj_name = make_target_obj_name(tier_ctx);
 
   if (!in_progress) { // first time. Send RESTORE req.
 
@@ -338,11 +350,7 @@ int rgw_cloud_tier_get_object(RGWLCCloudTierCtx& tier_ctx, bool head,
 
   rgw_bucket dest_bucket;
   dest_bucket.name = tier_ctx.target_bucket_name;
-  target_obj_name = tier_ctx.bucket_info.bucket.name + "/" +
-                    tier_ctx.obj->get_name();
-  if (!tier_ctx.o.is_current()) {
-    target_obj_name += get_key_instance(tier_ctx.obj->get_key());
-  }
+  target_obj_name = make_target_obj_name(tier_ctx);
 
   rgw_obj dest_obj(dest_bucket, rgw_obj_key(target_obj_name));
 
@@ -920,11 +928,7 @@ static int cloud_tier_plain_transfer(RGWLCCloudTierCtx& tier_ctx) {
   rgw_bucket dest_bucket;
   dest_bucket.name = tier_ctx.target_bucket_name;
 
-  target_obj_name = tier_ctx.bucket_info.bucket.name + "/" +
-    tier_ctx.obj->get_name();
-  if (!tier_ctx.o.is_current()) {
-    target_obj_name += get_key_instance(tier_ctx.obj->get_key());
-  }
+  target_obj_name = make_target_obj_name(tier_ctx);
 
   rgw_obj dest_obj(dest_bucket, rgw_obj_key(target_obj_name));
 
@@ -964,11 +968,7 @@ static int cloud_tier_send_multipart_part(RGWLCCloudTierCtx& tier_ctx,
   rgw_bucket dest_bucket;
   dest_bucket.name = tier_ctx.target_bucket_name;
 
-  target_obj_name = tier_ctx.bucket_info.bucket.name + "/" +
-    tier_ctx.obj->get_name();
-  if (!tier_ctx.o.is_current()) {
-    target_obj_name += get_key_instance(tier_ctx.obj->get_key());
-  }
+  target_obj_name = make_target_obj_name(tier_ctx);
 
   rgw_obj dest_obj(dest_bucket, rgw_obj_key(target_obj_name));
 
@@ -1321,11 +1321,7 @@ static int cloud_tier_multipart_transfer(RGWLCCloudTierCtx& tier_ctx) {
 
   target_bucket.name = tier_ctx.target_bucket_name;
 
-  target_obj_name = tier_ctx.bucket_info.bucket.name + "/" +
-    tier_ctx.obj->get_name();
-  if (!tier_ctx.o.is_current()) {
-    target_obj_name += get_key_instance(tier_ctx.obj->get_key());
-  }
+  target_obj_name = make_target_obj_name(tier_ctx);
   dest_obj.init(target_bucket, target_obj_name);
 
   rgw_pool pool = static_cast<rgw::sal::RadosStore*>(tier_ctx.driver)->svc()->zone->get_zone_params().log_pool;
index b6759d39bd6e36a7112da357675dfe9d902d92e4..0a8012cb13d3cbc8499fdb49dbdf10ba4b8e5ec4 100644 (file)
@@ -39,15 +39,16 @@ struct RGWLCCloudTierCtx {
 
   bool is_multipart_upload{false};
   bool target_bucket_created{true};
+  bool target_by_bucket{false};
 
   RGWLCCloudTierCtx(CephContext* _cct, const DoutPrefixProvider *_dpp,
       rgw_bucket_dir_entry& _o, rgw::sal::Driver *_driver,
       RGWBucketInfo &_binfo, rgw::sal::Object *_obj,
       RGWRESTConn& _conn, std::string& _bucket,
-      std::string& _storage_class) :
+      std::string& _storage_class, bool _target_by_bucket = false) :
     cct(_cct), dpp(_dpp), o(_o), driver(_driver), bucket_info(_binfo),
     obj(_obj), conn(_conn), target_bucket_name(_bucket),
-    target_storage_class(_storage_class) {}
+    target_storage_class(_storage_class), target_by_bucket(_target_by_bucket) {}
 };
 
 /* Transition object to cloud endpoint */
index b193a0a4c5c70262d9028659919117185c3bfdeb..c6f23aa5d56b6d29faeb17c041266a83bdb5359f 100644 (file)
@@ -3082,8 +3082,18 @@ int RadosObject::restore_obj_from_cloud(Bucket* bucket,
   RGWAccessKey key = rtier->get_rt().t.s3.key;
   string region = rtier->get_rt().t.s3.region;
   HostStyle host_style = rtier->get_rt().t.s3.host_style;
-  string bucket_name = rtier->get_rt().t.s3.target_path;
   const rgw::sal::ZoneGroup& zonegroup = store->get_zone()->get_zonegroup();
+  // extract owner (user_id or account_id depending on ownership type)
+  std::string owner;
+  if (const auto* acct = std::get_if<rgw_account_id>(&bucket->get_owner()); acct) {
+    owner = *acct;
+  } else if (const auto* user = std::get_if<rgw_user>(&bucket->get_owner()); user) {
+    owner = user->id;
+  }
+  string bucket_name = rtier->get_rt().t.s3.make_target_bucket_name(
+      zonegroup.get_name(),
+      tier->get_storage_class(), bucket->get_name(),
+      bucket->get_tenant(), owner);
   int ret = 0;
 
   auto& attrs = get_attrs();
@@ -3105,12 +3115,6 @@ int RadosObject::restore_obj_from_cloud(Bucket* bucket,
   // update tier_config in case tier params are updated
   tier_config.tier_placement = rtier->get_rt();
 
-  if (bucket_name.empty()) {
-    bucket_name = "rgwx-" + zonegroup.get_name() + "-" + tier->get_storage_class() +
-                    "-cloud-bucket";
-    boost::algorithm::to_lower(bucket_name);
-  }
-
   rgw_bucket_dir_entry ent;
   ent.key.name = get_key().name;
   ent.key.instance = get_key().instance;
@@ -3127,7 +3131,8 @@ int RadosObject::restore_obj_from_cloud(Bucket* bucket,
   // save source cloudtier storage class
   RGWLCCloudTierCtx tier_ctx(cct, dpp, ent, store, bucket->get_info(),
            this, conn, bucket_name,
-           rtier->get_rt().t.s3.target_storage_class);
+           rtier->get_rt().t.s3.target_storage_class,
+           rtier->get_rt().t.s3.target_by_bucket);
   tier_ctx.acl_mappings = rtier->get_rt().t.s3.acl_mappings;
   tier_ctx.multipart_min_part_size = rtier->get_rt().t.s3.multipart_min_part_size;
   tier_ctx.multipart_sync_threshold = rtier->get_rt().t.s3.multipart_sync_threshold;
@@ -3189,21 +3194,26 @@ int RadosObject::transition_to_cloud(Bucket* bucket,
   RGWAccessKey key = rtier->get_rt().t.s3.key;
   string region = rtier->get_rt().t.s3.region;
   HostStyle host_style = rtier->get_rt().t.s3.host_style;
-  string bucket_name = rtier->get_rt().t.s3.target_path;
   const rgw::sal::ZoneGroup& zonegroup = store->get_zone()->get_zonegroup();
-
-  if (bucket_name.empty()) {
-    bucket_name = "rgwx-" + zonegroup.get_name() + "-" + tier->get_storage_class() +
-                    "-cloud-bucket";
-    boost::algorithm::to_lower(bucket_name);
-  }
+  // extract owner (user_id or account_id depending on ownership type)
+  std::string owner;
+  if (const auto* acct = std::get_if<rgw_account_id>(&bucket->get_owner()); acct) {
+    owner = *acct;
+  } else if (const auto* user = std::get_if<rgw_user>(&bucket->get_owner()); user) {
+    owner = user->id;
+  }
+  string bucket_name = rtier->get_rt().t.s3.make_target_bucket_name(
+      zonegroup.get_name(),
+      tier->get_storage_class(), bucket->get_name(),
+      bucket->get_tenant(), owner);
 
   /* Create RGW REST connection */
   S3RESTConn conn(cct, id, { endpoint }, key, zonegroup.get_id(), region, host_style);
 
   RGWLCCloudTierCtx tier_ctx(cct, dpp, o, store, bucket->get_info(),
                             this, conn, bucket_name,
-                            rtier->get_rt().t.s3.target_storage_class);
+                            rtier->get_rt().t.s3.target_storage_class,
+                            rtier->get_rt().t.s3.target_by_bucket);
   tier_ctx.acl_mappings = rtier->get_rt().t.s3.acl_mappings;
   tier_ctx.multipart_min_part_size = rtier->get_rt().t.s3.multipart_min_part_size;
   tier_ctx.multipart_sync_threshold = rtier->get_rt().t.s3.multipart_sync_threshold;
index 9c5cb513939704e65cb7cc68e861e84fb008f800..703def7547200e7d9f80fc8305ce652d7a4da14c 100644 (file)
@@ -2,6 +2,7 @@
 // vim: ts=8 sw=2 sts=2 expandtab ft=cpp
 
 #include <optional>
+#include <boost/algorithm/string.hpp>
 
 #include "common/errno.h"
 
@@ -662,11 +663,69 @@ void RGWZoneGroupPlacementTierS3::decode_json(JSONObj *obj)
   JSONDecoder::decode_json("location_constraint", location_constraint, obj);
   JSONDecoder::decode_json("target_storage_class", target_storage_class, obj);
   JSONDecoder::decode_json("target_path", target_path, obj);
+  JSONDecoder::decode_json("target_by_bucket", target_by_bucket, obj);
+  JSONDecoder::decode_json("target_by_bucket_prefix", target_by_bucket_prefix, obj);
   JSONDecoder::decode_json("acl_mappings", acl_mappings, obj);
   JSONDecoder::decode_json("multipart_sync_threshold", multipart_sync_threshold, obj);
   JSONDecoder::decode_json("multipart_min_part_size", multipart_min_part_size, obj);
 }
 
+std::string RGWZoneGroupPlacementTierS3::make_target_bucket_name(
+    const std::string& zonegroup_name,
+    const std::string& storage_class,
+    const std::string& bucket_name,
+    const std::string& tenant,
+    const std::string& owner) const
+{
+  auto substitute = [](std::string& target, const std::string& placeholder,
+                       const std::string& value) {
+    size_t pos = 0;
+    while ((pos = target.find(placeholder, pos)) != std::string::npos) {
+      target.replace(pos, placeholder.size(), value);
+      pos += value.size();
+    }
+  };
+
+  const bool has_custom_target = !target_path.empty();
+  const bool has_custom_bucket_prefix = !target_by_bucket_prefix.empty();
+
+  std::string templ;
+
+  if (target_by_bucket) {
+    if (has_custom_bucket_prefix) {
+      templ = target_by_bucket_prefix;
+    } else {
+      templ = "rgwx-${zonegroup}-${storage_class}-${bucket}";
+    }
+  } else {
+    if (has_custom_target) {
+      templ = target_path;
+    } else {
+      templ = "rgwx-${zonegroup}-${storage_class}-cloud-bucket";
+    }
+  }
+
+  if (target_by_bucket) {
+    const bool has_bucket_token = templ.find("${bucket}") != std::string::npos;
+    substitute(templ, "${bucket}", bucket_name);
+    if (!has_bucket_token) {
+      if (!templ.empty() && templ.back() != '-' && templ.back() != '/') {
+        templ.push_back('-');
+      }
+      templ.append(bucket_name);
+    }
+    substitute(templ, "${tenant}", tenant);
+    substitute(templ, "${owner}", owner);
+  }
+
+  substitute(templ, "${zonegroup}", zonegroup_name);
+  substitute(templ, "${storage_class}", storage_class);
+
+  boost::algorithm::to_lower(templ);
+
+  return templ;
+}
+
 void RGWZoneStorageClass::dump(Formatter *f) const
 {
   if (data_pool) {
@@ -720,6 +779,8 @@ void RGWZoneGroupPlacementTierS3::dump(Formatter *f) const
   encode_json("location_constraint", location_constraint, f);
   encode_json("target_storage_class", target_storage_class, f);
   encode_json("target_path", target_path, f);
+  encode_json("target_by_bucket", target_by_bucket, f);
+  encode_json("target_by_bucket_prefix", target_by_bucket_prefix, f);
   encode_json("acl_mappings", acl_mappings, f);
   encode_json("multipart_sync_threshold", multipart_sync_threshold, f);
   encode_json("multipart_min_part_size", multipart_min_part_size, f);
@@ -1959,6 +2020,19 @@ int RGWZoneGroupPlacementTierS3::update_params(const JSONFormattable& config)
   if (config.exists("target_path")) {
     target_path = config["target_path"];
   }
+  if (config.exists("target_by_bucket")) {
+    string s = config["target_by_bucket"];
+    target_by_bucket = (s == "true");
+  }
+  if (config.exists("target_by_bucket_prefix")) {
+    target_by_bucket_prefix = config["target_by_bucket_prefix"];
+  }
+  if (target_by_bucket) {
+    if (!target_by_bucket_prefix.empty() &&
+        target_by_bucket_prefix.find('/') != std::string::npos) {
+      ldout(g_ceph_context, 1) << "cloud tier target_by_bucket_prefix contains '/', which may be invalid for bucket names" << dendl;
+    }
+  }
   if (config.exists("region")) {
     region = config["region"];
   }
@@ -2026,6 +2100,12 @@ int RGWZoneGroupPlacementTierS3::clear_params(const JSONFormattable& config)
   if (config.exists("target_path")) {
     target_path.clear();
   }
+  if (config.exists("target_by_bucket")) {
+    target_by_bucket = false;
+  }
+  if (config.exists("target_by_bucket_prefix")) {
+    target_by_bucket_prefix.clear();
+  }
   if (config.exists("region")) {
     region.clear();
   }
index bc0457ef1b18c065f076d389d5de5190675dfebe..802e41ca69f4c74b984233531ba3d3697e20187a 100644 (file)
@@ -499,6 +499,8 @@ struct RGWZoneGroupPlacementTierS3 {
 
   /* Should below be bucket/zone specific?? */
   std::string target_path;
+  bool target_by_bucket{false};
+  std::string target_by_bucket_prefix;
   std::map<std::string, RGWTierACLMapping> acl_mappings;
 
   uint64_t multipart_sync_threshold{DEFAULT_MULTIPART_SYNC_PART_SIZE};
@@ -506,9 +508,14 @@ struct RGWZoneGroupPlacementTierS3 {
 
   int update_params(const JSONFormattable& config);
   int clear_params(const JSONFormattable& config);
+  std::string make_target_bucket_name(const std::string& zonegroup_name,
+                                      const std::string& storage_class,
+                                      const std::string& bucket_name,
+                                      const std::string& tenant,
+                                      const std::string& owner = {}) const;
 
   void encode(bufferlist& bl) const {
-    ENCODE_START(2, 1, bl);
+    ENCODE_START(3, 1, bl);
     encode(endpoint, bl);
     encode(key, bl);
     encode(region, bl);
@@ -519,11 +526,13 @@ struct RGWZoneGroupPlacementTierS3 {
     encode(multipart_sync_threshold, bl);
     encode(multipart_min_part_size, bl);
     encode(location_constraint, bl);
+    encode(target_by_bucket, bl);
+    encode(target_by_bucket_prefix, bl);
     ENCODE_FINISH(bl);
   }
 
   void decode(bufferlist::const_iterator& bl) {
-    DECODE_START(2, bl);
+    DECODE_START(3, bl);
     decode(endpoint, bl);
     decode(key, bl);
     decode(region, bl);
@@ -541,6 +550,10 @@ struct RGWZoneGroupPlacementTierS3 {
     if (struct_v >= 2) {
       decode(location_constraint, bl);
     }
+    if (struct_v >= 3) {
+      decode(target_by_bucket, bl);
+      decode(target_by_bucket_prefix, bl);
+    }
 
     DECODE_FINISH(bl);
   }