]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: add RestoreStatus support to object listings 67547/head
authorMatthew N. Heler <matthew.heler@hotmail.com>
Thu, 26 Feb 2026 01:03:56 +0000 (19:03 -0600)
committerMatthew N. Heler <matthew.heler@hotmail.com>
Tue, 28 Apr 2026 20:33:11 +0000 (15:33 -0500)
S3 clients can request restore status in listing responses through the
x-amz-optional-object-attributes header, but we had no support for it.
This stores the restore state in the bucket index so listings can
include <RestoreStatus> without having to read each object's attrs
individually.

Signed-off-by: Matthew N. Heler <matthew.heler@hotmail.com>
PendingReleaseNotes
src/cls/rgw/cls_rgw_types.cc
src/cls/rgw/cls_rgw_types.h
src/rgw/driver/rados/rgw_rados.cc
src/rgw/driver/rados/rgw_rados.h
src/rgw/rgw_rest_s3.cc
src/rgw/rgw_rest_s3.h

index 8a691de692711653a3eff0ba4d63fa0eb3fadd9e..e8434cf45068535500a847f7ea422e48c77263e7 100644 (file)
@@ -4,6 +4,11 @@
   developers must update their method calls to use these new structs, which ensure
   read/write semantics are correctly applied.
 
+* RGW: S3 ListObjects and ListObjectVersions now support the
+  ``x-amz-optional-object-attributes: RestoreStatus`` request header to include
+  restore status in listing responses. Restore status is stored in the bucket
+  index, so only objects written or restored after this upgrade will populate
+  the field. Existing objects are unaffected.
 * ceph-volume: Raw BlueStore OSD preparation now pre-formats NVMe devices and
   skips the slower BlueStore discard phase,reducing mkfs time on
   very large namespaces.
index 036a63dfe030de60a734d85b76983eef020d236f..cc9f9e3ec43a234a2042231588cb7802c86cb876 100644 (file)
@@ -188,6 +188,8 @@ list<rgw_bucket_dir_entry_meta> rgw_bucket_dir_entry_meta::generate_test_instanc
   m.owner = "owner";
   m.owner_display_name = "display name";
   m.content_type = "content/type";
+  m.restore_status = 2; // CloudRestored
+  m.restore_expiry_date = ceph::real_time{std::chrono::seconds(1234567890)};
   o.push_back(std::move(m));
   o.emplace_back();
   return o;
@@ -209,6 +211,8 @@ void rgw_bucket_dir_entry_meta::dump(Formatter *f) const
   encode_json("accounted_size", accounted_size, f);
   encode_json("user_data", user_data, f);
   encode_json("appendable", appendable, f);
+  encode_json("restore_status", static_cast<int>(restore_status), f);
+  encode_json("restore_expiry_date", restore_expiry_date, f);
 }
 
 void rgw_bucket_dir_entry_meta::decode_json(JSONObj *obj) {
@@ -227,6 +231,10 @@ void rgw_bucket_dir_entry_meta::decode_json(JSONObj *obj) {
   JSONDecoder::decode_json("accounted_size", accounted_size, obj);
   JSONDecoder::decode_json("user_data", user_data, obj);
   JSONDecoder::decode_json("appendable", appendable, obj);
+  int rs_val = 0;
+  JSONDecoder::decode_json("restore_status", rs_val, obj);
+  restore_status = static_cast<uint8_t>(rs_val);
+  JSONDecoder::decode_json("restore_expiry_date", restore_expiry_date, obj);
 }
 
 list<rgw_bucket_dir_entry> rgw_bucket_dir_entry::generate_test_instances()
index 72cdbf3f872f20114c0d14bf8c11d9b43e022861..caea0407061b809fae7ae0179efad5c6a4513555 100644 (file)
@@ -215,9 +215,11 @@ struct rgw_bucket_dir_entry_meta {
   std::string user_data;
   std::string storage_class;
   bool appendable = false;
+  uint8_t restore_status = 0; // maps to RGWRestoreStatus enum
+  ceph::real_time restore_expiry_date; // zero when N/A
 
   void encode(ceph::buffer::list &bl) const {
-    ENCODE_START(7, 3, bl);
+    ENCODE_START(8, 3, bl);
     encode(category, bl);
     encode(size, bl);
     encode(mtime, bl);
@@ -229,11 +231,13 @@ struct rgw_bucket_dir_entry_meta {
     encode(user_data, bl);
     encode(storage_class, bl);
     encode(appendable, bl);
+    encode(restore_status, bl);
+    encode(restore_expiry_date, bl);
     ENCODE_FINISH(bl);
   }
 
   void decode(ceph::buffer::list::const_iterator &bl) {
-    DECODE_START_LEGACY_COMPAT_LEN(6, 3, 3, bl);
+    DECODE_START_LEGACY_COMPAT_LEN(8, 3, 3, bl);
     decode(category, bl);
     decode(size, bl);
     decode(mtime, bl);
@@ -252,6 +256,10 @@ struct rgw_bucket_dir_entry_meta {
       decode(storage_class, bl);
     if (struct_v >= 7)
       decode(appendable, bl);
+    if (struct_v >= 8) {
+      decode(restore_status, bl);
+      decode(restore_expiry_date, bl);
+    }
     DECODE_FINISH(bl);
   }
   void dump(ceph::Formatter *f) const;
index c495903d9041981f396fb531eb0b18dce4010be1..8bfe9f60d37fa083fa18a93762c76ac03ac87af9 100644 (file)
@@ -154,6 +154,52 @@ static inline void read_attr(std::map<std::string, bufferlist>& attrs,
   }
 }
 
+/**
+ * Decode restore status and expiry date from an attrs map for the bucket index.
+ * Tries primary first; if a key is missing and fallback is non-null, tries fallback.
+ */
+static void decode_restore_index_fields(
+    const rgw::sal::Attrs& primary,
+    const rgw::sal::Attrs* fallback,
+    uint8_t& restore_status,
+    ceph::real_time& restore_expiry_date)
+{
+  restore_status = 0;
+  restore_expiry_date = {};
+
+  bufferlist rs_bl;
+  if (auto it = primary.find(RGW_ATTR_RESTORE_STATUS); it != primary.end()) {
+    rs_bl = it->second;
+  } else if (fallback) {
+    if (auto it2 = fallback->find(RGW_ATTR_RESTORE_STATUS); it2 != fallback->end()) {
+      rs_bl = it2->second;
+    }
+  }
+  if (rs_bl.length()) {
+    try {
+      rgw::sal::RGWRestoreStatus rs;
+      auto bl_iter = rs_bl.cbegin();
+      decode(rs, bl_iter);
+      restore_status = static_cast<uint8_t>(rs);
+    } catch (buffer::error&) {}
+  }
+
+  bufferlist re_bl;
+  if (auto it = primary.find(RGW_ATTR_RESTORE_EXPIRY_DATE); it != primary.end()) {
+    re_bl = it->second;
+  } else if (fallback) {
+    if (auto it2 = fallback->find(RGW_ATTR_RESTORE_EXPIRY_DATE); it2 != fallback->end()) {
+      re_bl = it2->second;
+    }
+  }
+  if (re_bl.length()) {
+    try {
+      auto bl_iter = re_bl.cbegin();
+      decode(restore_expiry_date, bl_iter);
+    } catch (buffer::error&) {}
+  }
+}
+
 rgw_raw_obj rgw_obj_select::get_raw_obj(RGWRados* store) const
 {
   if (!is_raw) {
@@ -3434,6 +3480,12 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si
     }
   }
 
+  // extract restore fields for bucket index
+  uint8_t idx_restore_status = 0;
+  ceph::real_time idx_restore_expiry_date;
+  decode_restore_index_fields(attrs, nullptr,
+                              idx_restore_status, idx_restore_expiry_date);
+
   if (!op.size())
     return 0;
 
@@ -3494,7 +3546,8 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si
                         meta.set_mtime, etag, content_type,
                         storage_class, meta.owner,
                         meta.category, meta.remove_objs, rctx.y,
-                        meta.user_data, meta.appendable, log_op);
+                        meta.user_data, meta.appendable, log_op,
+                        idx_restore_status, idx_restore_expiry_date);
   tracepoint(rgw_rados, complete_exit, req_id.c_str());
   if (r < 0)
     goto done_cancel;
@@ -4097,6 +4150,12 @@ int RGWRados::reindex_obj(rgw::sal::Driver* driver,
   read_attr(attr_set, RGW_ATTR_OLH_INFO, olh_info_bl, &found_olh_info);
   read_attr(attr_set, RGW_ATTR_APPEND_PART_NUM, part_num_bl, &appendable);
 
+  // extract restore fields for bucket index
+  uint8_t idx_restore_status = 0;
+  ceph::real_time idx_restore_expiry_date;
+  decode_restore_index_fields(attr_set, nullptr,
+                              idx_restore_status, idx_restore_expiry_date);
+
   // check for a pure OLH object and if so exit early
   if (found_olh_info) {
     try {
@@ -4175,7 +4234,10 @@ int RGWRados::reindex_obj(rgw::sal::Driver* driver,
                            nullptr, // remove_objs list
                            y,
                            nullptr, // user data string
-                           appendable);
+                           appendable,
+                           true, // log_op
+                           idx_restore_status,
+                           idx_restore_expiry_date);
   if (ret < 0) {
     ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
       ": update index complete for " << p(head_obj) << " returned: " <<
@@ -4197,6 +4259,8 @@ int RGWRados::reindex_obj(rgw::sal::Driver* driver,
     meta.etag = etag;
     meta.content_type = content_type;
     meta.appendable = appendable;
+    meta.restore_status = idx_restore_status;
+    meta.restore_expiry_date = idx_restore_expiry_date;
 
     ret = link_helper(false, meta, "linking version");
   } // if bucket is versioned
@@ -7615,33 +7679,52 @@ int RGWRados::set_attrs(const DoutPrefixProvider *dpp, RGWObjectCtx* octx, RGWBu
       }
       int64_t poolid = ioctx.get_id();
 
-      // Retain Object category as CloudTiered while restore is in
-      // progress or failed or if its temporarily restored copy
+      /*
+       * Retain Object category as CloudTiered while restore is in
+       * progress or failed or if its temporarily restored copy.
+       * Check new attrs first, fall back to existing attrs for partial updates.
+       */
       RGWObjCategory category = RGWObjCategory::Main;
-      auto r_iter = attrs.find(RGW_ATTR_RESTORE_STATUS);
-      auto t_iter = attrs.find(RGW_ATTR_RESTORE_TYPE);
-      if (r_iter != attrs.end()) {
-        rgw::sal::RGWRestoreStatus st = rgw::sal::RGWRestoreStatus::None;
-        auto iter = r_iter->second.cbegin();
-
+      bufferlist rs_bl, rt_bl;
+      if (auto it = attrs.find(RGW_ATTR_RESTORE_STATUS); it != attrs.end()) {
+        rs_bl = it->second;
+      } else if (auto it2 = state->attrset.find(RGW_ATTR_RESTORE_STATUS);
+                 it2 != state->attrset.end()) {
+        rs_bl = it2->second;
+      }
+      if (rs_bl.length()) {
         try {
           using ceph::decode;
-          decode(st, iter);
+          rgw::sal::RGWRestoreStatus st = rgw::sal::RGWRestoreStatus::None;
+          auto bl_iter = rs_bl.cbegin();
+          decode(st, bl_iter);
 
           if (st != rgw::sal::RGWRestoreStatus::CloudRestored) {
             category = RGWObjCategory::CloudTiered;
           } else { // check if its temporary copy
-            if (t_iter != attrs.end()) {
+            if (auto it = attrs.find(RGW_ATTR_RESTORE_TYPE); it != attrs.end()) {
+              rt_bl = it->second;
+            } else if (auto it2 = state->attrset.find(RGW_ATTR_RESTORE_TYPE);
+                       it2 != state->attrset.end()) {
+              rt_bl = it2->second;
+            }
+            if (rt_bl.length()) {
               rgw::sal::RGWRestoreType rt;
-              decode(rt, t_iter->second);
+              decode(rt, rt_bl);
 
               if (rt == rgw::sal::RGWRestoreType::Temporary) {
                 category = RGWObjCategory::CloudTiered;
                 // temporary restore; set storage-class to cloudtier storage class
-                auto c_iter = attrs.find(RGW_ATTR_CLOUDTIER_STORAGE_CLASS);
-
-                if (c_iter != attrs.end()) {
-                  storage_class = rgw_bl_str(c_iter->second);
+                bufferlist sc_bl;
+                if (auto it = attrs.find(RGW_ATTR_CLOUDTIER_STORAGE_CLASS);
+                    it != attrs.end()) {
+                  sc_bl = it->second;
+                } else if (auto it2 = state->attrset.find(RGW_ATTR_CLOUDTIER_STORAGE_CLASS);
+                           it2 != state->attrset.end()) {
+                  sc_bl = it2->second;
+                }
+                if (sc_bl.length()) {
+                  storage_class = rgw_bl_str(sc_bl);
                 }
               }
             }
@@ -7649,10 +7732,17 @@ int RGWRados::set_attrs(const DoutPrefixProvider *dpp, RGWObjectCtx* octx, RGWBu
         } catch (buffer::error& err) {
         }
       }
+      // extract restore fields for index, with partial-update fallback
+      uint8_t idx_restore_status = 0;
+      ceph::real_time idx_restore_expiry_date;
+      decode_restore_index_fields(attrs, &state->attrset,
+                                  idx_restore_status, idx_restore_expiry_date);
+
            ldpp_dout(dpp, 20) << "Setting obj category:" << category << ", storage_class:" << storage_class << dendl;
       r = index_op.complete(dpp, poolid, epoch, state->size, state->accounted_size,
                             mtime, etag, content_type, storage_class, owner,
-                            category, nullptr, y, nullptr, false, log_op);
+                            category, nullptr, y, nullptr, false, log_op,
+                            idx_restore_status, idx_restore_expiry_date);
     } else {
       int ret = index_op.cancel(dpp, nullptr, y, log_op);
       if (ret < 0) {
@@ -8086,7 +8176,9 @@ int RGWRados::Bucket::UpdateIndex::complete(const DoutPrefixProvider *dpp, int64
                                            optional_yield y,
                                            const string *user_data,
                                             bool appendable,
-                                            bool log_op)
+                                            bool log_op,
+                                           uint8_t restore_status,
+                                           ceph::real_time restore_expiry_date)
 {
   if (blind) {
     return 0;
@@ -8114,6 +8206,8 @@ int RGWRados::Bucket::UpdateIndex::complete(const DoutPrefixProvider *dpp, int64
   ent.meta.owner_display_name = owner.display_name;
   ent.meta.content_type = content_type;
   ent.meta.appendable = appendable;
+  ent.meta.restore_status = restore_status;
+  ent.meta.restore_expiry_date = restore_expiry_date;
 
   bool add_log = log_op && store->svc.zone->need_to_log_data();
 
index d8fe665fcf3e1cf666fc6a82fe2e90714bb4d9b6..8109a84f8eb28c9531e3fe727781422c3af972d3 100644 (file)
@@ -1021,7 +1021,9 @@ public:
                   optional_yield y,
                   const std::string *user_data = nullptr,
                   bool appendable = false,
-                   bool log_op = true);
+                   bool log_op = true,
+                  uint8_t restore_status = 0,
+                  ceph::real_time restore_expiry_date = {});
       int complete_del(const DoutPrefixProvider *dpp,
                        int64_t poolid, uint64_t epoch,
                        ceph::real_time& removed_mtime, /* mtime of removed object */
index 92821498f8b704bf398df844e776275f2a4aa9cb..dc052c2e04aec6a3b73fbc6b9bf7b72d5a310ab0 100644 (file)
@@ -4,6 +4,7 @@
 #include <boost/algorithm/string/case_conv.hpp>
 #include <cstdint>
 #include <errno.h>
+#include <algorithm>
 #include <array>
 #include <string.h>
 #include <string_view>
@@ -1883,6 +1884,15 @@ int RGWListBucket_ObjStore_S3::get_common_params()
      shard_id = s->bucket_instance_shard_id;
     }
   }
+
+  // Parse x-amz-optional-object-attributes header.
+  const char* opt_attrs = s->info.env->get("HTTP_X_AMZ_OPTIONAL_OBJECT_ATTRIBUTES");
+  if (opt_attrs) {
+    auto tokens = ceph::split(opt_attrs, ", ");
+    fetch_restore_status =
+        std::find(tokens.begin(), tokens.end(), "RestoreStatus") != tokens.end();
+  }
+
   return 0;
 }
 
@@ -1918,6 +1928,30 @@ if(!continuation_token_exist) {
 return 0;
 }
 
+/**
+ * Emit <RestoreStatus> XML element for a listing entry.
+ * Only emits for RestoreAlreadyInProgress and CloudRestored states.
+ */
+static void dump_restore_status(req_state* s,
+                                const rgw_bucket_dir_entry_meta& meta)
+{
+  using RGWRestoreStatus = rgw::sal::RGWRestoreStatus;
+  auto status = static_cast<RGWRestoreStatus>(meta.restore_status);
+
+  if (status != RGWRestoreStatus::RestoreAlreadyInProgress &&
+      status != RGWRestoreStatus::CloudRestored) {
+    return;
+  }
+
+  bool in_progress = (status == RGWRestoreStatus::RestoreAlreadyInProgress);
+  s->formatter->open_object_section("RestoreStatus");
+  s->formatter->dump_bool("IsRestoreInProgress", in_progress);
+  if (!in_progress && meta.restore_expiry_date != ceph::real_time{}) {
+    dump_time(s, "RestoreExpiryDate", meta.restore_expiry_date);
+  }
+  s->formatter->close_section(); // RestoreStatus
+}
+
 void RGWListBucket_ObjStore_S3::send_common_versioned_response()
 {
   if (!s->bucket_tenant.empty()) {
@@ -1999,6 +2033,9 @@ void RGWListBucket_ObjStore_S3::send_versioned_response()
         s->formatter->dump_int("Size", iter->meta.accounted_size);
         auto& storage_class = rgw_placement_rule::get_canonical_storage_class(iter->meta.storage_class);
         s->formatter->dump_string("StorageClass", storage_class.c_str());
+        if (fetch_restore_status) {
+          dump_restore_status(s, iter->meta);
+        }
       }
       dump_owner(s, iter->meta.owner, iter->meta.owner_display_name);
       if (iter->meta.appendable) {
@@ -2091,6 +2128,9 @@ void RGWListBucket_ObjStore_S3::send_response()
       s->formatter->dump_int("Size", iter->meta.accounted_size);
       auto& storage_class = rgw_placement_rule::get_canonical_storage_class(iter->meta.storage_class);
       s->formatter->dump_string("StorageClass", storage_class.c_str());
+      if (fetch_restore_status) {
+        dump_restore_status(s, iter->meta);
+      }
       dump_owner(s, iter->meta.owner, iter->meta.owner_display_name);
       if (s->system_request) {
        s->formatter->dump_string("RgwxTag", iter->tag);
@@ -2166,6 +2206,9 @@ void RGWListBucket_ObjStore_S3v2::send_versioned_response()
         s->formatter->dump_int("Size", iter->meta.accounted_size);
         auto& storage_class = rgw_placement_rule::get_canonical_storage_class(iter->meta.storage_class);
         s->formatter->dump_string("StorageClass", storage_class.c_str());
+        if (fetch_restore_status) {
+          dump_restore_status(s, iter->meta);
+        }
       }
       if (fetchOwner == true) {
         dump_owner(s, iter->meta.owner, iter->meta.owner_display_name);
@@ -2235,6 +2278,9 @@ void RGWListBucket_ObjStore_S3v2::send_response()
       s->formatter->dump_int("Size", iter->meta.accounted_size);
       auto& storage_class = rgw_placement_rule::get_canonical_storage_class(iter->meta.storage_class);
       s->formatter->dump_string("StorageClass", storage_class.c_str());
+      if (fetch_restore_status) {
+        dump_restore_status(s, iter->meta);
+      }
       if (fetchOwner == true) {
         dump_owner(s, iter->meta.owner, iter->meta.owner_display_name);
       }
index bd46763fd986b6de00681d35ecec8b80f03313b9..591af05544495288380c67a29062b0574061e643 100644 (file)
@@ -150,6 +150,7 @@ class RGWListBucket_ObjStore_S3 : public RGWListBucket_ObjStore {
 protected:
   bool objs_container;
   bool encode_key {false};
+  bool fetch_restore_status {false};
   int get_common_params();
   void send_common_response();
   void send_common_versioned_response();