]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: address bug where object puts could write to decommissioned shard
authorJ. Eric Ivancich <ivancich@redhat.com>
Fri, 28 Oct 2022 19:14:51 +0000 (15:14 -0400)
committerJ. Eric Ivancich <ivancich@redhat.com>
Tue, 15 Nov 2022 05:56:55 +0000 (00:56 -0500)
Addresses an apparent bug where some object writes that initiate
around the time that resharding completes are written to the old
bucket index shard rather than the new one.

Other write operations to a bucket index shard now also assert their
existance so as not to re-create old bucket index shards.

Signed-off-by: J. Eric Ivancich <ivancich@redhat.com>
src/cls/rgw/cls_rgw_client.cc
src/cls/rgw/cls_rgw_types.cc
src/cls/rgw/cls_rgw_types.h
src/rgw/rgw_rados.cc
src/rgw/rgw_rados.h
src/rgw/rgw_reshard.cc

index da0fd9d0d572ef3f8b2183649ddcaa451411c771..cddd735c5abf708fe5d8a70850b3215b2e8648df 100644 (file)
@@ -1196,6 +1196,7 @@ static bool issue_set_bucket_resharding(librados::IoCtx& io_ctx,
   call.entry = entry;
   encode(call, in);
   librados::ObjectWriteOperation op;
+  op.assert_exists(); // the shard must exist; if not fail rather than recreate
   op.exec(RGW_CLASS, RGW_SET_BUCKET_RESHARDING, in);
   return manager->aio_operate(io_ctx, shard_id, oid, &op);
 }
index 57067625b54393e02bb9f9217d844003fdee5b93..4a982eccba7be48b0cd56c035bcd60ca2e156012 100644 (file)
@@ -764,3 +764,21 @@ void cls_rgw_lc_obj_head::dump(Formatter *f) const
 void cls_rgw_lc_obj_head::generate_test_instances(list<cls_rgw_lc_obj_head*>& ls)
 {
 }
+
+std::ostream& operator<<(std::ostream& out, cls_rgw_reshard_status status) {
+  switch (status) {
+  case cls_rgw_reshard_status::NOT_RESHARDING:
+    out << "NOT_RESHARDING";
+    break;
+  case cls_rgw_reshard_status::IN_PROGRESS:
+    out << "IN_PROGRESS";
+    break;
+  case cls_rgw_reshard_status::DONE:
+    out << "DONE";
+    break;
+  default:
+    out << "UNKNOWN_STATUS";
+  }
+
+  return out;
+}
index 03a3771b6f5f1a5d4b7eeae4b82b9e6e86704faa..5b0155584e27fb6deb89324300c94510b2a6e690 100644 (file)
@@ -749,6 +749,7 @@ enum class cls_rgw_reshard_status : uint8_t {
   IN_PROGRESS     = 1,
   DONE            = 2
 };
+std::ostream& operator<<(std::ostream&, cls_rgw_reshard_status);
 
 inline std::string to_string(const cls_rgw_reshard_status status)
 {
@@ -807,9 +808,16 @@ struct cls_rgw_bucket_instance_entry {
   bool resharding() const {
     return reshard_status != RESHARD_STATUS::NOT_RESHARDING;
   }
+
   bool resharding_in_progress() const {
     return reshard_status == RESHARD_STATUS::IN_PROGRESS;
   }
+
+  friend std::ostream& operator<<(std::ostream& out, const cls_rgw_bucket_instance_entry& v) {
+    out << "cls_rgw_bucket_instance_entry:{ " << v.reshard_status <<
+      ", \"" << v.new_bucket_instance_id << "\", " << v.num_shards << " }";
+    return out;
+  }
 };
 WRITE_CLASS_ENCODER(cls_rgw_bucket_instance_entry)
 
index 1000d93adcaa10a24943211e5c60a9254e8c3adf..4926d2e346281af9f4a97e6cb5b236c68224192f 100644 (file)
@@ -849,6 +849,7 @@ int RGWIndexCompletionThread::process(const DoutPrefixProvider *dpp)
     r = store->guard_reshard(this, &bs, c->obj, bucket_info,
                             [&](RGWRados::BucketShard *bs) -> int {
                               librados::ObjectWriteOperation o;
+                              o.assert_exists(); // bucket index shard must exist
                               cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING);
                               cls_rgw_bucket_complete_op(o, c->op, c->tag, c->ver, c->key, c->dir_meta, &c->remove_objs,
                                                          c->log_op, c->bilog_op, &c->zones_trace);
@@ -4955,7 +4956,8 @@ int RGWRados::bucket_set_reshard(const DoutPrefixProvider *dpp, const RGWBucketI
     return r;
   }
 
-  return CLSRGWIssueSetBucketResharding(index_pool.ioctx(), bucket_objs, entry, cct->_conf->rgw_bucket_index_max_aio)();
+  r = CLSRGWIssueSetBucketResharding(index_pool.ioctx(), bucket_objs, entry, cct->_conf->rgw_bucket_index_max_aio)();
+  return r;
 }
 
 int RGWRados::defer_gc(const DoutPrefixProvider *dpp, void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, optional_yield y)
@@ -6065,27 +6067,62 @@ int RGWRados::Bucket::UpdateIndex::guard_reshard(const DoutPrefixProvider *dpp,
       ldpp_dout(dpp, 5) << "failed to get BucketShard object: ret=" << ret << dendl;
       return ret;
     }
+
     r = call(bs);
-    if (r != -ERR_BUSY_RESHARDING) {
+    if (r != -ERR_BUSY_RESHARDING && r != -ENOENT) {
       break;
     }
-    ldpp_dout(dpp, 0) << "NOTICE: resharding operation on bucket index detected, blocking" << dendl;
-    string new_bucket_id;
-    r = store->block_while_resharding(bs, &new_bucket_id,
-                                      target->bucket_info, null_yield, dpp);
-    if (r == -ERR_BUSY_RESHARDING) {
-      continue;
-    }
-    if (r < 0) {
-      return r;
+
+    std::string new_bucket_id;
+
+    // different logic depending whether resharding completed or is
+    // underway
+
+    if (r == -ENOENT) { // case where resharding must have completed
+      ldpp_dout(dpp, 0) <<
+       "NOTICE: resharding operation recently completed, invalidating "
+       "old BucketInfo" << dendl;
+
+      r = store->fetch_new_bucket_id(target->bucket_info,
+                                    nullptr,
+                                    new_bucket_id, dpp);
+      if (r == -ENOENT) {
+       // apparently this op raced with a bucket deletion
+       ldpp_dout(dpp, 10) << "WARNING: " << __func__ <<
+         " unable to fetch bucket_id, apparently due to race "
+         "with deletion of bucket: " <<
+         target->bucket_info.bucket.get_key() << dendl;
+       return -ERR_NO_SUCH_BUCKET;
+      } else if (r < 0) {
+       ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+         " unable to refresh stale bucket_id after reshard; r=" <<
+         r << dendl;
+       return r;
+      }
+    } else { // must have been resharding at the time
+      ldpp_dout(dpp, 0) << "NOTICE: resharding operation on bucket index detected, blocking" << dendl;
+
+      r = store->block_while_resharding(bs, &new_bucket_id,
+                                       target->bucket_info, null_yield, dpp);
+      if (r == -ERR_BUSY_RESHARDING) {
+       continue;
+      }
+      if (r < 0) {
+       return r;
+      }
+
+      ldpp_dout(dpp, 20) << "reshard completion identified, new_bucket_id=" << new_bucket_id << dendl;
+      i = 0; /* resharding is finished, make sure we can retry */
     }
-    ldpp_dout(dpp, 20) << "reshard completion identified, new_bucket_id=" << new_bucket_id << dendl;
-    i = 0; /* resharding is finished, make sure we can retry */
+
+    // common portion -- finished resharding either way
+
     r = target->update_bucket_id(new_bucket_id, dpp);
     if (r < 0) {
       ldpp_dout(dpp, 0) << "ERROR: update_bucket_id() new_bucket_id=" << new_bucket_id << " returned r=" << r << dendl;
       return r;
     }
+
     invalidate_bs();
   } // for loop
 
@@ -6727,8 +6764,29 @@ int RGWRados::guard_reshard(const DoutPrefixProvider *dpp,
   return 0;
 }
 
+
+int RGWRados::fetch_new_bucket_id(
+  const RGWBucketInfo& curr_bucket_info,
+  RGWBucketInfo* save_bucket_info, // nullptr -> no save
+  std::string& new_bucket_id,
+  const DoutPrefixProvider* dpp)
+{
+  RGWBucketInfo local_bucket_info; // use if save_bucket_info is null
+  RGWBucketInfo* bip = save_bucket_info ? save_bucket_info : &local_bucket_info;
+  *bip = curr_bucket_info; // copy
+
+  int ret = try_refresh_bucket_info(*bip, nullptr, dpp);
+  if (ret < 0) {
+    return ret;
+  }
+
+  new_bucket_id = bip->bucket.bucket_id;
+  return 0;
+} // fetch_new_bucket_id
+
+
 int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
-                                    string *new_bucket_id,
+                                    std::string *new_bucket_id,
                                      const RGWBucketInfo& bucket_info,
                                      optional_yield y,
                                      const DoutPrefixProvider *dpp)
@@ -6736,32 +6794,18 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
   int ret = 0;
   cls_rgw_bucket_instance_entry entry;
 
-  // since we want to run this recovery code from two distinct places,
-  // let's just put it in a lambda so we can easily re-use; if the
-  // lambda successfully fetches a new bucket id, it sets
-  // new_bucket_id and returns 0, otherwise it returns a negative
-  // error code
-  auto fetch_new_bucket_id =
-    [this, &bucket_info, dpp](const std::string& log_tag,
-                        std::string* new_bucket_id) -> int {
-      RGWBucketInfo fresh_bucket_info = bucket_info;
-      int ret = try_refresh_bucket_info(fresh_bucket_info, nullptr, dpp);
-      if (ret < 0) {
-       ldpp_dout(dpp, 0) << __func__ <<
-         " ERROR: failed to refresh bucket info after reshard at " <<
-         log_tag << ": " << cpp_strerror(-ret) << dendl;
-       return ret;
-      }
-      *new_bucket_id = fresh_bucket_info.bucket.bucket_id;
-      return 0;
-    };
-
   constexpr int num_retries = 10;
   for (int i = 1; i <= num_retries; i++) { // nb: 1-based for loop
     auto& ref = bs->bucket_obj.get_ref();
     ret = cls_rgw_get_bucket_resharding(ref.pool.ioctx(), ref.obj.oid, &entry);
     if (ret == -ENOENT) {
-      return fetch_new_bucket_id("get_bucket_resharding_failed", new_bucket_id);
+      ret = fetch_new_bucket_id(bucket_info, nullptr, *new_bucket_id, dpp);
+      if (ret < 0) {
+       ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+         " failed to refresh bucket info after reshard when get bucket "
+         "resharding failed, error: " << cpp_strerror(-ret) << dendl;
+       return ret;
+      }
     } else if (ret < 0) {
       ldpp_dout(dpp, 0) << __func__ <<
        " ERROR: failed to get bucket resharding : " << cpp_strerror(-ret) <<
@@ -6770,8 +6814,13 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
     }
 
     if (!entry.resharding_in_progress()) {
-      return fetch_new_bucket_id("get_bucket_resharding_succeeded",
-                                new_bucket_id);
+      ret = fetch_new_bucket_id(bucket_info, nullptr, *new_bucket_id, dpp);
+      if (ret < 0) {
+       ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+         " failed to refresh bucket info after reshard when get bucket "
+         "resharding succeeded, error: " << cpp_strerror(-ret) << dendl;
+       return ret;
+      }
     }
 
     ldpp_dout(dpp, 20) << "NOTICE: reshard still in progress; " <<
@@ -6794,7 +6843,9 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
       std::string bucket_id = b.get_key();
       RGWBucketReshardLock reshard_lock(this->store, bucket_info, true);
       ret = reshard_lock.lock();
-      if (ret < 0) {
+      if (ret == -ENOENT) {
+       continue;
+      } else if (ret < 0) {
        ldpp_dout(dpp, 20) << __func__ <<
          " INFO: failed to take reshard lock for bucket " <<
          bucket_id << "; expected if resharding underway" << dendl;
@@ -6802,14 +6853,21 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
        ldpp_dout(dpp, 10) << __func__ <<
          " INFO: was able to take reshard lock for bucket " <<
          bucket_id << dendl;
+
        ret = RGWBucketReshard::clear_resharding(dpp, this->store, bucket_info);
-       if (ret < 0) {
-         reshard_lock.unlock();
+       reshard_lock.unlock();
+
+       if (ret == -ENOENT) {
+         ldpp_dout(dpp, 5) << __func__ <<
+           " INFO: no need to reset reshard flags; old shards apparently"
+           " removed after successful resharding of bucket " <<
+           bucket_id << dendl;
+         continue;
+       } else if (ret < 0) {
          ldpp_dout(dpp, 0) << __func__ <<
            " ERROR: failed to clear resharding flags for bucket " <<
-           bucket_id << dendl;
+           bucket_id << ", " << cpp_strerror(-ret) << dendl;
        } else {
-         reshard_lock.unlock();
          ldpp_dout(dpp, 5) << __func__ <<
            " INFO: apparently successfully cleared resharding flags for "
            "bucket " << bucket_id << dendl;
@@ -6858,6 +6916,7 @@ int RGWRados::bucket_index_link_olh(const DoutPrefixProvider *dpp, const RGWBuck
                      cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), obj_instance.key.instance);
                      auto& ref = bs->bucket_obj.get_ref();
                      librados::ObjectWriteOperation op;
+                     op.assert_exists(); // bucket index shard must exist
                      cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                      cls_rgw_bucket_link_olh(op, key, olh_state.olh_tag,
                                               delete_marker, op_tag, meta, olh_epoch,
@@ -6906,6 +6965,7 @@ int RGWRados::bucket_index_unlink_instance(const DoutPrefixProvider *dpp, const
                    [&](BucketShard *bs) -> int {
                      auto& ref = bs->bucket_obj.get_ref();
                      librados::ObjectWriteOperation op;
+                     op.assert_exists(); // bucket index shard must exist
                      cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                      cls_rgw_bucket_unlink_instance(op, key, op_tag,
                                                     olh_tag, olh_epoch, svc.zone->get_zone().log_data, zones_trace);
@@ -7051,6 +7111,7 @@ int RGWRados::bucket_index_trim_olh_log(const DoutPrefixProvider *dpp, const RGW
   ret = guard_reshard(dpp, &bs, obj_instance, bucket_info,
                      [&](BucketShard *pbs) -> int {
                        ObjectWriteOperation op;
+                       op.assert_exists(); // bucket index shard must exist
                        cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                        cls_rgw_trim_olh_log(op, key, ver, olh_tag);
                         return pbs->bucket_obj.operate(dpp, &op, null_yield);
@@ -7080,6 +7141,7 @@ int RGWRados::bucket_index_clear_olh(const DoutPrefixProvider *dpp, const RGWBuc
   int ret = guard_reshard(dpp, &bs, obj_instance, bucket_info,
                          [&](BucketShard *pbs) -> int {
                            ObjectWriteOperation op;
+                           op.assert_exists(); // bucket index shard must exist
                            auto& ref = pbs->bucket_obj.get_ref();
                            cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                            cls_rgw_clear_olh(op, key, olh_tag);
@@ -8259,6 +8321,8 @@ int RGWRados::cls_obj_prepare_op(const DoutPrefixProvider *dpp, BucketShard& bs,
   zones_trace.insert(svc.zone->get_zone().id, bs.bucket.get_key());
 
   ObjectWriteOperation o;
+  o.assert_exists(); // bucket index shard must exist
+
   cls_rgw_obj_key key(obj.key.get_index_key_name(), obj.key.instance);
   cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING);
   cls_rgw_bucket_prepare_op(o, op, tag, key, obj.key.get_loc(), svc.zone->get_zone().log_data, bilog_flags, zones_trace);
@@ -8271,6 +8335,8 @@ int RGWRados::cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModify
                                  list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *_zones_trace)
 {
   ObjectWriteOperation o;
+  o.assert_exists(); // bucket index shard must exist
+
   rgw_bucket_dir_entry_meta dir_meta;
   dir_meta = ent.meta;
   dir_meta.category = category;
index b3171468991ade357448bc23134b6f873741564c..ecc719b87f9936384ac64f0aeb59ed51f390a7ce 100644 (file)
@@ -1313,6 +1313,10 @@ public:
   int obj_operate(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
   int obj_operate(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
 
+  int fetch_new_bucket_id(const RGWBucketInfo& curr_bucket_info,
+                         RGWBucketInfo* save_bucket_info,
+                         std::string& new_bucket_id,
+                         const DoutPrefixProvider* dpp);
   int guard_reshard(const DoutPrefixProvider *dpp, 
                     BucketShard *bs,
                    const rgw_obj& obj_instance,
index bf2248fc7f90bc1782afd8b3e25c575d70820412..284b69c826da338a8537c904a2883ee67729eb20 100644 (file)
@@ -288,7 +288,8 @@ int RGWBucketReshard::clear_resharding(const DoutPrefixProvider *dpp,
                                        rgw::sal::RGWRadosStore* store,
                                       const RGWBucketInfo& bucket_info)
 {
-  int ret = clear_index_shard_reshard_status(dpp, store, bucket_info);
+  int ret;
+  ret = clear_index_shard_reshard_status(dpp, store, bucket_info);
   if (ret < 0) {
     ldpp_dout(dpp, 0) << "RGWBucketReshard::" << __func__ <<
       " ERROR: error clearing reshard status from index shard " <<
@@ -296,7 +297,9 @@ int RGWBucketReshard::clear_resharding(const DoutPrefixProvider *dpp,
     return ret;
   }
 
+  // default constructed = NOT_RESHARDING
   cls_rgw_bucket_instance_entry instance_entry;
+
   ret = store->getRados()->bucket_set_reshard(dpp, bucket_info, instance_entry);
   if (ret < 0) {
     ldpp_dout(dpp, 0) << "RGWReshard::" << __func__ <<