]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: address bug where object puts could write to decommissioned shard
authorJ. Eric Ivancich <ivancich@redhat.com>
Fri, 28 Oct 2022 19:14:51 +0000 (15:14 -0400)
committerJ. Eric Ivancich <ivancich@redhat.com>
Thu, 9 Feb 2023 19:36:07 +0000 (14:36 -0500)
Addresses an apparent bug where some object writes that initiate
around the time that resharding completes are written to the old
bucket index shard rather than the new one.

Other write operations to a bucket index shard now also assert their
existance so as not to re-create old bucket index shards.

Signed-off-by: J. Eric Ivancich <ivancich@redhat.com>
src/cls/rgw/cls_rgw_client.cc
src/cls/rgw/cls_rgw_types.cc
src/cls/rgw/cls_rgw_types.h
src/rgw/rgw_rados.cc
src/rgw/rgw_rados.h
src/rgw/rgw_reshard.cc

index 4667de8994d5a70d98348ddc497065d9e2d7d22f..32c5559a5d892c12916f4e5d9d5f053cf390fde5 100644 (file)
@@ -1196,6 +1196,7 @@ static bool issue_set_bucket_resharding(librados::IoCtx& io_ctx,
   call.entry = entry;
   encode(call, in);
   librados::ObjectWriteOperation op;
+  op.assert_exists(); // the shard must exist; if not fail rather than recreate
   op.exec(RGW_CLASS, RGW_SET_BUCKET_RESHARDING, in);
   return manager->aio_operate(io_ctx, shard_id, oid, &op);
 }
index 67535841e2fc2831801c7501e6bda191f115f70c..54532c895668e23ad4b1f7d73e129c38d72af7df 100644 (file)
@@ -763,3 +763,21 @@ void cls_rgw_lc_obj_head::dump(Formatter *f) const
 void cls_rgw_lc_obj_head::generate_test_instances(list<cls_rgw_lc_obj_head*>& ls)
 {
 }
+
+std::ostream& operator<<(std::ostream& out, cls_rgw_reshard_status status) {
+  switch (status) {
+  case cls_rgw_reshard_status::NOT_RESHARDING:
+    out << "NOT_RESHARDING";
+    break;
+  case cls_rgw_reshard_status::IN_PROGRESS:
+    out << "IN_PROGRESS";
+    break;
+  case cls_rgw_reshard_status::DONE:
+    out << "DONE";
+    break;
+  default:
+    out << "UNKNOWN_STATUS";
+  }
+
+  return out;
+}
index cdaebe5ce21cda47669ffb6a9826ffca2e07954e..72a0a3362551e2e04e71e3b49f332064b5ecb617 100644 (file)
@@ -775,6 +775,7 @@ enum class cls_rgw_reshard_status : uint8_t {
   IN_PROGRESS     = 1,
   DONE            = 2
 };
+std::ostream& operator<<(std::ostream&, cls_rgw_reshard_status);
 
 inline std::string to_string(const cls_rgw_reshard_status status)
 {
@@ -833,9 +834,16 @@ struct cls_rgw_bucket_instance_entry {
   bool resharding() const {
     return reshard_status != RESHARD_STATUS::NOT_RESHARDING;
   }
+
   bool resharding_in_progress() const {
     return reshard_status == RESHARD_STATUS::IN_PROGRESS;
   }
+
+  friend std::ostream& operator<<(std::ostream& out, const cls_rgw_bucket_instance_entry& v) {
+    out << "cls_rgw_bucket_instance_entry:{ " << v.reshard_status <<
+      ", \"" << v.new_bucket_instance_id << "\", " << v.num_shards << " }";
+    return out;
+  }
 };
 WRITE_CLASS_ENCODER(cls_rgw_bucket_instance_entry)
 
index bc6c34e68bffb563f3e6f7c98567ea332b2899a6..cb8d9b00ee56a7a59d9c3ba1507271a6c9712801 100644 (file)
@@ -865,6 +865,7 @@ int RGWIndexCompletionThread::process(const DoutPrefixProvider *dpp)
     r = store->guard_reshard(this, &bs, c->obj, bucket_info,
                             [&](RGWRados::BucketShard *bs) -> int {
                               librados::ObjectWriteOperation o;
+                              o.assert_exists(); // bucket index shard must exist
                               cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING);
                               cls_rgw_bucket_complete_op(o, c->op, c->tag, c->ver, c->key, c->dir_meta, &c->remove_objs,
                                                          c->log_op, c->bilog_op, &c->zones_trace);
@@ -5093,7 +5094,14 @@ int RGWRados::bucket_set_reshard(const DoutPrefixProvider *dpp, const RGWBucketI
     return r;
   }
 
-  return CLSRGWIssueSetBucketResharding(index_pool.ioctx(), bucket_objs, entry, cct->_conf->rgw_bucket_index_max_aio)();
+  r = CLSRGWIssueSetBucketResharding(index_pool.ioctx(), bucket_objs, entry, cct->_conf->rgw_bucket_index_max_aio)();
+  if (r < 0) {
+    ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+      ": unable to issue set bucket resharding, r=" << r << " (" <<
+      cpp_strerror(-r) << ")" << dendl;
+  }
+
+  return r;
 }
 
 int RGWRados::defer_gc(const DoutPrefixProvider *dpp, void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, optional_yield y)
@@ -6203,27 +6211,62 @@ int RGWRados::Bucket::UpdateIndex::guard_reshard(const DoutPrefixProvider *dpp,
       ldpp_dout(dpp, 5) << "failed to get BucketShard object: ret=" << ret << dendl;
       return ret;
     }
+
     r = call(bs);
-    if (r != -ERR_BUSY_RESHARDING) {
+    if (r != -ERR_BUSY_RESHARDING && r != -ENOENT) {
       break;
     }
-    ldpp_dout(dpp, 0) << "NOTICE: resharding operation on bucket index detected, blocking" << dendl;
-    string new_bucket_id;
-    r = store->block_while_resharding(bs, &new_bucket_id,
-                                      target->bucket_info, null_yield, dpp);
-    if (r == -ERR_BUSY_RESHARDING) {
-      continue;
-    }
-    if (r < 0) {
-      return r;
+
+    std::string new_bucket_id;
+
+    // different logic depending whether resharding completed or is
+    // underway
+
+    if (r == -ENOENT) { // case where resharding must have completed
+      ldpp_dout(dpp, 0) <<
+       "NOTICE: resharding operation recently completed, invalidating "
+       "old BucketInfo" << dendl;
+
+      r = store->fetch_new_bucket_id(target->bucket_info,
+                                    nullptr,
+                                    new_bucket_id, dpp);
+      if (r == -ENOENT) {
+       // apparently this op raced with a bucket deletion
+       ldpp_dout(dpp, 10) << "WARNING: " << __func__ <<
+         " unable to fetch bucket_id, apparently due to race "
+         "with deletion of bucket: " <<
+         target->bucket_info.bucket.get_key() << dendl;
+       return -ERR_NO_SUCH_BUCKET;
+      } else if (r < 0) {
+       ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+         " unable to refresh stale bucket_id after reshard; r=" <<
+         r << dendl;
+       return r;
+      }
+    } else { // must have been resharding at the time
+      ldpp_dout(dpp, 0) << "NOTICE: resharding operation on bucket index detected, blocking" << dendl;
+
+      r = store->block_while_resharding(bs, &new_bucket_id,
+                                       target->bucket_info, null_yield, dpp);
+      if (r == -ERR_BUSY_RESHARDING) {
+       continue;
+      }
+      if (r < 0) {
+       return r;
+      }
+
+      ldpp_dout(dpp, 20) << "reshard completion identified, new_bucket_id=" << new_bucket_id << dendl;
+      i = 0; /* resharding is finished, make sure we can retry */
     }
-    ldpp_dout(dpp, 20) << "reshard completion identified, new_bucket_id=" << new_bucket_id << dendl;
-    i = 0; /* resharding is finished, make sure we can retry */
+
+    // common portion -- finished resharding either way
+
     r = target->update_bucket_id(new_bucket_id, dpp);
     if (r < 0) {
       ldpp_dout(dpp, 0) << "ERROR: update_bucket_id() new_bucket_id=" << new_bucket_id << " returned r=" << r << dendl;
       return r;
     }
+
     invalidate_bs();
   } // for loop
 
@@ -6847,8 +6890,29 @@ int RGWRados::guard_reshard(const DoutPrefixProvider *dpp,
   return 0;
 }
 
+
+int RGWRados::fetch_new_bucket_id(
+  const RGWBucketInfo& curr_bucket_info,
+  RGWBucketInfo* save_bucket_info, // nullptr -> no save
+  std::string& new_bucket_id,
+  const DoutPrefixProvider* dpp)
+{
+  RGWBucketInfo local_bucket_info; // use if save_bucket_info is null
+  RGWBucketInfo* bip = save_bucket_info ? save_bucket_info : &local_bucket_info;
+  *bip = curr_bucket_info; // copy
+
+  int ret = try_refresh_bucket_info(*bip, nullptr, dpp);
+  if (ret < 0) {
+    return ret;
+  }
+
+  new_bucket_id = bip->bucket.bucket_id;
+  return 0;
+} // fetch_new_bucket_id
+
+
 int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
-                                    string *new_bucket_id,
+                                    std::string *new_bucket_id,
                                      const RGWBucketInfo& bucket_info,
                                      optional_yield y,
                                      const DoutPrefixProvider *dpp)
@@ -6856,32 +6920,18 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
   int ret = 0;
   cls_rgw_bucket_instance_entry entry;
 
-  // since we want to run this recovery code from two distinct places,
-  // let's just put it in a lambda so we can easily re-use; if the
-  // lambda successfully fetches a new bucket id, it sets
-  // new_bucket_id and returns 0, otherwise it returns a negative
-  // error code
-  auto fetch_new_bucket_id =
-    [this, &bucket_info, dpp](const std::string& log_tag,
-                        std::string* new_bucket_id) -> int {
-      RGWBucketInfo fresh_bucket_info = bucket_info;
-      int ret = try_refresh_bucket_info(fresh_bucket_info, nullptr, dpp);
-      if (ret < 0) {
-       ldpp_dout(dpp, 0) << __func__ <<
-         " ERROR: failed to refresh bucket info after reshard at " <<
-         log_tag << ": " << cpp_strerror(-ret) << dendl;
-       return ret;
-      }
-      *new_bucket_id = fresh_bucket_info.bucket.bucket_id;
-      return 0;
-    };
-
   constexpr int num_retries = 10;
   for (int i = 1; i <= num_retries; i++) { // nb: 1-based for loop
     auto& ref = bs->bucket_obj.get_ref();
     ret = cls_rgw_get_bucket_resharding(ref.pool.ioctx(), ref.obj.oid, &entry);
     if (ret == -ENOENT) {
-      return fetch_new_bucket_id("get_bucket_resharding_failed", new_bucket_id);
+      ret = fetch_new_bucket_id(bucket_info, nullptr, *new_bucket_id, dpp);
+      if (ret < 0) {
+       ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+         " failed to refresh bucket info after reshard when get bucket "
+         "resharding failed, error: " << cpp_strerror(-ret) << dendl;
+       return ret;
+      }
     } else if (ret < 0) {
       ldpp_dout(dpp, 0) << __func__ <<
        " ERROR: failed to get bucket resharding : " << cpp_strerror(-ret) <<
@@ -6890,8 +6940,13 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
     }
 
     if (!entry.resharding_in_progress()) {
-      return fetch_new_bucket_id("get_bucket_resharding_succeeded",
-                                new_bucket_id);
+      ret = fetch_new_bucket_id(bucket_info, nullptr, *new_bucket_id, dpp);
+      if (ret < 0) {
+       ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+         " failed to refresh bucket info after reshard when get bucket "
+         "resharding succeeded, error: " << cpp_strerror(-ret) << dendl;
+       return ret;
+      }
     }
 
     ldpp_dout(dpp, 20) << "NOTICE: reshard still in progress; " <<
@@ -6914,7 +6969,9 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
       std::string bucket_id = b.get_key();
       RGWBucketReshardLock reshard_lock(this->store, bucket_info, true);
       ret = reshard_lock.lock(dpp);
-      if (ret < 0) {
+      if (ret == -ENOENT) {
+       continue;
+      } else if (ret < 0) {
        ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ <<
          ": failed to take reshard lock for bucket " <<
          bucket_id << "; expected if resharding underway" << dendl;
@@ -6922,14 +6979,21 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
        ldpp_dout(dpp, 10) << __PRETTY_FUNCTION__ <<
          ": was able to take reshard lock for bucket " <<
          bucket_id << dendl;
+
        ret = RGWBucketReshard::clear_resharding(dpp, this->store, bucket_info);
-       if (ret < 0) {
-         reshard_lock.unlock();
+       reshard_lock.unlock();
+
+       if (ret == -ENOENT) {
+         ldpp_dout(dpp, 5) << __PRETTY_FUNCTION__ <<
+           " INFO: no need to reset reshard flags; old shards apparently"
+           " removed after successful resharding of bucket " <<
+           bucket_id << dendl;
+         continue;
+       } else if (ret < 0) {
          ldpp_dout(dpp, 0) << __PRETTY_FUNCTION__ <<
            " ERROR: failed to clear resharding flags for bucket " <<
-           bucket_id << dendl;
+           bucket_id << ", " << cpp_strerror(-ret) << dendl;
        } else {
-         reshard_lock.unlock();
          ldpp_dout(dpp, 5) << __PRETTY_FUNCTION__ <<
            ": apparently successfully cleared resharding flags for "
            "bucket " << bucket_id << dendl;
@@ -6978,6 +7042,7 @@ int RGWRados::bucket_index_link_olh(const DoutPrefixProvider *dpp, const RGWBuck
                      cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), obj_instance.key.instance);
                      auto& ref = bs->bucket_obj.get_ref();
                      librados::ObjectWriteOperation op;
+                     op.assert_exists(); // bucket index shard must exist
                      cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                      cls_rgw_bucket_link_olh(op, key, olh_state.olh_tag,
                                               delete_marker, op_tag, meta, olh_epoch,
@@ -7026,6 +7091,7 @@ int RGWRados::bucket_index_unlink_instance(const DoutPrefixProvider *dpp, const
                    [&](BucketShard *bs) -> int {
                      auto& ref = bs->bucket_obj.get_ref();
                      librados::ObjectWriteOperation op;
+                     op.assert_exists(); // bucket index shard must exist
                      cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                      cls_rgw_bucket_unlink_instance(op, key, op_tag,
                                                     olh_tag, olh_epoch, svc.zone->get_zone().log_data, zones_trace);
@@ -7171,6 +7237,7 @@ int RGWRados::bucket_index_trim_olh_log(const DoutPrefixProvider *dpp, const RGW
   ret = guard_reshard(dpp, &bs, obj_instance, bucket_info,
                      [&](BucketShard *pbs) -> int {
                        ObjectWriteOperation op;
+                       op.assert_exists(); // bucket index shard must exist
                        cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                        cls_rgw_trim_olh_log(op, key, ver, olh_tag);
                         return pbs->bucket_obj.operate(dpp, &op, null_yield);
@@ -7200,6 +7267,7 @@ int RGWRados::bucket_index_clear_olh(const DoutPrefixProvider *dpp, const RGWBuc
   int ret = guard_reshard(dpp, &bs, obj_instance, bucket_info,
                          [&](BucketShard *pbs) -> int {
                            ObjectWriteOperation op;
+                           op.assert_exists(); // bucket index shard must exist
                            auto& ref = pbs->bucket_obj.get_ref();
                            cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                            cls_rgw_clear_olh(op, key, olh_tag);
@@ -8380,6 +8448,8 @@ int RGWRados::cls_obj_prepare_op(const DoutPrefixProvider *dpp, BucketShard& bs,
   zones_trace.insert(svc.zone->get_zone().id, bs.bucket.get_key());
 
   ObjectWriteOperation o;
+  o.assert_exists(); // bucket index shard must exist
+
   cls_rgw_obj_key key(obj.key.get_index_key_name(), obj.key.instance);
   cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING);
   cls_rgw_bucket_prepare_op(o, op, tag, key, obj.key.get_loc(), svc.zone->get_zone().log_data, bilog_flags, zones_trace);
@@ -8392,6 +8462,8 @@ int RGWRados::cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModify
                                  list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *_zones_trace)
 {
   ObjectWriteOperation o;
+  o.assert_exists(); // bucket index shard must exist
+
   rgw_bucket_dir_entry_meta dir_meta;
   dir_meta = ent.meta;
   dir_meta.category = category;
index 7ec229fee18d91afa77e07c08f31a1b5be4b8ae4..16f209b891002aaba2560a7f7c19c159b75789e2 100644 (file)
@@ -1268,6 +1268,10 @@ public:
   int obj_operate(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
   int obj_operate(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
 
+  int fetch_new_bucket_id(const RGWBucketInfo& curr_bucket_info,
+                         RGWBucketInfo* save_bucket_info,
+                         std::string& new_bucket_id,
+                         const DoutPrefixProvider* dpp);
   int guard_reshard(const DoutPrefixProvider *dpp,
                     BucketShard *bs,
                    const rgw_obj& obj_instance,
index 532dad7b47fd19c72875e877b43ed6ee3d39329d..66977374559afde13466b9ca6dcc514753c1c2f1 100644 (file)
@@ -290,7 +290,8 @@ int RGWBucketReshard::clear_resharding(const DoutPrefixProvider *dpp,
                                        rgw::sal::RadosStore* store,
                                       const RGWBucketInfo& bucket_info)
 {
-  int ret = clear_index_shard_reshard_status(dpp, store, bucket_info);
+  int ret;
+  ret = clear_index_shard_reshard_status(dpp, store, bucket_info);
   if (ret < 0) {
     ldpp_dout(dpp, 0) << "RGWBucketReshard::" << __func__ <<
       " ERROR: error clearing reshard status from index shard " <<
@@ -298,7 +299,9 @@ int RGWBucketReshard::clear_resharding(const DoutPrefixProvider *dpp,
     return ret;
   }
 
+  // default constructed = NOT_RESHARDING
   cls_rgw_bucket_instance_entry instance_entry;
+
   ret = store->getRados()->bucket_set_reshard(dpp, bucket_info, instance_entry);
   if (ret < 0) {
     ldpp_dout(dpp, 0) << "RGWReshard::" << __func__ <<