rgw: address bug where object puts could write to decommissioned shard

author J. Eric Ivancich <ivancich@redhat.com>

Fri, 28 Oct 2022 19:14:51 +0000 (15:14 -0400)

committer J. Eric Ivancich <ivancich@redhat.com>

Thu, 9 Feb 2023 19:36:07 +0000 (14:36 -0500)
author J. Eric Ivancich <ivancich@redhat.com>
Fri, 28 Oct 2022 19:14:51 +0000 (15:14 -0400)
committer J. Eric Ivancich <ivancich@redhat.com>
Thu, 9 Feb 2023 19:36:07 +0000 (14:36 -0500)
diff --git a/src/cls/rgw/cls_rgw_client.cc b/src/cls/rgw/cls_rgw_client.cc

index 4667de8994d5a70d98348ddc497065d9e2d7d22f..32c5559a5d892c12916f4e5d9d5f053cf390fde5 100644 (file)
--- a/src/cls/rgw/cls_rgw_client.cc
+++ b/src/cls/rgw/cls_rgw_client.cc
@@ -1196,6 +1196,7 @@ static bool issue_set_bucket_resharding(librados::IoCtx& io_ctx,
    call.entry = entry;
    encode(call, in);
    librados::ObjectWriteOperation op;
+  op.assert_exists(); // the shard must exist; if not fail rather than recreate
    op.exec(RGW_CLASS, RGW_SET_BUCKET_RESHARDING, in);
    return manager->aio_operate(io_ctx, shard_id, oid, &op);
  }
diff --git a/src/cls/rgw/cls_rgw_types.cc b/src/cls/rgw/cls_rgw_types.cc

index 67535841e2fc2831801c7501e6bda191f115f70c..54532c895668e23ad4b1f7d73e129c38d72af7df 100644 (file)
--- a/src/cls/rgw/cls_rgw_types.cc
+++ b/src/cls/rgw/cls_rgw_types.cc
@@ -763,3 +763,21 @@ void cls_rgw_lc_obj_head::dump(Formatter *f) const
  void cls_rgw_lc_obj_head::generate_test_instances(list<cls_rgw_lc_obj_head*>& ls)
  {
  }
+
+std::ostream& operator<<(std::ostream& out, cls_rgw_reshard_status status) {
+  switch (status) {
+  case cls_rgw_reshard_status::NOT_RESHARDING:
+    out << "NOT_RESHARDING";
+    break;
+  case cls_rgw_reshard_status::IN_PROGRESS:
+    out << "IN_PROGRESS";
+    break;
+  case cls_rgw_reshard_status::DONE:
+    out << "DONE";
+    break;
+  default:
+    out << "UNKNOWN_STATUS";
+  }
+
+  return out;
+}
diff --git a/src/cls/rgw/cls_rgw_types.h b/src/cls/rgw/cls_rgw_types.h

index cdaebe5ce21cda47669ffb6a9826ffca2e07954e..72a0a3362551e2e04e71e3b49f332064b5ecb617 100644 (file)
--- a/src/cls/rgw/cls_rgw_types.h
+++ b/src/cls/rgw/cls_rgw_types.h
@@ -775,6 +775,7 @@ enum class cls_rgw_reshard_status : uint8_t {
    IN_PROGRESS     = 1,
    DONE            = 2
  };
+std::ostream& operator<<(std::ostream&, cls_rgw_reshard_status);
  
  inline std::string to_string(const cls_rgw_reshard_status status)
  {
@@ -833,9 +834,16 @@ struct cls_rgw_bucket_instance_entry {
    bool resharding() const {
      return reshard_status != RESHARD_STATUS::NOT_RESHARDING;
    }
+
    bool resharding_in_progress() const {
      return reshard_status == RESHARD_STATUS::IN_PROGRESS;
    }
+
+  friend std::ostream& operator<<(std::ostream& out, const cls_rgw_bucket_instance_entry& v) {
+    out << "cls_rgw_bucket_instance_entry:{ " << v.reshard_status <<
+      ", \"" << v.new_bucket_instance_id << "\", " << v.num_shards << " }";
+    return out;
+  }
  };
  WRITE_CLASS_ENCODER(cls_rgw_bucket_instance_entry)
  
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc

index bc6c34e68bffb563f3e6f7c98567ea332b2899a6..cb8d9b00ee56a7a59d9c3ba1507271a6c9712801 100644 (file)
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -865,6 +865,7 @@ int RGWIndexCompletionThread::process(const DoutPrefixProvider *dpp)
      r = store->guard_reshard(this, &bs, c->obj, bucket_info,
                              [&](RGWRados::BucketShard *bs) -> int {
                                librados::ObjectWriteOperation o;
+                              o.assert_exists(); // bucket index shard must exist
                                cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING);
                                cls_rgw_bucket_complete_op(o, c->op, c->tag, c->ver, c->key, c->dir_meta, &c->remove_objs,
                                                           c->log_op, c->bilog_op, &c->zones_trace);
@@ -5093,7 +5094,14 @@ int RGWRados::bucket_set_reshard(const DoutPrefixProvider *dpp, const RGWBucketI
      return r;
    }
  
-  return CLSRGWIssueSetBucketResharding(index_pool.ioctx(), bucket_objs, entry, cct->_conf->rgw_bucket_index_max_aio)();
+  r = CLSRGWIssueSetBucketResharding(index_pool.ioctx(), bucket_objs, entry, cct->_conf->rgw_bucket_index_max_aio)();
+  if (r < 0) {
+    ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+      ": unable to issue set bucket resharding, r=" << r << " (" <<
+      cpp_strerror(-r) << ")" << dendl;
+  }
+
+  return r;
  }
  
  int RGWRados::defer_gc(const DoutPrefixProvider *dpp, void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, optional_yield y)
@@ -6203,27 +6211,62 @@ int RGWRados::Bucket::UpdateIndex::guard_reshard(const DoutPrefixProvider *dpp,
        ldpp_dout(dpp, 5) << "failed to get BucketShard object: ret=" << ret << dendl;
        return ret;
      }
+
      r = call(bs);
-    if (r != -ERR_BUSY_RESHARDING) {
+    if (r != -ERR_BUSY_RESHARDING && r != -ENOENT) {
        break;
      }
-    ldpp_dout(dpp, 0) << "NOTICE: resharding operation on bucket index detected, blocking" << dendl;
-    string new_bucket_id;
-    r = store->block_while_resharding(bs, &new_bucket_id,
-                                      target->bucket_info, null_yield, dpp);
-    if (r == -ERR_BUSY_RESHARDING) {
-      continue;
-    }
-    if (r < 0) {
-      return r;
+
+    std::string new_bucket_id;
+
+    // different logic depending whether resharding completed or is
+    // underway
+
+    if (r == -ENOENT) { // case where resharding must have completed
+      ldpp_dout(dpp, 0) <<
+       "NOTICE: resharding operation recently completed, invalidating "
+       "old BucketInfo" << dendl;
+
+      r = store->fetch_new_bucket_id(target->bucket_info,
+                                    nullptr,
+                                    new_bucket_id, dpp);
+      if (r == -ENOENT) {
+       // apparently this op raced with a bucket deletion
+       ldpp_dout(dpp, 10) << "WARNING: " << __func__ <<
+         " unable to fetch bucket_id, apparently due to race "
+         "with deletion of bucket: " <<
+         target->bucket_info.bucket.get_key() << dendl;
+       return -ERR_NO_SUCH_BUCKET;
+      } else if (r < 0) {
+       ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+         " unable to refresh stale bucket_id after reshard; r=" <<
+         r << dendl;
+       return r;
+      }
+    } else { // must have been resharding at the time
+      ldpp_dout(dpp, 0) << "NOTICE: resharding operation on bucket index detected, blocking" << dendl;
+
+      r = store->block_while_resharding(bs, &new_bucket_id,
+                                       target->bucket_info, null_yield, dpp);
+      if (r == -ERR_BUSY_RESHARDING) {
+       continue;
+      }
+      if (r < 0) {
+       return r;
+      }
+
+      ldpp_dout(dpp, 20) << "reshard completion identified, new_bucket_id=" << new_bucket_id << dendl;
+      i = 0; /* resharding is finished, make sure we can retry */
      }
-    ldpp_dout(dpp, 20) << "reshard completion identified, new_bucket_id=" << new_bucket_id << dendl;
-    i = 0; /* resharding is finished, make sure we can retry */
+
+    // common portion -- finished resharding either way
+
      r = target->update_bucket_id(new_bucket_id, dpp);
      if (r < 0) {
        ldpp_dout(dpp, 0) << "ERROR: update_bucket_id() new_bucket_id=" << new_bucket_id << " returned r=" << r << dendl;
        return r;
      }
+
      invalidate_bs();
    } // for loop
  
@@ -6847,8 +6890,29 @@ int RGWRados::guard_reshard(const DoutPrefixProvider *dpp,
    return 0;
  }
  
+
+int RGWRados::fetch_new_bucket_id(
+  const RGWBucketInfo& curr_bucket_info,
+  RGWBucketInfo* save_bucket_info, // nullptr -> no save
+  std::string& new_bucket_id,
+  const DoutPrefixProvider* dpp)
+{
+  RGWBucketInfo local_bucket_info; // use if save_bucket_info is null
+  RGWBucketInfo* bip = save_bucket_info ? save_bucket_info : &local_bucket_info;
+  *bip = curr_bucket_info; // copy
+
+  int ret = try_refresh_bucket_info(*bip, nullptr, dpp);
+  if (ret < 0) {
+    return ret;
+  }
+
+  new_bucket_id = bip->bucket.bucket_id;
+  return 0;
+} // fetch_new_bucket_id
+
+
  int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
-                                    string *new_bucket_id,
+                                    std::string *new_bucket_id,
                                       const RGWBucketInfo& bucket_info,
                                       optional_yield y,
                                       const DoutPrefixProvider *dpp)
@@ -6856,32 +6920,18 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
    int ret = 0;
    cls_rgw_bucket_instance_entry entry;
  
-  // since we want to run this recovery code from two distinct places,
-  // let's just put it in a lambda so we can easily re-use; if the
-  // lambda successfully fetches a new bucket id, it sets
-  // new_bucket_id and returns 0, otherwise it returns a negative
-  // error code
-  auto fetch_new_bucket_id =
-    [this, &bucket_info, dpp](const std::string& log_tag,
-                        std::string* new_bucket_id) -> int {
-      RGWBucketInfo fresh_bucket_info = bucket_info;
-      int ret = try_refresh_bucket_info(fresh_bucket_info, nullptr, dpp);
-      if (ret < 0) {
-       ldpp_dout(dpp, 0) << __func__ <<
-         " ERROR: failed to refresh bucket info after reshard at " <<
-         log_tag << ": " << cpp_strerror(-ret) << dendl;
-       return ret;
-      }
-      *new_bucket_id = fresh_bucket_info.bucket.bucket_id;
-      return 0;
-    };
-
    constexpr int num_retries = 10;
    for (int i = 1; i <= num_retries; i++) { // nb: 1-based for loop
      auto& ref = bs->bucket_obj.get_ref();
      ret = cls_rgw_get_bucket_resharding(ref.pool.ioctx(), ref.obj.oid, &entry);
      if (ret == -ENOENT) {
-      return fetch_new_bucket_id("get_bucket_resharding_failed", new_bucket_id);
+      ret = fetch_new_bucket_id(bucket_info, nullptr, *new_bucket_id, dpp);
+      if (ret < 0) {
+       ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+         " failed to refresh bucket info after reshard when get bucket "
+         "resharding failed, error: " << cpp_strerror(-ret) << dendl;
+       return ret;
+      }
      } else if (ret < 0) {
        ldpp_dout(dpp, 0) << __func__ <<
         " ERROR: failed to get bucket resharding : " << cpp_strerror(-ret) <<
@@ -6890,8 +6940,13 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
      }
  
      if (!entry.resharding_in_progress()) {
-      return fetch_new_bucket_id("get_bucket_resharding_succeeded",
-                                new_bucket_id);
+      ret = fetch_new_bucket_id(bucket_info, nullptr, *new_bucket_id, dpp);
+      if (ret < 0) {
+       ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+         " failed to refresh bucket info after reshard when get bucket "
+         "resharding succeeded, error: " << cpp_strerror(-ret) << dendl;
+       return ret;
+      }
      }
  
      ldpp_dout(dpp, 20) << "NOTICE: reshard still in progress; " <<
@@ -6914,7 +6969,9 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
        std::string bucket_id = b.get_key();
        RGWBucketReshardLock reshard_lock(this->store, bucket_info, true);
        ret = reshard_lock.lock(dpp);
-      if (ret < 0) {
+      if (ret == -ENOENT) {
+       continue;
+      } else if (ret < 0) {
         ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ <<
           ": failed to take reshard lock for bucket " <<
           bucket_id << "; expected if resharding underway" << dendl;
@@ -6922,14 +6979,21 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
         ldpp_dout(dpp, 10) << __PRETTY_FUNCTION__ <<
           ": was able to take reshard lock for bucket " <<
           bucket_id << dendl;
+
         ret = RGWBucketReshard::clear_resharding(dpp, this->store, bucket_info);
-       if (ret < 0) {
-         reshard_lock.unlock();
+       reshard_lock.unlock();
+
+       if (ret == -ENOENT) {
+         ldpp_dout(dpp, 5) << __PRETTY_FUNCTION__ <<
+           " INFO: no need to reset reshard flags; old shards apparently"
+           " removed after successful resharding of bucket " <<
+           bucket_id << dendl;
+         continue;
+       } else if (ret < 0) {
           ldpp_dout(dpp, 0) << __PRETTY_FUNCTION__ <<
             " ERROR: failed to clear resharding flags for bucket " <<
-           bucket_id << dendl;
+           bucket_id << ", " << cpp_strerror(-ret) << dendl;
         } else {
-         reshard_lock.unlock();
           ldpp_dout(dpp, 5) << __PRETTY_FUNCTION__ <<
             ": apparently successfully cleared resharding flags for "
             "bucket " << bucket_id << dendl;
@@ -6978,6 +7042,7 @@ int RGWRados::bucket_index_link_olh(const DoutPrefixProvider *dpp, const RGWBuck
                       cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), obj_instance.key.instance);
                       auto& ref = bs->bucket_obj.get_ref();
                       librados::ObjectWriteOperation op;
+                     op.assert_exists(); // bucket index shard must exist
                       cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                       cls_rgw_bucket_link_olh(op, key, olh_state.olh_tag,
                                                delete_marker, op_tag, meta, olh_epoch,
@@ -7026,6 +7091,7 @@ int RGWRados::bucket_index_unlink_instance(const DoutPrefixProvider *dpp, const
                     [&](BucketShard *bs) -> int {
                       auto& ref = bs->bucket_obj.get_ref();
                       librados::ObjectWriteOperation op;
+                     op.assert_exists(); // bucket index shard must exist
                       cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                       cls_rgw_bucket_unlink_instance(op, key, op_tag,
                                                      olh_tag, olh_epoch, svc.zone->get_zone().log_data, zones_trace);
@@ -7171,6 +7237,7 @@ int RGWRados::bucket_index_trim_olh_log(const DoutPrefixProvider *dpp, const RGW
    ret = guard_reshard(dpp, &bs, obj_instance, bucket_info,
                       [&](BucketShard *pbs) -> int {
                         ObjectWriteOperation op;
+                       op.assert_exists(); // bucket index shard must exist
                         cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                         cls_rgw_trim_olh_log(op, key, ver, olh_tag);
                          return pbs->bucket_obj.operate(dpp, &op, null_yield);
@@ -7200,6 +7267,7 @@ int RGWRados::bucket_index_clear_olh(const DoutPrefixProvider *dpp, const RGWBuc
    int ret = guard_reshard(dpp, &bs, obj_instance, bucket_info,
                           [&](BucketShard *pbs) -> int {
                             ObjectWriteOperation op;
+                           op.assert_exists(); // bucket index shard must exist
                             auto& ref = pbs->bucket_obj.get_ref();
                             cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                             cls_rgw_clear_olh(op, key, olh_tag);
@@ -8380,6 +8448,8 @@ int RGWRados::cls_obj_prepare_op(const DoutPrefixProvider *dpp, BucketShard& bs,
    zones_trace.insert(svc.zone->get_zone().id, bs.bucket.get_key());
  
    ObjectWriteOperation o;
+  o.assert_exists(); // bucket index shard must exist
+
    cls_rgw_obj_key key(obj.key.get_index_key_name(), obj.key.instance);
    cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING);
    cls_rgw_bucket_prepare_op(o, op, tag, key, obj.key.get_loc(), svc.zone->get_zone().log_data, bilog_flags, zones_trace);
@@ -8392,6 +8462,8 @@ int RGWRados::cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModify
                                   list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *_zones_trace)
  {
    ObjectWriteOperation o;
+  o.assert_exists(); // bucket index shard must exist
+
    rgw_bucket_dir_entry_meta dir_meta;
    dir_meta = ent.meta;
    dir_meta.category = category;
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h

index 7ec229fee18d91afa77e07c08f31a1b5be4b8ae4..16f209b891002aaba2560a7f7c19c159b75789e2 100644 (file)
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -1268,6 +1268,10 @@ public:
    int obj_operate(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
    int obj_operate(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
  
+  int fetch_new_bucket_id(const RGWBucketInfo& curr_bucket_info,
+                         RGWBucketInfo* save_bucket_info,
+                         std::string& new_bucket_id,
+                         const DoutPrefixProvider* dpp);
    int guard_reshard(const DoutPrefixProvider *dpp,
                      BucketShard *bs,
                     const rgw_obj& obj_instance,
diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc

index 532dad7b47fd19c72875e877b43ed6ee3d39329d..66977374559afde13466b9ca6dcc514753c1c2f1 100644 (file)
--- a/src/rgw/rgw_reshard.cc
+++ b/src/rgw/rgw_reshard.cc
@@ -290,7 +290,8 @@ int RGWBucketReshard::clear_resharding(const DoutPrefixProvider *dpp,
                                         rgw::sal::RadosStore* store,
                                        const RGWBucketInfo& bucket_info)
  {
-  int ret = clear_index_shard_reshard_status(dpp, store, bucket_info);
+  int ret;
+  ret = clear_index_shard_reshard_status(dpp, store, bucket_info);
    if (ret < 0) {
      ldpp_dout(dpp, 0) << "RGWBucketReshard::" << __func__ <<
        " ERROR: error clearing reshard status from index shard " <<
@@ -298,7 +299,9 @@ int RGWBucketReshard::clear_resharding(const DoutPrefixProvider *dpp,
      return ret;
    }
  
+  // default constructed = NOT_RESHARDING
    cls_rgw_bucket_instance_entry instance_entry;
+
    ret = store->getRados()->bucket_set_reshard(dpp, bucket_info, instance_entry);
    if (ret < 0) {
      ldpp_dout(dpp, 0) << "RGWReshard::" << __func__ <<
author	J. Eric Ivancich <ivancich@redhat.com>
	Fri, 28 Oct 2022 19:14:51 +0000 (15:14 -0400)
committer	J. Eric Ivancich <ivancich@redhat.com>
	Thu, 9 Feb 2023 19:36:07 +0000 (14:36 -0500)
src/cls/rgw/cls_rgw_client.cc		patch \| blob \| history
src/cls/rgw/cls_rgw_types.cc		patch \| blob \| history
src/cls/rgw/cls_rgw_types.h		patch \| blob \| history
src/rgw/rgw_rados.cc		patch \| blob \| history
src/rgw/rgw_rados.h		patch \| blob \| history
src/rgw/rgw_reshard.cc		patch \| blob \| history