]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: address bug where object puts could write to decommissioned shard 48899/head
authorJ. Eric Ivancich <ivancich@redhat.com>
Fri, 28 Oct 2022 19:14:51 +0000 (15:14 -0400)
committerJ. Eric Ivancich <ivancich@redhat.com>
Mon, 28 Nov 2022 21:31:59 +0000 (16:31 -0500)
Addresses an apparent bug where some object writes that initiate
around the time that resharding completes are written to the old
bucket index shard rather than the new one.

Other write operations to a bucket index shard now also assert their
existance so as not to re-create old bucket index shards.

Signed-off-by: J. Eric Ivancich <ivancich@redhat.com>
src/cls/rgw/cls_rgw_client.cc
src/cls/rgw/cls_rgw_types.cc
src/cls/rgw/cls_rgw_types.h
src/rgw/rgw_rados.cc

index 72195378b91cc26f4d6c65995a7f8cd2f1045b80..4b544b3b20af856a3edc6a4aeed99a273e74c0a8 100644 (file)
@@ -1196,6 +1196,7 @@ static bool issue_set_bucket_resharding(librados::IoCtx& io_ctx,
   call.entry = entry;
   encode(call, in);
   librados::ObjectWriteOperation op;
+  op.assert_exists(); // the shard must exist; if not fail rather than recreate
   op.exec(RGW_CLASS, RGW_SET_BUCKET_RESHARDING, in);
   return manager->aio_operate(io_ctx, shard_id, oid, &op);
 }
index 4b7d533d1ff5788a2633b50bc183b6a19c53fce3..14e0d988466af9455071bf4a9767357d5b57ef81 100644 (file)
@@ -858,3 +858,21 @@ void cls_rgw_lc_obj_head::dump(Formatter *f) const
 void cls_rgw_lc_obj_head::generate_test_instances(list<cls_rgw_lc_obj_head*>& ls)
 {
 }
+
+std::ostream& operator<<(std::ostream& out, cls_rgw_reshard_status status) {
+  switch (status) {
+  case cls_rgw_reshard_status::NOT_RESHARDING:
+    out << "NOT_RESHARDING";
+    break;
+  case cls_rgw_reshard_status::IN_PROGRESS:
+    out << "IN_PROGRESS";
+    break;
+  case cls_rgw_reshard_status::DONE:
+    out << "DONE";
+    break;
+  default:
+    out << "UNKNOWN_STATUS";
+  }
+
+  return out;
+}
index fdf4b9427a94da788fb7fff49627e470fa3c608c..ae1d6c3752ef0add908004e3a18610229f4a0505 100644 (file)
@@ -786,6 +786,7 @@ enum class cls_rgw_reshard_status : uint8_t {
   IN_PROGRESS     = 1,
   DONE            = 2
 };
+std::ostream& operator<<(std::ostream&, cls_rgw_reshard_status);
 
 inline std::string to_string(const cls_rgw_reshard_status status)
 {
@@ -845,9 +846,15 @@ struct cls_rgw_bucket_instance_entry {
   bool resharding() const {
     return reshard_status != RESHARD_STATUS::NOT_RESHARDING;
   }
+
   bool resharding_in_progress() const {
     return reshard_status == RESHARD_STATUS::IN_PROGRESS;
   }
+
+  friend std::ostream& operator<<(std::ostream& out, const cls_rgw_bucket_instance_entry& v) {
+    out << "instance entry reshard status: " << v.reshard_status;
+    return out;
+  }
 };
 WRITE_CLASS_ENCODER(cls_rgw_bucket_instance_entry)
 
index 63992e9bc1ee400f0442028d92e38d554f123c09..03950e271581f7820cd1cc8ba1eaf2ac8c054afb 100644 (file)
@@ -934,6 +934,7 @@ void RGWIndexCompletionManager::process()
                                 "BACKTRACE: " << __func__ << ": " << ClibBackTrace(1) << dendl_bitx;
 
                               librados::ObjectWriteOperation o;
+                              o.assert_exists();
                               cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING);
                               cls_rgw_bucket_complete_op(o, c->op, c->tag, c->ver, c->key, c->dir_meta, &c->remove_objs,
                                                          c->log_op, c->bilog_op, &c->zones_trace);
@@ -5076,10 +5077,19 @@ int RGWRados::bucket_set_reshard(const DoutPrefixProvider *dpp, const RGWBucketI
 
   int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, std::nullopt, bucket_info.layout.current_index, &index_pool, &bucket_objs, nullptr);
   if (r < 0) {
+    ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+      ": unable to open bucket index, r=" << r << " (" <<
+      cpp_strerror(-r) << ")" << dendl;
     return r;
   }
 
-  return CLSRGWIssueSetBucketResharding(index_pool.ioctx(), bucket_objs, entry, cct->_conf->rgw_bucket_index_max_aio)();
+  r = CLSRGWIssueSetBucketResharding(index_pool.ioctx(), bucket_objs, entry, cct->_conf->rgw_bucket_index_max_aio)();
+  if (r < 0) {
+    ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+      ": unable to issue set bucket resharding, r=" << r << " (" <<
+      cpp_strerror(-r) << ")" << dendl;
+  }
+  return r;
 }
 
 int RGWRados::defer_gc(const DoutPrefixProvider *dpp, RGWObjectCtx* rctx, RGWBucketInfo& bucket_info, rgw::sal::Object* obj, optional_yield y)
@@ -6219,23 +6229,29 @@ int RGWRados::Bucket::UpdateIndex::guard_reshard(const DoutPrefixProvider *dpp,
         obj_instance.key << ". ret=" << ret << dendl;
       return ret;
     }
+
     r = call(bs);
     if (r != -ERR_BUSY_RESHARDING) {
       break;
     }
-    ldpp_dout(dpp, 0) << "NOTICE: resharding operation on bucket index detected, blocking. obj=" << 
+
+    ldpp_dout(dpp, 10) <<
+      "NOTICE: resharding operation on bucket index detected, blocking. obj=" << 
       obj_instance.key << dendl;
+
     r = store->block_while_resharding(bs, obj_instance, target->bucket_info, null_yield, dpp);
     if (r == -ERR_BUSY_RESHARDING) {
-      ldpp_dout(dpp, 0) << "ERROR: block_while_resharding() still busy. obj=" <<
+      ldpp_dout(dpp, 10) << __func__ <<
+       " NOTICE: block_while_resharding() still busy. obj=" <<
         obj_instance.key << dendl;
       continue;
-    }
-    if (r < 0) {
-      ldpp_dout(dpp, 0) << "ERROR: block_while_resharding() failed. obj=" << 
-        obj_instance.key << ". ret=" << r << dendl;
+    } else if (r < 0) {
+      ldpp_dout(dpp, 0) << __func__ <<
+       " ERROR: block_while_resharding() failed. obj=" <<
+        obj_instance.key << ". ret=" << cpp_strerror(-r) << dendl;
       return r;
     }
+
     ldpp_dout(dpp, 20) << "reshard completion identified. obj=" << obj_instance.key << dendl;
     i = 0; /* resharding is finished, make sure we can retry */
     invalidate_bs();
@@ -6243,7 +6259,7 @@ int RGWRados::Bucket::UpdateIndex::guard_reshard(const DoutPrefixProvider *dpp,
 
   if (r < 0) {
     ldpp_dout(dpp, 0) << "ERROR: bucket shard callback failed. obj=" << 
-      obj_instance.key << ". ret=" << r << dendl;
+      obj_instance.key << ". ret=" << cpp_strerror(-r) << dendl;
     return r;
   }
 
@@ -6838,29 +6854,43 @@ int RGWRados::guard_reshard(const DoutPrefixProvider *dpp,
       ldpp_dout(dpp, 5) << "bs.init() returned ret=" << r << dendl;
       return r;
     }
+
     r = call(bs);
     if (r != -ERR_BUSY_RESHARDING) {
       break;
     }
-    ldpp_dout(dpp, 0) << "NOTICE: resharding operation on bucket index detected, blocking" << dendl;
+
+    ldpp_dout(dpp, 10) <<
+      "NOTICE: resharding operation on bucket index detected, blocking. obj=" <<
+      obj_instance.key << dendl;
+
     r = block_while_resharding(bs, obj_instance, bucket_info, null_yield, dpp);
     if (r == -ERR_BUSY_RESHARDING) {
+      ldpp_dout(dpp, 10) << __func__ <<
+       " NOTICE: block_while_resharding() still busy. obj=" <<
+        obj_instance.key << dendl;
       continue;
-    }
-    if (r < 0) {
+    } else if (r < 0) {
+      ldpp_dout(dpp, 0) << __func__ <<
+       " ERROR: block_while_resharding() failed. obj=" <<
+        obj_instance.key << ". ret=" << cpp_strerror(-r) << dendl;
       return r;
     }
+
     ldpp_dout(dpp, 20) << "reshard completion identified" << dendl;
     i = 0; /* resharding is finished, make sure we can retry */
   } // for loop
 
   if (r < 0) {
+    ldpp_dout(dpp, 0) << "ERROR: bucket shard callback failed. obj=" << 
+      obj_instance.key << ". ret=" << cpp_strerror(-r) << dendl;
     return r;
   }
 
   return 0;
 }
 
+
 int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
                                      const rgw_obj& obj_instance,
                                      RGWBucketInfo& bucket_info,
@@ -6881,34 +6911,43 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
   // error code
   auto fetch_new_bucket_info =
     [this, bs, &obj_instance, &bucket_info, &bucket_attrs, &y, dpp](const std::string& log_tag) -> int {
-      int ret = get_bucket_info(&svc, bs->bucket.tenant, bs->bucket.name,
-           bucket_info, nullptr, y, dpp, &bucket_attrs);
-      if (ret < 0) {
-       ldpp_dout(dpp, 0) << __func__ <<
-         " ERROR: failed to refresh bucket info after reshard at " <<
-         log_tag << ": " << cpp_strerror(-ret) << dendl;
-       return ret;
-      }
-      ret = bs->init(dpp, bucket_info, obj_instance);
-      if (ret < 0) {
-       ldpp_dout(dpp, 0) << __func__ <<
-         " ERROR: failed to refresh bucket shard generation after reshard at " <<
-         log_tag << ": " << cpp_strerror(-ret) << dendl;
-       return ret;
-      }
-      const auto gen = bucket_info.layout.logs.empty() ? -1 : bucket_info.layout.logs.back().gen;
-      ldpp_dout(dpp, 20) << __func__ << 
-        " INFO: refreshed bucket info after reshard at " <<
-       log_tag << ". new shard_id=" << bs->shard_id << ". gen=" << gen << dendl;
-      return 0;
-    };
+    int ret = get_bucket_info(&svc, bs->bucket.tenant, bs->bucket.name,
+                             bucket_info, nullptr, y, dpp, &bucket_attrs);
+    if (ret < 0) {
+      ldpp_dout(dpp, 0) << __func__ <<
+       " ERROR: failed to refresh bucket info after reshard at " <<
+       log_tag << ": " << cpp_strerror(-ret) << dendl;
+      return ret;
+    }
+
+    ret = bs->init(dpp, bucket_info, obj_instance);
+    if (ret < 0) {
+      ldpp_dout(dpp, 0) << __func__ <<
+       " ERROR: failed to refresh bucket shard generation after reshard at " <<
+       log_tag << ": " << cpp_strerror(-ret) << dendl;
+      return ret;
+    }
+
+    const auto gen = bucket_info.layout.logs.empty() ? -1 : bucket_info.layout.logs.back().gen;
+    ldpp_dout(dpp, 20) << __func__ <<
+      " INFO: refreshed bucket info after reshard at " <<
+      log_tag << ". new shard_id=" << bs->shard_id << ". gen=" << gen << dendl;
+
+    return 0;
+  }; // lambda fetch_new_bucket_info
 
   constexpr int num_retries = 10;
   for (int i = 1; i <= num_retries; i++) { // nb: 1-based for loop
     auto& ref = bs->bucket_obj.get_ref();
     ret = cls_rgw_get_bucket_resharding(ref.pool.ioctx(), ref.obj.oid, &entry);
     if (ret == -ENOENT) {
-      return fetch_new_bucket_info("get_bucket_resharding_failed");
+      ret = fetch_new_bucket_info("get_bucket_resharding_failed");
+      if (ret < 0) {
+       ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+         " failed to refresh bucket info after reshard when get bucket "
+         "resharding failed, error: " << cpp_strerror(-ret) << dendl;
+       return ret;
+      }
     } else if (ret < 0) {
       ldpp_dout(dpp, 0) << __func__ <<
        " ERROR: failed to get bucket resharding : " << cpp_strerror(-ret) <<
@@ -6917,7 +6956,13 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
     }
 
     if (!entry.resharding_in_progress()) {
-      return fetch_new_bucket_info("get_bucket_resharding_succeeded");
+      ret = fetch_new_bucket_info("get_bucket_resharding_succeeded");
+      if (ret < 0) {
+       ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+         " failed to refresh bucket info after reshard when get bucket "
+         "resharding succeeded, error: " << cpp_strerror(-ret) << dendl;
+       return ret;
+      }
     }
 
     ldpp_dout(dpp, 20) << __func__ << " NOTICE: reshard still in progress; " <<
@@ -6940,7 +6985,9 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
       std::string bucket_id = b.get_key();
       RGWBucketReshardLock reshard_lock(this->store, bucket_info, true);
       ret = reshard_lock.lock(dpp);
-      if (ret < 0) {
+      if (ret == -ENOENT) {
+       continue;
+      } else if (ret < 0) {
        ldpp_dout(dpp, 20) << __func__ <<
          " ERROR: failed to take reshard lock for bucket " <<
          bucket_id << "; expected if resharding underway" << dendl;
@@ -6959,14 +7006,21 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
            bucket_id << dendl;
           continue; // try again
         }
+
        ret = RGWBucketReshard::clear_resharding(this->store, bucket_info, bucket_attrs, dpp);
-       if (ret < 0) {
-         reshard_lock.unlock();
+       reshard_lock.unlock();
+       if (ret == -ENOENT) {
+         ldpp_dout(dpp, 5) << __func__ <<
+           " INFO: no need to reset reshard flags; old shards apparently"
+           " removed after successful resharding of bucket " <<
+           bucket_id << dendl;
+         continue; // immediately test again
+       } else if (ret < 0) {
          ldpp_dout(dpp, 0) << __func__ <<
            " ERROR: failed to clear resharding flags for bucket " <<
-           bucket_id << dendl;
+           bucket_id << ", " << cpp_strerror(-ret) << dendl;
+         // wait and then test again
        } else {
-         reshard_lock.unlock();
          ldpp_dout(dpp, 5) << __func__ <<
            " INFO: apparently successfully cleared resharding flags for "
            "bucket " << bucket_id << dendl;
@@ -7015,6 +7069,7 @@ int RGWRados::bucket_index_link_olh(const DoutPrefixProvider *dpp, RGWBucketInfo
                      cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), obj_instance.key.instance);
                      auto& ref = bs->bucket_obj.get_ref();
                      librados::ObjectWriteOperation op;
+                     op.assert_exists(); // bucket index shard must exist
                      cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                      cls_rgw_bucket_link_olh(op, key, olh_state.olh_tag,
                                               delete_marker, op_tag, meta, olh_epoch,
@@ -7063,6 +7118,7 @@ int RGWRados::bucket_index_unlink_instance(const DoutPrefixProvider *dpp,
                    [&](BucketShard *bs) -> int {
                      auto& ref = bs->bucket_obj.get_ref();
                      librados::ObjectWriteOperation op;
+                     op.assert_exists(); // bucket index shard must exist
                      cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                      cls_rgw_bucket_unlink_instance(op, key, op_tag,
                                                     olh_tag, olh_epoch, svc.zone->get_zone().log_data, zones_trace);
@@ -7211,6 +7267,7 @@ int RGWRados::bucket_index_trim_olh_log(const DoutPrefixProvider *dpp,
   ret = guard_reshard(dpp, &bs, obj_instance, bucket_info,
                      [&](BucketShard *pbs) -> int {
                        ObjectWriteOperation op;
+                       op.assert_exists(); // bucket index shard must exist
                        cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                        cls_rgw_trim_olh_log(op, key, ver, olh_tag);
                         return pbs->bucket_obj.operate(dpp, &op, null_yield);
@@ -7243,6 +7300,7 @@ int RGWRados::bucket_index_clear_olh(const DoutPrefixProvider *dpp,
   int ret = guard_reshard(dpp, &bs, obj_instance, bucket_info,
                          [&](BucketShard *pbs) -> int {
                            ObjectWriteOperation op;
+                           op.assert_exists(); // bucket index shard must exist
                            auto& ref = pbs->bucket_obj.get_ref();
                            cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
                            cls_rgw_clear_olh(op, key, olh_tag);
@@ -8432,6 +8490,8 @@ int RGWRados::cls_obj_prepare_op(const DoutPrefixProvider *dpp, BucketShard& bs,
   zones_trace.insert(svc.zone->get_zone().id, bs.bucket.get_key());
 
   ObjectWriteOperation o;
+  o.assert_exists(); // bucket index shard must exist
+
   cls_rgw_obj_key key(obj.key.get_index_key_name(), obj.key.instance);
   cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING);
   cls_rgw_bucket_prepare_op(o, op, tag, key, obj.key.get_loc(), svc.zone->get_zone().log_data, bilog_flags, zones_trace);
@@ -8452,6 +8512,8 @@ int RGWRados::cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModify
   ldout_bitx_c(bitx, cct, 25) << "BACKTRACE: " << __func__ << ": " << ClibBackTrace(0) << dendl_bitx;
 
   ObjectWriteOperation o;
+  o.assert_exists(); // bucket index shard must exist
+
   rgw_bucket_dir_entry_meta dir_meta;
   dir_meta = ent.meta;
   dir_meta.category = category;