rgw: prevent reshard from creating too many log generations

author Casey Bodley <cbodley@redhat.com>

Tue, 28 Sep 2021 14:45:27 +0000 (10:45 -0400)

committer Adam C. Emerson <aemerson@redhat.com>

Tue, 1 Feb 2022 22:50:16 +0000 (17:50 -0500)
author Casey Bodley <cbodley@redhat.com>
Tue, 28 Sep 2021 14:45:27 +0000 (10:45 -0400)
committer Adam C. Emerson <aemerson@redhat.com>
Tue, 1 Feb 2022 22:50:16 +0000 (17:50 -0500)
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc

index 4994f67d143d748e20871ad5165e2eac791aa114..07812f43f20ca53ea702930a0ffad82e12c571e8 100644 (file)
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -7673,6 +7673,17 @@ next:
        return ret;
      }
  
+    auto zone_svc = static_cast<rgw::sal::RadosStore*>(store)->svc()->zone;
+    if (!RGWBucketReshard::can_reshard(bucket->get_info(), zone_svc) &&
+        !yes_i_really_mean_it) {
+      std::cerr << "Bucket '" << bucket->get_name() << "' already has too many "
+          "log generations (" << bucket->get_info().layout.logs.size() << ") "
+          "from previous reshards that peer zones haven't finished syncing. "
+          "Resharding is not recommended until the old generations sync, but "
+          "you can force a reshard with --yes-i-really-mean-it." << std::endl;
+      return -EINVAL;
+    }
+
      RGWBucketReshard br(static_cast<rgw::sal::RadosStore*>(store), bucket->get_info(), nullptr /* no callback */);
  
  #define DEFAULT_RESHARD_MAX_ENTRIES 1000
diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc

index 672b4b9c8e2c15b425c25e68e9dfc88c53cff185..d2de54271a0fe87b351a9a97e0fa7085ae92d42f 100644 (file)
--- a/src/rgw/rgw_reshard.cc
+++ b/src/rgw/rgw_reshard.cc
@@ -816,6 +816,13 @@ int RGWBucketReshard::execute(int num_shards,
    return 0;
  } // execute
  
+bool RGWBucketReshard::can_reshard(const RGWBucketInfo& bucket,
+                                   const RGWSI_Zone* zone_svc)
+{
+  return !zone_svc->need_to_log_data() ||
+      bucket.layout.logs.size() < max_bilog_history;
+}
+
  
  RGWReshard::RGWReshard(rgw::sal::RadosStore* _store, bool _verbose, ostream *_out,
                         Formatter *_formatter) :
@@ -1052,6 +1059,13 @@ int RGWReshard::process_entry(const cls_rgw_reshard_entry& entry,
      return 0;
    }
  
+  if (!RGWBucketReshard::can_reshard(bucket_info, store->svc()->zone)) {
+    ldpp_dout(dpp, 1) << "Bucket " << bucket_info.bucket << " is not "
+        "eligible for resharding until peer zones finish syncing one "
+        "or more of its old log generations" << dendl;
+    return remove(dpp, entry);
+  }
+
    RGWBucketReshard br(store, bucket_info, nullptr);
  
    ReshardFaultInjector f; // no fault injected
diff --git a/src/rgw/rgw_reshard.h b/src/rgw/rgw_reshard.h

index a8613d66467a37b267d68a5a79d7b7c5651d7a3a..09c89bb973006099c3362f7bda96c2b1c3934f6e 100644 (file)
--- a/src/rgw/rgw_reshard.h
+++ b/src/rgw/rgw_reshard.h
@@ -163,6 +163,14 @@ public:
  
      return final_num_shards;
    }
+
+  // for multisite, the RGWBucketInfo keeps a history of old log generations
+  // until all peers are done with them. prevent this log history from growing
+  // too large by refusing to reshard the bucket until the old logs get trimmed
+  static constexpr size_t max_bilog_history = 4;
+
+  static bool can_reshard(const RGWBucketInfo& bucket,
+                          const RGWSI_Zone* zone_svc);
  }; // RGWBucketReshard
author	Casey Bodley <cbodley@redhat.com>
	Tue, 28 Sep 2021 14:45:27 +0000 (10:45 -0400)
committer	Adam C. Emerson <aemerson@redhat.com>
	Tue, 1 Feb 2022 22:50:16 +0000 (17:50 -0500)
src/rgw/rgw_admin.cc		patch \| blob \| history
src/rgw/rgw_reshard.cc		patch \| blob \| history
src/rgw/rgw_reshard.h		patch \| blob \| history