From 15c687f8b5fe8f7844f701276430dab25f876a7a Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Fri, 25 Mar 2022 17:14:05 -0400 Subject: [PATCH] rgw multisite: resharding scales up shard counts 4x faster in multisite reshard, we need to keep the old index shards around until other zones finishing syncing from them. we don't want to allow a bunch of reshards in a row, because we have to duplicate that many sets of index objects. so we impose a limit of 4 bilog generations (or 3 reshards), and refuse to reshard again until bilog trimming catches up/ trims the oldest generation under a sustained write workload, a bucket can fill quickly and need successive reshards. if we have a limit of 3, we should make them count! so instead of doubling the shard count at each step, multiply by 8 instead when we're in a multisite configuration Signed-off-by: Casey Bodley --- src/rgw/rgw_quota.cc | 14 +++++++++++--- src/rgw/rgw_quota.h | 5 +++-- src/rgw/rgw_rados.cc | 6 +++++- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/rgw/rgw_quota.cc b/src/rgw/rgw_quota.cc index 1bc3bc099ae65..fe4c021af34f2 100644 --- a/src/rgw/rgw_quota.cc +++ b/src/rgw/rgw_quota.cc @@ -963,15 +963,23 @@ public: user_stats_cache.adjust_stats(user, bucket, obj_delta, added_bytes, removed_bytes); } - void check_bucket_shards(const DoutPrefixProvider *dpp, uint64_t max_objs_per_shard, uint64_t num_shards, - uint64_t num_objs, bool& need_resharding, uint32_t *suggested_num_shards) override + void check_bucket_shards(const DoutPrefixProvider *dpp, uint64_t max_objs_per_shard, + uint64_t num_shards, uint64_t num_objs, bool is_multisite, + bool& need_resharding, uint32_t *suggested_num_shards) override { if (num_objs > num_shards * max_objs_per_shard) { ldpp_dout(dpp, 0) << __func__ << ": resharding needed: stats.num_objects=" << num_objs << " shard max_objects=" << max_objs_per_shard * num_shards << dendl; need_resharding = true; if (suggested_num_shards) { - *suggested_num_shards = num_objs * 2 / max_objs_per_shard; + uint32_t obj_multiplier = 2; + if (is_multisite) { + // if we're maintaining bilogs for multisite, reshards are significantly + // more expensive. scale up the shard count much faster to minimize the + // number of reshard events during a write workload + obj_multiplier = 8; + } + *suggested_num_shards = num_objs * obj_multiplier / max_objs_per_shard; } } else { need_resharding = false; diff --git a/src/rgw/rgw_quota.h b/src/rgw/rgw_quota.h index 61bd25261aa92..92f6f03520dc7 100644 --- a/src/rgw/rgw_quota.h +++ b/src/rgw/rgw_quota.h @@ -100,8 +100,9 @@ public: RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t num_objs, uint64_t size, optional_yield y) = 0; - virtual void check_bucket_shards(const DoutPrefixProvider *dpp, uint64_t max_objs_per_shard, uint64_t num_shards, - uint64_t num_objs, bool& need_resharding, uint32_t *suggested_num_shards) = 0; + virtual void check_bucket_shards(const DoutPrefixProvider *dpp, uint64_t max_objs_per_shard, + uint64_t num_shards, uint64_t num_objs, bool is_multisite, + bool& need_resharding, uint32_t *suggested_num_shards) = 0; virtual void update_stats(const rgw_user& bucket_owner, rgw_bucket& bucket, int obj_delta, uint64_t added_bytes, uint64_t removed_bytes) = 0; diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 259f44ff5a2af..d3a2ad3b951fe 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -9377,8 +9377,12 @@ int RGWRados::check_bucket_shards(const RGWBucketInfo& bucket_info, const uint64_t max_objs_per_shard = cct->_conf.get_val("rgw_max_objs_per_shard"); + // TODO: consider per-bucket sync policy here? + const bool is_multisite = svc.zone->get_zone().log_data; + quota_handler->check_bucket_shards(dpp, max_objs_per_shard, num_source_shards, - num_objs, need_resharding, &suggested_num_shards); + num_objs, is_multisite, need_resharding, + &suggested_num_shards); if (! need_resharding) { return 0; } -- 2.39.5