From: Casey Bodley Date: Fri, 25 Mar 2022 21:14:05 +0000 (-0400) Subject: rgw multisite: resharding scales up shard counts 4x faster X-Git-Tag: v18.0.0~787^2~26 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f3d5a52a81bb0115fe440209b08772a70f1c1b81;p=ceph.git rgw multisite: resharding scales up shard counts 4x faster in multisite reshard, we need to keep the old index shards around until other zones finishing syncing from them. we don't want to allow a bunch of reshards in a row, because we have to duplicate that many sets of index objects. so we impose a limit of 4 bilog generations (or 3 reshards), and refuse to reshard again until bilog trimming catches up/ trims the oldest generation under a sustained write workload, a bucket can fill quickly and need successive reshards. if we have a limit of 3, we should make them count! so instead of doubling the shard count at each step, multiply by 8 instead when we're in a multisite configuration Signed-off-by: Casey Bodley --- diff --git a/src/rgw/rgw_quota.cc b/src/rgw/rgw_quota.cc index 2d2cf13f2530..74c2d5ea8c33 100644 --- a/src/rgw/rgw_quota.cc +++ b/src/rgw/rgw_quota.cc @@ -962,15 +962,23 @@ public: user_stats_cache.adjust_stats(user, bucket, obj_delta, added_bytes, removed_bytes); } - void check_bucket_shards(const DoutPrefixProvider *dpp, uint64_t max_objs_per_shard, uint64_t num_shards, - uint64_t num_objs, bool& need_resharding, uint32_t *suggested_num_shards) override + void check_bucket_shards(const DoutPrefixProvider *dpp, uint64_t max_objs_per_shard, + uint64_t num_shards, uint64_t num_objs, bool is_multisite, + bool& need_resharding, uint32_t *suggested_num_shards) override { if (num_objs > num_shards * max_objs_per_shard) { ldpp_dout(dpp, 0) << __func__ << ": resharding needed: stats.num_objects=" << num_objs << " shard max_objects=" << max_objs_per_shard * num_shards << dendl; need_resharding = true; if (suggested_num_shards) { - *suggested_num_shards = num_objs * 2 / max_objs_per_shard; + uint32_t obj_multiplier = 2; + if (is_multisite) { + // if we're maintaining bilogs for multisite, reshards are significantly + // more expensive. scale up the shard count much faster to minimize the + // number of reshard events during a write workload + obj_multiplier = 8; + } + *suggested_num_shards = num_objs * obj_multiplier / max_objs_per_shard; } } else { need_resharding = false; diff --git a/src/rgw/rgw_quota.h b/src/rgw/rgw_quota.h index c29bac936d07..2a63dab49053 100644 --- a/src/rgw/rgw_quota.h +++ b/src/rgw/rgw_quota.h @@ -107,8 +107,9 @@ public: RGWQuota& quota, uint64_t num_objs, uint64_t size, optional_yield y) = 0; - virtual void check_bucket_shards(const DoutPrefixProvider *dpp, uint64_t max_objs_per_shard, uint64_t num_shards, - uint64_t num_objs, bool& need_resharding, uint32_t *suggested_num_shards) = 0; + virtual void check_bucket_shards(const DoutPrefixProvider *dpp, uint64_t max_objs_per_shard, + uint64_t num_shards, uint64_t num_objs, bool is_multisite, + bool& need_resharding, uint32_t *suggested_num_shards) = 0; virtual void update_stats(const rgw_user& bucket_owner, rgw_bucket& bucket, int obj_delta, uint64_t added_bytes, uint64_t removed_bytes) = 0; diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 9189bec7c22b..a77adf72e3ba 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -9408,8 +9408,12 @@ int RGWRados::check_bucket_shards(const RGWBucketInfo& bucket_info, const uint64_t max_objs_per_shard = cct->_conf.get_val("rgw_max_objs_per_shard"); + // TODO: consider per-bucket sync policy here? + const bool is_multisite = svc.zone->get_zone().log_data; + quota_handler->check_bucket_shards(dpp, max_objs_per_shard, num_source_shards, - num_objs, need_resharding, &suggested_num_shards); + num_objs, is_multisite, need_resharding, + &suggested_num_shards); if (! need_resharding) { return 0; }