if (shards) {
layout.current_index.layout.normal.num_shards = *shards;
+ layout.current_index.layout.normal.min_num_shards = *shards;
} else if (cct->_conf->rgw_override_bucket_index_max_shards > 0) {
layout.current_index.layout.normal.num_shards =
cct->_conf->rgw_override_bucket_index_max_shards;
void RGWRados::calculate_preferred_shards(const DoutPrefixProvider* dpp,
const uint64_t num_objs,
const uint32_t num_source_shards,
+ const uint32_t min_layout_shards,
bool& need_resharding,
uint32_t* suggested_num_shards)
{
RGWBucketReshard::calculate_preferred_shards(dpp,
max_dynamic_shards,
+ min_layout_shards,
max_objs_per_shard,
is_multisite,
num_objs,
uint32_t suggested_num_shards = 0;
const uint32_t num_source_shards =
rgw::current_num_shards(bucket_info.layout);
+ const uint32_t min_layout_shards =
+ rgw::current_min_layout_shards(bucket_info.layout);
- calculate_preferred_shards(dpp, num_objs, num_source_shards,
+ calculate_preferred_shards(dpp, num_objs,
+ num_source_shards, min_layout_shards,
need_resharding, &suggested_num_shards);
if (! need_resharding) {
return 0;
void calculate_preferred_shards(const DoutPrefixProvider* dpp,
const uint64_t num_objs,
const uint32_t current_shard_count,
+ const uint32_t min_layout_shards,
bool& need_resharding,
uint32_t* suggested_num_shard_count = nullptr);
};
-uint32_t RGWBucketReshard::get_prime_shard_count(
- uint32_t shard_count,
- uint32_t max_dynamic_shards,
- uint32_t min_dynamic_shards)
-{
+uint32_t RGWBucketReshard::nearest_prime(uint32_t shard_count) {
uint32_t prime_shard_count =
get_prime_shards_greater_or_equal(shard_count);
// if we cannot find a larger prime number, then just use what was
// passed in
- if (! prime_shard_count) {
- prime_shard_count = shard_count;
- }
-
- // keep within min/max bounds
- return std::min(max_dynamic_shards,
- std::max(min_dynamic_shards, prime_shard_count));
+ return prime_shard_count ? prime_shard_count : shard_count;
}
void RGWBucketReshard::calculate_preferred_shards(
const DoutPrefixProvider* dpp,
const uint32_t max_dynamic_shards,
+ const uint32_t min_layout_shards,
const uint64_t max_objs_per_shard,
const bool is_multisite,
const uint64_t num_objs,
}
if (prefer_prime) {
- calculated_num_shards = get_prime_shard_count(
- calculated_num_shards, max_dynamic_shards, min_dynamic_shards);
+ calculated_num_shards = nearest_prime(calculated_num_shards);
}
+ calculated_num_shards =
+ std::min(max_dynamic_shards,
+ std::max({ calculated_num_shards, min_dynamic_shards, min_layout_shards }));
+
ldpp_dout(dpp, 20) << __func__ << ": reshard " << verb <<
" suggested; current average (objects/shard) is " <<
float(num_objs) / current_num_shards << ", which is not within " <<
rgw::bucket_index_layout_generation target;
target.layout.type = rgw::BucketIndexType::Normal;
target.layout.normal.num_shards = new_num_shards;
+ target.layout.normal.min_num_shards = current.layout.normal.min_num_shards;
target.gen = current.gen + 1;
if (bucket_info.reshard_status == cls_rgw_reshard_status::IN_PROGRESS) {
// block the client op and complete the resharding
ceph_assert(bucket_info.layout.resharding == rgw::BucketReshardState::InProgress);
ret = reshard_process(current, max_op_entries, target_shards_mgr, verbose_json_out, out,
- formatter, bucket_info.layout.resharding, dpp, y);
+ formatter, bucket_info.layout.resharding, dpp, y);
if (ret < 0) {
ldpp_dout(dpp, 0) << __func__ << ": failed in progress state of reshard ret = " << ret << dendl;
return ret;
ret = store->getRados()->get_bucket_stats(dpp, bucket_info,
bucket_info.layout.current_index,
-1, nullptr, nullptr, stats, nullptr, nullptr);
+ if (ret < 0) {
+ return clean_up("unable to access buckets current stats");
+ }
// determine current number of bucket entries across shards
uint64_t num_entries = 0;
}
const uint32_t current_shard_count =
- rgw::num_shards(bucket_info.get_current_index().layout.normal);
+ rgw::current_num_shards(bucket_info.layout);
+ const uint32_t min_layout_shards =
+ rgw::current_min_layout_shards(bucket_info.layout);
bool needs_resharding { false };
uint32_t suggested_shard_count { 0 };
// needed to perform the calculation before calling
// calculating_preferred_shards() in this class
store->getRados()->calculate_preferred_shards(
- dpp, num_entries, current_shard_count,
+ dpp, num_entries, current_shard_count, min_layout_shards,
needs_resharding, &suggested_shard_count);
// if we no longer need resharding or currently need to expand
}
// all checkes passed; we can reshard...
-
RGWBucketReshard br(store, bucket_info, bucket_attrs, nullptr);
ReshardFaultInjector f; // no fault injected
}
}
- // returns a preferred number of shards given a calculated number of
- // shards based on max_dynamic_shards and the list of prime values
- static uint32_t get_prime_shard_count(uint32_t suggested_shards,
- uint32_t max_dynamic_shards,
- uint32_t min_dynamic_shards);
+ // returns a preferred number of shards as a prime value
+ static uint32_t nearest_prime(uint32_t suggested_shards);
static void calculate_preferred_shards(const DoutPrefixProvider* dpp,
const uint32_t max_dynamic_shards,
+ const uint32_t min_layout_shards,
const uint64_t max_objs_per_shard,
const bool is_multisite,
const uint64_t num_objs,
// bucket_index_normal_layout
void encode(const bucket_index_normal_layout& l, bufferlist& bl, uint64_t f)
{
- ENCODE_START(1, 1, bl);
+ ENCODE_START(2, 1, bl);
encode(l.num_shards, bl);
encode(l.hash_type, bl);
+ encode(l.min_num_shards, bl);
ENCODE_FINISH(bl);
}
void decode(bucket_index_normal_layout& l, bufferlist::const_iterator& bl)
{
- DECODE_START(1, bl);
+ DECODE_START(2, bl);
decode(l.num_shards, bl);
decode(l.hash_type, bl);
+ if (struct_v >= 2) {
+ decode(l.min_num_shards, bl);
+ }
DECODE_FINISH(bl);
}
void encode_json_impl(const char *name, const bucket_index_normal_layout& l, ceph::Formatter *f)
f->open_object_section(name);
encode_json("num_shards", l.num_shards, f);
encode_json("hash_type", l.hash_type, f);
+ encode_json("min_num_shards", l.min_num_shards, f);
f->close_section();
}
void decode_json_obj(bucket_index_normal_layout& l, JSONObj *obj)
{
JSONDecoder::decode_json("num_shards", l.num_shards, obj);
JSONDecoder::decode_json("hash_type", l.hash_type, obj);
+
+ // if not set in json, set to default value of 1
+ JSONDecoder::decode_json("min_num_shards", l.min_num_shards, obj, 1);
}
// bucket_index_layout
struct bucket_index_normal_layout {
uint32_t num_shards = 1;
+ // the fewest number of shards this bucket layout allows
+ uint32_t min_num_shards = 1;
+
BucketHashType hash_type = BucketHashType::Mod;
- friend std::ostream& operator<<(std::ostream& out, const bucket_index_normal_layout& l) {
- out << "num_shards=" << l.num_shards << ", hash_type=" << to_string(l.hash_type);
+ friend std::ostream& operator<<(std::ostream& out,
+ const bucket_index_normal_layout& l) {
+ out << "num_shards=" << l.num_shards << ", min_num_shards=" <<
+ l.min_num_shards << ", hash_type=" << to_string(l.hash_type);
return out;
}
};
inline uint32_t num_shards(const bucket_index_layout_generation& index) {
return num_shards(index.layout);
}
+
inline uint32_t current_num_shards(const BucketLayout& layout) {
return num_shards(layout.current_index);
}
+inline uint32_t current_min_layout_shards(const BucketLayout& layout) {
+ return layout.current_index.layout.normal.min_num_shards;
+}
inline bool is_layout_indexless(const bucket_index_layout_generation& layout) {
return layout.layout.type == BucketIndexType::Indexless;
}
#include "rgw_cksum.h"
+
+// this represents the at-rest bucket instance object and is stored as
+// a system object
struct RGWBucketInfo {
rgw_bucket bucket;
rgw_owner owner;
#include <gtest/gtest.h>
-TEST(TestRGWReshard, dynamic_reshard_shard_count)
+TEST(TestRGWReshard, max_prime_shards)
{
// assuming we have prime numbers up to 1999
ASSERT_EQ(1999u, RGWBucketReshard::get_max_prime_shards()) <<
"initial list has primes up to 1999";
+}
+TEST(TestRGWReshard, prime_lookups)
+{
ASSERT_EQ(1u, RGWBucketReshard::get_prime_shards_greater_or_equal(1)) <<
"we allow for 1 shard even though it's not prime";
ASSERT_EQ(809u, RGWBucketReshard::get_prime_shards_greater_or_equal(808)) <<
"811 is prime";
ASSERT_EQ(811u, RGWBucketReshard::get_prime_shards_less_or_equal(812)) <<
"821 is prime";
+}
+TEST(TestRGWReshard, nearest_prime)
+{
// tests when max dynamic shards is equal to end of prime list
- ASSERT_EQ(1999u, RGWBucketReshard::get_prime_shard_count(1998, 1999, 11));
- ASSERT_EQ(1999u, RGWBucketReshard::get_prime_shard_count(1999, 1999, 11));
- ASSERT_EQ(1999u, RGWBucketReshard::get_prime_shard_count(2000, 1999, 11));
-
- // tests when max dynamic shards is above end of prime list
- ASSERT_EQ(1999u, RGWBucketReshard::get_prime_shard_count(1998, 3000, 11));
- ASSERT_EQ(1999u, RGWBucketReshard::get_prime_shard_count(1999, 3000, 11));
- ASSERT_EQ(2000u, RGWBucketReshard::get_prime_shard_count(2000, 3000, 11));
- ASSERT_EQ(2001u, RGWBucketReshard::get_prime_shard_count(2001, 3000, 11));
-
- // tests when max dynamic shards is below end of prime list
- ASSERT_EQ(500u, RGWBucketReshard::get_prime_shard_count(1998, 500, 11));
- ASSERT_EQ(500u, RGWBucketReshard::get_prime_shard_count(2001, 500, 11));
-
- // tests when max dynamic shards is below end of prime list
- ASSERT_EQ(499u, RGWBucketReshard::get_prime_shard_count(498, 1999, 499));
- ASSERT_EQ(499u, RGWBucketReshard::get_prime_shard_count(499, 1999, 499));
- ASSERT_EQ(503u, RGWBucketReshard::get_prime_shard_count(500, 1999, 499));
+
+ ASSERT_EQ(239u, RGWBucketReshard::nearest_prime(238));
+ ASSERT_EQ(239u, RGWBucketReshard::nearest_prime(239));
+ ASSERT_EQ(241u, RGWBucketReshard::nearest_prime(240));
+ ASSERT_EQ(241u, RGWBucketReshard::nearest_prime(241));
+ ASSERT_EQ(251u, RGWBucketReshard::nearest_prime(242));
+
+ ASSERT_EQ(1997u, RGWBucketReshard::nearest_prime(1995));
+ ASSERT_EQ(1997u, RGWBucketReshard::nearest_prime(1996));
+ ASSERT_EQ(1997u, RGWBucketReshard::nearest_prime(1997));
+ ASSERT_EQ(1999u, RGWBucketReshard::nearest_prime(1998));
+ ASSERT_EQ(1999u, RGWBucketReshard::nearest_prime(1999));
+ ASSERT_EQ(2000u, RGWBucketReshard::nearest_prime(2000));
+}
+
+TEST(TestRGWReshard, calculate_preferred_shards)
+{
+ bool needs_resharding;
+ uint32_t suggested_shard_count = 0;
+
+ RGWBucketReshard::calculate_preferred_shards(nullptr, 1999, 101, 100000, false, 10000000, 200,
+ needs_resharding, &suggested_shard_count);
+
+ ASSERT_EQ(false, needs_resharding) << "no need to reshard when shards are half-used";
+
+
+ RGWBucketReshard::calculate_preferred_shards(nullptr, 1999, 101, 100000, false, 20200000, 200,
+ needs_resharding, &suggested_shard_count, false);
+ ASSERT_EQ(true, needs_resharding);
+ ASSERT_EQ(404, suggested_shard_count) << "number of shards when primes are not preferred";
+
+ RGWBucketReshard::calculate_preferred_shards(nullptr, 1999, 101, 100000, false, 20200000, 200,
+ needs_resharding, &suggested_shard_count, true);
+ ASSERT_EQ(true, needs_resharding);
+ ASSERT_EQ(409, suggested_shard_count) << "number of shards when primes are preferred";
+
+ RGWBucketReshard::calculate_preferred_shards(nullptr, 1999, 101, 100000, true, 20200000, 200,
+ needs_resharding, &suggested_shard_count, true);
+ ASSERT_EQ(true, needs_resharding);
+ ASSERT_EQ(1619, suggested_shard_count) <<
+ "number of shards under multisite with primes preferred since "
+ "multisite quadruples number of shards to reduce need to reshaard";
+
+ RGWBucketReshard::calculate_preferred_shards(nullptr, 1999, 3, 100000, false, 650000, 700,
+ needs_resharding, &suggested_shard_count, true);
+ // 650,000 objs across 700 shards -> <1000 objs per shard; 650000 /
+ // 50000 = 13
+ ASSERT_EQ(true, needs_resharding);
+ ASSERT_EQ(13, suggested_shard_count) << "shard reduction without hitting min_layout_shards";
+
+ RGWBucketReshard::calculate_preferred_shards(nullptr, 1999, 3, 100000, false, 350000, 400,
+ needs_resharding, &suggested_shard_count, true);
+ // 350,000 objs across 400 shards -> <1000 objs per shard; 350000 /
+ // 50000 = 7, but hard-coded minimum of 11
+ ASSERT_EQ(true, needs_resharding);
+ ASSERT_EQ(11, suggested_shard_count) << "shard reduction and hitting hard-coded minimum of 11";
+
+ RGWBucketReshard::calculate_preferred_shards(nullptr, 1999, 51, 100000, false, 650000, 700,
+ needs_resharding, &suggested_shard_count, true);
+ // 650,000 objs across 700 shards -> <1000 objs per shard; 650000 /
+ // 50000 = 13, but bucket min of 51
+ ASSERT_EQ(true, needs_resharding);
+ ASSERT_EQ(51, suggested_shard_count) << "shard reduction and hitting min_layout_shards";
}