.set_default(true)
.set_description("Enable stats on bucket listing in Swift"),
- Option("rgw_reshard_num_logs", Option::TYPE_INT, Option::LEVEL_DEV)
+ Option("rgw_reshard_num_logs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(16)
- .set_description(""),
+ .set_min(1)
+ .set_description("")
+ .add_service("rgw"),
- Option("rgw_reshard_bucket_lock_duration", Option::TYPE_INT, Option::LEVEL_DEV)
- .set_default(120)
- .set_description(""),
+ Option("rgw_reshard_bucket_lock_duration", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+ .set_default(360)
+ .set_min(30)
+ .set_description("Number of seconds the timeout on the reshard locks (bucket reshard lock and reshard log lock) are set to. As a reshard proceeds these locks can be renewed/extended. If too short, reshards cannot complete and will fail, causing a future reshard attempt. If too long a hung or crashed reshard attempt will keep the bucket locked for an extended period, not allowing RGW to detect the failed reshard attempt and recover.")
+ .add_tag("performance")
+ .add_service("rgw"),
+
+ Option("rgw_reshard_batch_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+ .set_default(64)
+ .set_min(8)
+ .set_description("Number of reshard entries to batch together before sending the operations to the CLS back-end")
+ .add_tag("performance")
+ .add_service("rgw"),
+
+ Option("rgw_reshard_max_aio", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+ .set_default(128)
+ .set_min(16)
+ .set_description("Maximum number of outstanding asynchronous I/O operations to allow at a time during resharding")
+ .add_tag("performance")
+ .add_service("rgw"),
Option("rgw_trust_forwarded_https", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(false)
Option("rgw_reshard_thread_interval", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(10_min)
- .set_description(""),
+ .set_min(10_min)
+ .set_description("Number of seconds between processing of reshard log entries"),
Option("rgw_cache_expiry_interval", Option::TYPE_UINT,
Option::LEVEL_ADVANCED)
// vim: ts=8 sw=2 smarttab
#include <limits>
+#include <sstream>
#include "rgw_rados.h"
#include "rgw_bucket.h"
const string bucket_instance_lock_name = "bucket_instance_lock";
-#define RESHARD_SHARD_WINDOW 64
-#define RESHARD_MAX_AIO 128
-
-
class BucketReshardShard {
RGWRados *store;
const RGWBucketInfo& bucket_info;
vector<rgw_cls_bi_entry> entries;
map<uint8_t, rgw_bucket_category_stats> stats;
deque<librados::AioCompletion *>& aio_completions;
+ uint64_t max_aio_completions;
+ uint64_t reshard_shard_batch_size;
int wait_next_completion() {
librados::AioCompletion *c = aio_completions.front();
}
int get_completion(librados::AioCompletion **c) {
- if (aio_completions.size() >= RESHARD_MAX_AIO) {
+ if (aio_completions.size() >= max_aio_completions) {
int ret = wait_next_completion();
if (ret < 0) {
return ret;
{
num_shard = (bucket_info.num_shards > 0 ? _num_shard : -1);
bs.init(bucket_info.bucket, num_shard, nullptr /* no RGWBucketInfo */);
+
+ max_aio_completions =
+ store->ctx()->_conf.get_val<uint64_t>("rgw_reshard_max_aio");
+ reshard_shard_batch_size =
+ store->ctx()->_conf.get_val<uint64_t>("rgw_reshard_batch_size");
}
int get_num_shard() {
target.total_size_rounded += entry_stats.total_size_rounded;
target.actual_size += entry_stats.actual_size;
}
- if (entries.size() >= RESHARD_SHARD_WINDOW) {
+ if (entries.size() >= reshard_shard_batch_size) {
int ret = flush();
if (ret < 0) {
return ret;
ephemeral(_ephemeral),
internal_lock(reshard_lock_name)
{
- const int lock_dur_secs = store->ctx()->_conf->rgw_reshard_bucket_lock_duration;
+ const int lock_dur_secs = store->ctx()->_conf.get_val<uint64_t>(
+ "rgw_reshard_bucket_lock_duration");
duration = std::chrono::seconds(lock_dur_secs);
#define COOKIE_LEN 16
ret = internal_lock.lock_exclusive(&store->reshard_pool_ctx, lock_oid);
}
if (ret < 0) { /* expired or already locked by another processor */
+ std::stringstream error_s;
+ if (-ENOENT == ret) {
+ error_s << "ENOENT (lock expired or never initially locked)";
+ } else {
+ error_s << ret << " (" << cpp_strerror(-ret) << ")";
+ }
ldout(store->ctx(), 5) << __func__ << "(): failed to renew lock on " <<
- lock_oid << " with " << cpp_strerror(-ret) << dendl;
+ lock_oid << " with error " << error_s.str() << dendl;
return ret;
}
internal_lock.set_must_renew(false);
utime_t end = ceph_clock_now();
end -= start;
- int secs = cct->_conf->rgw_reshard_thread_interval;
+ int secs = cct->_conf.get_val<uint64_t>("rgw_reshard_thread_interval");
if (secs <= end.sec())
continue; // next round