struct RGWBucketInfo
{
+ enum BIShardsHashType {
+ MOD = 0
+ };
+
rgw_bucket bucket;
string owner;
uint32_t flags;
// - value of UINT32_T::MAX indicates this is a blind bucket.
uint32_t num_shards;
+ // Represents the bucket index shard hash type.
+ uint8_t bucket_index_shard_hash_type;
+
// Represents the shard number for blind bucket.
const static uint32_t NUM_SHARDS_BLIND_BUCKET;
void encode(bufferlist& bl) const {
- ENCODE_START(10, 4, bl);
+ ENCODE_START(11, 4, bl);
::encode(bucket, bl);
::encode(owner, bl);
::encode(flags, bl);
::encode(has_instance_obj, bl);
::encode(quota, bl);
::encode(num_shards, bl);
+ ::encode(bucket_index_shard_hash_type, bl);
ENCODE_FINISH(bl);
}
void decode(bufferlist::iterator& bl) {
::decode(quota, bl);
if (struct_v >= 10)
::decode(num_shards, bl);
+ if (struct_v >= 11)
+ ::decode(bucket_index_shard_hash_type, bl);
DECODE_FINISH(bl);
}
void dump(Formatter *f) const;
#define dout_subsys ceph_subsys_rgw
+#define MAX_BUCKET_INDEX_SHARDS_PRIME 7877
+
using namespace std;
static RGWCache<RGWRados> cached_rados_provider;
int ret;
bucket_index_max_shards = cct->_conf->rgw_bucket_index_max_shards;
+ if (bucket_index_max_shards > MAX_BUCKET_INDEX_SHARDS_PRIME) {
+ bucket_index_max_shards = MAX_BUCKET_INDEX_SHARDS_PRIME;
+ ldout(cct, 1) << __func__ << " bucket index max shards is too large, reset to value: "
+ << MAX_BUCKET_INDEX_SHARDS_PRIME << dendl;
+ }
ldout(cct, 20) << __func__ << " bucket index max shards: " << bucket_index_max_shards << dendl;
rados = new Rados();
info.region = region_name;
info.placement_rule = selected_placement_rule;
info.num_shards = bucket_index_max_shards;
+ info.bucket_index_shard_hash_type = RGWBucketInfo::MOD;
if (!creation_time)
time(&info.creation_time);
else
return 0;
}
+int RGWRados::open_bucket_index_shard(rgw_bucket& bucket, librados::IoCtx& index_ctx,
+ const string& obj_key, string *bucket_obj)
+{
+ string bucket_oid_base;
+ int ret = open_bucket_index_base(bucket, index_ctx, bucket_oid_base);
+ if (ret < 0)
+ return ret;
+
+ // Get the bucket info
+ RGWBucketInfo binfo;
+ ret = get_bucket_instance_info(NULL, bucket, binfo, NULL, NULL);
+ if (ret < 0)
+ return ret;
+
+ get_bucket_index_object(bucket_oid_base, obj_key, binfo.num_shards,
+ (RGWBucketInfo::BIShardsHashType)binfo.bucket_index_shard_hash_type, bucket_obj);
+ return 0;
+}
+
static void translate_raw_stats(rgw_bucket_dir_header& header, map<RGWObjCategory, RGWStorageStats>& stats)
{
map<uint8_t, struct rgw_bucket_category_stats>::iterator iter = header.stats.begin();
string& name, string& locator)
{
librados::IoCtx index_ctx;
- string oid;
-
- int r = open_bucket_index(bucket, index_ctx, oid);
- if (r < 0)
- return r;
+ string bucket_obj;
+ int ret = open_bucket_index_shard(bucket, index_ctx, name, &bucket_obj);
+ ldout(cct, 20) << " bucket index object: " << bucket_obj << dendl;
+ if (ret < 0)
+ return ret;
ObjectWriteOperation o;
cls_rgw_bucket_prepare_op(o, op, tag, name, locator, zone_public_config.log_data);
- r = index_ctx.operate(oid, &o);
- return r;
+ ret = index_ctx.operate(bucket_obj, &o);
+ return ret;
}
int RGWRados::cls_obj_complete_op(rgw_bucket& bucket, RGWModifyOp op, string& tag,
list<string> *remove_objs)
{
librados::IoCtx index_ctx;
- string oid;
-
- int r = open_bucket_index(bucket, index_ctx, oid);
- if (r < 0)
- return r;
+ string bucket_obj;
+ int ret = open_bucket_index_shard(bucket, index_ctx, ent.name, &bucket_obj);
+ ldout(cct, 20) << " bucket index object: " << bucket_obj << dendl;
+ if (ret < 0)
+ return ret;
ObjectWriteOperation o;
rgw_bucket_dir_entry_meta dir_meta;
cls_rgw_bucket_complete_op(o, op, tag, ver, ent.name, dir_meta, remove_objs, zone_public_config.log_data);
AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL);
- r = index_ctx.aio_operate(oid, c, &o);
+ ret = index_ctx.aio_operate(bucket_obj, c, &o);
c->release();
- return r;
+ return ret;
}
int RGWRados::cls_obj_complete_add(rgw_bucket& bucket, string& tag,
}
}
+int RGWRados::get_bucket_index_object(const string& bucket_oid_base, const string& obj_key,
+ uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj)
+{
+ int r = 0;
+ switch (hash_type) {
+ case RGWBucketInfo::MOD:
+ if (!num_shards) {
+ // By default with no sharding, we use the bucket oid as itself
+ (*bucket_obj) = bucket_oid_base;
+ } else {
+ uint32_t sid = ceph_str_hash_linux(obj_key.c_str(),
+ obj_key.size()) % MAX_BUCKET_INDEX_SHARDS_PRIME % num_shards;
+ char buf[bucket_oid_base.size() + 32];
+ snprintf(buf, sizeof(buf), "%s.%d", bucket_oid_base.c_str(), sid);
+ (*bucket_obj) = buf;
+ }
+ break;
+ default:
+ r = -ENOTSUP;
+ }
+ return r;
+}
+
int RGWRados::process_intent_log(rgw_bucket& bucket, string& oid,
time_t epoch, int flags, bool purge)
{
int open_bucket_index(rgw_bucket& bucket, librados::IoCtx& index_ctx, string& bucket_oid);
int open_bucket_index_base(rgw_bucket& bucket, librados::IoCtx& index_ctx,
string& bucket_oid_base);
+ int open_bucket_index_shard(rgw_bucket& bucket, librados::IoCtx& index_ctx,
+ const string& obj_key, string *bucket_obj);
int open_bucket_index(rgw_bucket& bucket, librados::IoCtx& index_ctx,
vector<string>& bucket_objs);
struct GetObjState {
void get_bucket_index_objects(const string& bucket_oid_base, const uint32_t num_shards,
vector<string>& bucket_objs);
+ /**
+ * Get the bucket index object with the given base bucket index object and object key,
+ * and the number of bucket index shards.
+ *
+ * bucket_oid_base [in] - bucket object base name.
+ * obj_key [in] - object key.
+ * num_shards [in] - number of bucket index shards.
+ * hash_type [in] - type of hash to find the shard ID.
+ * bucket_obj [out] - the bucket index object for the given object.
+ *
+ * Return 0 on success, a failure code otherwise.
+ */
+ int get_bucket_index_object(const string& bucket_oid_base, const string& obj_key,
+ uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj);
+
int process_intent_log(rgw_bucket& bucket, string& oid,
time_t epoch, int flags, bool purge);
/**