From 751fd07bec681e62f6ce58a921bc77583d6cb5ab Mon Sep 17 00:00:00 2001 From: Guang Yang Date: Sat, 16 Aug 2014 09:04:28 +0000 Subject: [PATCH] Adjust rgw bucket prepare/complete OP to work with multiple bucket index shards. Signed-off-by: Guang Yang (yguang@yahoo-inc.com) --- src/rgw/rgw_common.h | 12 ++++++- src/rgw/rgw_rados.cc | 78 ++++++++++++++++++++++++++++++++++++-------- src/rgw/rgw_rados.h | 17 ++++++++++ 3 files changed, 92 insertions(+), 15 deletions(-) diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 5b3cfbde1edb3..0769f91a037bb 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -725,6 +725,10 @@ enum RGWBucketFlags { struct RGWBucketInfo { + enum BIShardsHashType { + MOD = 0 + }; + rgw_bucket bucket; string owner; uint32_t flags; @@ -742,11 +746,14 @@ struct RGWBucketInfo // - value of UINT32_T::MAX indicates this is a blind bucket. uint32_t num_shards; + // Represents the bucket index shard hash type. + uint8_t bucket_index_shard_hash_type; + // Represents the shard number for blind bucket. const static uint32_t NUM_SHARDS_BLIND_BUCKET; void encode(bufferlist& bl) const { - ENCODE_START(10, 4, bl); + ENCODE_START(11, 4, bl); ::encode(bucket, bl); ::encode(owner, bl); ::encode(flags, bl); @@ -757,6 +764,7 @@ struct RGWBucketInfo ::encode(has_instance_obj, bl); ::encode(quota, bl); ::encode(num_shards, bl); + ::encode(bucket_index_shard_hash_type, bl); ENCODE_FINISH(bl); } void decode(bufferlist::iterator& bl) { @@ -781,6 +789,8 @@ struct RGWBucketInfo ::decode(quota, bl); if (struct_v >= 10) ::decode(num_shards, bl); + if (struct_v >= 11) + ::decode(bucket_index_shard_hash_type, bl); DECODE_FINISH(bl); } void dump(Formatter *f) const; diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 0fd332336f7ae..f53727bb0a331 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -48,6 +48,8 @@ using namespace librados; #define dout_subsys ceph_subsys_rgw +#define MAX_BUCKET_INDEX_SHARDS_PRIME 7877 + using namespace std; static RGWCache cached_rados_provider; @@ -1326,6 +1328,11 @@ int RGWRados::init_rados() int ret; bucket_index_max_shards = cct->_conf->rgw_bucket_index_max_shards; + if (bucket_index_max_shards > MAX_BUCKET_INDEX_SHARDS_PRIME) { + bucket_index_max_shards = MAX_BUCKET_INDEX_SHARDS_PRIME; + ldout(cct, 1) << __func__ << " bucket index max shards is too large, reset to value: " + << MAX_BUCKET_INDEX_SHARDS_PRIME << dendl; + } ldout(cct, 20) << __func__ << " bucket index max shards: " << bucket_index_max_shards << dendl; rados = new Rados(); @@ -2437,6 +2444,7 @@ int RGWRados::create_bucket(RGWUserInfo& owner, rgw_bucket& bucket, info.region = region_name; info.placement_rule = selected_placement_rule; info.num_shards = bucket_index_max_shards; + info.bucket_index_shard_hash_type = RGWBucketInfo::MOD; if (!creation_time) time(&info.creation_time); else @@ -3803,6 +3811,25 @@ int RGWRados::open_bucket_index(rgw_bucket& bucket, librados::IoCtx& index_ctx, return 0; } +int RGWRados::open_bucket_index_shard(rgw_bucket& bucket, librados::IoCtx& index_ctx, + const string& obj_key, string *bucket_obj) +{ + string bucket_oid_base; + int ret = open_bucket_index_base(bucket, index_ctx, bucket_oid_base); + if (ret < 0) + return ret; + + // Get the bucket info + RGWBucketInfo binfo; + ret = get_bucket_instance_info(NULL, bucket, binfo, NULL, NULL); + if (ret < 0) + return ret; + + get_bucket_index_object(bucket_oid_base, obj_key, binfo.num_shards, + (RGWBucketInfo::BIShardsHashType)binfo.bucket_index_shard_hash_type, bucket_obj); + return 0; +} + static void translate_raw_stats(rgw_bucket_dir_header& header, map& stats) { map::iterator iter = header.stats.begin(); @@ -6090,16 +6117,16 @@ int RGWRados::cls_obj_prepare_op(rgw_bucket& bucket, RGWModifyOp op, string& tag string& name, string& locator) { librados::IoCtx index_ctx; - string oid; - - int r = open_bucket_index(bucket, index_ctx, oid); - if (r < 0) - return r; + string bucket_obj; + int ret = open_bucket_index_shard(bucket, index_ctx, name, &bucket_obj); + ldout(cct, 20) << " bucket index object: " << bucket_obj << dendl; + if (ret < 0) + return ret; ObjectWriteOperation o; cls_rgw_bucket_prepare_op(o, op, tag, name, locator, zone_public_config.log_data); - r = index_ctx.operate(oid, &o); - return r; + ret = index_ctx.operate(bucket_obj, &o); + return ret; } int RGWRados::cls_obj_complete_op(rgw_bucket& bucket, RGWModifyOp op, string& tag, @@ -6108,11 +6135,11 @@ int RGWRados::cls_obj_complete_op(rgw_bucket& bucket, RGWModifyOp op, string& ta list *remove_objs) { librados::IoCtx index_ctx; - string oid; - - int r = open_bucket_index(bucket, index_ctx, oid); - if (r < 0) - return r; + string bucket_obj; + int ret = open_bucket_index_shard(bucket, index_ctx, ent.name, &bucket_obj); + ldout(cct, 20) << " bucket index object: " << bucket_obj << dendl; + if (ret < 0) + return ret; ObjectWriteOperation o; rgw_bucket_dir_entry_meta dir_meta; @@ -6130,9 +6157,9 @@ int RGWRados::cls_obj_complete_op(rgw_bucket& bucket, RGWModifyOp op, string& ta cls_rgw_bucket_complete_op(o, op, tag, ver, ent.name, dir_meta, remove_objs, zone_public_config.log_data); AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL); - r = index_ctx.aio_operate(oid, c, &o); + ret = index_ctx.aio_operate(bucket_obj, c, &o); c->release(); - return r; + return ret; } int RGWRados::cls_obj_complete_add(rgw_bucket& bucket, string& tag, @@ -6767,6 +6794,29 @@ void RGWRados::get_bucket_index_objects(const string& bucket_oid_base, } } +int RGWRados::get_bucket_index_object(const string& bucket_oid_base, const string& obj_key, + uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj) +{ + int r = 0; + switch (hash_type) { + case RGWBucketInfo::MOD: + if (!num_shards) { + // By default with no sharding, we use the bucket oid as itself + (*bucket_obj) = bucket_oid_base; + } else { + uint32_t sid = ceph_str_hash_linux(obj_key.c_str(), + obj_key.size()) % MAX_BUCKET_INDEX_SHARDS_PRIME % num_shards; + char buf[bucket_oid_base.size() + 32]; + snprintf(buf, sizeof(buf), "%s.%d", bucket_oid_base.c_str(), sid); + (*bucket_obj) = buf; + } + break; + default: + r = -ENOTSUP; + } + return r; +} + int RGWRados::process_intent_log(rgw_bucket& bucket, string& oid, time_t epoch, int flags, bool purge) { diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index e7030ab32a82f..556b4cf685eff 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -1262,6 +1262,8 @@ class RGWRados int open_bucket_index(rgw_bucket& bucket, librados::IoCtx& index_ctx, string& bucket_oid); int open_bucket_index_base(rgw_bucket& bucket, librados::IoCtx& index_ctx, string& bucket_oid_base); + int open_bucket_index_shard(rgw_bucket& bucket, librados::IoCtx& index_ctx, + const string& obj_key, string *bucket_obj); int open_bucket_index(rgw_bucket& bucket, librados::IoCtx& index_ctx, vector& bucket_objs); struct GetObjState { @@ -1963,6 +1965,21 @@ public: void get_bucket_index_objects(const string& bucket_oid_base, const uint32_t num_shards, vector& bucket_objs); + /** + * Get the bucket index object with the given base bucket index object and object key, + * and the number of bucket index shards. + * + * bucket_oid_base [in] - bucket object base name. + * obj_key [in] - object key. + * num_shards [in] - number of bucket index shards. + * hash_type [in] - type of hash to find the shard ID. + * bucket_obj [out] - the bucket index object for the given object. + * + * Return 0 on success, a failure code otherwise. + */ + int get_bucket_index_object(const string& bucket_oid_base, const string& obj_key, + uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj); + int process_intent_log(rgw_bucket& bucket, string& oid, time_t epoch, int flags, bool purge); /** -- 2.39.5