From fc83e197ab85355e385c13f2a64957cad7481298 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Tue, 22 Jul 2014 15:30:11 -0700 Subject: [PATCH] rgw: align object chunk size with pool alignment Fixes: #8442 Backport: firefly Data pools might have strict write alignment requirements. Use pool alignment info when setting the max_chunk_size for the write. Signed-off-by: Yehuda Sadeh --- src/rgw/rgw_op.cc | 7 ++- src/rgw/rgw_rados.cc | 131 +++++++++++++++++++++++++++++++++---------- src/rgw/rgw_rados.h | 14 +++-- 3 files changed, 115 insertions(+), 37 deletions(-) diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 714349f4f8df3..02d7c504f73a7 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -1381,7 +1381,10 @@ public: int RGWPutObjProcessor_Multipart::prepare(RGWRados *store, void *obj_ctx, string *oid_rand) { - RGWPutObjProcessor::prepare(store, obj_ctx, NULL); + int r = prepare_init(store, obj_ctx, NULL); + if (r < 0) { + return r; + } string oid = obj_str; upload_id = s->info.args.get("uploadId"); @@ -1420,7 +1423,7 @@ int RGWPutObjProcessor_Multipart::prepare(RGWRados *store, void *obj_ctx, string manifest.set_multipart_part_rule(store->ctx()->_conf->rgw_obj_stripe_size, num); - int r = manifest_gen.create_begin(store->ctx(), &manifest, bucket, target_obj); + r = manifest_gen.create_begin(store->ctx(), &manifest, bucket, target_obj); if (r < 0) { return r; } diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index ffe27545a104b..e8b95b3bb4169 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -1044,8 +1044,6 @@ int RGWPutObjProcessor_Atomic::handle_data(bufferlist& bl, off_t ofs, void **pha } } - uint64_t max_chunk_size = store->get_max_chunk_size(); - pending_data_bl.claim_append(bl); if (pending_data_bl.length() < max_chunk_size) return 0; @@ -1070,17 +1068,30 @@ int RGWPutObjProcessor_Atomic::handle_data(bufferlist& bl, off_t ofs, void **pha return write_data(bl, write_ofs, phandle, exclusive); } -int RGWPutObjProcessor_Atomic::prepare(RGWRados *store, void *obj_ctx, string *oid_rand) + +int RGWPutObjProcessor_Atomic::prepare_init(RGWRados *store, void *obj_ctx, string *oid_rand) { RGWPutObjProcessor::prepare(store, obj_ctx, oid_rand); - head_obj.init(bucket, obj_str); + int r = store->get_max_chunk_size(bucket, &max_chunk_size); + if (r < 0) { + return r; + } - uint64_t max_chunk_size = store->get_max_chunk_size(); + return 0; +} + +int RGWPutObjProcessor_Atomic::prepare(RGWRados *store, void *obj_ctx, string *oid_rand) +{ + int r = prepare_init(store, obj_ctx, oid_rand); + if (r < 0) { + return r; + } + head_obj.init(bucket, obj_str); manifest.set_trivial_rule(max_chunk_size, store->ctx()->_conf->rgw_obj_stripe_size); - int r = manifest_gen.create_begin(store->ctx(), &manifest, bucket, head_obj); + r = manifest_gen.create_begin(store->ctx(), &manifest, bucket, head_obj); if (r < 0) { return r; } @@ -1201,6 +1212,44 @@ void RGWRadosCtx::set_prefetch_data(rgw_obj& obj) { } } +int RGWRados::get_required_alignment(rgw_bucket& bucket, uint64_t *alignment) +{ + IoCtx ioctx; + int r = open_bucket_data_ctx(bucket, ioctx); + if (r < 0) { + ldout(cct, 0) << "ERROR: open_bucket_data_ctx() returned " << r << dendl; + return r; + } + + *alignment = ioctx.pool_required_alignment(); + return 0; +} + +int RGWRados::get_max_chunk_size(rgw_bucket& bucket, uint64_t *max_chunk_size) +{ + uint64_t alignment; + int r = get_required_alignment(bucket, &alignment); + if (r < 0) { + return r; + } + + uint64_t config_chunk_size = cct->_conf->rgw_max_chunk_size; + + if (alignment == 0) { + *max_chunk_size = config_chunk_size; + return 0; + } + + if (config_chunk_size <= alignment) { + *max_chunk_size = alignment; + return 0; + } + + *max_chunk_size = config_chunk_size - (config_chunk_size % alignment); + + return 0; +} + void RGWRados::finalize() { if (need_watch_notify()) { @@ -1236,8 +1285,6 @@ int RGWRados::init_rados() { int ret; - max_chunk_size = cct->_conf->rgw_max_chunk_size; - rados = new Rados(); if (!rados) return -ENOMEM; @@ -3059,7 +3106,15 @@ int RGWRados::rewrite_obj(const string& bucket_owner, rgw_obj& obj) attrset.erase(RGW_ATTR_ID_TAG); - return copy_obj_data((void *)&rctx, bucket_owner, &handle, end, obj, obj, &mtime, attrset, RGW_OBJ_CATEGORY_MAIN, NULL, NULL); + uint64_t max_chunk_size; + + ret = get_max_chunk_size(obj.bucket, &max_chunk_size); + if (ret < 0) { + ldout(cct, 0) << "ERROR: failed to get max_chunk_size() for bucket " << obj.bucket << dendl; + return ret; + } + + return copy_obj_data((void *)&rctx, bucket_owner, &handle, end, obj, obj, max_chunk_size, &mtime, attrset, RGW_OBJ_CATEGORY_MAIN, NULL, NULL); } /** @@ -3231,24 +3286,6 @@ set_err_state: vector ref_objs; - bool copy_data = !astate->has_manifest; - bool copy_first = false; - if (astate->has_manifest) { - if (!astate->manifest.has_tail()) { - copy_data = true; - } else { - uint64_t head_size = astate->manifest.get_head_size(); - - if (head_size > 0) { - if (head_size > max_chunk_size) // should never happen - copy_data = true; - else - copy_first = true; - } - } - } - - if (remote_dest) { /* dest is in a different region, copy it there */ @@ -3269,8 +3306,35 @@ set_err_state: return ret; return 0; - } else if (copy_data) { /* refcounting tail wouldn't work here, just copy the data */ - return copy_obj_data(ctx, dest_bucket_info.owner, &handle, end, dest_obj, src_obj, mtime, src_attrs, category, ptag, err); + } + + uint64_t max_chunk_size; + + ret = get_max_chunk_size(dest_obj.bucket, &max_chunk_size); + if (ret < 0) { + ldout(cct, 0) << "ERROR: failed to get max_chunk_size() for bucket " << dest_obj.bucket << dendl; + return ret; + } + + bool copy_data = !astate->has_manifest; + bool copy_first = false; + if (astate->has_manifest) { + if (!astate->manifest.has_tail()) { + copy_data = true; + } else { + uint64_t head_size = astate->manifest.get_head_size(); + + if (head_size > 0) { + if (head_size > max_chunk_size) + copy_data = true; + else + copy_first = true; + } + } + } + + if (copy_data) { /* refcounting tail wouldn't work here, just copy the data */ + return copy_obj_data(ctx, dest_bucket_info.owner, &handle, end, dest_obj, src_obj, max_chunk_size, mtime, src_attrs, category, ptag, err); } RGWObjManifest::obj_iterator miter = astate->manifest.obj_begin(); @@ -3380,6 +3444,7 @@ int RGWRados::copy_obj_data(void *ctx, void **handle, off_t end, rgw_obj& dest_obj, rgw_obj& src_obj, + uint64_t max_chunk_size, time_t *mtime, map& attrs, RGWObjCategory category, @@ -4512,6 +4577,8 @@ int RGWRados::get_obj(void *ctx, RGWObjVersionTracker *objv_tracker, void **hand bool merge_bl = false; bufferlist *pbl = &bl; bufferlist read_bl; + uint64_t max_chunk_size; + get_obj_bucket_and_oid_key(obj, bucket, oid, key); @@ -4544,6 +4611,12 @@ int RGWRados::get_obj(void *ctx, RGWObjVersionTracker *objv_tracker, void **hand } } + r = get_max_chunk_size(bucket, &max_chunk_size); + if (r < 0) { + ldout(cct, 0) << "ERROR: failed to get max_chunk_size() for bucket " << bucket << dendl; + goto done_ret; + } + if (len > max_chunk_size) len = max_chunk_size; diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index 63c9ac73fdd36..6b93b98f9b2e4 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -621,6 +621,8 @@ class RGWPutObjProcessor_Atomic : public RGWPutObjProcessor_Aio uint64_t extra_data_len; bufferlist extra_data_bl; bufferlist pending_data_bl; + uint64_t max_chunk_size; + protected: rgw_bucket bucket; string obj_str; @@ -639,6 +641,8 @@ protected: int complete_parts(); int complete_writing_data(); + int prepare_init(RGWRados *store, void *obj_ctx, string *oid_rand); + public: ~RGWPutObjProcessor_Atomic() {} RGWPutObjProcessor_Atomic(const string& bucket_owner, rgw_bucket& _b, const string& _o, uint64_t _p, const string& _t) : @@ -649,6 +653,7 @@ public: cur_part_id(0), data_ofs(0), extra_data_len(0), + max_chunk_size(0), bucket(_b), obj_str(_o), unique_tag(_t) {} @@ -1269,8 +1274,6 @@ class RGWRados int get_obj_ref(const rgw_obj& obj, rgw_rados_ref *ref, rgw_bucket *bucket, bool ref_system_obj = false); uint64_t max_bucket_id; - uint64_t max_chunk_size; - int get_obj_state(RGWRadosCtx *rctx, rgw_obj& obj, RGWObjState **state, RGWObjVersionTracker *objv_tracker); int append_atomic_test(RGWRadosCtx *rctx, rgw_obj& obj, librados::ObjectOperation& op, RGWObjState **state); @@ -1335,7 +1338,6 @@ public: num_watchers(0), watchers(NULL), watch_handles(NULL), watch_initialized(false), bucket_id_lock("rados_bucket_id"), max_bucket_id(0), - max_chunk_size(0), cct(NULL), rados(NULL), pools_initialized(false), quota_handler(NULL), @@ -1373,9 +1375,8 @@ public: } } - uint64_t get_max_chunk_size() { - return max_chunk_size; - } + int get_required_alignment(rgw_bucket& bucket, uint64_t *alignment); + int get_max_chunk_size(rgw_bucket& bucket, uint64_t *max_chunk_size); int list_raw_objects(rgw_bucket& pool, const string& prefix_filter, int max, RGWListRawObjsCtx& ctx, list& oids, @@ -1612,6 +1613,7 @@ public: void **handle, off_t end, rgw_obj& dest_obj, rgw_obj& src_obj, + uint64_t max_chunk_size, time_t *mtime, map& attrs, RGWObjCategory category, -- 2.39.5