From: Yehuda Sadeh Date: Mon, 30 Oct 2017 23:48:05 +0000 (-0700) Subject: rgw: aws sync: configurable multipart threshold, part size X-Git-Tag: v13.1.0~270^2~67 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=b22f687ae41e4ffcea5bdfdbf8707cb552448090;p=ceph.git rgw: aws sync: configurable multipart threshold, part size Signed-off-by: Yehuda Sadeh --- diff --git a/src/rgw/rgw_sync_module_aws.cc b/src/rgw/rgw_sync_module_aws.cc index 4833b95a11a67..73fb938b2e6dc 100644 --- a/src/rgw/rgw_sync_module_aws.cc +++ b/src/rgw/rgw_sync_module_aws.cc @@ -14,6 +14,9 @@ #define dout_subsys ceph_subsys_rgw + +#define DEFAULT_MULTIPART_SYNC_PART_SIZE (32 * 1024 * 1024) + // TODO: have various bucket naming schemes at a global/user and a bucket level static string aws_bucket_name(const RGWBucketInfo& bucket_info, bool user_buckets=false){ @@ -41,6 +44,9 @@ static string obj_to_aws_path(const rgw_obj& obj) struct AWSSyncConfig { string s3_endpoint; RGWAccessKey key; + + uint64_t multipart_sync_threshold{DEFAULT_MULTIPART_SYNC_PART_SIZE}; + uint64_t multipart_min_part_size{DEFAULT_MULTIPART_SYNC_PART_SIZE}; }; struct AWSSyncInstanceEnv { @@ -591,6 +597,7 @@ public: class RGWAWSStreamObjToCloudMultipartCR : public RGWCoroutine { RGWDataSyncEnv *sync_env; + const AWSSyncConfig& conf; RGWRESTConn *source_conn; RGWRESTConn *dest_conn; rgw_obj src_obj; @@ -610,6 +617,7 @@ class RGWAWSStreamObjToCloudMultipartCR : public RGWCoroutine { public: RGWAWSStreamObjToCloudMultipartCR(RGWDataSyncEnv *_sync_env, + const AWSSyncConfig& _conf, RGWRESTConn *_source_conn, const rgw_obj& _src_obj, RGWRESTConn *_dest_conn, @@ -617,6 +625,7 @@ public: uint64_t _obj_size, const rgw_sync_aws_src_obj_properties& _src_properties) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), + conf(_conf), source_conn(_source_conn), dest_conn(_dest_conn), src_obj(_src_obj), @@ -656,8 +665,9 @@ public: status.obj_size = obj_size; status.src_properties = src_properties; -#warning flexible part size needed - status.part_size = 5 * 1024 * 1024; +#define MULTIPART_MAX_PARTS 10000 + uint64_t min_part_size = obj_size / MULTIPART_MAX_PARTS; + status.part_size = std::max(conf.multipart_min_part_size, min_part_size); status.num_parts = (obj_size + status.part_size - 1) / status.part_size; status.cur_part = 1; } @@ -753,8 +763,6 @@ class RGWAWSHandleRemoteObjCBCR: public RGWStatRemoteObjCBCR { uint32_t src_zone_short_id{0}; uint64_t src_pg_ver{0}; - static constexpr uint32_t multipart_threshold = 8 * 1024 * 1024; - public: RGWAWSHandleRemoteObjCBCR(RGWDataSyncEnv *_sync_env, RGWBucketInfo& _bucket_info, @@ -825,12 +833,12 @@ public: src_properties.zone_short_id = src_zone_short_id; src_properties.pg_ver = src_pg_ver; - if (size < multipart_threshold) { + if (size < instance.conf.multipart_sync_threshold) { call(new RGWAWSStreamObjToCloudPlainCR(sync_env, source_conn, src_obj, src_properties, instance.conn.get(), dest_obj)); } else { - call(new RGWAWSStreamObjToCloudMultipartCR(sync_env, source_conn, src_obj, instance.conn.get(), + call(new RGWAWSStreamObjToCloudMultipartCR(sync_env, instance.conf, source_conn, src_obj, instance.conn.get(), dest_obj, size, src_properties)); } } @@ -943,6 +951,21 @@ public: } }; +static int conf_to_uint64(CephContext *cct, const map& config, const string& key, uint64_t *pval) +{ + auto i = config.find(key); + if (i != config.end()) { + string err; + uint64_t val = strict_strtoll(i->second.c_str(), 10, &err); + if (!err.empty()) { + ldout(cct, 0) << "ERROR: could not parse configurable value for cloud sync module: " << i->first << ": " << i->second << dendl; + return -EINVAL; + } + *pval = val; + } + return 0; +} + int RGWAWSSyncModule::create_instance(CephContext *cct, map& config, RGWSyncModuleInstanceRef *instance){ AWSSyncConfig conf; auto i = config.find("s3_endpoint"); @@ -962,6 +985,20 @@ int RGWAWSSyncModule::create_instance(CephContext *cct, mapreset(new RGWAWSSyncModuleInstance(cct, conf)); return 0; }