From: Pavan Rallabhandi Date: Wed, 8 Apr 2015 13:23:14 +0000 (+0530) Subject: RGW: Make RADOS handles in RGW to be a configurable option X-Git-Tag: v9.0.2~100^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b3c3a24fe9c0fbb2bdbbccb08b40efc1dca207cc;p=ceph.git RGW: Make RADOS handles in RGW to be a configurable option Having a configurable number of RADOS handles is resulting in significant performance boost for all types of workloads. Each RGW worker thread would now get to pick a RADOS handle for its lifetime, from the available bunch. Signed-off-by: Pavan Rallabhandi --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index d9b1dfcf1e9..d69a9df799b 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -992,6 +992,7 @@ OPTION(rgw_op_thread_timeout, OPT_INT, 10*60) OPTION(rgw_op_thread_suicide_timeout, OPT_INT, 0) OPTION(rgw_thread_pool_size, OPT_INT, 100) OPTION(rgw_num_control_oids, OPT_INT, 8) +OPTION(rgw_num_rados_handles, OPT_U32, 1) OPTION(rgw_zone, OPT_STR, "") // zone name OPTION(rgw_zone_root_pool, OPT_STR, ".rgw.root") // pool where zone specific info is stored diff --git a/src/rgw/rgw_gc.cc b/src/rgw/rgw_gc.cc index c536fa96d16..70f5691d6d4 100644 --- a/src/rgw/rgw_gc.cc +++ b/src/rgw/rgw_gc.cc @@ -186,7 +186,7 @@ int RGWGC::process(int index, int max_secs) if (obj.pool != last_pool) { delete ctx; ctx = new IoCtx; - ret = store->rados->ioctx_create(obj.pool.c_str(), *ctx); + ret = store->get_rados_handle()->ioctx_create(obj.pool.c_str(), *ctx); if (ret < 0) { dout(0) << "ERROR: failed to create ioctx pool=" << obj.pool << dendl; continue; diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index d7c312fce7d..e57af444b51 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -1223,7 +1223,7 @@ int RGWRados::unwatch(uint64_t watch_handle) ldout(cct, 0) << "ERROR: rados->unwatch2() returned r=" << r << dendl; return r; } - r = rados->watch_flush(); + r = rados[0]->watch_flush(); if (r < 0) { ldout(cct, 0) << "ERROR: rados->watch_flush() returned r=" << r << dendl; return r; @@ -1431,23 +1431,50 @@ void RGWRados::finalize() */ int RGWRados::init_rados() { - int ret; + int ret = 0; - rados = new Rados(); - if (!rados) - return -ENOMEM; + num_rados_handles = cct->_conf->rgw_num_rados_handles; - ret = rados->init_with_context(cct); - if (ret < 0) - return ret; + rados = new librados::Rados *[num_rados_handles]; + if (!rados) { + ret = -ENOMEM; + return ret; + } - ret = rados->connect(); - if (ret < 0) - return ret; + for (uint32_t i=0; i < num_rados_handles; i++) { + + rados[i] = new Rados(); + if (!rados[i]) { + ret = -ENOMEM; + goto fail; + } + + ret = rados[i]->init_with_context(cct); + if (ret < 0) { + goto fail; + } + + ret = rados[i]->connect(); + if (ret < 0) { + goto fail; + } + } meta_mgr = new RGWMetadataManager(cct, this); data_log = new RGWDataChangesLog(cct, this); + return ret; + +fail: + for (uint32_t i=0; i < num_rados_handles; i++) { + if (rados[i]) { + delete rados[i]; + } + } + if (rados) { + delete[] rados; + } + return ret; } @@ -1654,15 +1681,16 @@ int RGWRados::open_root_pool_ctx() { const string& pool = zone.domain_root.name; const char *pool_str = pool.c_str(); - int r = rados->ioctx_create(pool_str, root_pool_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(pool_str, root_pool_ctx); if (r == -ENOENT) { - r = rados->pool_create(pool_str); + r = rad->pool_create(pool_str); if (r == -EEXIST) r = 0; if (r < 0) return r; - r = rados->ioctx_create(pool_str, root_pool_ctx); + r = rad->ioctx_create(pool_str, root_pool_ctx); } return r; @@ -1671,15 +1699,16 @@ int RGWRados::open_root_pool_ctx() int RGWRados::open_gc_pool_ctx() { const char *gc_pool = zone.gc_pool.name.c_str(); - int r = rados->ioctx_create(gc_pool, gc_pool_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(gc_pool, gc_pool_ctx); if (r == -ENOENT) { - r = rados->pool_create(gc_pool); + r = rad->pool_create(gc_pool); if (r == -EEXIST) r = 0; if (r < 0) return r; - r = rados->ioctx_create(gc_pool, gc_pool_ctx); + r = rad->ioctx_create(gc_pool, gc_pool_ctx); } return r; @@ -1688,15 +1717,16 @@ int RGWRados::open_gc_pool_ctx() int RGWRados::init_watch() { const char *control_pool = zone.control_pool.name.c_str(); - int r = rados->ioctx_create(control_pool, control_pool_ctx); + librados::Rados *rad = rados[0]; + int r = rad->ioctx_create(control_pool, control_pool_ctx); if (r == -ENOENT) { - r = rados->pool_create(control_pool); + r = rad->pool_create(control_pool); if (r == -EEXIST) r = 0; if (r < 0) return r; - r = rados->ioctx_create(control_pool, control_pool_ctx); + r = rad->ioctx_create(control_pool, control_pool_ctx); if (r < 0) return r; } @@ -1752,18 +1782,19 @@ void RGWRados::pick_control_oid(const string& key, string& notify_oid) int RGWRados::open_bucket_pool_ctx(const string& bucket_name, const string& pool, librados::IoCtx& io_ctx) { - int r = rados->ioctx_create(pool.c_str(), io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(pool.c_str(), io_ctx); if (r != -ENOENT) return r; if (!pools_initialized) return r; - r = rados->pool_create(pool.c_str()); + r = rad->pool_create(pool.c_str()); if (r < 0 && r != -EEXIST) return r; - r = rados->ioctx_create(pool.c_str(), io_ctx); + r = rad->ioctx_create(pool.c_str(), io_ctx); return r; } @@ -1853,7 +1884,8 @@ int RGWRados::log_list_init(const string& prefix, RGWAccessHandle *handle) { log_list_state *state = new log_list_state; const char *log_pool = zone.log_pool.name.c_str(); - int r = rados->ioctx_create(log_pool, state->io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(log_pool, state->io_ctx); if (r < 0) { delete state; return r; @@ -1888,7 +1920,8 @@ int RGWRados::log_remove(const string& name) { librados::IoCtx io_ctx; const char *log_pool = zone.log_pool.name.c_str(); - int r = rados->ioctx_create(log_pool, io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(log_pool, io_ctx); if (r < 0) return r; return io_ctx.remove(name); @@ -1908,7 +1941,8 @@ int RGWRados::log_show_init(const string& name, RGWAccessHandle *handle) { log_show_state *state = new log_show_state; const char *log_pool = zone.log_pool.name.c_str(); - int r = rados->ioctx_create(log_pool, state->io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(log_pool, state->io_ctx); if (r < 0) { delete state; return r; @@ -2128,7 +2162,8 @@ int RGWRados::time_log_add(const string& oid, const utime_t& ut, const string& s librados::IoCtx io_ctx; const char *log_pool = zone.log_pool.name.c_str(); - int r = rados->ioctx_create(log_pool, io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(log_pool, io_ctx); if (r == -ENOENT) { rgw_bucket pool(log_pool); r = create_pool(pool); @@ -2136,7 +2171,7 @@ int RGWRados::time_log_add(const string& oid, const utime_t& ut, const string& s return r; // retry - r = rados->ioctx_create(log_pool, io_ctx); + r = rad->ioctx_create(log_pool, io_ctx); } if (r < 0) return r; @@ -2153,7 +2188,8 @@ int RGWRados::time_log_add(const string& oid, list& entries) librados::IoCtx io_ctx; const char *log_pool = zone.log_pool.name.c_str(); - int r = rados->ioctx_create(log_pool, io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(log_pool, io_ctx); if (r == -ENOENT) { rgw_bucket pool(log_pool); r = create_pool(pool); @@ -2161,7 +2197,7 @@ int RGWRados::time_log_add(const string& oid, list& entries) return r; // retry - r = rados->ioctx_create(log_pool, io_ctx); + r = rad->ioctx_create(log_pool, io_ctx); } if (r < 0) return r; @@ -2182,7 +2218,8 @@ int RGWRados::time_log_list(const string& oid, utime_t& start_time, utime_t& end librados::IoCtx io_ctx; const char *log_pool = zone.log_pool.name.c_str(); - int r = rados->ioctx_create(log_pool, io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(log_pool, io_ctx); if (r < 0) return r; librados::ObjectReadOperation op; @@ -2204,7 +2241,8 @@ int RGWRados::time_log_info(const string& oid, cls_log_header *header) librados::IoCtx io_ctx; const char *log_pool = zone.log_pool.name.c_str(); - int r = rados->ioctx_create(log_pool, io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(log_pool, io_ctx); if (r < 0) return r; librados::ObjectReadOperation op; @@ -2226,7 +2264,8 @@ int RGWRados::time_log_trim(const string& oid, const utime_t& start_time, const librados::IoCtx io_ctx; const char *log_pool = zone.log_pool.name.c_str(); - int r = rados->ioctx_create(log_pool, io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(log_pool, io_ctx); if (r < 0) return r; @@ -2240,7 +2279,8 @@ int RGWRados::lock_exclusive(rgw_bucket& pool, const string& oid, utime_t& durat const char *pool_name = pool.name.c_str(); - int r = rados->ioctx_create(pool_name, io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(pool_name, io_ctx); if (r < 0) return r; @@ -2258,7 +2298,8 @@ int RGWRados::unlock(rgw_bucket& pool, const string& oid, string& zone_id, strin const char *pool_name = pool.name.c_str(); - int r = rados->ioctx_create(pool_name, io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(pool_name, io_ctx); if (r < 0) return r; @@ -2479,14 +2520,15 @@ int RGWRados::create_pool(rgw_bucket& bucket) string pool = bucket.index_pool; - ret = rados->pool_create(pool.c_str(), 0); + librados::Rados *rad = get_rados_handle(); + ret = rad->pool_create(pool.c_str(), 0); if (ret == -EEXIST) ret = 0; if (ret < 0) return ret; if (bucket.data_pool != pool) { - ret = rados->pool_create(bucket.data_pool.c_str(), 0); + ret = rad->pool_create(bucket.data_pool.c_str(), 0); if (ret == -EEXIST) ret = 0; if (ret < 0) @@ -2542,7 +2584,8 @@ int RGWRados::create_bucket(RGWUserInfo& owner, rgw_bucket& bucket, const string& pool = zone.domain_root.name; const char *pool_str = pool.c_str(); librados::IoCtx id_io_ctx; - int r = rados->ioctx_create(pool_str, id_io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(pool_str, id_io_ctx); if (r < 0) return r; @@ -2828,7 +2871,8 @@ int RGWRados::update_placement_map() int RGWRados::add_bucket_placement(std::string& new_pool) { - int ret = rados->pool_lookup(new_pool.c_str()); + librados::Rados *rad = get_rados_handle(); + int ret = rad->pool_lookup(new_pool.c_str()); if (ret < 0) // DNE, or something return ret; @@ -2878,11 +2922,12 @@ int RGWRados::create_pools(vector& names, vector& retcodes) vector completions; vector rets; + librados::Rados *rad = get_rados_handle(); for (iter = names.begin(); iter != names.end(); ++iter) { librados::PoolAsyncCompletion *c = librados::Rados::pool_async_create_completion(); completions.push_back(c); string& name = *iter; - int ret = rados->pool_create_async(name.c_str(), c); + int ret = rad->pool_create_async(name.c_str(), c); rets.push_back(ret); } @@ -7156,7 +7201,8 @@ int RGWRados::append_async(rgw_obj& obj, size_t size, bufferlist& bl) if (r < 0) { return r; } - librados::AioCompletion *completion = rados->aio_create_completion(NULL, NULL, NULL); + librados::Rados *rad = get_rados_handle(); + librados::AioCompletion *completion = rad->aio_create_completion(NULL, NULL, NULL); r = ref.ioctx.aio_append(ref.oid, completion, bl, size); completion->release(); @@ -7708,7 +7754,8 @@ int RGWRados::cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info) librados::IoCtx io_ctx; const char *usage_log_pool = zone.usage_log_pool.name.c_str(); - int r = rados->ioctx_create(usage_log_pool, io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(usage_log_pool, io_ctx); if (r == -ENOENT) { rgw_bucket pool(usage_log_pool); r = create_pool(pool); @@ -7716,7 +7763,7 @@ int RGWRados::cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info) return r; // retry - r = rados->ioctx_create(usage_log_pool, io_ctx); + r = rad->ioctx_create(usage_log_pool, io_ctx); } if (r < 0) return r; @@ -7736,7 +7783,8 @@ int RGWRados::cls_obj_usage_log_read(string& oid, string& user, uint64_t start_e *is_truncated = false; const char *usage_log_pool = zone.usage_log_pool.name.c_str(); - int r = rados->ioctx_create(usage_log_pool, io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(usage_log_pool, io_ctx); if (r < 0) return r; @@ -7751,7 +7799,8 @@ int RGWRados::cls_obj_usage_log_trim(string& oid, string& user, uint64_t start_e librados::IoCtx io_ctx; const char *usage_log_pool = zone.usage_log_pool.name.c_str(); - int r = rados->ioctx_create(usage_log_pool, io_ctx); + librados::Rados *rad = get_rados_handle(); + int r = rad->ioctx_create(usage_log_pool, io_ctx); if (r < 0) return r; @@ -8244,7 +8293,7 @@ string RGWStateLog::get_oid(const string& object) { int RGWStateLog::open_ioctx(librados::IoCtx& ioctx) { string pool_name; store->get_log_pool_name(pool_name); - int r = store->rados->ioctx_create(pool_name.c_str(), ioctx); + int r = store->get_rados_handle()->ioctx_create(pool_name.c_str(), ioctx); if (r < 0) { lderr(store->ctx()) << "ERROR: could not open rados pool" << dendl; return r; @@ -8488,7 +8537,7 @@ int RGWOpStateSingleOp::renew_state() { uint64_t RGWRados::instance_id() { - return rados->get_instance_id(); + return get_rados_handle()->get_instance_id(); } uint64_t RGWRados::next_bucket_id() @@ -8540,3 +8589,30 @@ void RGWStoreManager::close_storage(RGWRados *store) delete store; } +librados::Rados* RGWRados::get_rados_handle() +{ + if (num_rados_handles == 1) { + return rados[0]; + } else { + handle_lock.get_read(); + pthread_t id = pthread_self(); + std::map:: iterator it = rados_map.find(id); + + if (it != rados_map.end()) { + handle_lock.put_read(); + return rados[it->second]; + } else { + handle_lock.put_read(); + handle_lock.get_write(); + if (next_rados_handle.read() == num_rados_handles) { + next_rados_handle.set(0); + } + int handle = next_rados_handle.read(); + rados_map[id] = handle; + next_rados_handle.inc(); + handle_lock.put_write(); + return rados[handle]; + } + } +} + diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index 45b7f61c30f..d72ac1825fb 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -1236,7 +1236,13 @@ class RGWRados void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const string& prefix, bool fail_if_exist); protected: CephContext *cct; - librados::Rados *rados; + + librados::Rados **rados; + atomic_t next_rados_handle; + uint32_t num_rados_handles; + RWLock handle_lock; + std::map rados_map; + librados::IoCtx gc_pool_ctx; // .rgw.gc bool pools_initialized; @@ -1255,8 +1261,9 @@ public: watch_initialized(false), bucket_id_lock("rados_bucket_id"), bucket_index_max_shards(0), - max_bucket_id(0), - cct(NULL), rados(NULL), + max_bucket_id(0), cct(NULL), + rados(NULL), next_rados_handle(0), + num_rados_handles(0), handle_lock("rados_handle_lock"), pools_initialized(false), quota_handler(NULL), finisher(NULL), @@ -1288,9 +1295,14 @@ public: RGWDataChangesLog *data_log; virtual ~RGWRados() { + for (uint32_t i=0; i < num_rados_handles; i++) { + if (rados[i]) { + rados[i]->shutdown(); + delete rados[i]; + } + } if (rados) { - rados->shutdown(); - delete rados; + delete[] rados; } } @@ -2135,6 +2147,8 @@ public: uint64_t instance_id(); uint64_t next_bucket_id(); + + librados::Rados* get_rados_handle(); }; class RGWStoreManager { diff --git a/src/rgw/rgw_replica_log.cc b/src/rgw/rgw_replica_log.cc index 6d8ed096da6..b56a90b440f 100644 --- a/src/rgw/rgw_replica_log.cc +++ b/src/rgw/rgw_replica_log.cc @@ -37,7 +37,7 @@ RGWReplicaLogger::RGWReplicaLogger(RGWRados *_store) : int RGWReplicaLogger::open_ioctx(librados::IoCtx& ctx, const string& pool) { - int r = store->rados->ioctx_create(pool.c_str(), ctx); + int r = store->get_rados_handle()->ioctx_create(pool.c_str(), ctx); if (r == -ENOENT) { rgw_bucket p(pool.c_str()); r = store->create_pool(p); @@ -45,7 +45,7 @@ int RGWReplicaLogger::open_ioctx(librados::IoCtx& ctx, const string& pool) return r; // retry - r = store->rados->ioctx_create(pool.c_str(), ctx); + r = store->get_rados_handle()->ioctx_create(pool.c_str(), ctx); } if (r < 0) { lderr(cct) << "ERROR: could not open rados pool " << pool << dendl;