From: Yehuda Sadeh Date: Tue, 20 Oct 2015 21:57:52 +0000 (-0700) Subject: rgw: multiple init metadata sync locking fixes X-Git-Tag: v10.1.0~354^2~313 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=70a2b935ed6abe3ea104bcd085cf22102180eaa9;p=ceph.git rgw: multiple init metadata sync locking fixes Signed-off-by: Yehuda Sadeh --- diff --git a/src/rgw/rgw_coroutine.cc b/src/rgw/rgw_coroutine.cc index d4caafd069f0..923ad5f50e6b 100644 --- a/src/rgw/rgw_coroutine.cc +++ b/src/rgw/rgw_coroutine.cc @@ -128,8 +128,8 @@ int RGWCoroutinesStack::operate(RGWCoroutinesEnv *_env) error_flag = op->is_error(); if (op->is_done()) { - int op_retcode = op->get_ret_status(); - r = unwind(r); + int op_retcode = r; + r = unwind(op_retcode); op->put(); done_flag = (pos == ops.end()); if (done_flag) { diff --git a/src/rgw/rgw_cr_rados.cc b/src/rgw/rgw_cr_rados.cc index 2a66fd88527e..1c37dd92ba53 100644 --- a/src/rgw/rgw_cr_rados.cc +++ b/src/rgw/rgw_cr_rados.cc @@ -110,15 +110,14 @@ int RGWSimpleRadosReadAttrsCR::request_complete() int RGWAsyncPutSystemObj::_send_request() { - return store->put_system_obj(NULL, obj, bl.c_str(), bl.length(), exclusive, - NULL, attrs, objv_tracker, mtime); + return store->put_system_obj_data(NULL, obj, bl, -1, exclusive); } RGWAsyncPutSystemObj::RGWAsyncPutSystemObj(RGWAioCompletionNotifier *cn, RGWRados *_store, - RGWObjVersionTracker *_objv_tracker, rgw_obj& _obj, bool _exclusive, - bufferlist& _bl, time_t _mtime) : RGWAsyncRadosRequest(cn), store(_store), - objv_tracker(_objv_tracker), obj(_obj), exclusive(_exclusive), - bl(_bl), mtime(_mtime) + rgw_obj& _obj, bool _exclusive, + bufferlist& _bl) : RGWAsyncRadosRequest(cn), store(_store), + obj(_obj), exclusive(_exclusive), + bl(_bl) { } @@ -156,6 +155,7 @@ int RGWAsyncLockSystemObj::_send_request() utime_t duration(duration_secs, 0); l.set_duration(duration); l.set_cookie(cookie); + l.set_renew(true); return l.lock_exclusive(&ioctx, obj.get_object()); } diff --git a/src/rgw/rgw_cr_rados.h b/src/rgw/rgw_cr_rados.h index c61ef1b928e1..327b14f5fe2e 100644 --- a/src/rgw/rgw_cr_rados.h +++ b/src/rgw/rgw_cr_rados.h @@ -83,19 +83,16 @@ public: class RGWAsyncPutSystemObj : public RGWAsyncRadosRequest { RGWRados *store; - RGWObjVersionTracker *objv_tracker; rgw_obj obj; bool exclusive; bufferlist bl; - map attrs; - time_t mtime; protected: int _send_request(); public: RGWAsyncPutSystemObj(RGWAioCompletionNotifier *cn, RGWRados *_store, - RGWObjVersionTracker *_objv_tracker, rgw_obj& _obj, bool _exclusive, - bufferlist& _bl, time_t _mtime = 0); + rgw_obj& _obj, bool _exclusive, + bufferlist& _bl); }; class RGWAsyncPutSystemObjAttrs : public RGWAsyncRadosRequest { @@ -280,7 +277,7 @@ public: int send_request() { rgw_obj obj = rgw_obj(pool, oid); req = new RGWAsyncPutSystemObj(stack->create_completion_notifier(), - store, NULL, obj, false, bl); + store, obj, false, bl); async_rados->queue(req); return 0; } diff --git a/src/rgw/rgw_sync.cc b/src/rgw/rgw_sync.cc index 4a64cb3e06b6..1ad451b4b596 100644 --- a/src/rgw/rgw_sync.cc +++ b/src/rgw/rgw_sync.cc @@ -421,24 +421,15 @@ public: uint32_t lock_duration = 30; call(new RGWSimpleRadosLockCR(async_rados, store, store->get_zone_params().log_pool, mdlog_sync_status_oid, lock_name, cookie, lock_duration)); - if (retcode < 0) { - ldout(cct, 0) << "ERROR: failed to take a lock on " << mdlog_sync_status_oid << dendl; - return set_state(RGWCoroutine_Error, retcode); - } + } + if (retcode < 0) { + ldout(cct, 0) << "ERROR: failed to take a lock on " << mdlog_sync_status_oid << dendl; + return set_state(RGWCoroutine_Error, retcode); } yield { call(new RGWSimpleRadosWriteCR(async_rados, store, store->get_zone_params().log_pool, mdlog_sync_status_oid, status)); } - yield { /* take lock again, we just recreated the object */ - uint32_t lock_duration = 30; - call(new RGWSimpleRadosLockCR(async_rados, store, store->get_zone_params().log_pool, mdlog_sync_status_oid, - lock_name, cookie, lock_duration)); - if (retcode < 0) { - ldout(cct, 0) << "ERROR: failed to take a lock on " << mdlog_sync_status_oid << dendl; - return set_state(RGWCoroutine_Error, retcode); - } - } /* fetch current position in logs */ yield { for (int i = 0; i < (int)status.num_shards; i++) { @@ -1138,17 +1129,28 @@ int RGWRemoteMetaLog::run_sync(int num_shards, rgw_meta_sync_status& sync_status RGWObjectCtx obj_ctx(store, NULL); - int r = run(new RGWReadSyncStatusCoroutine(async_rados, store, obj_ctx, &sync_status)); - if (r < 0) { - ldout(store->ctx(), 0) << "ERROR: failed to fetch sync status" << dendl; - return r; - } + int r; + do { + r = run(new RGWReadSyncStatusCoroutine(async_rados, store, obj_ctx, &sync_status)); + if (r < 0 && r != -ENOENT) { + ldout(store->ctx(), 0) << "ERROR: failed to fetch sync status r=" << r << dendl; + return r; + } - switch ((rgw_meta_sync_info::SyncState)sync_status.sync_info.state) { - case rgw_meta_sync_info::StateInit: + if (sync_status.sync_info.state == rgw_meta_sync_info::StateInit) { ldout(store->ctx(), 20) << __func__ << "(): init" << dendl; r = run(new RGWInitSyncStatusCoroutine(async_rados, store, &http_manager, obj_ctx, num_shards)); - /* fall through */ + if (r == -EBUSY) { + continue; + } + if (r < 0) { + ldout(store->ctx(), 0) << "ERROR: failed to init sync status" << dendl; + return r; + } + } + } while (sync_status.sync_info.state == rgw_meta_sync_info::StateInit); + + switch ((rgw_meta_sync_info::SyncState)sync_status.sync_info.state) { case rgw_meta_sync_info::StateBuildingFullSyncMaps: ldout(store->ctx(), 20) << __func__ << "(): building full sync maps" << dendl; r = run(new RGWFetchAllMetaCR(store, &http_manager, async_rados, num_shards));