From: fang.yuxiang Date: Tue, 13 Jun 2017 08:40:16 +0000 (+0800) Subject: rgw: meta sync thread crash at RGWMetaSyncShardCR X-Git-Tag: v11.2.1~10^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d463de9b8ae357a4edbfcf63e65740ecd4845b26;p=ceph.git rgw: meta sync thread crash at RGWMetaSyncShardCR Fixes: http://tracker.ceph.com/issues/20251 Signed-off-by: fang yuxiang fang.yuxiang@eisoo.com (cherry picked from commit 45877d38fd9a385b2f8b13e90be94d784898b0b3) Conflicts: src/rgw/rgw_data_sync.cc - kraken RGWContinuousLeaseCR() has slightly different options src/rgw/rgw_sync.cc - kraken RGWContinuousLeaseCR() has slightly different options - added "override" qualifier to ~RGWFetchAllMetaCR() definition --- diff --git a/src/rgw/rgw_data_sync.cc b/src/rgw/rgw_data_sync.cc index be9e25d4751..00030b7095a 100644 --- a/src/rgw/rgw_data_sync.cc +++ b/src/rgw/rgw_data_sync.cc @@ -1060,8 +1060,8 @@ class RGWDataSyncShardCR : public RGWCoroutine { set spawned_keys; - RGWContinuousLeaseCR *lease_cr; - RGWCoroutinesStack *lease_stack; + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; string status_oid; @@ -1102,7 +1102,6 @@ public: delete marker_tracker; if (lease_cr) { lease_cr->abort(); - lease_cr->put(); } if (error_repo) { error_repo->put(); @@ -1150,13 +1149,12 @@ public: string lock_name = "sync_lock"; if (lease_cr) { lease_cr->abort(); - lease_cr->put(); } RGWRados *store = sync_env->store; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, store->get_zone_params().log_pool, status_oid, - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, + store->get_zone_params().log_pool, status_oid, + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); } int full_sync() { @@ -1328,7 +1326,7 @@ public: set_status() << "num_spawned() > spawn_window"; yield wait_for_child(); int ret; - while (collect(&ret, lease_stack)) { + while (collect(&ret, lease_stack.get())) { if (ret < 0) { ldout(sync_env->cct, 0) << "ERROR: a sync operation returned error" << dendl; /* we have reported this error */ @@ -2663,12 +2661,12 @@ int RGWRunBucketSyncCoroutine::operate() yield { set_status("acquiring sync lock"); auto store = sync_env->store; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, - store->get_zone_params().log_pool, - status_oid, "sync_lock", - cct->_conf->rgw_sync_lease_period, - this); - lease_stack = spawn(lease_cr.get(), false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, + store->get_zone_params().log_pool, + status_oid, "sync_lock", + cct->_conf->rgw_sync_lease_period, + this)); + lease_stack.reset(spawn(lease_cr.get(), false)); } while (!lease_cr->is_locked()) { if (lease_cr->is_done()) { diff --git a/src/rgw/rgw_sync.cc b/src/rgw/rgw_sync.cc index 33e3fe48856..799dd3fb348 100644 --- a/src/rgw/rgw_sync.cc +++ b/src/rgw/rgw_sync.cc @@ -592,8 +592,8 @@ class RGWInitSyncStatusCoroutine : public RGWCoroutine { rgw_meta_sync_info status; vector shards_info; - RGWContinuousLeaseCR *lease_cr; - RGWCoroutinesStack *lease_stack; + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; public: RGWInitSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env, const rgw_meta_sync_info &status) @@ -604,7 +604,6 @@ public: ~RGWInitSyncStatusCoroutine() { if (lease_cr) { lease_cr->abort(); - lease_cr->put(); } } @@ -616,10 +615,10 @@ public: uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; string lock_name = "sync_lock"; RGWRados *store = sync_env->store; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, store->get_zone_params().log_pool, sync_env->status_oid(), - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, + store->get_zone_params().log_pool, sync_env->status_oid(), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); } while (!lease_cr->is_locked()) { if (lease_cr->is_done()) { @@ -652,7 +651,7 @@ public: } } - drain_all_but_stack(lease_stack); /* the lease cr still needs to run */ + drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */ yield { set_status("updating sync status"); @@ -737,8 +736,8 @@ class RGWFetchAllMetaCR : public RGWCoroutine { std::unique_ptr entries_index; - RGWContinuousLeaseCR *lease_cr; - RGWCoroutinesStack *lease_stack; + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; bool lost_lock; bool failed; @@ -752,10 +751,7 @@ public: lost_lock(false), failed(false), markers(_markers) { } - ~RGWFetchAllMetaCR() { - if (lease_cr) { - lease_cr->put(); - } + ~RGWFetchAllMetaCR() override { } void append_section_from_set(set& all_sections, const string& name) { @@ -791,10 +787,11 @@ public: set_status(string("acquiring lock (") + sync_env->status_oid() + ")"); uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; string lock_name = "sync_lock"; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, sync_env->store, sync_env->store->get_zone_params().log_pool, sync_env->status_oid(), - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, sync_env->store, + sync_env->store->get_zone_params().log_pool, + sync_env->status_oid(), lock_name, + lock_duration, this)); + lease_stack = spawn(lease_cr.get(), false); } while (!lease_cr->is_locked()) { if (lease_cr->is_done()) { @@ -875,7 +872,7 @@ public: } } - drain_all_but_stack(lease_stack); /* the lease cr still needs to run */ + drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */ yield lease_cr->go_down(); @@ -1267,8 +1264,9 @@ class RGWMetaSyncShardCR : public RGWCoroutine { boost::asio::coroutine incremental_cr; boost::asio::coroutine full_cr; - RGWContinuousLeaseCR *lease_cr = nullptr; - RGWCoroutinesStack *lease_stack = nullptr; + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; + bool lost_lock = false; bool *reset_backoff; @@ -1299,7 +1297,6 @@ public: delete marker_tracker; if (lease_cr) { lease_cr->abort(); - lease_cr->put(); } } @@ -1391,15 +1388,11 @@ public: yield { uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; string lock_name = "sync_lock"; - if (lease_cr) { - lease_cr->put(); - } RGWRados *store = sync_env->store; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, pool, - sync_env->shard_obj_name(shard_id), - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, pool, + sync_env->shard_obj_name(shard_id), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); lost_lock = false; } while (!lease_cr->is_locked()) { @@ -1492,8 +1485,7 @@ public: yield lease_cr->go_down(); - lease_cr->put(); - lease_cr = NULL; + lease_cr.reset(); drain_all(); @@ -1525,11 +1517,10 @@ public: uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; string lock_name = "sync_lock"; RGWRados *store = sync_env->store; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, pool, - sync_env->shard_obj_name(shard_id), - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, pool, + sync_env->shard_obj_name(shard_id), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); lost_lock = false; } while (!lease_cr->is_locked()) {