From 0930eb07d1c983412e27f7c42836a319a3161c7a Mon Sep 17 00:00:00 2001 From: "fang.yuxiang" Date: Tue, 13 Jun 2017 16:40:16 +0800 Subject: [PATCH] rgw: meta sync thread crash at RGWMetaSyncShardCR Fixes: http://tracker.ceph.com/issues/20251 Signed-off-by: fang yuxiang fang.yuxiang@eisoo.com (cherry picked from commit 45877d38fd9a385b2f8b13e90be94d784898b0b3) --- src/rgw/rgw_data_sync.cc | 15 ++++------ src/rgw/rgw_sync.cc | 60 ++++++++++++++++------------------------ 2 files changed, 30 insertions(+), 45 deletions(-) diff --git a/src/rgw/rgw_data_sync.cc b/src/rgw/rgw_data_sync.cc index 28e703629c300..fe516fe22afd6 100644 --- a/src/rgw/rgw_data_sync.cc +++ b/src/rgw/rgw_data_sync.cc @@ -1004,8 +1004,8 @@ class RGWDataSyncShardCR : public RGWCoroutine { set spawned_keys; - RGWContinuousLeaseCR *lease_cr; - RGWCoroutinesStack *lease_stack; + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; string status_oid; @@ -1046,7 +1046,6 @@ public: delete marker_tracker; if (lease_cr) { lease_cr->abort(); - lease_cr->put(); } if (error_repo) { error_repo->put(); @@ -1094,13 +1093,11 @@ public: string lock_name = "sync_lock"; if (lease_cr) { lease_cr->abort(); - lease_cr->put(); } RGWRados *store = sync_env->store; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, store->get_zone_params().log_pool, status_oid, - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, store->get_zone_params().log_pool, status_oid, + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); } int full_sync() { @@ -1272,7 +1269,7 @@ public: set_status() << "num_spawned() > spawn_window"; yield wait_for_child(); int ret; - while (collect(&ret, lease_stack)) { + while (collect(&ret, lease_stack.get())) { if (ret < 0) { ldout(sync_env->cct, 0) << "ERROR: a sync operation returned error" << dendl; /* we have reported this error */ diff --git a/src/rgw/rgw_sync.cc b/src/rgw/rgw_sync.cc index 4617cd9dd05ac..b694f739e10da 100644 --- a/src/rgw/rgw_sync.cc +++ b/src/rgw/rgw_sync.cc @@ -592,8 +592,8 @@ class RGWInitSyncStatusCoroutine : public RGWCoroutine { rgw_meta_sync_info status; vector shards_info; - RGWContinuousLeaseCR *lease_cr; - RGWCoroutinesStack *lease_stack; + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; public: RGWInitSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env, const rgw_meta_sync_info &status) @@ -604,7 +604,6 @@ public: ~RGWInitSyncStatusCoroutine() { if (lease_cr) { lease_cr->abort(); - lease_cr->put(); } } @@ -616,10 +615,9 @@ public: uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; string lock_name = "sync_lock"; RGWRados *store = sync_env->store; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, store->get_zone_params().log_pool, sync_env->status_oid(), - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, store->get_zone_params().log_pool, sync_env->status_oid(), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); } while (!lease_cr->is_locked()) { if (lease_cr->is_done()) { @@ -652,7 +650,7 @@ public: } } - drain_all_but_stack(lease_stack); /* the lease cr still needs to run */ + drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */ yield { set_status("updating sync status"); @@ -737,8 +735,8 @@ class RGWFetchAllMetaCR : public RGWCoroutine { std::unique_ptr entries_index; - RGWContinuousLeaseCR *lease_cr; - RGWCoroutinesStack *lease_stack; + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; bool lost_lock; bool failed; @@ -753,9 +751,6 @@ public: } ~RGWFetchAllMetaCR() { - if (lease_cr) { - lease_cr->put(); - } } void append_section_from_set(set& all_sections, const string& name) { @@ -791,10 +786,9 @@ public: set_status(string("acquiring lock (") + sync_env->status_oid() + ")"); uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; string lock_name = "sync_lock"; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, sync_env->store, sync_env->store->get_zone_params().log_pool, sync_env->status_oid(), - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, sync_env->store, sync_env->store->get_zone_params().log_pool, sync_env->status_oid(), + lock_name, lock_duration, this)); + lease_stack = spawn(lease_cr.get(), false); } while (!lease_cr->is_locked()) { if (lease_cr->is_done()) { @@ -873,7 +867,7 @@ public: } } - drain_all_but_stack(lease_stack); /* the lease cr still needs to run */ + drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */ yield lease_cr->go_down(); @@ -1259,8 +1253,9 @@ class RGWMetaSyncShardCR : public RGWCoroutine { boost::asio::coroutine incremental_cr; boost::asio::coroutine full_cr; - RGWContinuousLeaseCR *lease_cr = nullptr; - RGWCoroutinesStack *lease_stack = nullptr; + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; + bool lost_lock = false; bool *reset_backoff; @@ -1291,7 +1286,6 @@ public: delete marker_tracker; if (lease_cr) { lease_cr->abort(); - lease_cr->put(); } } @@ -1383,15 +1377,11 @@ public: yield { uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; string lock_name = "sync_lock"; - if (lease_cr) { - lease_cr->put(); - } RGWRados *store = sync_env->store; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, pool, - sync_env->shard_obj_name(shard_id), - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, pool, + sync_env->shard_obj_name(shard_id), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); lost_lock = false; } while (!lease_cr->is_locked()) { @@ -1484,8 +1474,7 @@ public: yield lease_cr->go_down(); - lease_cr->put(); - lease_cr = NULL; + lease_cr.reset(); drain_all(); @@ -1517,11 +1506,10 @@ public: uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; string lock_name = "sync_lock"; RGWRados *store = sync_env->store; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, pool, - sync_env->shard_obj_name(shard_id), - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, pool, + sync_env->shard_obj_name(shard_id), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); lost_lock = false; } while (!lease_cr->is_locked()) { -- 2.39.5