From: fang.yuxiang Date: Tue, 13 Jun 2017 08:40:16 +0000 (+0800) Subject: rgw: meta sync thread crash at RGWMetaSyncShardCR X-Git-Tag: v12.1.0~71^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=45877d38fd9a385b2f8b13e90be94d784898b0b3;p=ceph.git rgw: meta sync thread crash at RGWMetaSyncShardCR Fixes: http://tracker.ceph.com/issues/20251 Signed-off-by: fang yuxiang fang.yuxiang@eisoo.com --- diff --git a/src/rgw/rgw_data_sync.cc b/src/rgw/rgw_data_sync.cc index 7769c8dc9de2d..da4db761daa97 100644 --- a/src/rgw/rgw_data_sync.cc +++ b/src/rgw/rgw_data_sync.cc @@ -1084,8 +1084,8 @@ class RGWDataSyncShardCR : public RGWCoroutine { set spawned_keys; - RGWContinuousLeaseCR *lease_cr; - RGWCoroutinesStack *lease_stack; + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; string status_oid; @@ -1126,7 +1126,6 @@ public: delete marker_tracker; if (lease_cr) { lease_cr->abort(); - lease_cr->put(); } if (error_repo) { error_repo->put(); @@ -1174,14 +1173,12 @@ public: string lock_name = "sync_lock"; if (lease_cr) { lease_cr->abort(); - lease_cr->put(); } RGWRados *store = sync_env->store; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, - rgw_raw_obj(store->get_zone_params().log_pool, status_oid), - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, + rgw_raw_obj(store->get_zone_params().log_pool, status_oid), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); } int full_sync() { @@ -1358,7 +1355,7 @@ public: set_status() << "num_spawned() > spawn_window"; yield wait_for_child(); int ret; - while (collect(&ret, lease_stack)) { + while (collect(&ret, lease_stack.get())) { if (ret < 0) { ldout(sync_env->cct, 0) << "ERROR: a sync operation returned error" << dendl; /* we have reported this error */ @@ -2701,12 +2698,12 @@ int RGWRunBucketSyncCoroutine::operate() yield { set_status("acquiring sync lock"); auto store = sync_env->store; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, - rgw_raw_obj(store->get_zone_params().log_pool, status_oid), - "sync_lock", - cct->_conf->rgw_sync_lease_period, - this); - lease_stack = spawn(lease_cr.get(), false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, + rgw_raw_obj(store->get_zone_params().log_pool, status_oid), + "sync_lock", + cct->_conf->rgw_sync_lease_period, + this)); + lease_stack.reset(spawn(lease_cr.get(), false)); } while (!lease_cr->is_locked()) { if (lease_cr->is_done()) { diff --git a/src/rgw/rgw_sync.cc b/src/rgw/rgw_sync.cc index 7a8a8bec2f12f..99927dbaf0aa5 100644 --- a/src/rgw/rgw_sync.cc +++ b/src/rgw/rgw_sync.cc @@ -593,8 +593,8 @@ class RGWInitSyncStatusCoroutine : public RGWCoroutine { rgw_meta_sync_info status; vector shards_info; - RGWContinuousLeaseCR *lease_cr; - RGWCoroutinesStack *lease_stack; + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; public: RGWInitSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env, const rgw_meta_sync_info &status) @@ -605,7 +605,6 @@ public: ~RGWInitSyncStatusCoroutine() override { if (lease_cr) { lease_cr->abort(); - lease_cr->put(); } } @@ -617,11 +616,10 @@ public: uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; string lock_name = "sync_lock"; RGWRados *store = sync_env->store; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, - rgw_raw_obj(store->get_zone_params().log_pool, sync_env->status_oid()), - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, + rgw_raw_obj(store->get_zone_params().log_pool, sync_env->status_oid()), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); } while (!lease_cr->is_locked()) { if (lease_cr->is_done()) { @@ -655,7 +653,7 @@ public: } } - drain_all_but_stack(lease_stack); /* the lease cr still needs to run */ + drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */ yield { set_status("updating sync status"); @@ -782,8 +780,8 @@ class RGWFetchAllMetaCR : public RGWCoroutine { std::unique_ptr entries_index; - RGWContinuousLeaseCR *lease_cr; - RGWCoroutinesStack *lease_stack; + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; bool lost_lock; bool failed; @@ -798,9 +796,6 @@ public: } ~RGWFetchAllMetaCR() override { - if (lease_cr) { - lease_cr->put(); - } } void append_section_from_set(set& all_sections, const string& name) { @@ -836,12 +831,11 @@ public: set_status(string("acquiring lock (") + sync_env->status_oid() + ")"); uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; string lock_name = "sync_lock"; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, - sync_env->store, - rgw_raw_obj(sync_env->store->get_zone_params().log_pool, sync_env->status_oid()), - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, + sync_env->store, + rgw_raw_obj(sync_env->store->get_zone_params().log_pool, sync_env->status_oid()), + lock_name, lock_duration, this)); + lease_stack = spawn(lease_cr.get(), false); } while (!lease_cr->is_locked()) { if (lease_cr->is_done()) { @@ -921,7 +915,7 @@ public: } } - drain_all_but_stack(lease_stack); /* the lease cr still needs to run */ + drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */ yield lease_cr->go_down(); @@ -1316,8 +1310,9 @@ class RGWMetaSyncShardCR : public RGWCoroutine { boost::asio::coroutine incremental_cr; boost::asio::coroutine full_cr; - RGWContinuousLeaseCR *lease_cr = nullptr; - RGWCoroutinesStack *lease_stack = nullptr; + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; + bool lost_lock = false; bool *reset_backoff; @@ -1350,7 +1345,6 @@ public: delete marker_tracker; if (lease_cr) { lease_cr->abort(); - lease_cr->put(); } } @@ -1442,15 +1436,11 @@ public: yield { uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; string lock_name = "sync_lock"; - if (lease_cr) { - lease_cr->put(); - } RGWRados *store = sync_env->store; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, - rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)), - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, + rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); lost_lock = false; } while (!lease_cr->is_locked()) { @@ -1547,8 +1537,7 @@ ldout(cct, 0) << __FILE__ << ":" << __LINE__ << ":" << *this << ": num_spawned() yield lease_cr->go_down(); - lease_cr->put(); - lease_cr = NULL; + lease_cr.reset(); drain_all(); @@ -1580,11 +1569,10 @@ ldout(cct, 0) << __FILE__ << ":" << __LINE__ << ":" << *this << ": num_spawned() uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; string lock_name = "sync_lock"; RGWRados *store = sync_env->store; - lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, - rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)), - lock_name, lock_duration, this); - lease_cr->get(); - lease_stack = spawn(lease_cr, false); + lease_cr.reset( new RGWContinuousLeaseCR(sync_env->async_rados, store, + rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); lost_lock = false; } while (!lease_cr->is_locked()) {