From: Casey Bodley Date: Mon, 20 Feb 2017 21:00:01 +0000 (-0500) Subject: rgw: RGWMetaSyncShardControlCR retries with backoff on all error codes X-Git-Tag: v12.0.1~228^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F13546%2Fhead;p=ceph.git rgw: RGWMetaSyncShardControlCR retries with backoff on all error codes RGWBackoffControlCR only treats EBUSY and EAGAIN as 'temporary' error codes, with all other errors being fatal when exit_on_error is set to RGWMetaSyncShardControlCR, a 'fatal' error means that no further sync is possible on that shard until the gateway restarts this changes RGWMetaSyncShardControlCR to set exit_on_error to false, so that it will continue to retry with backoff no matter what error code it gets Fixes: http://tracker.ceph.com/issues/19019 Signed-off-by: Casey Bodley --- diff --git a/src/rgw/rgw_sync.cc b/src/rgw/rgw_sync.cc index 3e1b6c10e647..65c213fa690b 100644 --- a/src/rgw/rgw_sync.cc +++ b/src/rgw/rgw_sync.cc @@ -1578,6 +1578,7 @@ public: ldout(sync_env->cct, 10) << *this << ": failed to fetch more log entries, retcode=" << retcode << dendl; yield lease_cr->go_down(); drain_all(); + *reset_backoff = false; // back off and try again later return retcode; } *reset_backoff = true; /* if we got to this point, all systems function */ @@ -1587,6 +1588,13 @@ public: yield call(new RGWReadMDLogEntriesCR(sync_env, mdlog, shard_id, &max_marker, INCREMENTAL_MAX_ENTRIES, &log_entries, &truncated)); + if (retcode < 0) { + ldout(sync_env->cct, 10) << *this << ": failed to list mdlog entries, retcode=" << retcode << dendl; + yield lease_cr->go_down(); + drain_all(); + *reset_backoff = false; // back off and try again later + return retcode; + } for (log_iter = log_entries.begin(); log_iter != log_entries.end(); ++log_iter) { if (!period_marker.empty() && period_marker < log_iter->id) { done_with_period = true; @@ -1657,12 +1665,13 @@ class RGWMetaSyncShardControlCR : public RGWBackoffControlCR rgw_meta_sync_marker sync_marker; const std::string period_marker; + static constexpr bool exit_on_error = false; // retry on all errors public: RGWMetaSyncShardControlCR(RGWMetaSyncEnv *_sync_env, const rgw_bucket& _pool, const std::string& period, RGWMetadataLog* mdlog, uint32_t _shard_id, const rgw_meta_sync_marker& _marker, std::string&& period_marker) - : RGWBackoffControlCR(_sync_env->cct, true), sync_env(_sync_env), + : RGWBackoffControlCR(_sync_env->cct, exit_on_error), sync_env(_sync_env), pool(_pool), period(period), mdlog(mdlog), shard_id(_shard_id), sync_marker(_marker), period_marker(std::move(period_marker)) {}