From b04d3cfb33e416846301264c36793409548374b3 Mon Sep 17 00:00:00 2001 From: Shilpa Jagannath Date: Mon, 17 Jul 2023 12:52:07 -0400 Subject: [PATCH] rgw/multisite: in order to sleep between mdlog polling events, we check if the mdlog_marker is not modified by comparing mdlog_marker and max_marker. but max_marker is exposed to changes from RGWReadMDLogEntriesCR, and if there is a race coming from mdlog trimming which could render max_marker empty, then its comparison with mdlog polling can be incorrect. To fix this, we now save the previous mdlog marker and compare with the updated mdlog marker. Signed-off-by: Shilpa Jagannath (cherry picked from commit bae10f22e8b008088f0861db2785d8c0ba7ee506) --- src/rgw/driver/rados/rgw_sync.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/rgw/driver/rados/rgw_sync.cc b/src/rgw/driver/rados/rgw_sync.cc index a9ea2ecf5491d..583e2d1098e1c 100644 --- a/src/rgw/driver/rados/rgw_sync.cc +++ b/src/rgw/driver/rados/rgw_sync.cc @@ -1481,6 +1481,7 @@ class RGWMetaSyncShardCR : public RGWCoroutine { bool done_with_period = false; int total_entries = 0; + string old_mdlog_marker; RGWSyncTraceNodeRef tn; public: @@ -1832,6 +1833,7 @@ public: if (mdlog_marker <= max_marker || !truncated) { /* we're at the tip, try to bring more entries */ ldpp_dout(sync_env->dpp, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " syncing mdlog for shard_id=" << shard_id << dendl; + old_mdlog_marker = mdlog_marker; yield call(new RGWCloneMetaLogCoroutine(sync_env, mdlog, period, shard_id, mdlog_marker, &mdlog_marker)); @@ -1902,7 +1904,8 @@ public: tn->log(10, SSTR(*this << ": done with period")); break; } - if (mdlog_marker == max_marker && can_adjust_marker) { + if (mdlog_marker == old_mdlog_marker && can_adjust_marker) { + tn->log(20, SSTR("mdlog_marker=" << mdlog_marker << " old_mdlog_marker=" << old_mdlog_marker)); tn->unset_flag(RGW_SNS_FLAG_ACTIVE); yield wait(utime_t(cct->_conf->rgw_meta_sync_poll_interval, 0)); } -- 2.39.5