From: kchheda3 Date: Thu, 18 Sep 2025 20:01:50 +0000 (-0400) Subject: rgw/lc: if the buckets last lc processing time is less than start time of current... X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=541d13a6305bac9255348eeeef61d0c5096bf5bf;p=ceph.git rgw/lc: if the buckets last lc processing time is less than start time of current LC session, then continue processing bucket for lC even if the status is not in initalized state. Currently the logic inside expired_session() would consider an LC session valid for almost 2-3 days, so for some bucket where the lc processing POST status update fails, the next lc session would skip the bucket because the expired_session() would return false as it multiplies the num_seconds_day *2. Instead of hardcoding the logic to 2 days, store the start time for each lc session and then compare the bucket update time with lc_start time, if bucket process time is less then current lc start time, then bucket can be processed as previous session is already expired. Signed-off-by: kchheda3 --- diff --git a/src/rgw/rgw_lc.cc b/src/rgw/rgw_lc.cc index eaae450c966..40f2ac05269 100644 --- a/src/rgw/rgw_lc.cc +++ b/src/rgw/rgw_lc.cc @@ -204,6 +204,7 @@ void *RGWLC::LCWorker::entry() { std::unique_ptr all_buckets; // empty restriction utime_t start = ceph_clock_now(); if (should_work(start)) { + lc_start_time = time(nullptr); ldpp_dout(dpp, 2) << "life cycle: start worker=" << ix << dendl; int r = lc->process(this, all_buckets, false /* once */); if (r < 0) { @@ -2104,14 +2105,19 @@ int RGWLC::process(LCWorker* worker, return 0; } -bool RGWLC::expired_session(time_t started) -{ +bool RGWLC::expired_session(time_t started, time_t lc_start_time) { if (! cct->_conf->rgwlc_auto_session_clear) { return false; } - - time_t interval = (cct->_conf->rgw_lc_debug_interval > 0) - ? cct->_conf->rgw_lc_debug_interval : secs_in_a_day; + // lc_start_time is greater than last time when bucket was updated, then + // session is confirmed expired + if (cct->_conf->rgw_lc_debug_interval <= 0) { + if (lc_start_time > started) { + return true; + } + return false; + } + time_t interval = cct->_conf->rgw_lc_debug_interval; auto now = time(nullptr); @@ -2180,8 +2186,8 @@ int RGWLC::process_bucket(int index, int max_lock_secs, LCWorker* worker, bucket_entry_marker, entry); if (ret >= 0) { if (entry.status == lc_processing) { - if (expired_session(entry.start_time)) { - ldpp_dout(this, 5) << "RGWLC::process_bucket(): STALE lc session found for: " << entry + if (expired_session(entry.start_time, worker->lc_start_time)) { + ldpp_dout(this, 5) << "RGWLC::process_bucket(): STALE lc session found for: " << entry << " index: " << index << " worker ix: " << worker->ix << " (clearing)" << dendl; @@ -2447,7 +2453,7 @@ int RGWLC::process(int index, int max_lock_secs, LCWorker* worker, if (!entry.bucket.empty()) { if (entry.status == lc_processing) { - if (expired_session(entry.start_time)) { + if (expired_session(entry.start_time, worker->lc_start_time)) { ldpp_dout(this, 5) << "RGWLC::process(): STALE lc session found for: " << entry << " index: " << index << " worker ix: " << worker->ix diff --git a/src/rgw/rgw_lc.h b/src/rgw/rgw_lc.h index 5a4c05adf8d..250849c783a 100644 --- a/src/rgw/rgw_lc.h +++ b/src/rgw/rgw_lc.h @@ -589,9 +589,9 @@ public: * to cloud. This list is maintained for the duration of each RGWLC::process() * post which it is discarded. */ std::set cloud_targets; + time_t lc_start_time; - public: - + public: using lock_guard = std::lock_guard; using unique_lock = std::unique_lock; @@ -642,7 +642,7 @@ public: int process(int index, int max_lock_secs, LCWorker* worker, bool once); int process_bucket(int index, int max_lock_secs, LCWorker* worker, const std::string& bucket_entry_marker, bool once); - bool expired_session(time_t started); + bool expired_session(time_t started, time_t lc_start_time); time_t thread_stop_at(); int list_lc_progress(std::string& marker, uint32_t max_entries, std::vector&,