From: Patrick Donnelly Date: Tue, 27 Aug 2024 17:50:55 +0000 (-0400) Subject: mds: delay expiry if LogSegment is ahead of committed oft seq X-Git-Tag: v20.0.0~955^2~5 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=b2711f655a7c9e45d6cf2b31919d0c20d96d52f8;p=ceph.git mds: delay expiry if LogSegment is ahead of committed oft seq And remove the misplaced conditional in ::trim_expiring_segments. This is necessary as the `flush journal` command gets confused by missing a wait_for_expiry on a LogSegment that is not actually expired. Signed-off-by: Patrick Donnelly --- diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index 9fba37da51d03..0f33bacbd3e2b 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -856,8 +856,6 @@ void MDLog::_trim_expired_segments(auto& locker, MDSContext* ctx) ceph_assert(ceph_mutex_is_locked_by_me(submit_mutex)); ceph_assert(locker.owns_lock()); - uint64_t const oft_committed_seq = mds->mdcache->open_file_table.get_committed_log_seq(); - // trim expired segments? bool trimmed = false; uint64_t end = 0; @@ -903,12 +901,6 @@ void MDLog::_trim_expired_segments(auto& locker, MDSContext* ctx) break; } - if (!mds_is_shutting_down && ls->seq >= oft_committed_seq) { - dout(10) << __func__ << " defer expire for open file table committedseq " << oft_committed_seq - << " <= " << ls->seq << "/" << ls->offset << dendl; - break; - } - end = seq; dout(10) << __func__ << ": maybe expiring " << *ls << dendl; } diff --git a/src/mds/OpenFileTable.cc b/src/mds/OpenFileTable.cc index 4322b6a8a7d59..811c6aff8ad28 100644 --- a/src/mds/OpenFileTable.cc +++ b/src/mds/OpenFileTable.cc @@ -283,6 +283,14 @@ void OpenFileTable::_commit_finish(int r, uint64_t log_seq, MDSContext *fin) committed_log_seq = log_seq; num_pending_commit--; + { + auto last = waiting_for_commit.upper_bound(log_seq); + for (auto it = waiting_for_commit.begin(); it != last; it++) { + finish_contexts(g_ceph_context, it->second); + } + waiting_for_commit.erase(waiting_for_commit.begin(), last); + } + if (fin) fin->complete(r); } diff --git a/src/mds/OpenFileTable.h b/src/mds/OpenFileTable.h index b18395213f56b..a1b62012f7952 100644 --- a/src/mds/OpenFileTable.h +++ b/src/mds/OpenFileTable.h @@ -50,6 +50,9 @@ public: ceph_assert(!load_done); waiting_for_load.push_back(c); } + void wait_for_commit(uint64_t seq, Context* c) { + waiting_for_commit[seq].push_back(c); + } bool prefetch_inodes(); bool is_prefetched() const { return prefetch_state == DONE; } @@ -149,6 +152,8 @@ protected: std::set destroyed_inos_set; std::unique_ptr logger; + + std::map> waiting_for_commit; }; #endif diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 3018ea99fa035..b7fc058692a66 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -235,6 +235,14 @@ void LogSegment::try_to_expire(MDSRank *mds, MDSGatherBuilder &gather_bld, int o } } + auto const oft_cseq = mds->mdcache->open_file_table.get_committed_log_seq(); + if (!mds->mdlog->is_capped() && seq >= oft_cseq) { + dout(10) << *this << ".try_to_expire" + << " defer expire for oft_committed_seq (" << oft_cseq + << ") <= seq (" << seq << ")" << dendl; + mds->mdcache->open_file_table.wait_for_commit(seq, gather_bld.new_sub()); + } + ceph_assert(g_conf()->mds_kill_journal_expire_at != 3); std::map> ops_vec_map;