From 871b2e945638396f54532d6c892a56de6f18e78b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 22 Jun 2021 17:03:10 -0400 Subject: [PATCH] mon/LogMonitor: externally log events from when we were out of quorum Previously, we would externally log any item from the LogSummary.version (written every commit, with version-1) and last_committed. That meant that we logged everything only if the mon was in quorum. If we fell out of quorum and then later caught up, we would/could miss items. Instead, explicitly track what version we have externally logged to, and log everything up until that point. Only consule summary.version if we don't have a local marker recorded (e.g., because we just upgraded). Signed-off-by: Sage Weil --- src/mon/LogMonitor.cc | 53 +++++++++++++++++++++++++++++++++++++++++++ src/mon/LogMonitor.h | 2 ++ 2 files changed, 55 insertions(+) diff --git a/src/mon/LogMonitor.cc b/src/mon/LogMonitor.cc index 4773a5edfcb1a..b9737902cd8d0 100644 --- a/src/mon/LogMonitor.cc +++ b/src/mon/LogMonitor.cc @@ -242,6 +242,9 @@ void LogMonitor::update_from_paxos(bool *need_bootstrap) version_t version = get_last_committed(); dout(10) << __func__ << " version " << version << " summary v " << summary.version << dendl; + + log_external_backlog(); + if (version == summary.version) return; ceph_assert(version >= summary.version); @@ -281,6 +284,8 @@ void LogMonitor::update_from_paxos(bool *need_bootstrap) summary.version++; summary.prune(g_conf()->mon_log_max_summary); } + external_log_to = version; + mon.store->write_meta("external_log_to", stringify(external_log_to)); check_subs(); } @@ -373,6 +378,54 @@ void LogMonitor::log_external_close_fds() channel_fds.clear(); } +/// catch external logs up to summary.version +void LogMonitor::log_external_backlog() +{ + if (!external_log_to) { + std::string cur_str; + int r = mon.store->read_meta("external_log_to", &cur_str); + if (r == 0) { + external_log_to = std::stoull(cur_str); + dout(10) << __func__ << " initialized external_log_to = " << external_log_to + << " (recorded log_to position)" << dendl; + } else { + // pre-quincy, we assumed that anything through summary.version was + // logged externally. + assert(r == -ENOENT); + external_log_to = summary.version; + dout(10) << __func__ << " initialized external_log_to = " << external_log_to + << " (summary v " << summary.version << ")" << dendl; + } + } + // we may have logged ahead of summary.version, but never ahead of paxos + assert(external_log_to <= get_last_committed()); + if (external_log_to >= summary.version) { + return; + } + if (auto first = get_first_committed(); external_log_to < first) { + derr << __func__ << " local logs at " << external_log_to + << ", skipping to " << first << dendl; + external_log_to = first; + // FIXME: write marker in each channel log file? + } + for (; external_log_to < summary.version; ++external_log_to) { + bufferlist bl; + int err = get_version(external_log_to+1, bl); + ceph_assert(err == 0); + ceph_assert(bl.length()); + + auto p = bl.cbegin(); + __u8 v; + decode(v, p); + while (!p.end()) { + LogEntry le; + le.decode(p); + log_external(le); + } + } + mon.store->write_meta("external_log_to", stringify(external_log_to)); +} + void LogMonitor::create_pending() { pending_log.clear(); diff --git a/src/mon/LogMonitor.h b/src/mon/LogMonitor.h index d60b57b4205b4..2be79c4ff44c9 100644 --- a/src/mon/LogMonitor.h +++ b/src/mon/LogMonitor.h @@ -45,6 +45,7 @@ private: std::multimap pending_log; LogSummary pending_summary, summary; + version_t external_log_to = 0; std::map channel_fds; fmt::memory_buffer file_log_buffer; @@ -168,6 +169,7 @@ private: void log_external_close_fds(); void log_external(const LogEntry& le); + void log_external_backlog(); /** * translate log sub name ('log-info') to integer id -- 2.39.5