]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mon/LogMonitor: externally log events from when we were out of quorum
authorSage Weil <sage@newdream.net>
Tue, 22 Jun 2021 21:03:10 +0000 (17:03 -0400)
committerSage Weil <sage@newdream.net>
Fri, 2 Jul 2021 13:00:03 +0000 (09:00 -0400)
Previously, we would externally log any item from the LogSummary.version
(written every commit, with version-1) and last_committed.  That meant
that we logged everything only if the mon was in quorum.  If we fell
out of quorum and then later caught up, we would/could miss items.

Instead, explicitly track what version we have externally logged to, and
log everything up until that point.  Only consule summary.version if we
don't have a local marker recorded (e.g., because we just upgraded).

Signed-off-by: Sage Weil <sage@newdream.net>
src/mon/LogMonitor.cc
src/mon/LogMonitor.h

index 4773a5edfcb1a960304ec74fd1ec9c902396503d..b9737902cd8d0f09c698eb191440e85dee1fbb3e 100644 (file)
@@ -242,6 +242,9 @@ void LogMonitor::update_from_paxos(bool *need_bootstrap)
   version_t version = get_last_committed();
   dout(10) << __func__ << " version " << version
            << " summary v " << summary.version << dendl;
+
+  log_external_backlog();
+
   if (version == summary.version)
     return;
   ceph_assert(version >= summary.version);
@@ -281,6 +284,8 @@ void LogMonitor::update_from_paxos(bool *need_bootstrap)
     summary.version++;
     summary.prune(g_conf()->mon_log_max_summary);
   }
+  external_log_to = version;
+  mon.store->write_meta("external_log_to", stringify(external_log_to));
 
   check_subs();
 }
@@ -373,6 +378,54 @@ void LogMonitor::log_external_close_fds()
   channel_fds.clear();
 }
 
+/// catch external logs up to summary.version
+void LogMonitor::log_external_backlog()
+{
+  if (!external_log_to) {
+    std::string cur_str;
+    int r = mon.store->read_meta("external_log_to", &cur_str);
+    if (r == 0) {
+      external_log_to = std::stoull(cur_str);
+      dout(10) << __func__ << " initialized external_log_to = " << external_log_to
+              << " (recorded log_to position)" << dendl;
+    } else {
+      // pre-quincy, we assumed that anything through summary.version was
+      // logged externally.
+      assert(r == -ENOENT);
+      external_log_to = summary.version;
+      dout(10) << __func__ << " initialized external_log_to = " << external_log_to
+              << " (summary v " << summary.version << ")" << dendl;
+    }
+  }
+  // we may have logged ahead of summary.version, but never ahead of paxos
+  assert(external_log_to <= get_last_committed());
+  if (external_log_to >= summary.version) {
+    return;
+  }
+  if (auto first = get_first_committed(); external_log_to < first) {
+    derr << __func__ << " local logs at " << external_log_to
+        << ", skipping to " << first << dendl;
+    external_log_to = first;
+    // FIXME: write marker in each channel log file?
+  }
+  for (; external_log_to < summary.version; ++external_log_to) {
+    bufferlist bl;
+    int err = get_version(external_log_to+1, bl);
+    ceph_assert(err == 0);
+    ceph_assert(bl.length());
+
+    auto p = bl.cbegin();
+    __u8 v;
+    decode(v, p);
+    while (!p.end()) {
+      LogEntry le;
+      le.decode(p);
+      log_external(le);
+    }
+  }
+  mon.store->write_meta("external_log_to", stringify(external_log_to));
+}
+
 void LogMonitor::create_pending()
 {
   pending_log.clear();
index d60b57b4205b47538255b554f022f4fadec2e4d1..2be79c4ff44c9b607f1d55558ce30b83f1de7aff 100644 (file)
@@ -45,6 +45,7 @@ private:
   std::multimap<utime_t,LogEntry> pending_log;
   LogSummary pending_summary, summary;
 
+  version_t external_log_to = 0;
   std::map<std::string, int> channel_fds;
 
   fmt::memory_buffer file_log_buffer;
@@ -168,6 +169,7 @@ private:
 
   void log_external_close_fds();
   void log_external(const LogEntry& le);
+  void log_external_backlog();
 
   /**
    * translate log sub name ('log-info') to integer id