]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: rewind_divergent_log needs to dirty log if crt changes or ...
authorBill Scales <bill_scales@uk.ibm.com>
Mon, 23 Jun 2025 09:12:10 +0000 (10:12 +0100)
committerJon <jonathan.bailey1@ibm.com>
Fri, 3 Oct 2025 13:31:23 +0000 (14:31 +0100)
rollback_info_trimmed_to changes

PGLog::rewind_divergent_log was only causing the log to be marked
dirty and checkpointed if there were divergent entries. However
after a PG split it is possible that the log can be rewound
modifying crt and/or rollback_info_trimmed_to without creating
divergent entries because the entries being rolled back were
all split into the other PG.

Failing to checkpoint the log generates a window where if the OSD
is reset you can end up with crt (and rollback_info_trimmed_to) > head.
One consequence of this is asserts like
ceph_assert(rollback_info_trimmed_to == head); firing.

Fixes: https://tracker.ceph.com/issues/55141
Signed-off-by: Bill Scales <bill_scales@uk.ibm.com>
(cherry picked from commit d8f78adf85f8cb11deeae3683a28db92046779b5)

src/osd/PGLog.cc
src/osd/PGLog.h
src/osd/osd_types.h

index 07e3f30b6c820e6a1f321f8611ab061f76127822..20d9300902e15a839903f6aa3d5175d96b16e8a2 100644 (file)
@@ -349,10 +349,18 @@ void PGLog::rewind_divergent_log(eversion_t newhead,
   if (info.last_complete > newhead)
     info.last_complete = newhead;
 
-  auto divergent = log.rewind_from_head(newhead);
+  bool need_dirty_log;
+  auto divergent = log.rewind_from_head(newhead, &need_dirty_log);
   if (!divergent.empty()) {
     mark_dirty_from(divergent.front().version);
+  } else if (need_dirty_log) {
+    // can_rollback_to and/or rollback_info_trimmed_to have been modified
+    // and need checkpointing
+    dout(10) << "rewind_divergent_log crt = "
+            << log.get_can_rollback_to() << dendl;
+    dirty_log = true;
   }
+
   for (auto &&entry: divergent) {
     dout(10) << "rewind_divergent_log future divergent " << entry << dendl;
   }
index 07390eebe1deca4ff0ac26b78ba0034e83108387..f7d0386d9cf8104c9f9c0b359dfa03cf265c94db 100644 (file)
@@ -296,8 +296,8 @@ public:
                                        eversion_t previous_version) {});
     }
 
-    mempool::osd_pglog::list<pg_log_entry_t> rewind_from_head(eversion_t newhead) {
-      auto divergent = pg_log_t::rewind_from_head(newhead);
+    mempool::osd_pglog::list<pg_log_entry_t> rewind_from_head(eversion_t newhead, bool *dirty_log = nullptr) {
+      auto divergent = pg_log_t::rewind_from_head(newhead, dirty_log);
       index();
       reset_rollback_info_trimmed_to_riter();
       return divergent;
index 5a58dddaf80e38fafb2b58f38f3f072d9319b8fb..607b030da7d1de759f2b0e187252b1a0b7a51547 100644 (file)
@@ -4738,7 +4738,7 @@ public:
       std::move(childdups));
     }
 
-  mempool::osd_pglog::list<pg_log_entry_t> rewind_from_head(eversion_t newhead) {
+  mempool::osd_pglog::list<pg_log_entry_t> rewind_from_head(eversion_t newhead, bool *dirty_log = nullptr) {
     ceph_assert(newhead >= tail);
 
     mempool::osd_pglog::list<pg_log_entry_t>::iterator p = log.end();
@@ -4768,11 +4768,19 @@ public:
     }
     head = newhead;
 
-    if (can_rollback_to > newhead)
+    if (can_rollback_to > newhead) {
       can_rollback_to = newhead;
+      if (dirty_log) {
+       *dirty_log = true;
+      }
+    }
 
-    if (rollback_info_trimmed_to > newhead)
+    if (rollback_info_trimmed_to > newhead) {
       rollback_info_trimmed_to = newhead;
+      if (dirty_log) {
+       *dirty_log = true;
+      }
+    }
 
     return divergent;
   }