From a99368b38091e55ecc7d6aaf301d1bc167ea9146 Mon Sep 17 00:00:00 2001 From: ethanwu Date: Sun, 24 Mar 2024 17:11:16 +0800 Subject: [PATCH] mds: flush mds log before finishing STATE_STARTING If we donn't flush mds log before requesting STATE_ACTIVE, and mds happens to stop later before the log reaches journal. The take-over mds will have no SubtreeMap to replay, and fail later at non-empty subtree check. Fixes: https://tracker.ceph.com/issues/65094 Signed-off-by: ethanwu (cherry picked from commit ee5472edd6fe80ed043c3eb73322348367f9bb22) --- src/mds/MDSRank.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 9fd50b477ded2..c2f5b47f25421 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1724,7 +1724,10 @@ void MDSRank::boot_start(BootStep step, int r) } else { dout(2) << "Booting: " << step << ": positioning at end of old mds log" << dendl; mdlog->append(); - starting_done(); + auto sle = mdcache->create_subtree_map(); + mdlog->submit_entry(sle); + mdlog->flush(); + mdlog->wait_for_safe(new C_MDS_VoidFn(this, &MDSRank::starting_done)); } break; case MDS_BOOT_REPLAY_DONE: @@ -1771,9 +1774,6 @@ void MDSRank::starting_done() ceph_assert(is_starting()); request_state(MDSMap::STATE_ACTIVE); - auto sle = mdcache->create_subtree_map(); - mdlog->submit_entry(sle); - // sync snaptable cache snapclient->sync(new C_MDSInternalNoop); } -- 2.39.5