From: ethanwu Date: Sun, 24 Mar 2024 09:11:16 +0000 (+0800) Subject: mds: flush mds log before finishing STATE_STARTING X-Git-Tag: v19.1.1~79^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a99368b38091e55ecc7d6aaf301d1bc167ea9146;p=ceph.git mds: flush mds log before finishing STATE_STARTING If we donn't flush mds log before requesting STATE_ACTIVE, and mds happens to stop later before the log reaches journal. The take-over mds will have no SubtreeMap to replay, and fail later at non-empty subtree check. Fixes: https://tracker.ceph.com/issues/65094 Signed-off-by: ethanwu (cherry picked from commit ee5472edd6fe80ed043c3eb73322348367f9bb22) --- diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 9fd50b477ded..c2f5b47f2542 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1724,7 +1724,10 @@ void MDSRank::boot_start(BootStep step, int r) } else { dout(2) << "Booting: " << step << ": positioning at end of old mds log" << dendl; mdlog->append(); - starting_done(); + auto sle = mdcache->create_subtree_map(); + mdlog->submit_entry(sle); + mdlog->flush(); + mdlog->wait_for_safe(new C_MDS_VoidFn(this, &MDSRank::starting_done)); } break; case MDS_BOOT_REPLAY_DONE: @@ -1771,9 +1774,6 @@ void MDSRank::starting_done() ceph_assert(is_starting()); request_state(MDSMap::STATE_ACTIVE); - auto sle = mdcache->create_subtree_map(); - mdlog->submit_entry(sle); - // sync snaptable cache snapclient->sync(new C_MDSInternalNoop); }