From da5c263b8e7797eac6c9d13d5b6a6b292d9c5def Mon Sep 17 00:00:00 2001 From: Leonid Usov Date: Mon, 20 May 2024 19:17:04 +0300 Subject: [PATCH] mds/quiesce: overdrive an export if it hasn't frozen the tree yet Just like with the fragmenting, we should abort an ongoing export if a quiesce is attempted for the directory. To minimize the stress for the system, we only allow the abort if the export hasn't yet managed to freeze the tree. If that is the case, then quiesce will have to wait for the export to finish. Fixes: https://tracker.ceph.com/issues/66123 Signed-off-by: Leonid Usov --- src/mds/MDCache.cc | 2 ++ src/mds/Migrator.cc | 33 ++++++++++++++++++++++++++++----- src/mds/Migrator.h | 1 + 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index a545daded17b4..65ffac60889b0 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -13758,6 +13758,7 @@ void MDCache::dispatch_quiesce_inode(const MDRequestRef& mdr) dout(25) << " iterating " << *dir << dendl; // overdrive syncrhonously since we aren't yet on the waiting list quiesce_overdrive_fragmenting(dir, false); + migrator->quiesce_overdrive_export(dir); for (auto& [dnk, dn] : *dir) { dout(25) << " evaluating (" << dnk << ", " << *dn << ")" << dendl; auto* in = dn->get_projected_inode(); @@ -13795,6 +13796,7 @@ void MDCache::dispatch_quiesce_inode(const MDRequestRef& mdr) } } if (gather.has_subs()) { + mdr->mark_event("quiescing children"); dout(20) << __func__ << ": waiting for sub-ops to gather" << dendl; gather.activate(); return; diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 3600f78c572b6..cb77282e38443 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -242,6 +242,20 @@ void Migrator::find_stale_export_freeze() } } +void Migrator::quiesce_overdrive_export(CDir *dir) { + map::iterator it = export_state.find(dir); + if (it == export_state.end()) { + return; + } + auto state = it->second.state; + if (state <= EXPORT_FREEZING) { + dout(10) << "will try to cancel in state: (" << state << ") " << get_export_statename(state) << dendl; + export_try_cancel(dir, true); + } else { + dout(10) << "won't cancel in state: (" << state << ") " << get_export_statename(state) << dendl; + } +} + void Migrator::export_try_cancel(CDir *dir, bool notify_peer) { dout(10) << *dir << dendl; @@ -1229,13 +1243,18 @@ void Migrator::handle_export_discover_ack(const cref_t &m ceph_assert(it->second.state == EXPORT_DISCOVERING); if (m->is_success()) { + // move to freezing the subtree + it->second.state = EXPORT_FREEZING; + auto&& mdr = boost::static_pointer_cast(std::move(it->second.mut)); + ceph_assert(!it->second.mut); // should have been moved out of + // release locks to avoid deadlock - MDRequestRef mdr = static_cast(it->second.mut.get()); ceph_assert(mdr); + // We should only call request_finish after we changed the state. + // Other requests may run as part of the finish here, so we want them + // to see this export in the updated state. mdcache->request_finish(mdr); - it->second.mut.reset(); - // freeze the subtree - it->second.state = EXPORT_FREEZING; + dir->auth_unpin(this); ceph_assert(g_conf()->mds_kill_export_at != 3); @@ -2388,7 +2407,11 @@ void Migrator::handle_export_cancel(const cref_t &m) dirfrag_t df = m->get_dirfrag(); map::iterator it = import_state.find(df); if (it == import_state.end()) { - ceph_abort_msg("got export_cancel in weird state"); + // don't assert here: we could NACK a discovery and also + // receive an async cancel. + // In general, it shouldn't be fatal error to receive a cancel + // for an opration we don't know about. + dout(3) << "got export_cancel for an unknown fragment " << df << dendl; } else if (it->second.state == IMPORT_DISCOVERING) { import_reverse_discovering(df); } else if (it->second.state == IMPORT_DISCOVERED) { diff --git a/src/mds/Migrator.h b/src/mds/Migrator.h index 53bf99fb639c1..d6e599c06a995 100644 --- a/src/mds/Migrator.h +++ b/src/mds/Migrator.h @@ -182,6 +182,7 @@ public: void handle_mds_failure_or_stop(mds_rank_t who); void audit(); + void quiesce_overdrive_export(CDir *dir); // -- import/export -- // exporter -- 2.39.5