From ae687d35d1ff69fd5915934173a265036549a4df Mon Sep 17 00:00:00 2001 From: Kotresh HR Date: Tue, 2 Apr 2024 15:55:06 +0530 Subject: [PATCH] mds: log at a lower level when stopping Log at lower level at log level 5 while the mds is stopping to track down issues like mds becoming laggy while stopping. It was being logged at level 7 before. https://tracker.ceph.com/issues/65260 Signed-off-by: Kotresh HR --- src/mds/MDCache.cc | 30 +++++++++++++++--------------- src/mds/MDSRank.cc | 6 +++--- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 0226ce32f719a..48803667d8a17 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -6873,7 +6873,7 @@ std::pair MDCache::trim(uint64_t count) uint64_t limit = cache_memory_limit; expiremap expiremap; - dout(7) << "trim bytes_used=" << bytes2str(used) + dout(5) << "trim bytes_used=" << bytes2str(used) << " limit=" << bytes2str(limit) << " reservation=" << cache_reservation << "% count=" << count << dendl; @@ -7882,11 +7882,11 @@ void MDCache::shutdown_start() bool MDCache::shutdown_pass() { - dout(7) << "shutdown_pass" << dendl; + dout(5) << "shutdown_pass" << dendl; ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_START); if (mds->is_stopped()) { - dout(7) << " already shut down" << dendl; + dout(5) << " already shut down" << dendl; show_cache(); show_subtrees(); return true; @@ -7903,7 +7903,7 @@ bool MDCache::shutdown_pass() // Export all subtrees to another active (usually rank 0) if not rank 0 int num_auth_subtree = 0; if (!subtrees.empty() && mds->get_nodeid() != 0) { - dout(7) << "looking for subtrees to export" << dendl; + dout(5) << "looking for subtrees to export" << dendl; std::vector ls; for (auto& [dir, bounds] : subtrees) { dout(10) << " examining " << *dir << " bounds " << bounds << dendl; @@ -7927,7 +7927,7 @@ bool MDCache::shutdown_pass() mds_rank_t dest = dir->get_inode()->authority().first; if (dest > 0 && !mds->mdsmap->is_active(dest)) dest = 0; - dout(7) << "sending " << *dir << " back to mds." << dest << dendl; + dout(5) << "sending " << *dir << " back to mds." << dest << dendl; migrator->export_dir_nicely(dir, dest); ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_POSTONEEXPORT); } @@ -7936,13 +7936,13 @@ bool MDCache::shutdown_pass() ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_POSTALLEXPORTS); if (!strays_all_exported) { - dout(7) << "waiting for strays to migrate" << dendl; + dout(5) << "waiting for strays to migrate" << dendl; return false; } if (num_auth_subtree > 0) { ceph_assert(mds->get_nodeid() > 0); - dout(7) << "still have " << num_auth_subtree << " auth subtrees" << dendl; + dout(5) << "still have " << num_auth_subtree << " auth subtrees" << dendl; show_subtrees(); return false; } @@ -7971,7 +7971,7 @@ bool MDCache::shutdown_pass() } mds->mdlog->trim_all(); if (mds->mdlog->get_num_segments() > 1) { - dout(7) << "still >1 segments, waiting for log to trim" << dendl; + dout(5) << "still >1 segments, waiting for log to trim" << dendl; return false; } ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_TRIMALL); @@ -7993,7 +7993,7 @@ bool MDCache::shutdown_pass() // subtrees map not empty yet? if (subtrees.size() > (mydir ? 1 : 0)) { - dout(7) << "still have " << num_subtrees() << " subtrees" << dendl; + dout(5) << "still have " << num_subtrees() << " subtrees" << dendl; show_subtrees(); migrator->show_importing(); migrator->show_exporting(); @@ -8006,19 +8006,19 @@ bool MDCache::shutdown_pass() // replicas may dirty scatter locks if (myin && myin->is_replicated()) { - dout(7) << "still have replicated objects" << dendl; + dout(5) << "still have replicated objects" << dendl; return false; } if ((myin && myin->get_num_auth_pins()) || (mydir && (mydir->get_auth_pins() || mydir->get_dir_auth_pins()))) { - dout(7) << "still have auth pinned objects" << dendl; + dout(5) << "still have auth pinned objects" << dendl; return false; } // (only do this once!) if (!mds->mdlog->is_capped()) { - dout(7) << "capping the mdlog" << dendl; + dout(5) << "capping the mdlog" << dendl; mds->mdlog->submit_entry(new ELid()); mds->mdlog->flush(); mds->mdlog->cap(); @@ -8028,14 +8028,14 @@ bool MDCache::shutdown_pass() // filer active? if (mds->objecter->is_active()) { - dout(7) << "objecter still active" << dendl; + dout(5) << "objecter still active" << dendl; mds->objecter->dump_active(); return false; } // trim what we can from the cache if (lru.lru_get_size() > 0 || bottom_lru.lru_get_size() > 0) { - dout(7) << "there's still stuff in the cache: " << lru.lru_get_size() << "/" << bottom_lru.lru_get_size() << dendl; + dout(5) << "there's still stuff in the cache: " << lru.lru_get_size() << "/" << bottom_lru.lru_get_size() << dendl; show_cache(); //dump(); return false; @@ -8044,7 +8044,7 @@ bool MDCache::shutdown_pass() // make mydir subtree go away if (mydir) { if (mydir->get_num_ref() > 1) { // subtree pin - dout(7) << "there's still reference to mydir " << *mydir << dendl; + dout(5) << "there's still reference to mydir " << *mydir << dendl; show_cache(); return false; } diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index c9e50d79fa209..678bcd5e0d392 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -795,7 +795,7 @@ void MDSRankDispatcher::tick() uint64_t pq_total = 0; size_t pq_in_flight = 0; if (!purge_queue.drain(&pq_progress, &pq_total, &pq_in_flight)) { - dout(7) << "shutdown_pass=true, but still waiting for purge queue" + dout(5) << "shutdown_pass=true, but still waiting for purge queue" << dendl; // This takes unbounded time, so we must indicate progress // to the administrator: we do it in a slightly imperfect way @@ -805,13 +805,13 @@ void MDSRankDispatcher::tick() << std::dec << pq_progress << "/" << pq_total << " " << pq_in_flight << " files purging" << ")"; } else { - dout(7) << "shutdown_pass=true, finished w/ shutdown, moving to " + dout(5) << "shutdown_pass=true, finished w/ shutdown, moving to " "down:stopped" << dendl; stopping_done(); } } else { - dout(7) << "shutdown_pass=false" << dendl; + dout(5) << "shutdown_pass=false" << dendl; } } -- 2.39.5