From: Zhansong Gao Date: Tue, 14 Feb 2023 08:10:59 +0000 (+0800) Subject: mds: add an asok command to dump export states X-Git-Tag: v19.2.3~365^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=abeafaa0fe4cc30a9378ad4b29b4c9de059bfc53;p=ceph.git mds: add an asok command to dump export states Task to export subtree may be blocked, use this command to find out what's going on. Fixes: https://tracker.ceph.com/issues/58835 Signed-off-by: Zhansong Gao (cherry picked from commit d34f33055d25ba78f63369f661eb75515b5f465d) --- diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 76ac7e21cc09..a5cd31017444 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -549,6 +549,16 @@ public: void maybe_finish_freeze(); + size_t count_unfreeze_tree_waiters() { + size_t n = count_unfreeze_dir_waiters(); + _walk_tree([&n](CDir *dir) { + n += dir->count_unfreeze_dir_waiters(); + return true; + }); + return n; + } + inline size_t count_unfreeze_dir_waiters() const { return count_waiters(WAIT_UNFREEZE); } + std::pair is_freezing_or_frozen_tree() const { if (freeze_tree_state) { if (freeze_tree_state->frozen) diff --git a/src/mds/MDSCacheObject.h b/src/mds/MDSCacheObject.h index d322a05851a5..3b33fe4b1940 100644 --- a/src/mds/MDSCacheObject.h +++ b/src/mds/MDSCacheObject.h @@ -279,6 +279,8 @@ class MDSCacheObject { } bool is_waiter_for(waitmask_t mask); + inline size_t count_waiters(uint64_t mask) const { return waiting.count(mask); } + virtual void add_waiter(uint64_t mask, MDSContext *c) { add_waiter(waitmask_t(mask), c); } diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index b31d9c95220c..b250dc8ae838 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -304,6 +304,10 @@ void MDSDaemon::set_up_admin_socket() asok_hook, "show recent ops, sorted by op duration"); ceph_assert(r == 0); + r = admin_socket->register_command("dump_export_states", + asok_hook, + "dump export states"); + ceph_assert(r == 0); r = admin_socket->register_command("scrub_path name=path,type=CephString " "name=scrubops,type=CephChoices," "strings=force|recursive|repair,n=N,req=false " diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index c766e9ef1f98..2656ade8ad70 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -2806,6 +2806,9 @@ void MDSRankDispatcher::handle_asok_command( if (!op_tracker.dump_historic_ops(f, true)) { *css << "op_tracker disabled; set mds_enable_op_tracker=true to enable"; } + } else if (command == "dump_export_states") { + std::lock_guard l(mds_lock); + mdcache->migrator->dump_export_states(f); } else if (command == "osdmap barrier") { int64_t target_epoch = 0; bool got_val = cmd_getval(cmdmap, "target_epoch", target_epoch); diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index cb77282e3844..722b6bd74226 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -268,12 +268,12 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer) case EXPORT_LOCKING: dout(10) << "export state=locking : dropping locks and removing auth_pin" << dendl; num_locking_exports--; - it->second.state = EXPORT_CANCELLED; + it->second.set_state(EXPORT_CANCELLED); dir->auth_unpin(this); break; case EXPORT_DISCOVERING: dout(10) << "export state=discovering : canceling freeze and removing auth_pin" << dendl; - it->second.state = EXPORT_CANCELLED; + it->second.set_state(EXPORT_CANCELLED); dir->unfreeze_tree(); // cancel the freeze dir->auth_unpin(this); if (notify_peer && @@ -286,7 +286,7 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer) case EXPORT_FREEZING: dout(10) << "export state=freezing : canceling freeze" << dendl; - it->second.state = EXPORT_CANCELLED; + it->second.set_state(EXPORT_CANCELLED); dir->unfreeze_tree(); // cancel the freeze if (dir->is_subtree_root()) mdcache->try_subtree_merge(dir); @@ -301,13 +301,13 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer) // NOTE: state order reversal, warning comes after prepping case EXPORT_WARNING: dout(10) << "export state=warning : unpinning bounds, unfreezing, notifying" << dendl; - it->second.state = EXPORT_CANCELLING; + it->second.set_state(EXPORT_CANCELLING); // fall-thru case EXPORT_PREPPING: if (state != EXPORT_WARNING) { dout(10) << "export state=prepping : unpinning bounds, unfreezing" << dendl; - it->second.state = EXPORT_CANCELLED; + it->second.set_state(EXPORT_CANCELLED); } { @@ -340,7 +340,7 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer) case EXPORT_EXPORTING: dout(10) << "export state=exporting : reversing, and unfreezing" << dendl; - it->second.state = EXPORT_CANCELLING; + it->second.set_state(EXPORT_CANCELLING); export_reverse(dir, it->second); break; @@ -865,7 +865,7 @@ void Migrator::export_dir(CDir *dir, mds_rank_t dest) ceph_assert(export_state.count(dir) == 0); export_state_t& stat = export_state[dir]; num_locking_exports++; - stat.state = EXPORT_LOCKING; + stat.set_state(EXPORT_LOCKING); stat.peer = dest; stat.tid = mdr->reqid.tid; stat.mut = mdr; @@ -1140,7 +1140,7 @@ void Migrator::dispatch_export_dir(const MDRequestRef& mdr, int count) if (results.size() == 1 && results.front().first == dir) { num_locking_exports--; - it->second.state = EXPORT_DISCOVERING; + it->second.set_state(EXPORT_DISCOVERING); // send ExportDirDiscover (ask target) filepath path; dir->inode->make_path(path); @@ -1191,7 +1191,7 @@ void Migrator::dispatch_export_dir(const MDRequestRef& mdr, int count) ceph_assert(export_state.count(sub) == 0); auto& stat = export_state[sub]; num_locking_exports++; - stat.state = EXPORT_LOCKING; + stat.set_state(EXPORT_LOCKING); stat.peer = dest; stat.tid = _mdr->reqid.tid; stat.mut = _mdr; @@ -1244,7 +1244,7 @@ void Migrator::handle_export_discover_ack(const cref_t &m if (m->is_success()) { // move to freezing the subtree - it->second.state = EXPORT_FREEZING; + it->second.set_state(EXPORT_FREEZING); auto&& mdr = boost::static_pointer_cast(std::move(it->second.mut)); ceph_assert(!it->second.mut); // should have been moved out of @@ -1427,18 +1427,18 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid) } // send. - it->second.state = EXPORT_PREPPING; + it->second.set_state(EXPORT_PREPPING); mds->send_message_mds(prep, it->second.peer); ceph_assert(g_conf()->mds_kill_export_at != 4); // make sure any new instantiations of caps are flushed out ceph_assert(it->second.warning_ack_waiting.empty()); - set export_client_set; - get_export_client_set(dir, export_client_set); + ceph_assert(it->second.export_client_set.empty()); + get_export_client_set(dir, it->second.export_client_set); MDSGatherBuilder gather(g_ceph_context); - mds->server->flush_client_sessions(export_client_set, gather); + mds->server->flush_client_sessions(it->second.export_client_set, gather); if (gather.has_subs()) { it->second.warning_ack_waiting.insert(MDS_RANK_NONE); gather.set_finisher(new C_M_ExportSessionsFlushed(this, dir, it->second.tid)); @@ -1537,7 +1537,7 @@ void Migrator::handle_export_prep_ack(const cref_t &m) } - it->second.state = EXPORT_WARNING; + it->second.set_state(EXPORT_WARNING); ceph_assert(g_conf()->mds_kill_export_at != 6); // nobody to warn? @@ -1587,8 +1587,8 @@ void Migrator::export_go_synced(CDir *dir, uint64_t tid) dout(7) << *dir << " to " << dest << dendl; mdcache->show_subtrees(); - - it->second.state = EXPORT_EXPORTING; + + it->second.set_state(EXPORT_EXPORTING); ceph_assert(g_conf()->mds_kill_export_at != 7); ceph_assert(dir->is_frozen_tree_root()); @@ -1933,7 +1933,7 @@ void Migrator::handle_export_ack(const cref_t &m) auto bp = m->imported_caps.cbegin(); decode(it->second.peer_imported, bp); - it->second.state = EXPORT_LOGGINGFINISH; + it->second.set_state(EXPORT_LOGGINGFINISH); ceph_assert(g_conf()->mds_kill_export_at != 9); set bounds; mdcache->get_subtree_bounds(dir, bounds); @@ -1970,7 +1970,7 @@ void Migrator::export_notify_abort(CDir *dir, export_state_t& stat, set& ceph_assert(stat.state == EXPORT_CANCELLING); if (stat.notify_ack_waiting.empty()) { - stat.state = EXPORT_CANCELLED; + stat.set_state(EXPORT_CANCELLED); return; } @@ -2095,7 +2095,7 @@ void Migrator::export_logged_finish(CDir *dir) } // wait for notifyacks - stat.state = EXPORT_NOTIFYING; + stat.set_state(EXPORT_NOTIFYING); ceph_assert(g_conf()->mds_kill_export_at != 11); // no notifies to wait for? @@ -3217,6 +3217,79 @@ void Migrator::import_finish(CDir *dir, bool notify, bool last) } } +void Migrator::dump_export_states(Formatter *f) +{ + f->open_array_section("states"); + for (const auto& [dir, state] : export_state) { + f->open_object_section("state"); + + f->dump_unsigned("tid", state.tid); + + dir->dump(f, CDir::DUMP_PATH | CDir::DUMP_DIRFRAG); + + f->dump_string("state", get_export_statename(state.state)); + + f->open_object_section("state_history"); + for (const auto& [s, _1] : state.state_history) { + f->open_object_section(get_export_statename(s)); + f->dump_stream("start_at") << state.get_start_time(s); + f->dump_float("time_spent", state.get_time_spent(s)); + f->close_section(); + } + f->close_section(); + + f->dump_int("peer", state.peer); + + switch (state.state) { + case EXPORT_DISCOVERING: + case EXPORT_FREEZING: + f->dump_stream("last_cum_auth_pins_change") << state.last_cum_auth_pins_change; + f->dump_int("last_cum_auth_pins", state.last_cum_auth_pins); + f->dump_int("num_remote_waiters", state.num_remote_waiters); + + break; + + case EXPORT_PREPPING: + case EXPORT_WARNING: + f->open_array_section("flushed_clients"); + for (const auto &client : state.export_client_set) + f->dump_int("client", client.v); + f->close_section(); + + f->open_array_section("warning_ack_waiting"); + for (const auto &rank : state.warning_ack_waiting) + f->dump_int("rank", rank); + f->close_section(); + + if (state.state == EXPORT_PREPPING) + break; + // fall-thru + + case EXPORT_EXPORTING: + case EXPORT_LOGGINGFINISH: + case EXPORT_NOTIFYING: + f->open_array_section("notify_ack_waiting"); + for (const auto &rank : state.notify_ack_waiting) + f->dump_int("rank", rank); + f->close_section(); + + break; + + default: + break; + } + + if (state.state >= EXPORT_DISCOVERING) { + f->dump_unsigned("approx_size", state.approx_size); + f->dump_unsigned("unfreeze_tree_waiters", dir->count_unfreeze_tree_waiters()); + f->dump_float("freeze_tree_time", state.get_freeze_tree_time()); + } + + f->close_section(); + } + f->close_section(); +} + void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp, mds_rank_t oldauth, LogSegment *ls, map >& peer_exports, diff --git a/src/mds/Migrator.h b/src/mds/Migrator.h index d6e599c06a99..f733dea76b30 100644 --- a/src/mds/Migrator.h +++ b/src/mds/Migrator.h @@ -252,6 +252,8 @@ public: void import_finish(CDir *dir, bool notify, bool last=true); + void dump_export_states(Formatter *f); + protected: struct export_base_t { export_base_t(dirfrag_t df, mds_rank_t d, unsigned c, uint64_t g) : @@ -267,7 +269,31 @@ protected: struct export_state_t { export_state_t() {} - int state = 0; + void set_state(int s) { + ceph_assert(s != state); + if (state != EXPORT_CANCELLED) { + auto& t = state_history.at(state); + t.second = double(ceph_clock_now()) - double(t.first); + } + state = s; + state_history[state] = std::pair(ceph_clock_now(), 0.0); + } + utime_t get_start_time(int s) const { + ceph_assert(state_history.count(s) > 0); + return state_history.at(s).first; + } + double get_time_spent(int s) const { + ceph_assert(state_history.count(s) > 0); + const auto& t = state_history.at(s); + return s == state ? double(ceph_clock_now()) - double(t.first) : t.second; + } + double get_freeze_tree_time() const { + ceph_assert(state >= EXPORT_DISCOVERING); + ceph_assert(state_history.count((int)EXPORT_DISCOVERING) > 0); + return double(ceph_clock_now()) - double(state_history.at((int)EXPORT_DISCOVERING).first); + }; + + int state = EXPORT_CANCELLED; mds_rank_t peer = MDS_RANK_NONE; uint64_t tid = 0; std::set warning_ack_waiting; @@ -275,6 +301,10 @@ protected: std::map > peer_imported; MutationRef mut; size_t approx_size = 0; + // record the start time and time spent of each export state + std::map > state_history; + // record the clients whose sessions need to be flushed + std::set export_client_set; // for freeze tree deadlock detection utime_t last_cum_auth_pins_change; int last_cum_auth_pins = 0;