From: Zhansong Gao Date: Tue, 14 Feb 2023 08:10:59 +0000 (+0800) Subject: mds: add an asok command to dump export states X-Git-Tag: testing/wip-jcollin-testing-20250925.080745-reef~2^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=144681835af1dbfbb4f5c3792f071e0e1bff2f62;p=ceph-ci.git mds: add an asok command to dump export states Task to export subtree may be blocked, use this command to find out what's going on. Fixes: https://tracker.ceph.com/issues/58835 Signed-off-by: Zhansong Gao (cherry picked from commit d34f33055d25ba78f63369f661eb75515b5f465d) Conflicts: src/mds/MDSCacheObject.h src/mds/Migrator.cc - conflicts due to quiesce additions in main branch --- diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 7cc4dc7ffcf..edabc61f8c1 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -548,6 +548,16 @@ public: void maybe_finish_freeze(); + size_t count_unfreeze_tree_waiters() { + size_t n = count_unfreeze_dir_waiters(); + _walk_tree([&n](CDir *dir) { + n += dir->count_unfreeze_dir_waiters(); + return true; + }); + return n; + } + inline size_t count_unfreeze_dir_waiters() const { return count_waiters(WAIT_UNFREEZE); } + std::pair is_freezing_or_frozen_tree() const { if (freeze_tree_state) { if (freeze_tree_state->frozen) diff --git a/src/mds/MDSCacheObject.h b/src/mds/MDSCacheObject.h index 8710102b70d..228b447761d 100644 --- a/src/mds/MDSCacheObject.h +++ b/src/mds/MDSCacheObject.h @@ -262,6 +262,9 @@ class MDSCacheObject { void set_replica_nonce(unsigned n) { replica_nonce = n; } bool is_waiter_for(uint64_t mask, uint64_t min=0); + + inline size_t count_waiters(uint64_t mask) const { return waiting.count(mask); } + virtual void add_waiter(uint64_t mask, MDSContext *c) { if (waiting.empty()) get(PIN_WAITER); diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index 374c5e9a17d..fab81b832c2 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -280,6 +280,10 @@ void MDSDaemon::set_up_admin_socket() asok_hook, "show recent ops, sorted by op duration"); ceph_assert(r == 0); + r = admin_socket->register_command("dump_export_states", + asok_hook, + "dump export states"); + ceph_assert(r == 0); r = admin_socket->register_command("scrub_path name=path,type=CephString " "name=scrubops,type=CephChoices," "strings=force|recursive|repair,n=N,req=false " diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 04df37dded9..f1fe829a41b 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -2685,6 +2685,9 @@ void MDSRankDispatcher::handle_asok_command( if (!op_tracker.dump_historic_ops(f, true)) { *css << "op_tracker disabled; set mds_enable_op_tracker=true to enable"; } + } else if (command == "dump_export_states") { + std::lock_guard l(mds_lock); + mdcache->migrator->dump_export_states(f); } else if (command == "osdmap barrier") { int64_t target_epoch = 0; bool got_val = cmd_getval(cmdmap, "target_epoch", target_epoch); diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 9c5d19ee86d..317e7efcc21 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -254,12 +254,12 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer) case EXPORT_LOCKING: dout(10) << "export state=locking : dropping locks and removing auth_pin" << dendl; num_locking_exports--; - it->second.state = EXPORT_CANCELLED; + it->second.set_state(EXPORT_CANCELLED); dir->auth_unpin(this); break; case EXPORT_DISCOVERING: dout(10) << "export state=discovering : canceling freeze and removing auth_pin" << dendl; - it->second.state = EXPORT_CANCELLED; + it->second.set_state(EXPORT_CANCELLED); dir->unfreeze_tree(); // cancel the freeze dir->auth_unpin(this); if (notify_peer && @@ -272,7 +272,7 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer) case EXPORT_FREEZING: dout(10) << "export state=freezing : canceling freeze" << dendl; - it->second.state = EXPORT_CANCELLED; + it->second.set_state(EXPORT_CANCELLED); dir->unfreeze_tree(); // cancel the freeze if (dir->is_subtree_root()) mdcache->try_subtree_merge(dir); @@ -287,13 +287,13 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer) // NOTE: state order reversal, warning comes after prepping case EXPORT_WARNING: dout(10) << "export state=warning : unpinning bounds, unfreezing, notifying" << dendl; - it->second.state = EXPORT_CANCELLING; + it->second.set_state(EXPORT_CANCELLING); // fall-thru case EXPORT_PREPPING: if (state != EXPORT_WARNING) { dout(10) << "export state=prepping : unpinning bounds, unfreezing" << dendl; - it->second.state = EXPORT_CANCELLED; + it->second.set_state(EXPORT_CANCELLED); } { @@ -326,7 +326,7 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer) case EXPORT_EXPORTING: dout(10) << "export state=exporting : reversing, and unfreezing" << dendl; - it->second.state = EXPORT_CANCELLING; + it->second.set_state(EXPORT_CANCELLING); export_reverse(dir, it->second); break; @@ -848,7 +848,7 @@ void Migrator::export_dir(CDir *dir, mds_rank_t dest) ceph_assert(export_state.count(dir) == 0); export_state_t& stat = export_state[dir]; num_locking_exports++; - stat.state = EXPORT_LOCKING; + stat.set_state(EXPORT_LOCKING); stat.peer = dest; stat.tid = mdr->reqid.tid; stat.mut = mdr; @@ -1109,7 +1109,7 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count) if (results.size() == 1 && results.front().first == dir) { num_locking_exports--; - it->second.state = EXPORT_DISCOVERING; + it->second.set_state(EXPORT_DISCOVERING); // send ExportDirDiscover (ask target) filepath path; dir->inode->make_path(path); @@ -1160,7 +1160,7 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count) ceph_assert(export_state.count(sub) == 0); auto& stat = export_state[sub]; num_locking_exports++; - stat.state = EXPORT_LOCKING; + stat.set_state(EXPORT_LOCKING); stat.peer = dest; stat.tid = _mdr->reqid.tid; stat.mut = _mdr; @@ -1212,6 +1212,8 @@ void Migrator::handle_export_discover_ack(const cref_t &m ceph_assert(it->second.state == EXPORT_DISCOVERING); if (m->is_success()) { + // move to freezing the subtree + it->second.set_state(EXPORT_FREEZING); // release locks to avoid deadlock MDRequestRef mdr = static_cast(it->second.mut.get()); ceph_assert(mdr); @@ -1391,18 +1393,18 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid) } // send. - it->second.state = EXPORT_PREPPING; + it->second.set_state(EXPORT_PREPPING); mds->send_message_mds(prep, it->second.peer); ceph_assert(g_conf()->mds_kill_export_at != 4); // make sure any new instantiations of caps are flushed out ceph_assert(it->second.warning_ack_waiting.empty()); - set export_client_set; - get_export_client_set(dir, export_client_set); + ceph_assert(it->second.export_client_set.empty()); + get_export_client_set(dir, it->second.export_client_set); MDSGatherBuilder gather(g_ceph_context); - mds->server->flush_client_sessions(export_client_set, gather); + mds->server->flush_client_sessions(it->second.export_client_set, gather); if (gather.has_subs()) { it->second.warning_ack_waiting.insert(MDS_RANK_NONE); gather.set_finisher(new C_M_ExportSessionsFlushed(this, dir, it->second.tid)); @@ -1501,7 +1503,7 @@ void Migrator::handle_export_prep_ack(const cref_t &m) } - it->second.state = EXPORT_WARNING; + it->second.set_state(EXPORT_WARNING); ceph_assert(g_conf()->mds_kill_export_at != 6); // nobody to warn? @@ -1551,8 +1553,8 @@ void Migrator::export_go_synced(CDir *dir, uint64_t tid) dout(7) << *dir << " to " << dest << dendl; mdcache->show_subtrees(); - - it->second.state = EXPORT_EXPORTING; + + it->second.set_state(EXPORT_EXPORTING); ceph_assert(g_conf()->mds_kill_export_at != 7); ceph_assert(dir->is_frozen_tree_root()); @@ -1897,7 +1899,7 @@ void Migrator::handle_export_ack(const cref_t &m) auto bp = m->imported_caps.cbegin(); decode(it->second.peer_imported, bp); - it->second.state = EXPORT_LOGGINGFINISH; + it->second.set_state(EXPORT_LOGGINGFINISH); ceph_assert(g_conf()->mds_kill_export_at != 9); set bounds; mdcache->get_subtree_bounds(dir, bounds); @@ -1935,7 +1937,7 @@ void Migrator::export_notify_abort(CDir *dir, export_state_t& stat, set& ceph_assert(stat.state == EXPORT_CANCELLING); if (stat.notify_ack_waiting.empty()) { - stat.state = EXPORT_CANCELLED; + stat.set_state(EXPORT_CANCELLED); return; } @@ -2060,7 +2062,7 @@ void Migrator::export_logged_finish(CDir *dir) } // wait for notifyacks - stat.state = EXPORT_NOTIFYING; + stat.set_state(EXPORT_NOTIFYING); ceph_assert(g_conf()->mds_kill_export_at != 11); // no notifies to wait for? @@ -3173,6 +3175,79 @@ void Migrator::import_finish(CDir *dir, bool notify, bool last) } } +void Migrator::dump_export_states(Formatter *f) +{ + f->open_array_section("states"); + for (const auto& [dir, state] : export_state) { + f->open_object_section("state"); + + f->dump_unsigned("tid", state.tid); + + dir->dump(f, CDir::DUMP_PATH | CDir::DUMP_DIRFRAG); + + f->dump_string("state", get_export_statename(state.state)); + + f->open_object_section("state_history"); + for (const auto& [s, _1] : state.state_history) { + f->open_object_section(get_export_statename(s)); + f->dump_stream("start_at") << state.get_start_time(s); + f->dump_float("time_spent", state.get_time_spent(s)); + f->close_section(); + } + f->close_section(); + + f->dump_int("peer", state.peer); + + switch (state.state) { + case EXPORT_DISCOVERING: + case EXPORT_FREEZING: + f->dump_stream("last_cum_auth_pins_change") << state.last_cum_auth_pins_change; + f->dump_int("last_cum_auth_pins", state.last_cum_auth_pins); + f->dump_int("num_remote_waiters", state.num_remote_waiters); + + break; + + case EXPORT_PREPPING: + case EXPORT_WARNING: + f->open_array_section("flushed_clients"); + for (const auto &client : state.export_client_set) + f->dump_int("client", client.v); + f->close_section(); + + f->open_array_section("warning_ack_waiting"); + for (const auto &rank : state.warning_ack_waiting) + f->dump_int("rank", rank); + f->close_section(); + + if (state.state == EXPORT_PREPPING) + break; + // fall-thru + + case EXPORT_EXPORTING: + case EXPORT_LOGGINGFINISH: + case EXPORT_NOTIFYING: + f->open_array_section("notify_ack_waiting"); + for (const auto &rank : state.notify_ack_waiting) + f->dump_int("rank", rank); + f->close_section(); + + break; + + default: + break; + } + + if (state.state >= EXPORT_DISCOVERING) { + f->dump_unsigned("approx_size", state.approx_size); + f->dump_unsigned("unfreeze_tree_waiters", dir->count_unfreeze_tree_waiters()); + f->dump_float("freeze_tree_time", state.get_freeze_tree_time()); + } + + f->close_section(); + } + f->close_section(); +} + void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp, mds_rank_t oldauth, LogSegment *ls, map >& peer_exports, diff --git a/src/mds/Migrator.h b/src/mds/Migrator.h index be24e311100..bd76ed9d31d 100644 --- a/src/mds/Migrator.h +++ b/src/mds/Migrator.h @@ -251,6 +251,8 @@ public: void import_finish(CDir *dir, bool notify, bool last=true); + void dump_export_states(Formatter *f); + protected: struct export_base_t { export_base_t(dirfrag_t df, mds_rank_t d, unsigned c, uint64_t g) : @@ -266,7 +268,31 @@ protected: struct export_state_t { export_state_t() {} - int state = 0; + void set_state(int s) { + ceph_assert(s != state); + if (state != EXPORT_CANCELLED) { + auto& t = state_history.at(state); + t.second = double(ceph_clock_now()) - double(t.first); + } + state = s; + state_history[state] = std::pair(ceph_clock_now(), 0.0); + } + utime_t get_start_time(int s) const { + ceph_assert(state_history.count(s) > 0); + return state_history.at(s).first; + } + double get_time_spent(int s) const { + ceph_assert(state_history.count(s) > 0); + const auto& t = state_history.at(s); + return s == state ? double(ceph_clock_now()) - double(t.first) : t.second; + } + double get_freeze_tree_time() const { + ceph_assert(state >= EXPORT_DISCOVERING); + ceph_assert(state_history.count((int)EXPORT_DISCOVERING) > 0); + return double(ceph_clock_now()) - double(state_history.at((int)EXPORT_DISCOVERING).first); + }; + + int state = EXPORT_CANCELLED; mds_rank_t peer = MDS_RANK_NONE; uint64_t tid = 0; std::set warning_ack_waiting; @@ -274,6 +300,10 @@ protected: std::map > peer_imported; MutationRef mut; size_t approx_size = 0; + // record the start time and time spent of each export state + std::map > state_history; + // record the clients whose sessions need to be flushed + std::set export_client_set; // for freeze tree deadlock detection utime_t last_cum_auth_pins_change; int last_cum_auth_pins = 0;