]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mds: add an asok command to dump export states
authorZhansong Gao <zhsgao@hotmail.com>
Tue, 14 Feb 2023 08:10:59 +0000 (16:10 +0800)
committerMilind Changire <mchangir@redhat.com>
Thu, 25 Sep 2025 02:20:18 +0000 (07:50 +0530)
Task to export subtree may be blocked, use this command
to find out what's going on.

Fixes: https://tracker.ceph.com/issues/58835
Signed-off-by: Zhansong Gao <zhsgao@hotmail.com>
(cherry picked from commit d34f33055d25ba78f63369f661eb75515b5f465d)

Conflicts:
src/mds/MDSCacheObject.h
src/mds/Migrator.cc
- conflicts due to quiesce additions in main branch

src/mds/CDir.h
src/mds/MDSCacheObject.h
src/mds/MDSDaemon.cc
src/mds/MDSRank.cc
src/mds/Migrator.cc
src/mds/Migrator.h

index 7cc4dc7ffcf8379104f061d19bc12572e2f076a4..edabc61f8c10e3154b38d0a3a12b9730eb1ecdbe 100644 (file)
@@ -548,6 +548,16 @@ public:
 
   void maybe_finish_freeze();
 
+  size_t count_unfreeze_tree_waiters() {
+    size_t n = count_unfreeze_dir_waiters();
+    _walk_tree([&n](CDir *dir) {
+        n += dir->count_unfreeze_dir_waiters();
+        return true;
+      });
+    return n;
+  }
+  inline size_t count_unfreeze_dir_waiters() const { return count_waiters(WAIT_UNFREEZE); }
+
   std::pair<bool,bool> is_freezing_or_frozen_tree() const {
     if (freeze_tree_state) {
       if (freeze_tree_state->frozen)
index 8710102b70d99268881b2af09061b8bbb0f39411..228b447761d63c8fb18c8a865ab882f3344f9c2e 100644 (file)
@@ -262,6 +262,9 @@ class MDSCacheObject {
   void set_replica_nonce(unsigned n) { replica_nonce = n; }
 
   bool is_waiter_for(uint64_t mask, uint64_t min=0);
+
+  inline size_t count_waiters(uint64_t mask) const { return waiting.count(mask); }
+
   virtual void add_waiter(uint64_t mask, MDSContext *c) {
     if (waiting.empty())
       get(PIN_WAITER);
index 374c5e9a17d1d8852d842733c48e08c9d26f956f..fab81b832c260532534695d0708ccc2f05634892 100644 (file)
@@ -280,6 +280,10 @@ void MDSDaemon::set_up_admin_socket()
                                     asok_hook,
                                     "show recent ops, sorted by op duration");
   ceph_assert(r == 0);
+  r = admin_socket->register_command("dump_export_states",
+                                    asok_hook,
+                                    "dump export states");
+  ceph_assert(r == 0);
   r = admin_socket->register_command("scrub_path name=path,type=CephString "
                                     "name=scrubops,type=CephChoices,"
                                     "strings=force|recursive|repair,n=N,req=false "
index 04df37dded9e724213df86953094602f23a14136..f1fe829a41b44d6777b5eb481d50011aad2ae06f 100644 (file)
@@ -2685,6 +2685,9 @@ void MDSRankDispatcher::handle_asok_command(
     if (!op_tracker.dump_historic_ops(f, true)) {
       *css << "op_tracker disabled; set mds_enable_op_tracker=true to enable";
     }
+  } else if (command == "dump_export_states") {
+    std::lock_guard l(mds_lock);
+    mdcache->migrator->dump_export_states(f);
   } else if (command == "osdmap barrier") {
     int64_t target_epoch = 0;
     bool got_val = cmd_getval(cmdmap, "target_epoch", target_epoch);
index 9c5d19ee86daaba65d602fe15b7262a489daa273..317e7efcc21eabfe30a9a41b6fbaca99fe4b8ad3 100644 (file)
@@ -254,12 +254,12 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
   case EXPORT_LOCKING:
     dout(10) << "export state=locking : dropping locks and removing auth_pin" << dendl;
     num_locking_exports--;
-    it->second.state = EXPORT_CANCELLED;
+    it->second.set_state(EXPORT_CANCELLED);
     dir->auth_unpin(this);
     break;
   case EXPORT_DISCOVERING:
     dout(10) << "export state=discovering : canceling freeze and removing auth_pin" << dendl;
-    it->second.state = EXPORT_CANCELLED;
+    it->second.set_state(EXPORT_CANCELLED);
     dir->unfreeze_tree();  // cancel the freeze
     dir->auth_unpin(this);
     if (notify_peer &&
@@ -272,7 +272,7 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
 
   case EXPORT_FREEZING:
     dout(10) << "export state=freezing : canceling freeze" << dendl;
-    it->second.state = EXPORT_CANCELLED;
+    it->second.set_state(EXPORT_CANCELLED);
     dir->unfreeze_tree();  // cancel the freeze
     if (dir->is_subtree_root())
       mdcache->try_subtree_merge(dir);
@@ -287,13 +287,13 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
     // NOTE: state order reversal, warning comes after prepping
   case EXPORT_WARNING:
     dout(10) << "export state=warning : unpinning bounds, unfreezing, notifying" << dendl;
-    it->second.state = EXPORT_CANCELLING;
+    it->second.set_state(EXPORT_CANCELLING);
     // fall-thru
 
   case EXPORT_PREPPING:
     if (state != EXPORT_WARNING) {
       dout(10) << "export state=prepping : unpinning bounds, unfreezing" << dendl;
-      it->second.state = EXPORT_CANCELLED;
+      it->second.set_state(EXPORT_CANCELLED);
     }
 
     {
@@ -326,7 +326,7 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
 
   case EXPORT_EXPORTING:
     dout(10) << "export state=exporting : reversing, and unfreezing" << dendl;
-    it->second.state = EXPORT_CANCELLING;
+    it->second.set_state(EXPORT_CANCELLING);
     export_reverse(dir, it->second);
     break;
 
@@ -848,7 +848,7 @@ void Migrator::export_dir(CDir *dir, mds_rank_t dest)
   ceph_assert(export_state.count(dir) == 0);
   export_state_t& stat = export_state[dir];
   num_locking_exports++;
-  stat.state = EXPORT_LOCKING;
+  stat.set_state(EXPORT_LOCKING);
   stat.peer = dest;
   stat.tid = mdr->reqid.tid;
   stat.mut = mdr;
@@ -1109,7 +1109,7 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count)
 
   if (results.size() == 1 && results.front().first == dir) {
     num_locking_exports--;
-    it->second.state = EXPORT_DISCOVERING;
+    it->second.set_state(EXPORT_DISCOVERING);
     // send ExportDirDiscover (ask target)
     filepath path;
     dir->inode->make_path(path);
@@ -1160,7 +1160,7 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count)
     ceph_assert(export_state.count(sub) == 0);
     auto& stat = export_state[sub];
     num_locking_exports++;
-    stat.state = EXPORT_LOCKING;
+    stat.set_state(EXPORT_LOCKING);
     stat.peer = dest;
     stat.tid = _mdr->reqid.tid;
     stat.mut = _mdr;
@@ -1212,6 +1212,8 @@ void Migrator::handle_export_discover_ack(const cref_t<MExportDirDiscoverAck> &m
     ceph_assert(it->second.state == EXPORT_DISCOVERING);
 
     if (m->is_success()) {
+      // move to freezing the subtree
+      it->second.set_state(EXPORT_FREEZING);
       // release locks to avoid deadlock
       MDRequestRef mdr = static_cast<MDRequestImpl*>(it->second.mut.get());
       ceph_assert(mdr);
@@ -1391,18 +1393,18 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid)
   }
 
   // send.
-  it->second.state = EXPORT_PREPPING;
+  it->second.set_state(EXPORT_PREPPING);
   mds->send_message_mds(prep, it->second.peer);
   ceph_assert(g_conf()->mds_kill_export_at != 4);
 
   // make sure any new instantiations of caps are flushed out
   ceph_assert(it->second.warning_ack_waiting.empty());
 
-  set<client_t> export_client_set;
-  get_export_client_set(dir, export_client_set);
+  ceph_assert(it->second.export_client_set.empty());
+  get_export_client_set(dir, it->second.export_client_set);
 
   MDSGatherBuilder gather(g_ceph_context);
-  mds->server->flush_client_sessions(export_client_set, gather);
+  mds->server->flush_client_sessions(it->second.export_client_set, gather);
   if (gather.has_subs()) {
     it->second.warning_ack_waiting.insert(MDS_RANK_NONE);
     gather.set_finisher(new C_M_ExportSessionsFlushed(this, dir, it->second.tid));
@@ -1501,7 +1503,7 @@ void Migrator::handle_export_prep_ack(const cref_t<MExportDirPrepAck> &m)
     
   }
 
-  it->second.state = EXPORT_WARNING;
+  it->second.set_state(EXPORT_WARNING);
 
   ceph_assert(g_conf()->mds_kill_export_at != 6);
   // nobody to warn?
@@ -1551,8 +1553,8 @@ void Migrator::export_go_synced(CDir *dir, uint64_t tid)
   dout(7) << *dir << " to " << dest << dendl;
 
   mdcache->show_subtrees();
-  
-  it->second.state = EXPORT_EXPORTING;
+
+  it->second.set_state(EXPORT_EXPORTING);
   ceph_assert(g_conf()->mds_kill_export_at != 7);
 
   ceph_assert(dir->is_frozen_tree_root());
@@ -1897,7 +1899,7 @@ void Migrator::handle_export_ack(const cref_t<MExportDirAck> &m)
   auto bp = m->imported_caps.cbegin();
   decode(it->second.peer_imported, bp);
 
-  it->second.state = EXPORT_LOGGINGFINISH;
+  it->second.set_state(EXPORT_LOGGINGFINISH);
   ceph_assert(g_conf()->mds_kill_export_at != 9);
   set<CDir*> bounds;
   mdcache->get_subtree_bounds(dir, bounds);
@@ -1935,7 +1937,7 @@ void Migrator::export_notify_abort(CDir *dir, export_state_t& stat, set<CDir*>&
   ceph_assert(stat.state == EXPORT_CANCELLING);
 
   if (stat.notify_ack_waiting.empty()) {
-    stat.state = EXPORT_CANCELLED;
+    stat.set_state(EXPORT_CANCELLED);
     return;
   }
 
@@ -2060,7 +2062,7 @@ void Migrator::export_logged_finish(CDir *dir)
   }
 
   // wait for notifyacks
-  stat.state = EXPORT_NOTIFYING;
+  stat.set_state(EXPORT_NOTIFYING);
   ceph_assert(g_conf()->mds_kill_export_at != 11);
   
   // no notifies to wait for?
@@ -3173,6 +3175,79 @@ void Migrator::import_finish(CDir *dir, bool notify, bool last)
   }
 }
 
+void Migrator::dump_export_states(Formatter *f)
+{
+  f->open_array_section("states");
+  for (const auto& [dir, state] : export_state) {
+    f->open_object_section("state");
+
+    f->dump_unsigned("tid", state.tid);
+
+    dir->dump(f, CDir::DUMP_PATH | CDir::DUMP_DIRFRAG);
+
+    f->dump_string("state", get_export_statename(state.state));
+
+    f->open_object_section("state_history");
+    for (const auto& [s, _1] : state.state_history) {
+      f->open_object_section(get_export_statename(s));
+      f->dump_stream("start_at") << state.get_start_time(s);
+      f->dump_float("time_spent", state.get_time_spent(s));
+      f->close_section();
+    }
+    f->close_section();
+
+    f->dump_int("peer", state.peer);
+
+    switch (state.state) {
+    case EXPORT_DISCOVERING:
+    case EXPORT_FREEZING:
+      f->dump_stream("last_cum_auth_pins_change") << state.last_cum_auth_pins_change;
+      f->dump_int("last_cum_auth_pins", state.last_cum_auth_pins);
+      f->dump_int("num_remote_waiters", state.num_remote_waiters);
+
+      break;
+
+    case EXPORT_PREPPING:
+    case EXPORT_WARNING:
+      f->open_array_section("flushed_clients");
+      for (const auto &client : state.export_client_set)
+       f->dump_int("client", client.v);
+      f->close_section();
+
+      f->open_array_section("warning_ack_waiting");
+      for (const auto &rank : state.warning_ack_waiting)
+       f->dump_int("rank", rank);
+      f->close_section();
+
+      if (state.state == EXPORT_PREPPING)
+       break;
+      // fall-thru
+
+    case EXPORT_EXPORTING:
+    case EXPORT_LOGGINGFINISH:
+    case EXPORT_NOTIFYING:
+      f->open_array_section("notify_ack_waiting");
+      for (const auto &rank : state.notify_ack_waiting)
+       f->dump_int("rank", rank);
+      f->close_section();
+
+      break;
+
+    default:
+      break;
+    }
+
+    if (state.state >= EXPORT_DISCOVERING) {
+      f->dump_unsigned("approx_size", state.approx_size);
+      f->dump_unsigned("unfreeze_tree_waiters", dir->count_unfreeze_tree_waiters());
+      f->dump_float("freeze_tree_time", state.get_freeze_tree_time());
+    }
+
+    f->close_section();
+  }
+  f->close_section();
+}
+
 void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp,
                                   mds_rank_t oldauth, LogSegment *ls,
                                   map<CInode*, map<client_t,Capability::Export> >& peer_exports,
index be24e311100064225c02974aec5d67dee65f735d..bd76ed9d31da90ece6704e2c6f95d41515529472 100644 (file)
@@ -251,6 +251,8 @@ public:
 
   void import_finish(CDir *dir, bool notify, bool last=true);
 
+  void dump_export_states(Formatter *f);
+
 protected:
   struct export_base_t {
     export_base_t(dirfrag_t df, mds_rank_t d, unsigned c, uint64_t g) :
@@ -266,7 +268,31 @@ protected:
   struct export_state_t {
     export_state_t() {}
 
-    int state = 0;
+    void set_state(int s) {
+      ceph_assert(s != state);
+      if (state != EXPORT_CANCELLED) {
+       auto& t = state_history.at(state);
+       t.second = double(ceph_clock_now()) - double(t.first);
+      }
+      state = s;
+      state_history[state] = std::pair<utime_t, double>(ceph_clock_now(), 0.0);
+    }
+    utime_t get_start_time(int s) const {
+      ceph_assert(state_history.count(s) > 0);
+      return state_history.at(s).first;
+    }
+    double get_time_spent(int s) const {
+      ceph_assert(state_history.count(s) > 0);
+      const auto& t = state_history.at(s);
+      return s == state ? double(ceph_clock_now()) - double(t.first) : t.second;
+    }
+    double get_freeze_tree_time() const {
+      ceph_assert(state >= EXPORT_DISCOVERING);
+      ceph_assert(state_history.count((int)EXPORT_DISCOVERING) > 0);
+      return double(ceph_clock_now()) - double(state_history.at((int)EXPORT_DISCOVERING).first);
+    };
+
+    int state = EXPORT_CANCELLED;
     mds_rank_t peer = MDS_RANK_NONE;
     uint64_t tid = 0;
     std::set<mds_rank_t> warning_ack_waiting;
@@ -274,6 +300,10 @@ protected:
     std::map<inodeno_t,std::map<client_t,Capability::Import> > peer_imported;
     MutationRef mut;
     size_t approx_size = 0;
+    // record the start time and time spent of each export state
+    std::map<int, std::pair<utime_t, double> > state_history;
+    // record the clients whose sessions need to be flushed
+    std::set<client_t> export_client_set;
     // for freeze tree deadlock detection
     utime_t last_cum_auth_pins_change;
     int last_cum_auth_pins = 0;