]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mds: add an asok command to dump export states
authorZhansong Gao <zhsgao@hotmail.com>
Tue, 14 Feb 2023 08:10:59 +0000 (16:10 +0800)
committerVenky Shankar <vshankar@redhat.com>
Tue, 26 Nov 2024 05:14:20 +0000 (10:44 +0530)
Task to export subtree may be blocked, use this command
to find out what's going on.

Fixes: https://tracker.ceph.com/issues/58835
Signed-off-by: Zhansong Gao <zhsgao@hotmail.com>
(cherry picked from commit d34f33055d25ba78f63369f661eb75515b5f465d)

src/mds/CDir.h
src/mds/MDSCacheObject.h
src/mds/MDSDaemon.cc
src/mds/MDSRank.cc
src/mds/Migrator.cc
src/mds/Migrator.h

index 76ac7e21cc0902229ea7f067d29298e956bfe23d..a5cd31017444eeb6735dda49bc8c65131e039b8d 100644 (file)
@@ -549,6 +549,16 @@ public:
 
   void maybe_finish_freeze();
 
+  size_t count_unfreeze_tree_waiters() {
+    size_t n = count_unfreeze_dir_waiters();
+    _walk_tree([&n](CDir *dir) {
+        n += dir->count_unfreeze_dir_waiters();
+        return true;
+      });
+    return n;
+  }
+  inline size_t count_unfreeze_dir_waiters() const { return count_waiters(WAIT_UNFREEZE); }
+
   std::pair<bool,bool> is_freezing_or_frozen_tree() const {
     if (freeze_tree_state) {
       if (freeze_tree_state->frozen)
index d322a05851a56fcf8f2c7e3669cf1246a97e2a34..3b33fe4b19405468385be3ded50fd424a139f367 100644 (file)
@@ -279,6 +279,8 @@ class MDSCacheObject {
   }
   bool is_waiter_for(waitmask_t mask);
 
+  inline size_t count_waiters(uint64_t mask) const { return waiting.count(mask); }
+
   virtual void add_waiter(uint64_t mask, MDSContext *c) {
     add_waiter(waitmask_t(mask), c);
   }
index b31d9c95220cc2fbcd7ea55c3a8d36eeef2ac6be..b250dc8ae83857680b2c1425e0c398997874772b 100644 (file)
@@ -304,6 +304,10 @@ void MDSDaemon::set_up_admin_socket()
                                     asok_hook,
                                     "show recent ops, sorted by op duration");
   ceph_assert(r == 0);
+  r = admin_socket->register_command("dump_export_states",
+                                    asok_hook,
+                                    "dump export states");
+  ceph_assert(r == 0);
   r = admin_socket->register_command("scrub_path name=path,type=CephString "
                                     "name=scrubops,type=CephChoices,"
                                     "strings=force|recursive|repair,n=N,req=false "
index c766e9ef1f982aa5ed29c0bfc44af68607d1f425..2656ade8ad70a3bb7f3253cfaaba3fc59d656b81 100644 (file)
@@ -2806,6 +2806,9 @@ void MDSRankDispatcher::handle_asok_command(
     if (!op_tracker.dump_historic_ops(f, true)) {
       *css << "op_tracker disabled; set mds_enable_op_tracker=true to enable";
     }
+  } else if (command == "dump_export_states") {
+    std::lock_guard l(mds_lock);
+    mdcache->migrator->dump_export_states(f);
   } else if (command == "osdmap barrier") {
     int64_t target_epoch = 0;
     bool got_val = cmd_getval(cmdmap, "target_epoch", target_epoch);
index cb77282e384432fe7292c71069984f658412dfd7..722b6bd742262289f61dc2fae7e16ee4f208ab04 100644 (file)
@@ -268,12 +268,12 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
   case EXPORT_LOCKING:
     dout(10) << "export state=locking : dropping locks and removing auth_pin" << dendl;
     num_locking_exports--;
-    it->second.state = EXPORT_CANCELLED;
+    it->second.set_state(EXPORT_CANCELLED);
     dir->auth_unpin(this);
     break;
   case EXPORT_DISCOVERING:
     dout(10) << "export state=discovering : canceling freeze and removing auth_pin" << dendl;
-    it->second.state = EXPORT_CANCELLED;
+    it->second.set_state(EXPORT_CANCELLED);
     dir->unfreeze_tree();  // cancel the freeze
     dir->auth_unpin(this);
     if (notify_peer &&
@@ -286,7 +286,7 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
 
   case EXPORT_FREEZING:
     dout(10) << "export state=freezing : canceling freeze" << dendl;
-    it->second.state = EXPORT_CANCELLED;
+    it->second.set_state(EXPORT_CANCELLED);
     dir->unfreeze_tree();  // cancel the freeze
     if (dir->is_subtree_root())
       mdcache->try_subtree_merge(dir);
@@ -301,13 +301,13 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
     // NOTE: state order reversal, warning comes after prepping
   case EXPORT_WARNING:
     dout(10) << "export state=warning : unpinning bounds, unfreezing, notifying" << dendl;
-    it->second.state = EXPORT_CANCELLING;
+    it->second.set_state(EXPORT_CANCELLING);
     // fall-thru
 
   case EXPORT_PREPPING:
     if (state != EXPORT_WARNING) {
       dout(10) << "export state=prepping : unpinning bounds, unfreezing" << dendl;
-      it->second.state = EXPORT_CANCELLED;
+      it->second.set_state(EXPORT_CANCELLED);
     }
 
     {
@@ -340,7 +340,7 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
 
   case EXPORT_EXPORTING:
     dout(10) << "export state=exporting : reversing, and unfreezing" << dendl;
-    it->second.state = EXPORT_CANCELLING;
+    it->second.set_state(EXPORT_CANCELLING);
     export_reverse(dir, it->second);
     break;
 
@@ -865,7 +865,7 @@ void Migrator::export_dir(CDir *dir, mds_rank_t dest)
   ceph_assert(export_state.count(dir) == 0);
   export_state_t& stat = export_state[dir];
   num_locking_exports++;
-  stat.state = EXPORT_LOCKING;
+  stat.set_state(EXPORT_LOCKING);
   stat.peer = dest;
   stat.tid = mdr->reqid.tid;
   stat.mut = mdr;
@@ -1140,7 +1140,7 @@ void Migrator::dispatch_export_dir(const MDRequestRef& mdr, int count)
 
   if (results.size() == 1 && results.front().first == dir) {
     num_locking_exports--;
-    it->second.state = EXPORT_DISCOVERING;
+    it->second.set_state(EXPORT_DISCOVERING);
     // send ExportDirDiscover (ask target)
     filepath path;
     dir->inode->make_path(path);
@@ -1191,7 +1191,7 @@ void Migrator::dispatch_export_dir(const MDRequestRef& mdr, int count)
     ceph_assert(export_state.count(sub) == 0);
     auto& stat = export_state[sub];
     num_locking_exports++;
-    stat.state = EXPORT_LOCKING;
+    stat.set_state(EXPORT_LOCKING);
     stat.peer = dest;
     stat.tid = _mdr->reqid.tid;
     stat.mut = _mdr;
@@ -1244,7 +1244,7 @@ void Migrator::handle_export_discover_ack(const cref_t<MExportDirDiscoverAck> &m
 
     if (m->is_success()) {
       // move to freezing the subtree
-      it->second.state = EXPORT_FREEZING;
+      it->second.set_state(EXPORT_FREEZING);
       auto&& mdr = boost::static_pointer_cast<MDRequestImpl>(std::move(it->second.mut));
       ceph_assert(!it->second.mut); // should have been moved out of
 
@@ -1427,18 +1427,18 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid)
   }
 
   // send.
-  it->second.state = EXPORT_PREPPING;
+  it->second.set_state(EXPORT_PREPPING);
   mds->send_message_mds(prep, it->second.peer);
   ceph_assert(g_conf()->mds_kill_export_at != 4);
 
   // make sure any new instantiations of caps are flushed out
   ceph_assert(it->second.warning_ack_waiting.empty());
 
-  set<client_t> export_client_set;
-  get_export_client_set(dir, export_client_set);
+  ceph_assert(it->second.export_client_set.empty());
+  get_export_client_set(dir, it->second.export_client_set);
 
   MDSGatherBuilder gather(g_ceph_context);
-  mds->server->flush_client_sessions(export_client_set, gather);
+  mds->server->flush_client_sessions(it->second.export_client_set, gather);
   if (gather.has_subs()) {
     it->second.warning_ack_waiting.insert(MDS_RANK_NONE);
     gather.set_finisher(new C_M_ExportSessionsFlushed(this, dir, it->second.tid));
@@ -1537,7 +1537,7 @@ void Migrator::handle_export_prep_ack(const cref_t<MExportDirPrepAck> &m)
     
   }
 
-  it->second.state = EXPORT_WARNING;
+  it->second.set_state(EXPORT_WARNING);
 
   ceph_assert(g_conf()->mds_kill_export_at != 6);
   // nobody to warn?
@@ -1587,8 +1587,8 @@ void Migrator::export_go_synced(CDir *dir, uint64_t tid)
   dout(7) << *dir << " to " << dest << dendl;
 
   mdcache->show_subtrees();
-  
-  it->second.state = EXPORT_EXPORTING;
+
+  it->second.set_state(EXPORT_EXPORTING);
   ceph_assert(g_conf()->mds_kill_export_at != 7);
 
   ceph_assert(dir->is_frozen_tree_root());
@@ -1933,7 +1933,7 @@ void Migrator::handle_export_ack(const cref_t<MExportDirAck> &m)
   auto bp = m->imported_caps.cbegin();
   decode(it->second.peer_imported, bp);
 
-  it->second.state = EXPORT_LOGGINGFINISH;
+  it->second.set_state(EXPORT_LOGGINGFINISH);
   ceph_assert(g_conf()->mds_kill_export_at != 9);
   set<CDir*> bounds;
   mdcache->get_subtree_bounds(dir, bounds);
@@ -1970,7 +1970,7 @@ void Migrator::export_notify_abort(CDir *dir, export_state_t& stat, set<CDir*>&
   ceph_assert(stat.state == EXPORT_CANCELLING);
 
   if (stat.notify_ack_waiting.empty()) {
-    stat.state = EXPORT_CANCELLED;
+    stat.set_state(EXPORT_CANCELLED);
     return;
   }
 
@@ -2095,7 +2095,7 @@ void Migrator::export_logged_finish(CDir *dir)
   }
 
   // wait for notifyacks
-  stat.state = EXPORT_NOTIFYING;
+  stat.set_state(EXPORT_NOTIFYING);
   ceph_assert(g_conf()->mds_kill_export_at != 11);
   
   // no notifies to wait for?
@@ -3217,6 +3217,79 @@ void Migrator::import_finish(CDir *dir, bool notify, bool last)
   }
 }
 
+void Migrator::dump_export_states(Formatter *f)
+{
+  f->open_array_section("states");
+  for (const auto& [dir, state] : export_state) {
+    f->open_object_section("state");
+
+    f->dump_unsigned("tid", state.tid);
+
+    dir->dump(f, CDir::DUMP_PATH | CDir::DUMP_DIRFRAG);
+
+    f->dump_string("state", get_export_statename(state.state));
+
+    f->open_object_section("state_history");
+    for (const auto& [s, _1] : state.state_history) {
+      f->open_object_section(get_export_statename(s));
+      f->dump_stream("start_at") << state.get_start_time(s);
+      f->dump_float("time_spent", state.get_time_spent(s));
+      f->close_section();
+    }
+    f->close_section();
+
+    f->dump_int("peer", state.peer);
+
+    switch (state.state) {
+    case EXPORT_DISCOVERING:
+    case EXPORT_FREEZING:
+      f->dump_stream("last_cum_auth_pins_change") << state.last_cum_auth_pins_change;
+      f->dump_int("last_cum_auth_pins", state.last_cum_auth_pins);
+      f->dump_int("num_remote_waiters", state.num_remote_waiters);
+
+      break;
+
+    case EXPORT_PREPPING:
+    case EXPORT_WARNING:
+      f->open_array_section("flushed_clients");
+      for (const auto &client : state.export_client_set)
+       f->dump_int("client", client.v);
+      f->close_section();
+
+      f->open_array_section("warning_ack_waiting");
+      for (const auto &rank : state.warning_ack_waiting)
+       f->dump_int("rank", rank);
+      f->close_section();
+
+      if (state.state == EXPORT_PREPPING)
+       break;
+      // fall-thru
+
+    case EXPORT_EXPORTING:
+    case EXPORT_LOGGINGFINISH:
+    case EXPORT_NOTIFYING:
+      f->open_array_section("notify_ack_waiting");
+      for (const auto &rank : state.notify_ack_waiting)
+       f->dump_int("rank", rank);
+      f->close_section();
+
+      break;
+
+    default:
+      break;
+    }
+
+    if (state.state >= EXPORT_DISCOVERING) {
+      f->dump_unsigned("approx_size", state.approx_size);
+      f->dump_unsigned("unfreeze_tree_waiters", dir->count_unfreeze_tree_waiters());
+      f->dump_float("freeze_tree_time", state.get_freeze_tree_time());
+    }
+
+    f->close_section();
+  }
+  f->close_section();
+}
+
 void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp,
                                   mds_rank_t oldauth, LogSegment *ls,
                                   map<CInode*, map<client_t,Capability::Export> >& peer_exports,
index d6e599c06a995aa9c2cf18a258817d32350060b9..f733dea76b30168f6340e3719cfda4613e545621 100644 (file)
@@ -252,6 +252,8 @@ public:
 
   void import_finish(CDir *dir, bool notify, bool last=true);
 
+  void dump_export_states(Formatter *f);
+
 protected:
   struct export_base_t {
     export_base_t(dirfrag_t df, mds_rank_t d, unsigned c, uint64_t g) :
@@ -267,7 +269,31 @@ protected:
   struct export_state_t {
     export_state_t() {}
 
-    int state = 0;
+    void set_state(int s) {
+      ceph_assert(s != state);
+      if (state != EXPORT_CANCELLED) {
+       auto& t = state_history.at(state);
+       t.second = double(ceph_clock_now()) - double(t.first);
+      }
+      state = s;
+      state_history[state] = std::pair<utime_t, double>(ceph_clock_now(), 0.0);
+    }
+    utime_t get_start_time(int s) const {
+      ceph_assert(state_history.count(s) > 0);
+      return state_history.at(s).first;
+    }
+    double get_time_spent(int s) const {
+      ceph_assert(state_history.count(s) > 0);
+      const auto& t = state_history.at(s);
+      return s == state ? double(ceph_clock_now()) - double(t.first) : t.second;
+    }
+    double get_freeze_tree_time() const {
+      ceph_assert(state >= EXPORT_DISCOVERING);
+      ceph_assert(state_history.count((int)EXPORT_DISCOVERING) > 0);
+      return double(ceph_clock_now()) - double(state_history.at((int)EXPORT_DISCOVERING).first);
+    };
+
+    int state = EXPORT_CANCELLED;
     mds_rank_t peer = MDS_RANK_NONE;
     uint64_t tid = 0;
     std::set<mds_rank_t> warning_ack_waiting;
@@ -275,6 +301,10 @@ protected:
     std::map<inodeno_t,std::map<client_t,Capability::Import> > peer_imported;
     MutationRef mut;
     size_t approx_size = 0;
+    // record the start time and time spent of each export state
+    std::map<int, std::pair<utime_t, double> > state_history;
+    // record the clients whose sessions need to be flushed
+    std::set<client_t> export_client_set;
     // for freeze tree deadlock detection
     utime_t last_cum_auth_pins_change;
     int last_cum_auth_pins = 0;