]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: add killpoints for MDS shutdown
authorPatrick Donnelly <pdonnell@redhat.com>
Tue, 7 Mar 2023 18:20:19 +0000 (13:20 -0500)
committerPatrick Donnelly <pdonnell@redhat.com>
Tue, 1 Aug 2023 15:16:01 +0000 (11:16 -0400)
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
src/common/options/mds.yaml.in
src/mds/MDCache.cc
src/mds/MDCache.h
src/mds/MDSRank.cc

index 3214970ce02fdb1fa9a7ff0594ae05caf099b1be..87f216433b587dd588c417ce2502813ade0c30a5 100644 (file)
@@ -999,6 +999,12 @@ options:
   services:
   - mds
   fmt_desc: probabilistically inject corrupt CDentry::first at journal load
+- name: mds_kill_shutdown_at
+  type: uint
+  level: dev
+  default: 0
+  services:
+  - mds
   flags:
   - runtime
 - name: mds_kill_mdstable_at
index f4ff185056a86d3d722a13ed5be0edae358293d4..2cc3b177e3448663f74f8976f9adedb3bf6fc6a9 100644 (file)
@@ -139,6 +139,8 @@ MDCache::MDCache(MDSRank *m, PurgeQueue &purge_queue_) :
 
   symlink_recovery = g_conf().get_val<bool>("mds_symlink_recovery");
 
+  kill_shutdown_at = g_conf().get_val<uint64_t>("mds_kill_shutdown_at");
+
   lru.lru_set_midpoint(g_conf().get_val<double>("mds_cache_mid"));
 
   bottom_lru.lru_set_midpoint(0);
@@ -199,6 +201,9 @@ void MDCache::handle_conf_change(const std::set<std::string>& changed, const MDS
     symlink_recovery = g_conf().get_val<bool>("mds_symlink_recovery");
     dout(10) << "Storing symlink targets on file object's head " << symlink_recovery << dendl;
   }
+  if (changed.count("mds_kill_shutdown_at")) {
+    kill_shutdown_at = g_conf().get_val<uint64_t>("mds_kill_shutdown_at");
+  }
 
   migrator->handle_conf_change(changed, mdsmap);
   mds->balancer->handle_conf_change(changed, mdsmap);
@@ -7878,6 +7883,7 @@ void MDCache::shutdown_start()
 bool MDCache::shutdown_pass()
 {
   dout(7) << "shutdown_pass" << dendl;
+  ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_START);
 
   if (mds->is_stopped()) {
     dout(7) << " already shut down" << dendl;
@@ -7892,6 +7898,7 @@ bool MDCache::shutdown_pass()
   // trim cache
   trim(UINT64_MAX);
   dout(5) << "lru size now " << lru.lru_get_size() << "/" << bottom_lru.lru_get_size() << dendl;
+  ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_POSTTRIM);
 
   // Export all subtrees to another active (usually rank 0) if not rank 0
   int num_auth_subtree = 0;
@@ -7922,9 +7929,12 @@ bool MDCache::shutdown_pass()
        dest = 0;
       dout(7) << "sending " << *dir << " back to mds." << dest << dendl;
       migrator->export_dir_nicely(dir, dest);
+      ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_POSTONEEXPORT);
     }
   }
 
+  ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_POSTALLEXPORTS);
+
   if (!strays_all_exported) {
     dout(7) << "waiting for strays to migrate" << dendl;
     return false;
@@ -7943,6 +7953,7 @@ bool MDCache::shutdown_pass()
       mds->server->terminate_sessions();
     return false;
   }
+  ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_SESSIONTERMINATE);
 
   // Fully trim the log so that all objects in cache are clean and may be
   // trimmed by a future MDCache::trim. Note that MDSRank::tick does not
@@ -7955,6 +7966,7 @@ bool MDCache::shutdown_pass()
       auto sle = create_subtree_map();
       mds->mdlog->submit_entry(sle);
       mds->mdlog->flush();
+      ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_SUBTREEMAP);
     }
   }
   mds->mdlog->trim_all();
@@ -7962,6 +7974,7 @@ bool MDCache::shutdown_pass()
     dout(7) << "still >1 segments, waiting for log to trim" << dendl;
     return false;
   }
+  ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_TRIMALL);
 
   // drop our reference to our stray dir inode
   for (int i = 0; i < NUM_STRAY; ++i) {
@@ -7972,6 +7985,7 @@ bool MDCache::shutdown_pass()
       strays[i]->put_stickydirs();
     }
   }
+  ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_STRAYPUT);
 
   CDir *mydir = myin ? myin->get_dirfrag(frag_t()) : NULL;
   if (mydir && !mydir->is_subtree_root())
@@ -8007,6 +8021,7 @@ bool MDCache::shutdown_pass()
     dout(7) << "capping the mdlog" << dendl;
     mds->mdlog->cap();
   }
+  ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_LOGCAP);
   
   if (!mds->mdlog->empty())
     mds->mdlog->trim(0);
@@ -8054,16 +8069,19 @@ bool MDCache::shutdown_pass()
     myin->close_dirfrag(mydir->get_frag());
   }
   ceph_assert(subtrees.empty());
+  ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_EMPTYSUBTREES);
 
   if (myin) {
     remove_inode(myin);
     ceph_assert(!myin);
   }
+  ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_MYINREMOVAL);
 
   if (global_snaprealm) {
     remove_inode(global_snaprealm->inode);
     global_snaprealm = nullptr;
   }
+  ceph_assert(kill_shutdown_at != KILL_SHUTDOWN_AT::SHUTDOWN_GLOBALSNAPREALMREMOVAL);
   
   // done!
   dout(5) << "shutdown done." << dendl;
index 1280fa1d01e44deead773a34f9ec1baf29554571..fabd847cf9d1b2fd05f8a367e086c6dd035cdeb2 100644 (file)
@@ -1290,6 +1290,23 @@ class MDCache {
     int num_remote_waiters = 0;        // number of remote authpin waiters
   };
 
+  enum KILL_SHUTDOWN_AT {
+    SHUTDOWN_NULL,
+    SHUTDOWN_START,
+    SHUTDOWN_POSTTRIM,
+    SHUTDOWN_POSTONEEXPORT,
+    SHUTDOWN_POSTALLEXPORTS,
+    SHUTDOWN_SESSIONTERMINATE,
+    SHUTDOWN_SUBTREEMAP,
+    SHUTDOWN_TRIMALL,
+    SHUTDOWN_STRAYPUT,
+    SHUTDOWN_LOGCAP,
+    SHUTDOWN_EMPTYSUBTREES,
+    SHUTDOWN_MYINREMOVAL,
+    SHUTDOWN_GLOBALSNAPREALMREMOVAL,
+    SHUTDOWN_UNUSED
+  };
+
   typedef std::map<dirfrag_t,fragment_info_t>::iterator fragment_info_iterator;
 
   friend class EFragment;
@@ -1389,6 +1406,8 @@ class MDCache {
   time upkeep_last_trim = time::min();
   time upkeep_last_release = time::min();
   std::atomic<bool> upkeep_trim_shutdown{false};
+
+  uint64_t kill_shutdown_at = 0;
 };
 
 class C_MDS_RetryRequest : public MDSInternalContext {
index da681e735a3f967ec1d7eff6a607e5bc31940622..6e6e518870d29b7d78d28f46d28f51b2cf1d96ae 100644 (file)
@@ -3820,6 +3820,7 @@ const char** MDSRankDispatcher::get_tracked_conf_keys() const
     "mds_inject_journal_corrupt_dentry_first",
     "mds_inject_migrator_session_race",
     "mds_inject_rename_corrupt_dentry_first",
+    "mds_kill_shutdown_at",
     "mds_log_event_large_threshold",
     "mds_log_events_per_segment",
     "mds_log_major_segment_event_ratio",