]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: migrate strays part by part when shutdown mds 24324/head
authorYan, Zheng <zyan@redhat.com>
Mon, 13 Aug 2018 08:47:53 +0000 (16:47 +0800)
committerNathan Cutler <ncutler@suse.com>
Fri, 19 Oct 2018 20:57:55 +0000 (22:57 +0200)
migrating all strays at once may require lots of memory and cpu time.

Fixes: http://tracker.ceph.com/issues/26926
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
(cherry picked from commit 24eea7e11d014fbb7cb468b66e84d0b747cd9359)

Conflicts:
        src/mds/MDCache.cc
src/mds/Server.cc

src/mds/MDCache.cc
src/mds/MDCache.h
src/mds/Server.cc
src/mds/StrayManager.cc

index 869f4b6d147780f3560a605db7a718f8d71e0cfd..afd327d36625238eb695acfd7da1b4a23657c1bf 100644 (file)
@@ -3068,7 +3068,7 @@ void MDCache::handle_mds_failure(mds_rank_t who)
 
   // MDCache::shutdown_export_strays() always exports strays to mds.0
   if (who == mds_rank_t(0))
-    shutdown_exported_strays.clear();
+    shutdown_exporting_strays.clear();
 
   show_subtrees();  
 }
@@ -7744,60 +7744,119 @@ bool MDCache::shutdown_pass()
 
 bool MDCache::shutdown_export_strays()
 {
+  static const unsigned MAX_EXPORTING = 100;
+
   if (mds->get_nodeid() == 0)
     return true;
-  
-  dout(10) << "shutdown_export_strays" << dendl;
+
+  if (shutdown_exporting_strays.size() * 3 >= MAX_EXPORTING * 2)
+    return false;
+
+  dout(10) << "shutdown_export_strays " << shutdown_export_next.first
+          << " '" << shutdown_export_next.second << "'" << dendl;
 
   bool mds0_active = mds->mdsmap->is_active(mds_rank_t(0));
+  bool all_exported = false;
 
-  bool done = true;
+again:
+  auto next = shutdown_export_next;
 
-  list<CDir*> dfs;
   for (int i = 0; i < NUM_STRAY; ++i) {
-    if (!strays[i] ||
-       !strays[i]->state_test(CInode::STATE_STRAYPINNED))
+    CInode *strayi = strays[i];
+    if (!strayi ||
+       !strayi->state_test(CInode::STATE_STRAYPINNED))
+      continue;
+    if (strayi->ino() < next.first.ino)
       continue;
-    strays[i]->get_dirfrags(dfs);
-  }
 
-  for (std::list<CDir*>::iterator dfs_i = dfs.begin();
-       dfs_i != dfs.end(); ++dfs_i)
-  {
-    CDir *dir = *dfs_i;
+    deque<CDir*> dfls;
+    strayi->get_dirfrags(dfls);
 
-    if (!dir->is_complete()) {
-      dir->fetch(0);
-      done = false;
-      if (!mds0_active)
-       break;
-    }
-    
-    for (auto &p : dir->items) {
-      CDentry *dn = p.second;
-      CDentry::linkage_t *dnl = dn->get_projected_linkage();
-      if (dnl->is_null())
+    while (!dfls.empty()) {
+      CDir *dir = dfls.front();
+      dfls.pop_front();
+
+      if (dir->dirfrag() < next.first)
        continue;
-      done = false;
-      if (!mds0_active)
-       break;
-      
-      if (dn->state_test(CDentry::STATE_PURGING)) {
-        // Don't try to migrate anything that is actually
-        // being purged right now
-        continue;
+      if (next.first < dir->dirfrag()) {
+       next.first = dir->dirfrag();
+       next.second.clear();
+      }
+
+      if (!dir->is_complete()) {
+       MDSInternalContextBase *fin = nullptr;
+       if (shutdown_exporting_strays.empty()) {
+         fin = new MDSInternalContextWrapper(mds,
+                 new FunctionContext([this](int r) {
+                   shutdown_export_strays();
+                 })
+               );
+       }
+       dir->fetch(fin);
+       goto done;
       }
 
-      if (shutdown_exported_strays.count(dnl->get_inode()->ino()) == 0) {
-       shutdown_exported_strays.insert(dnl->get_inode()->ino());
-       stray_manager.migrate_stray(dn, mds_rank_t(0));  // send to root!
+      CDir::dentry_key_map::iterator it;
+      if (next.second.empty()) {
+       it = dir->begin();
       } else {
-       dout(10) << "already exporting " << *dn << dendl;
+       auto hash = ceph_frag_value(strayi->hash_dentry_name(next.second));
+       it = dir->lower_bound(dentry_key_t(0, next.second, hash));
       }
+
+      for (; it != dir->end(); ++it) {
+       CDentry *dn = it->second;
+       CDentry::linkage_t *dnl = dn->get_projected_linkage();
+       if (dnl->is_null())
+         continue;
+
+       if (!mds0_active && !dn->state_test(CDentry::STATE_PURGING)) {
+         next.second = string(it->first.name);
+         goto done;
+       }
+
+       auto ret = shutdown_exporting_strays.insert(dnl->get_inode()->ino());
+       if (!ret.second) {
+         dout(10) << "already exporting/purging " << *dn << dendl;
+         continue;
+       }
+
+       // Don't try to migrate anything that is actually
+       // being purged right now
+       if (!dn->state_test(CDentry::STATE_PURGING))
+         stray_manager.migrate_stray(dn, mds_rank_t(0));  // send to root!
+
+       if (shutdown_exporting_strays.size() >= MAX_EXPORTING) {
+         ++it;
+         if (it != dir->end()) {
+           next.second = string(it->first.name);
+         } else {
+           if (dfls.empty())
+             next.first.ino.val++;
+           else
+             next.first = dfls.front()->dirfrag();
+           next.second.clear();
+         }
+         goto done;
+       }
+      }
+    }
+  }
+
+  if (shutdown_exporting_strays.empty()) {
+    dirfrag_t first_df(MDS_INO_STRAY(mds->get_nodeid(), 0), 0);
+    if (first_df < shutdown_export_next.first ||
+       !shutdown_export_next.second.empty()) {
+      shutdown_export_next.first = first_df;
+      shutdown_export_next.second.clear();
+      goto again;
     }
+    all_exported = true;
   }
 
-  return done;
+done:
+  shutdown_export_next = next;
+  return all_exported;
 }
 
 // ========= messaging ==============
index 6aa08ec22e34a503ed2aaf0afbf93efe1382c516..20fe1f0bdbb30a46de9f57282272dc111df5d5f6 100644 (file)
@@ -756,13 +756,18 @@ public:
 
   // shutdown
 private:
-  set<inodeno_t> shutdown_exported_strays;
+  set<inodeno_t> shutdown_exporting_strays;
+  pair<dirfrag_t, string> shutdown_export_next;
 public:
   void shutdown_start();
   void shutdown_check();
   bool shutdown_pass();
-  bool shutdown_export_strays();
   bool shutdown();                    // clear cache (ie at shutodwn)
+  bool shutdown_export_strays();
+  void shutdown_export_stray_finish(inodeno_t ino) {
+    if (shutdown_exporting_strays.erase(ino))
+      shutdown_export_strays();
+  }
 
   bool did_shutdown_log_cap;
 
index c163c16ae3250237d3873d1aa0cc3cd214901a16..e006b878d1d7b4038bbb2acec6df7520d0ce3e77 100644 (file)
@@ -7879,16 +7879,17 @@ void Server::_commit_slave_rename(MDRequestRef& mdr, int r,
 {
   dout(10) << "_commit_slave_rename " << *mdr << " r=" << r << dendl;
 
-  CDentry::linkage_t *destdnl = destdn->get_linkage();
+  CInode *in = destdn->get_linkage()->get_inode();
+
+  inodeno_t migrated_stray;
+  if (srcdn->is_auth() && srcdn->get_dir()->inode->is_stray())
+    migrated_stray = in->ino();
 
   list<MDSInternalContextBase*> finished;
   if (r == 0) {
     // unfreeze+singleauth inode
     //  hmm, do i really need to delay this?
     if (mdr->more()->is_inode_exporter) {
-
-      CInode *in = destdnl->get_inode();
-
       // drop our pins
       // we exported, clear out any xlocks that we moved to another MDS
       set<SimpleLock*>::iterator i = mdr->xlocks.begin();
@@ -7905,14 +7906,14 @@ void Server::_commit_slave_rename(MDRequestRef& mdr, int r,
       bufferlist::iterator bp = mdr->more()->inode_import.begin();
       ::decode(peer_imported, bp);
 
-      dout(10) << " finishing inode export on " << *destdnl->get_inode() << dendl;
-      mdcache->migrator->finish_export_inode(destdnl->get_inode(), ceph_clock_now(),
-                                            mdr->slave_to_mds, peer_imported, finished);
+      dout(10) << " finishing inode export on " << *in << dendl;
+      mdcache->migrator->finish_export_inode(in, ceph_clock_now(), mdr->slave_to_mds,
+                                            peer_imported, finished);
       mds->queue_waiters(finished);   // this includes SINGLEAUTH waiters.
 
       // unfreeze
-      assert(destdnl->get_inode()->is_frozen_inode());
-      destdnl->get_inode()->unfreeze_inode(finished);
+      assert(in->is_frozen_inode());
+      in->unfreeze_inode(finished);
     }
 
     // singleauth
@@ -7948,8 +7949,8 @@ void Server::_commit_slave_rename(MDRequestRef& mdr, int r,
     // witness list from the master, and they failed before we tried prep again.
     if (mdr->more()->rollback_bl.length()) {
       if (mdr->more()->is_inode_exporter) {
-       dout(10) << " reversing inode export of " << *destdnl->get_inode() << dendl;
-       destdnl->get_inode()->abort_export();
+       dout(10) << " reversing inode export of " << *in << dendl;
+       in->abort_export();
       }
       if (mdcache->is_ambiguous_slave_update(mdr->reqid, mdr->slave_to_mds)) {
        mdcache->remove_ambiguous_slave_update(mdr->reqid, mdr->slave_to_mds);
@@ -7972,6 +7973,9 @@ void Server::_commit_slave_rename(MDRequestRef& mdr, int r,
       mdcache->request_finish(mdr);
     }
   }
+
+  if (migrated_stray && mds->is_stopping())
+    mdcache->shutdown_export_stray_finish(migrated_stray);
 }
 
 void _rollback_repair_dir(MutationRef& mut, CDir *dir, rename_rollback::drec &r, utime_t ctime,
index b94dd57cba292fec2c0c1fee3a4750c55c361c95..608e37de2c6bc1503432061a7bcadc07df9baef2 100644 (file)
@@ -263,9 +263,13 @@ void StrayManager::_purge_stray_logged(CDentry *dn, version_t pdv, LogSegment *l
   }
 
   // drop inode
+  inodeno_t ino = in->ino();
   if (in->is_dirty())
     in->mark_clean();
-  in->mdcache->remove_inode(in);
+  mds->mdcache->remove_inode(in);
+
+  if (mds->is_stopping())
+    mds->mdcache->shutdown_export_stray_finish(ino);
 }
 
 void StrayManager::enqueue(CDentry *dn, bool trunc)
@@ -465,7 +469,7 @@ bool StrayManager::_eval_stray(CDentry *dn, bool delay)
        return false;  // not until some snaps are deleted.
       }
 
-      in->mdcache->clear_dirty_bits_for_stray(in);
+      mds->mdcache->clear_dirty_bits_for_stray(in);
 
       if (!in->remote_parents.empty()) {
        // unlink any stale remote snap dentry.
@@ -748,11 +752,15 @@ void StrayManager::_truncate_stray_logged(CDentry *dn, LogSegment *ls)
 
   dout(10) << __func__ << ": " << *dn << " " << *in << dendl;
 
+  in->pop_and_dirty_projected_inode(ls);
+
+  in->state_clear(CInode::STATE_PURGING);
   dn->state_clear(CDentry::STATE_PURGING | CDentry::STATE_PURGINGPINNED);
   dn->put(CDentry::PIN_PURGING);
 
-  in->pop_and_dirty_projected_inode(ls);
-
   eval_stray(dn);
+
+  if (!dn->state_test(CDentry::STATE_PURGING) &&  mds->is_stopping())
+    mds->mdcache->shutdown_export_stray_finish(in->ino());
 }