]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: migrate strays part by part when shutdown mds 23548/head
authorYan, Zheng <zyan@redhat.com>
Mon, 13 Aug 2018 08:47:53 +0000 (16:47 +0800)
committerYan, Zheng <zyan@redhat.com>
Tue, 14 Aug 2018 04:34:56 +0000 (12:34 +0800)
migrating all strays at once may require lots of memory and cpu time.

Fixes: http://tracker.ceph.com/issues/26926
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
src/mds/MDCache.cc
src/mds/MDCache.h
src/mds/Server.cc
src/mds/StrayManager.cc

index dd57feeed80f0c4f58786d0d52ae24124decff81..41dd8095081c4ba8271aec76527b14987f54f7f6 100644 (file)
@@ -3111,7 +3111,7 @@ void MDCache::handle_mds_failure(mds_rank_t who)
 
   // MDCache::shutdown_export_strays() always exports strays to mds.0
   if (who == mds_rank_t(0))
-    shutdown_exported_strays.clear();
+    shutdown_exporting_strays.clear();
 
   show_subtrees();  
 }
@@ -7903,60 +7903,119 @@ bool MDCache::shutdown_pass()
 
 bool MDCache::shutdown_export_strays()
 {
+  static const unsigned MAX_EXPORTING = 100;
+
   if (mds->get_nodeid() == 0)
     return true;
-  
-  dout(10) << "shutdown_export_strays" << dendl;
+
+  if (shutdown_exporting_strays.size() * 3 >= MAX_EXPORTING * 2)
+    return false;
+
+  dout(10) << "shutdown_export_strays " << shutdown_export_next.first
+          << " '" << shutdown_export_next.second << "'" << dendl;
 
   bool mds0_active = mds->mdsmap->is_active(mds_rank_t(0));
+  bool all_exported = false;
 
-  bool done = true;
+again:
+  auto next = shutdown_export_next;
 
-  list<CDir*> dfs;
   for (int i = 0; i < NUM_STRAY; ++i) {
-    if (!strays[i] ||
-       !strays[i]->state_test(CInode::STATE_STRAYPINNED))
+    CInode *strayi = strays[i];
+    if (!strayi ||
+       !strayi->state_test(CInode::STATE_STRAYPINNED))
+      continue;
+    if (strayi->ino() < next.first.ino)
       continue;
-    strays[i]->get_dirfrags(dfs);
-  }
 
-  for (std::list<CDir*>::iterator dfs_i = dfs.begin();
-       dfs_i != dfs.end(); ++dfs_i)
-  {
-    CDir *dir = *dfs_i;
+    deque<CDir*> dfls;
+    strayi->get_dirfrags(dfls);
 
-    if (!dir->is_complete()) {
-      dir->fetch(0);
-      done = false;
-      if (!mds0_active)
-       break;
-    }
-    
-    for (auto &p : dir->items) {
-      CDentry *dn = p.second;
-      CDentry::linkage_t *dnl = dn->get_projected_linkage();
-      if (dnl->is_null())
+    while (!dfls.empty()) {
+      CDir *dir = dfls.front();
+      dfls.pop_front();
+
+      if (dir->dirfrag() < next.first)
        continue;
-      done = false;
-      if (!mds0_active)
-       break;
-      
-      if (dn->state_test(CDentry::STATE_PURGING)) {
-        // Don't try to migrate anything that is actually
-        // being purged right now
-        continue;
+      if (next.first < dir->dirfrag()) {
+       next.first = dir->dirfrag();
+       next.second.clear();
+      }
+
+      if (!dir->is_complete()) {
+       MDSInternalContextBase *fin = nullptr;
+       if (shutdown_exporting_strays.empty()) {
+         fin = new MDSInternalContextWrapper(mds,
+                 new FunctionContext([this](int r) {
+                   shutdown_export_strays();
+                 })
+               );
+       }
+       dir->fetch(fin);
+       goto done;
       }
 
-      if (shutdown_exported_strays.count(dnl->get_inode()->ino()) == 0) {
-       shutdown_exported_strays.insert(dnl->get_inode()->ino());
-       stray_manager.migrate_stray(dn, mds_rank_t(0));  // send to root!
+      CDir::dentry_key_map::iterator it;
+      if (next.second.empty()) {
+       it = dir->begin();
       } else {
-       dout(10) << "already exporting " << *dn << dendl;
+       auto hash = ceph_frag_value(strayi->hash_dentry_name(next.second));
+       it = dir->lower_bound(dentry_key_t(0, next.second, hash));
+      }
+
+      for (; it != dir->end(); ++it) {
+       CDentry *dn = it->second;
+       CDentry::linkage_t *dnl = dn->get_projected_linkage();
+       if (dnl->is_null())
+         continue;
+
+       if (!mds0_active && !dn->state_test(CDentry::STATE_PURGING)) {
+         next.second = it->first.name;
+         goto done;
+       }
+
+       auto ret = shutdown_exporting_strays.insert(dnl->get_inode()->ino());
+       if (!ret.second) {
+         dout(10) << "already exporting/purging " << *dn << dendl;
+         continue;
+       }
+
+       // Don't try to migrate anything that is actually
+       // being purged right now
+       if (!dn->state_test(CDentry::STATE_PURGING))
+         stray_manager.migrate_stray(dn, mds_rank_t(0));  // send to root!
+
+       if (shutdown_exporting_strays.size() >= MAX_EXPORTING) {
+         ++it;
+         if (it != dir->end()) {
+           next.second = it->first.name;
+         } else {
+           if (dfls.empty())
+             next.first.ino.val++;
+           else
+             next.first = dfls.front()->dirfrag();
+           next.second.clear();
+         }
+         goto done;
+       }
       }
     }
   }
 
-  return done;
+  if (shutdown_exporting_strays.empty()) {
+    dirfrag_t first_df(MDS_INO_STRAY(mds->get_nodeid(), 0), 0);
+    if (first_df < shutdown_export_next.first ||
+       !shutdown_export_next.second.empty()) {
+      shutdown_export_next.first = first_df;
+      shutdown_export_next.second.clear();
+      goto again;
+    }
+    all_exported = true;
+  }
+
+done:
+  shutdown_export_next = next;
+  return all_exported;
 }
 
 // ========= messaging ==============
index 8e323dbade42136f74e102ce25d2f3a095e4d51f..15697b92e60778c1eee2939ad992257e9d7b596c 100644 (file)
@@ -775,13 +775,18 @@ public:
 
   // shutdown
 private:
-  set<inodeno_t> shutdown_exported_strays;
+  set<inodeno_t> shutdown_exporting_strays;
+  pair<dirfrag_t, string> shutdown_export_next;
 public:
   void shutdown_start();
   void shutdown_check();
   bool shutdown_pass();
-  bool shutdown_export_strays();
   bool shutdown();                    // clear cache (ie at shutodwn)
+  bool shutdown_export_strays();
+  void shutdown_export_stray_finish(inodeno_t ino) {
+    if (shutdown_exporting_strays.erase(ino))
+      shutdown_export_strays();
+  }
 
   bool did_shutdown_log_cap;
 
index 722ce887011d4b256f73e80ace71d8af3beea94f..5862de3edcc84c6f3d2a89ed1d61af38c5ec9e4d 100644 (file)
@@ -8469,16 +8469,17 @@ void Server::_commit_slave_rename(MDRequestRef& mdr, int r,
 {
   dout(10) << "_commit_slave_rename " << *mdr << " r=" << r << dendl;
 
-  CDentry::linkage_t *destdnl = destdn->get_linkage();
+  CInode *in = destdn->get_linkage()->get_inode();
+
+  inodeno_t migrated_stray;
+  if (srcdn->is_auth() && srcdn->get_dir()->inode->is_stray())
+    migrated_stray = in->ino();
 
   MDSInternalContextBase::vec finished;
   if (r == 0) {
     // unfreeze+singleauth inode
     //  hmm, do i really need to delay this?
     if (mdr->more()->is_inode_exporter) {
-
-      CInode *in = destdnl->get_inode();
-
       // drop our pins
       // we exported, clear out any xlocks that we moved to another MDS
       set<SimpleLock*>::iterator i = mdr->xlocks.begin();
@@ -8495,14 +8496,13 @@ void Server::_commit_slave_rename(MDRequestRef& mdr, int r,
       auto bp = mdr->more()->inode_import.cbegin();
       decode(peer_imported, bp);
 
-      dout(10) << " finishing inode export on " << *destdnl->get_inode() << dendl;
-      mdcache->migrator->finish_export_inode(destdnl->get_inode(),
-                                            mdr->slave_to_mds, peer_imported, finished);
+      dout(10) << " finishing inode export on " << *in << dendl;
+      mdcache->migrator->finish_export_inode(in, mdr->slave_to_mds, peer_imported, finished);
       mds->queue_waiters(finished);   // this includes SINGLEAUTH waiters.
 
       // unfreeze
-      assert(destdnl->get_inode()->is_frozen_inode());
-      destdnl->get_inode()->unfreeze_inode(finished);
+      assert(in->is_frozen_inode());
+      in->unfreeze_inode(finished);
     }
 
     // singleauth
@@ -8538,8 +8538,8 @@ void Server::_commit_slave_rename(MDRequestRef& mdr, int r,
     // witness list from the master, and they failed before we tried prep again.
     if (mdr->more()->rollback_bl.length()) {
       if (mdr->more()->is_inode_exporter) {
-       dout(10) << " reversing inode export of " << *destdnl->get_inode() << dendl;
-       destdnl->get_inode()->abort_export();
+       dout(10) << " reversing inode export of " << *in << dendl;
+       in->abort_export();
       }
       if (mdcache->is_ambiguous_slave_update(mdr->reqid, mdr->slave_to_mds)) {
        mdcache->remove_ambiguous_slave_update(mdr->reqid, mdr->slave_to_mds);
@@ -8562,6 +8562,9 @@ void Server::_commit_slave_rename(MDRequestRef& mdr, int r,
       mdcache->request_finish(mdr);
     }
   }
+
+  if (migrated_stray && mds->is_stopping())
+    mdcache->shutdown_export_stray_finish(migrated_stray);
 }
 
 void _rollback_repair_dir(MutationRef& mut, CDir *dir, rename_rollback::drec &r, utime_t ctime,
index 08c64f61c204268977dc82b375534c4d0b986830..ffc96aa084c95a19e00b20ede9e7f1e40bc8d21b 100644 (file)
@@ -263,9 +263,13 @@ void StrayManager::_purge_stray_logged(CDentry *dn, version_t pdv, LogSegment *l
   }
 
   // drop inode
+  inodeno_t ino = in->ino();
   if (in->is_dirty())
     in->mark_clean();
-  in->mdcache->remove_inode(in);
+  mds->mdcache->remove_inode(in);
+
+  if (mds->is_stopping())
+    mds->mdcache->shutdown_export_stray_finish(ino);
 }
 
 void StrayManager::enqueue(CDentry *dn, bool trunc)
@@ -465,7 +469,7 @@ bool StrayManager::_eval_stray(CDentry *dn, bool delay)
        return false;  // not until some snaps are deleted.
       }
 
-      in->mdcache->clear_dirty_bits_for_stray(in);
+      mds->mdcache->clear_dirty_bits_for_stray(in);
 
       if (!in->remote_parents.empty()) {
        // unlink any stale remote snap dentry.
@@ -748,11 +752,15 @@ void StrayManager::_truncate_stray_logged(CDentry *dn, LogSegment *ls)
 
   dout(10) << __func__ << ": " << *dn << " " << *in << dendl;
 
+  in->pop_and_dirty_projected_inode(ls);
+
+  in->state_clear(CInode::STATE_PURGING);
   dn->state_clear(CDentry::STATE_PURGING | CDentry::STATE_PURGINGPINNED);
   dn->put(CDentry::PIN_PURGING);
 
-  in->pop_and_dirty_projected_inode(ls);
-
   eval_stray(dn);
+
+  if (!dn->state_test(CDentry::STATE_PURGING) &&  mds->is_stopping())
+    mds->mdcache->shutdown_export_stray_finish(in->ino());
 }