]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mds: create subtree root immediately after directory tree becomes frozen
authorYan, Zheng <zyan@redhat.com>
Tue, 6 Jun 2017 07:43:28 +0000 (15:43 +0800)
committerJohn Spray <john.spray@redhat.com>
Fri, 23 Jun 2017 16:07:32 +0000 (17:07 +0100)
When a directory tree become frozen, its WAIT_FROZEN contexts are
executed asynchronously. Before Migrator::export_frozen() set export
bounds, MDCache::try_subtree_merge_at() can merge newly imported
subtree into the frozen directory tree. This causes problem if there
are auth pins in newly imported subtree.

The fix is creating subtree root immediately after directory tree
becomes frozen. The new subtree root has dir_auth 'me, me', so it's
not meregeable.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
src/mds/CDir.cc
src/mds/Migrator.cc

index 6561d2fb257c4f6eecdaf8a3d2bfc7741e687fae..c190cca175b479f89f74d8dc55c118adff491351 100644 (file)
@@ -2583,8 +2583,11 @@ void CDir::set_dir_auth(mds_authority_t a)
       inode->adjust_nested_auth_pins(-1, NULL);
     
     // unpin parent of frozen dir/tree?
-    if (inode->is_auth() && (is_frozen_tree_root() || is_frozen_dir()))
-      inode->auth_unpin(this);
+    if (inode->is_auth()) {
+      assert(!is_frozen_tree_root());
+      if (is_frozen_dir())
+       inode->auth_unpin(this);
+    }
   } 
   if (was_subtree && !is_subtree_root()) {
     dout(10) << " old subtree root, adjusting auth_pins" << dendl;
@@ -2594,8 +2597,11 @@ void CDir::set_dir_auth(mds_authority_t a)
       inode->adjust_nested_auth_pins(1, NULL);
 
     // pin parent of frozen dir/tree?
-    if (inode->is_auth() && (is_frozen_tree_root() || is_frozen_dir()))
-      inode->auth_pin(this);
+    if (inode->is_auth()) {
+      assert(!is_frozen_tree_root());
+      if (is_frozen_dir())
+       inode->auth_pin(this);
+    }
   }
 
   // newly single auth?
@@ -2774,13 +2780,30 @@ void CDir::_freeze_tree()
     state_clear(STATE_FREEZINGTREE);   // actually, this may get set again by next context?
     --num_freezing_trees;
   }
+
+  if (is_auth()) {
+    mds_authority_t auth;
+    bool was_subtree = is_subtree_root();
+    if (was_subtree) {
+      auth = get_dir_auth();
+    } else {
+      // temporarily prevent parent subtree from becoming frozen.
+      inode->auth_pin(this);
+      // create new subtree
+      auth = authority();
+    }
+
+    assert(auth.first >= 0);
+    assert(auth.second == CDIR_AUTH_UNKNOWN);
+    auth.second = auth.first;
+    inode->mdcache->adjust_subtree_auth(this, auth);
+    if (!was_subtree)
+      inode->auth_unpin(this);
+  }
+
   state_set(STATE_FROZENTREE);
   ++num_frozen_trees;
   get(PIN_FROZEN);
-
-  // auth_pin inode for duration of freeze, if we are not a subtree root.
-  if (is_auth() && !is_subtree_root())
-    inode->auth_pin(this);
 }
 
 void CDir::unfreeze_tree()
@@ -2794,9 +2817,16 @@ void CDir::unfreeze_tree()
 
     put(PIN_FROZEN);
 
-    // unpin  (may => FREEZEABLE)   FIXME: is this order good?
-    if (is_auth() && !is_subtree_root())
-      inode->auth_unpin(this);
+    if (is_auth()) {
+      // must be subtree
+      assert(is_subtree_root());
+      // for debug purpose, caller should ensure 'dir_auth.second == dir_auth.first'
+      mds_authority_t auth = get_dir_auth();
+      assert(auth.first >= 0);
+      assert(auth.second == auth.first);
+      auth.second = CDIR_AUTH_UNKNOWN;
+      inode->mdcache->adjust_subtree_auth(this, auth);
+    }
 
     // waiters?
     finish_waiting(WAIT_UNFREEZE);
index aafd89c2820c6ae66dbe8712eae9b28575a9a3d1..52df04f8d7a5598dbb93239cd1ab781ab632945a 100644 (file)
@@ -288,6 +288,8 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
     dout(10) << "export state=freezing : canceling freeze" << dendl;
     it->second.state = EXPORT_CANCELLED;
     dir->unfreeze_tree();  // cancel the freeze
+    if (dir->is_subtree_root())
+      cache->try_subtree_merge(dir);
     if (notify_peer &&
        (!mds->is_cluster_degraded() ||
         mds->mdsmap->is_clientreplay_or_active_or_stopping(it->second.peer))) // tell them.
@@ -325,7 +327,6 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
       }
     }
     dir->unfreeze_tree();
-    cache->adjust_subtree_auth(dir, mds->get_nodeid());
     cache->try_subtree_merge(dir);
     if (notify_peer &&
        (!mds->is_cluster_degraded() ||
@@ -518,7 +519,6 @@ void Migrator::handle_mds_failure_or_stop(mds_rank_t who)
          
          // adjust auth back to the exporter
          cache->adjust_subtree_auth(dir, q->second.peer);
-         cache->try_subtree_merge(dir);
 
          // notify bystanders ; wait in aborting state
          import_state[df].state = IMPORT_ABORTING;
@@ -564,9 +564,8 @@ void Migrator::handle_mds_failure_or_stop(mds_rank_t who)
          dout(10) << "faking export_notify_ack from mds." << who
                   << " on aborting import " << *dir << " from mds." << q->second.peer
                   << dendl;
-         if (q->second.bystanders.empty()) {
+         if (q->second.bystanders.empty())
            import_reverse_unfreeze(dir);
-         }
        }
       }
     }
@@ -1046,14 +1045,15 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid)
       !diri->nestlock.can_wrlock(-1)) {
     dout(7) << "export_dir couldn't acquire all needed locks, failing. "
            << *dir << dendl;
-
     // .. unwind ..
     dir->unfreeze_tree();
-    dir->state_clear(CDir::STATE_EXPORTING);
+    cache->try_subtree_merge(dir);
 
     mds->send_message_mds(new MExportDirCancel(dir->dirfrag(), it->second.tid), it->second.peer);
-
     export_state.erase(it);
+
+    dir->state_clear(CDir::STATE_EXPORTING);
+    cache->maybe_send_pending_resolves();
     return;
   }
 
@@ -1066,9 +1066,9 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid)
 
   cache->show_subtrees();
 
+  // CDir::_freeze_tree() should have forced it into subtree.
+  assert(dir->get_dir_auth() == mds_authority_t(mds->get_nodeid(), mds->get_nodeid()));
   // note the bounds.
-  //  force it into a subtree by listing auth as <me,me>.
-  cache->adjust_subtree_auth(dir, mds->get_nodeid(), mds->get_nodeid());
   set<CDir*> bounds;
   cache->get_subtree_bounds(dir, bounds);
 
@@ -1783,18 +1783,17 @@ void Migrator::export_reverse(CDir *dir)
     bd->state_clear(CDir::STATE_EXPORTBOUND);
   }
 
-  // adjust auth, with possible subtree merge.
-  cache->adjust_subtree_auth(dir, mds->get_nodeid());
-  cache->try_subtree_merge(dir);
-
   // notify bystanders
   export_notify_abort(dir, bounds);
 
+  // unfreeze tree, with possible subtree merge.
+  cache->adjust_subtree_auth(dir, mds->get_nodeid(), mds->get_nodeid());
+
   // process delayed expires
   cache->process_delayed_expire(dir);
-  
-  // unfreeze
+
   dir->unfreeze_tree();
+  cache->try_subtree_merge(dir);
 
   // revoke/resume stale caps
   for (auto in : to_eval) {
@@ -1951,10 +1950,13 @@ void Migrator::export_finish(CDir *dir)
   
   // finish export (adjust local cache state)
   int num_dentries = 0;
-  C_ContextsBase<MDSInternalContextBase, MDSInternalContextGather> *fin = new C_ContextsBase<MDSInternalContextBase, MDSInternalContextGather>(g_ceph_context);
+  list<MDSInternalContextBase*> finished;
   finish_export_dir(dir, ceph_clock_now(), it->second.peer,
-                   it->second.peer_imported, fin->contexts, &num_dentries);
-  
+                   it->second.peer_imported, finished, &num_dentries);
+
+  assert(!dir->is_auth());
+  cache->adjust_subtree_auth(dir, it->second.peer);
+
   // unpin bounds
   set<CDir*> bounds;
   cache->get_subtree_bounds(dir, bounds);
@@ -1969,9 +1971,14 @@ void Migrator::export_finish(CDir *dir)
   if (dir->state_test(CDir::STATE_AUXSUBTREE))
     dir->state_clear(CDir::STATE_AUXSUBTREE);
 
-  // adjust auth, with possible subtree merge.
+  // discard delayed expires
+  cache->discard_delayed_expire(dir);
+
+  dout(7) << "export_finish unfreezing" << dendl;
+
+  // unfreeze tree, with possible subtree merge.
   //  (we do this _after_ removing EXPORTBOUND pins, to allow merges)
-  cache->adjust_subtree_auth(dir, it->second.peer);
+  dir->unfreeze_tree();
   cache->try_subtree_merge(dir);
 
   // no more auth subtree? clear scatter dirty
@@ -1979,17 +1986,11 @@ void Migrator::export_finish(CDir *dir)
       !dir->get_inode()->has_subtree_root_dirfrag(mds->get_nodeid())) {
     dir->get_inode()->clear_scatter_dirty();
     // wake up scatter_nudge waiters
-    dir->get_inode()->take_waiting(CInode::WAIT_ANY_MASK, fin->contexts);
+    dir->get_inode()->take_waiting(CInode::WAIT_ANY_MASK, finished);
   }
 
-  dir->add_waiter(CDir::WAIT_UNFREEZE, fin);
-
-  // unfreeze
-  dout(7) << "export_finish unfreezing" << dendl;
-  dir->unfreeze_tree();
-
-  // discard delayed expires
-  cache->discard_delayed_expire(dir);
+  if (!finished.empty())
+    mds->queue_waiters(finished);
 
   MutationRef mut = it->second.mut;
   // remove from exporting list, clean up state
@@ -2139,7 +2140,6 @@ void Migrator::handle_export_cancel(MExportDirCancel *m)
     import_remove_pins(dir, bounds);
     // adjust auth back to the exportor
     cache->adjust_subtree_auth(dir, it->second.peer);
-    cache->try_subtree_merge(dir);
     import_reverse_unfreeze(dir);
   } else {
     assert(0 == "got export_cancel in weird state");
@@ -2628,8 +2628,6 @@ void Migrator::import_reverse(CDir *dir)
   // log our failure
   mds->mdlog->start_submit_entry(new EImportFinish(dir, false));       // log failure
 
-  cache->try_subtree_merge(dir);
-
   cache->trim(-1, num_dentries); // try trimming dentries
 
   // notify bystanders; wait in aborting state
@@ -2686,10 +2684,12 @@ void Migrator::import_notify_abort(CDir *dir, set<CDir*>& bounds)
 
 void Migrator::import_reverse_unfreeze(CDir *dir)
 {
-  assert(dir);
   dout(7) << "import_reverse_unfreeze " << *dir << dendl;
-  dir->unfreeze_tree();
+  assert(!dir->is_auth());
   cache->discard_delayed_expire(dir);
+  dir->unfreeze_tree();
+  if (dir->is_subtree_root())
+    cache->try_subtree_merge(dir);
   import_reverse_final(dir);
 }
 
@@ -2789,6 +2789,11 @@ void Migrator::import_finish(CDir *dir, bool notify, bool last)
   assert(it != import_state.end());
   assert(it->second.state == IMPORT_ACKING || it->second.state == IMPORT_FINISHING);
 
+  if (it->second.state == IMPORT_ACKING) {
+    assert(dir->is_auth());
+    cache->adjust_subtree_auth(dir, mds->get_nodeid(), mds->get_nodeid());
+  }
+
   // log finish
   assert(g_conf->mds_kill_import_at != 9);
 
@@ -2844,18 +2849,15 @@ void Migrator::import_finish(CDir *dir, bool notify, bool last)
   MutationRef mut = it->second.mut;
   import_state.erase(it);
 
-  // adjust auth, with possible subtree merge.
-  cache->adjust_subtree_auth(dir, mds->get_nodeid());
-
   mds->mdlog->start_submit_entry(new EImportFinish(dir, true));
 
-  cache->try_subtree_merge(dir);
-
   // process delayed expires
   cache->process_delayed_expire(dir);
 
-  // ok now unfreeze (and thus kick waiters)
+  // unfreeze tree, with possible subtree merge.
   dir->unfreeze_tree();
+  cache->try_subtree_merge(dir);
+
   cache->show_subtrees();
   //audit();  // this fails, bc we munge up the subtree map during handle_import_map (resolve phase)