]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: limit total size of exporting subtrees
authorYan, Zheng <zyan@redhat.com>
Tue, 17 Jul 2018 03:57:22 +0000 (11:57 +0800)
committerYan, Zheng <zyan@redhat.com>
Tue, 28 Aug 2018 23:12:42 +0000 (07:12 +0800)
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
src/mds/MDSDaemon.cc
src/mds/Migrator.cc
src/mds/Migrator.h

index 0b651a7d523bc3ddec1fcd51930da14968305a8f..1e00138cfa31f5d0d85f3afd5c1a61c8d9969cbd 100644 (file)
@@ -362,6 +362,8 @@ const char** MDSDaemon::get_tracked_conf_keys() const
     "mds_max_purge_ops",
     "mds_max_purge_ops_per_pg",
     "mds_max_purge_files",
+    // Migrator
+    "mds_max_export_size",
     "mds_inject_migrator_session_race",
     "mds_inject_migrator_message_loss",
     "clog_to_graylog",
index e530146b2ac2d9f5ac308251b08f0af44ba2278e..83c73444979d36d70049b028690c0ebe28d95f8d 100644 (file)
@@ -287,6 +287,7 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
   switch (state) {
   case EXPORT_LOCKING:
     dout(10) << "export state=locking : dropping locks and removing auth_pin" << dendl;
+    num_locking_exports--;
     it->second.state = EXPORT_CANCELLED;
     dir->auth_unpin(this);
     break;
@@ -376,10 +377,7 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
     mut.swap(it->second.mut);
 
     if (it->second.state == EXPORT_CANCELLED) {
-      export_state.erase(it);
-      dir->clear_exporting();
-      // send pending import_maps?
-      cache->maybe_send_pending_resolves();
+      export_cancel_finish(it);
     }
 
     // drop locks
@@ -398,13 +396,21 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
   }
 }
 
-void Migrator::export_cancel_finish(CDir *dir)
+void Migrator::export_cancel_finish(export_state_iterator& it)
 {
+  CDir *dir = it->first;
+  bool unpin = (it->second.state == EXPORT_CANCELLING);
+
+  total_exporting_size -= it->second.approx_size;
+  export_state.erase(it);
+
   assert(dir->state_test(CDir::STATE_EXPORTING));
   dir->clear_exporting();
 
-  // pinned by Migrator::export_notify_abort()
-  dir->auth_unpin(this);
+  if (unpin) {
+    // pinned by Migrator::export_notify_abort()
+    dir->auth_unpin(this);
+  }
   // send pending import_maps?  (these need to go out when all exports have finished.)
   cache->maybe_send_pending_resolves();
 }
@@ -478,8 +484,7 @@ void Migrator::handle_mds_failure_or_stop(mds_rank_t who)
            export_finish(dir);
        } else if (p->second.state == EXPORT_CANCELLING) {
          if (p->second.notify_ack_waiting.empty()) {
-           export_state.erase(p);
-           export_cancel_finish(dir);
+           export_cancel_finish(p);
          }
        }
       }
@@ -706,8 +711,14 @@ void Migrator::maybe_do_queued_export()
   if (running)
     return;
   running = true;
+
+  uint64_t max_total_size = max_export_size * 2;
+
   while (!export_queue.empty() &&
-        export_state.size() <= 4) {
+        max_total_size > total_exporting_size &&
+        max_total_size - total_exporting_size >=
+        max_export_size * (num_locking_exports + 1)) {
+
     dirfrag_t df = export_queue.front().first;
     mds_rank_t dest = export_queue.front().second;
     export_queue.pop_front();
@@ -720,6 +731,7 @@ void Migrator::maybe_do_queued_export()
 
     export_dir(dir, dest);
   }
+
   running = false;
 }
 
@@ -861,6 +873,7 @@ void Migrator::export_dir(CDir *dir, mds_rank_t dest)
 
   assert(export_state.count(dir) == 0);
   export_state_t& stat = export_state[dir];
+  num_locking_exports++;
   stat.state = EXPORT_LOCKING;
   stat.peer = dest;
   stat.tid = mdr->reqid.tid;
@@ -898,7 +911,7 @@ void Migrator::maybe_split_export(CDir* dir, vector<pair<CDir*, size_t> >& resul
   vector<LevelData> stack;
   stack.emplace_back(dir);
 
-  uint64_t max_size = g_conf->get_val<uint64_t>("mds_max_export_size");
+  uint64_t max_size = max_export_size;
   size_t found_size = 0;
   size_t skipped_size = 0;
 
@@ -1095,6 +1108,7 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count)
   maybe_split_export(dir, results);
 
   if (results.size() == 1 && results.front().first == dir) {
+    num_locking_exports--;
     it->second.state = EXPORT_DISCOVERING;
     // send ExportDirDiscover (ask target)
     filepath path;
@@ -1105,6 +1119,8 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count)
     assert(g_conf->mds_kill_export_at != 2);
 
     it->second.last_cum_auth_pins_change = ceph_clock_now();
+    it->second.approx_size = results.front().second;
+    total_exporting_size += it->second.approx_size;
 
     // start the freeze, but hold it up with an auth_pin.
     dir->freeze_tree();
@@ -1126,8 +1142,8 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count)
     _mdr->more()->export_dir = sub;
 
     assert(export_state.count(sub) == 0);
-    export_state_t& stat = export_state[sub];
-
+    auto& stat = export_state[sub];
+    num_locking_exports++;
     stat.state = EXPORT_LOCKING;
     stat.peer = dest;
     stat.tid = _mdr->reqid.tid;
@@ -1243,15 +1259,7 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid)
       !diri->nestlock.can_wrlock(-1)) {
     dout(7) << "export_dir couldn't acquire all needed locks, failing. "
            << *dir << dendl;
-    // .. unwind ..
-    dir->unfreeze_tree();
-    cache->try_subtree_merge(dir);
-
-    mds->send_message_mds(new MExportDirCancel(dir->dirfrag(), it->second.tid), it->second.peer);
-    export_state.erase(it);
-
-    dir->clear_exporting();
-    cache->maybe_send_pending_resolves();
+    export_try_cancel(dir);
     return;
   }
 
@@ -2097,8 +2105,7 @@ void Migrator::handle_export_notify_ack(MExportDirNotifyAck *m)
       dout(7) << "handle_export_notify_ack from " << m->get_source()
              << ": cancelling export, processing notify on " << *dir << dendl;
       if (stat.notify_ack_waiting.empty()) {
-       export_state.erase(export_state_entry);
-       export_cancel_finish(dir);
+       export_cancel_finish(export_state_entry);
       }
     }
   }
@@ -2187,7 +2194,10 @@ void Migrator::export_finish(CDir *dir)
 
   MutationRef mut = it->second.mut;
   // remove from exporting list, clean up state
+  total_exporting_size -= it->second.approx_size;
   export_state.erase(it);
+
+  assert(dir->state_test(CDir::STATE_EXPORTING));
   dir->clear_exporting();
 
   cache->show_subtrees();
@@ -3566,10 +3576,18 @@ void Migrator::logged_import_caps(CInode *in,
   in->auth_unpin(this);
 }
 
+Migrator::Migrator(MDSRank *m, MDCache *c) : mds(m), cache(c) {
+  max_export_size = g_conf->get_val<uint64_t>("mds_max_export_size");
+  inject_session_race = g_conf->get_val<bool>("mds_inject_migrator_session_race");
+}
+
 void Migrator::handle_conf_change(const struct md_config_t *conf,
                                   const std::set <std::string> &changed,
                                   const MDSMap &mds_map)
 {
+  if (changed.count("mds_max_export_size"))
+    max_export_size = conf->get_val<uint64_t>("mds_max_export_size");
+
   if (changed.count("mds_inject_migrator_session_race")) {
     inject_session_race = conf->get_val<bool>("mds_inject_migrator_session_race");
     dout(0) << "mds_inject_migrator_session_race is " << inject_session_race << dendl;
index bff35e37371105ee7e1de50f3fe6958413888e4b..71607376be5f73f5c3c7d518492be91997a375e3 100644 (file)
@@ -102,10 +102,7 @@ public:
   }
 
   // -- cons --
-  Migrator(MDSRank *m, MDCache *c) : mds(m), cache(c) {
-    inject_session_race = g_conf->get_val<bool>("mds_inject_migrator_session_race");
-    inject_message_loss = g_conf->get_val<int64_t>("mds_inject_migrator_message_loss");
-  }
+  Migrator(MDSRank *m, MDCache *c);
 
   void handle_conf_change(const struct md_config_t *conf,
                           const std::set <std::string> &changed,
@@ -114,22 +111,25 @@ public:
 protected:
   // export fun
   struct export_state_t {
-    int state;
-    mds_rank_t peer;
-    uint64_t tid;
+    int state = 0;
+    mds_rank_t peer = MDS_RANK_NONE;
+    uint64_t tid = 0;
     set<mds_rank_t> warning_ack_waiting;
     set<mds_rank_t> notify_ack_waiting;
     map<inodeno_t,map<client_t,Capability::Import> > peer_imported;
     MutationRef mut;
+    size_t approx_size = 0;
     // for freeze tree deadlock detection
     utime_t last_cum_auth_pins_change;
-    int last_cum_auth_pins;
-    int num_remote_waiters; // number of remote authpin waiters
-    export_state_t() : state(0), peer(0), tid(0), mut(),
-                      last_cum_auth_pins(0), num_remote_waiters(0) {}
+    int last_cum_auth_pins = 0;
+    int num_remote_waiters = 0; // number of remote authpin waiters
+    export_state_t() {}
   };
-
   map<CDir*, export_state_t>  export_state;
+  typedef map<CDir*, export_state_t>::iterator export_state_iterator;
+
+  uint64_t total_exporting_size = 0;
+  unsigned num_locking_exports = 0; // exports in locking state (approx_size == 0)
 
   list<pair<dirfrag_t,mds_rank_t> >  export_queue;
 
@@ -156,7 +156,7 @@ protected:
   void export_go(CDir *dir);
   void export_go_synced(CDir *dir, uint64_t tid);
   void export_try_cancel(CDir *dir, bool notify_peer=true);
-  void export_cancel_finish(CDir *dir);
+  void export_cancel_finish(export_state_iterator& it);
   void export_reverse(CDir *dir, export_state_t& stat);
   void export_notify_abort(CDir *dir, export_state_t& stat, set<CDir*>& bounds);
   void handle_export_ack(MExportDirAck *m);
@@ -352,6 +352,7 @@ public:
 private:
   MDSRank *mds;
   MDCache *cache;
+  uint64_t max_export_size = 0;
   bool inject_session_race = false;
   int inject_message_loss = 0;
 };