From: Yan, Zheng Date: Tue, 17 Jul 2018 03:57:22 +0000 (+0800) Subject: mds: limit total size of exporting subtrees X-Git-Tag: v12.2.9~88^2~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3e3d8dbc74f1074816ea9e01e5422d26ea9d3e24;p=ceph.git mds: limit total size of exporting subtrees Signed-off-by: "Yan, Zheng" --- diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index 0b651a7d523..1e00138cfa3 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -362,6 +362,8 @@ const char** MDSDaemon::get_tracked_conf_keys() const "mds_max_purge_ops", "mds_max_purge_ops_per_pg", "mds_max_purge_files", + // Migrator + "mds_max_export_size", "mds_inject_migrator_session_race", "mds_inject_migrator_message_loss", "clog_to_graylog", diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index e530146b2ac..83c73444979 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -287,6 +287,7 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer) switch (state) { case EXPORT_LOCKING: dout(10) << "export state=locking : dropping locks and removing auth_pin" << dendl; + num_locking_exports--; it->second.state = EXPORT_CANCELLED; dir->auth_unpin(this); break; @@ -376,10 +377,7 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer) mut.swap(it->second.mut); if (it->second.state == EXPORT_CANCELLED) { - export_state.erase(it); - dir->clear_exporting(); - // send pending import_maps? - cache->maybe_send_pending_resolves(); + export_cancel_finish(it); } // drop locks @@ -398,13 +396,21 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer) } } -void Migrator::export_cancel_finish(CDir *dir) +void Migrator::export_cancel_finish(export_state_iterator& it) { + CDir *dir = it->first; + bool unpin = (it->second.state == EXPORT_CANCELLING); + + total_exporting_size -= it->second.approx_size; + export_state.erase(it); + assert(dir->state_test(CDir::STATE_EXPORTING)); dir->clear_exporting(); - // pinned by Migrator::export_notify_abort() - dir->auth_unpin(this); + if (unpin) { + // pinned by Migrator::export_notify_abort() + dir->auth_unpin(this); + } // send pending import_maps? (these need to go out when all exports have finished.) cache->maybe_send_pending_resolves(); } @@ -478,8 +484,7 @@ void Migrator::handle_mds_failure_or_stop(mds_rank_t who) export_finish(dir); } else if (p->second.state == EXPORT_CANCELLING) { if (p->second.notify_ack_waiting.empty()) { - export_state.erase(p); - export_cancel_finish(dir); + export_cancel_finish(p); } } } @@ -706,8 +711,14 @@ void Migrator::maybe_do_queued_export() if (running) return; running = true; + + uint64_t max_total_size = max_export_size * 2; + while (!export_queue.empty() && - export_state.size() <= 4) { + max_total_size > total_exporting_size && + max_total_size - total_exporting_size >= + max_export_size * (num_locking_exports + 1)) { + dirfrag_t df = export_queue.front().first; mds_rank_t dest = export_queue.front().second; export_queue.pop_front(); @@ -720,6 +731,7 @@ void Migrator::maybe_do_queued_export() export_dir(dir, dest); } + running = false; } @@ -861,6 +873,7 @@ void Migrator::export_dir(CDir *dir, mds_rank_t dest) assert(export_state.count(dir) == 0); export_state_t& stat = export_state[dir]; + num_locking_exports++; stat.state = EXPORT_LOCKING; stat.peer = dest; stat.tid = mdr->reqid.tid; @@ -898,7 +911,7 @@ void Migrator::maybe_split_export(CDir* dir, vector >& resul vector stack; stack.emplace_back(dir); - uint64_t max_size = g_conf->get_val("mds_max_export_size"); + uint64_t max_size = max_export_size; size_t found_size = 0; size_t skipped_size = 0; @@ -1095,6 +1108,7 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count) maybe_split_export(dir, results); if (results.size() == 1 && results.front().first == dir) { + num_locking_exports--; it->second.state = EXPORT_DISCOVERING; // send ExportDirDiscover (ask target) filepath path; @@ -1105,6 +1119,8 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count) assert(g_conf->mds_kill_export_at != 2); it->second.last_cum_auth_pins_change = ceph_clock_now(); + it->second.approx_size = results.front().second; + total_exporting_size += it->second.approx_size; // start the freeze, but hold it up with an auth_pin. dir->freeze_tree(); @@ -1126,8 +1142,8 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count) _mdr->more()->export_dir = sub; assert(export_state.count(sub) == 0); - export_state_t& stat = export_state[sub]; - + auto& stat = export_state[sub]; + num_locking_exports++; stat.state = EXPORT_LOCKING; stat.peer = dest; stat.tid = _mdr->reqid.tid; @@ -1243,15 +1259,7 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid) !diri->nestlock.can_wrlock(-1)) { dout(7) << "export_dir couldn't acquire all needed locks, failing. " << *dir << dendl; - // .. unwind .. - dir->unfreeze_tree(); - cache->try_subtree_merge(dir); - - mds->send_message_mds(new MExportDirCancel(dir->dirfrag(), it->second.tid), it->second.peer); - export_state.erase(it); - - dir->clear_exporting(); - cache->maybe_send_pending_resolves(); + export_try_cancel(dir); return; } @@ -2097,8 +2105,7 @@ void Migrator::handle_export_notify_ack(MExportDirNotifyAck *m) dout(7) << "handle_export_notify_ack from " << m->get_source() << ": cancelling export, processing notify on " << *dir << dendl; if (stat.notify_ack_waiting.empty()) { - export_state.erase(export_state_entry); - export_cancel_finish(dir); + export_cancel_finish(export_state_entry); } } } @@ -2187,7 +2194,10 @@ void Migrator::export_finish(CDir *dir) MutationRef mut = it->second.mut; // remove from exporting list, clean up state + total_exporting_size -= it->second.approx_size; export_state.erase(it); + + assert(dir->state_test(CDir::STATE_EXPORTING)); dir->clear_exporting(); cache->show_subtrees(); @@ -3566,10 +3576,18 @@ void Migrator::logged_import_caps(CInode *in, in->auth_unpin(this); } +Migrator::Migrator(MDSRank *m, MDCache *c) : mds(m), cache(c) { + max_export_size = g_conf->get_val("mds_max_export_size"); + inject_session_race = g_conf->get_val("mds_inject_migrator_session_race"); +} + void Migrator::handle_conf_change(const struct md_config_t *conf, const std::set &changed, const MDSMap &mds_map) { + if (changed.count("mds_max_export_size")) + max_export_size = conf->get_val("mds_max_export_size"); + if (changed.count("mds_inject_migrator_session_race")) { inject_session_race = conf->get_val("mds_inject_migrator_session_race"); dout(0) << "mds_inject_migrator_session_race is " << inject_session_race << dendl; diff --git a/src/mds/Migrator.h b/src/mds/Migrator.h index bff35e37371..71607376be5 100644 --- a/src/mds/Migrator.h +++ b/src/mds/Migrator.h @@ -102,10 +102,7 @@ public: } // -- cons -- - Migrator(MDSRank *m, MDCache *c) : mds(m), cache(c) { - inject_session_race = g_conf->get_val("mds_inject_migrator_session_race"); - inject_message_loss = g_conf->get_val("mds_inject_migrator_message_loss"); - } + Migrator(MDSRank *m, MDCache *c); void handle_conf_change(const struct md_config_t *conf, const std::set &changed, @@ -114,22 +111,25 @@ public: protected: // export fun struct export_state_t { - int state; - mds_rank_t peer; - uint64_t tid; + int state = 0; + mds_rank_t peer = MDS_RANK_NONE; + uint64_t tid = 0; set warning_ack_waiting; set notify_ack_waiting; map > peer_imported; MutationRef mut; + size_t approx_size = 0; // for freeze tree deadlock detection utime_t last_cum_auth_pins_change; - int last_cum_auth_pins; - int num_remote_waiters; // number of remote authpin waiters - export_state_t() : state(0), peer(0), tid(0), mut(), - last_cum_auth_pins(0), num_remote_waiters(0) {} + int last_cum_auth_pins = 0; + int num_remote_waiters = 0; // number of remote authpin waiters + export_state_t() {} }; - map export_state; + typedef map::iterator export_state_iterator; + + uint64_t total_exporting_size = 0; + unsigned num_locking_exports = 0; // exports in locking state (approx_size == 0) list > export_queue; @@ -156,7 +156,7 @@ protected: void export_go(CDir *dir); void export_go_synced(CDir *dir, uint64_t tid); void export_try_cancel(CDir *dir, bool notify_peer=true); - void export_cancel_finish(CDir *dir); + void export_cancel_finish(export_state_iterator& it); void export_reverse(CDir *dir, export_state_t& stat); void export_notify_abort(CDir *dir, export_state_t& stat, set& bounds); void handle_export_ack(MExportDirAck *m); @@ -352,6 +352,7 @@ public: private: MDSRank *mds; MDCache *cache; + uint64_t max_export_size = 0; bool inject_session_race = false; int inject_message_loss = 0; };