From: Patrick Donnelly Date: Tue, 18 Apr 2017 02:39:19 +0000 (-0400) Subject: mds: unify export_targets handling for all exports X-Git-Tag: v12.0.3~38^2~24 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=082e86c58f5abb93e2a912e603d86f12bc47972c;p=ceph.git mds: unify export_targets handling for all exports This commit moves the MDSMap export_targets updates/handling to MDSRank. It is necessary to wait for export_targets to be updated prior to doing any exports as clients must have sessions open with targets of exports before any export can be performed. Before this commit, this handling was only done for migrations initiated by the balancer and not for manual migrations done by the admin socket. This fix and refactoring does the following: o MDSRank now manages export_targets via a map of ranks with DecayCounters. MDSRank::hit_export_target enables the Migrator/MDBalancer to hit a rank to indicate migration is either desired or in progress. Importantly, updating export_targets is now no longer tied to the previous MDBalancer cycle. o mds_bal_target_removal_min and mds_bal_target_removal_max are removed in favor of a DecayRate, via mds_bal_target_decay, which is independent of the tick rate. o Certain balancing state has been pulled out of the MDBalancer into a stack variable type (balance_state_t). This is to avoid unnecessary persistence of my_targets, imported, and exported maps which made the code confusing. o try_rebalance is no longer called on MDSMap updates. This was done before export target checking was part of the balancer, in 3e36d3202. o The Migrator now hits a rank in export_targets via MDSRank::hit_export_target proportional to how much is being exported and periodically during the course of the export. In my testing, one "default" hit (-1) will at least keep the target in the export map for about 2 minutes. o The Migrator will wait until a target is in the export_targets before it actually does the export, or abort the export if the target is not added in a timely manner. Signed-off-by: Patrick Donnelly --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 8984c35594e3..4f0632db7787 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -546,8 +546,7 @@ OPTION(mds_bal_need_min, OPT_FLOAT, .8) // take within this range of what OPTION(mds_bal_need_max, OPT_FLOAT, 1.2) OPTION(mds_bal_midchunk, OPT_FLOAT, .3) // any sub bigger than this taken in full OPTION(mds_bal_minchunk, OPT_FLOAT, .001) // never take anything smaller than this -OPTION(mds_bal_target_removal_min, OPT_INT, 5) // min balance iterations before old target is removed -OPTION(mds_bal_target_removal_max, OPT_INT, 10) // max balance iterations before old target is removed +OPTION(mds_bal_target_decay, OPT_DOUBLE, 10.0) // target decay half-life in MDSMap (2x larger is approx. 2x slower) OPTION(mds_replay_interval, OPT_FLOAT, 1.0) // time to wait before starting replay again OPTION(mds_shutdown_check, OPT_INT, 0) OPTION(mds_thrash_exports, OPT_INT, 0) diff --git a/src/mds/MDBalancer.cc b/src/mds/MDBalancer.cc index f52abe0f61c2..2f9b26fa78c5 100644 --- a/src/mds/MDBalancer.cc +++ b/src/mds/MDBalancer.cc @@ -27,7 +27,6 @@ #include "include/Context.h" #include "msg/Messenger.h" #include "messages/MHeartbeat.h" -#include "messages/MMDSLoadTargets.h" #include #include @@ -352,7 +351,7 @@ void MDBalancer::export_empties() -double MDBalancer::try_match(mds_rank_t ex, double& maxex, +double MDBalancer::try_match(balance_state_t& state, mds_rank_t ex, double& maxex, mds_rank_t im, double& maxim) { if (maxex <= 0 || maxim <= 0) return 0.0; @@ -363,10 +362,10 @@ double MDBalancer::try_match(mds_rank_t ex, double& maxex, dout(5) << " - mds." << ex << " exports " << howmuch << " to mds." << im << dendl; if (ex == mds->get_nodeid()) - my_targets[im] += howmuch; + state.targets[im] += howmuch; - exported[ex] += howmuch; - imported[im] += howmuch; + state.exported[ex] += howmuch; + state.imported[im] += howmuch; maxex -= howmuch; maxim -= howmuch; @@ -495,25 +494,20 @@ void MDBalancer::queue_merge(CDir *dir) void MDBalancer::prep_rebalance(int beat) { + balance_state_t state; + if (g_conf->mds_thrash_exports) { //we're going to randomly export to all the mds in the cluster - my_targets.clear(); set up_mds; mds->get_mds_map()->get_up_mds_set(up_mds); - for (set::iterator i = up_mds.begin(); - i != up_mds.end(); - ++i) - my_targets[*i] = 0.0; + for (const auto &rank : up_mds) { + state.targets[rank] = 0.0; + } } else { int cluster_size = mds->get_mds_map()->get_num_in_mds(); mds_rank_t whoami = mds->get_nodeid(); rebalance_time = ceph_clock_now(); - // reset - my_targets.clear(); - imported.clear(); - exported.clear(); - dout(5) << " prep_rebalance: cluster loads are" << dendl; mds->mdcache->migrator->clear_export_queue(); @@ -611,17 +605,16 @@ void MDBalancer::prep_rebalance(int beat) for (multimap::reverse_iterator ex = exporters.rbegin(); ex != exporters.rend(); ++ex) { - double maxex = get_maxex(ex->second); + double maxex = get_maxex(state, ex->second); if (maxex <= .001) continue; // check importers. for now, just in arbitrary order (no intelligent matching). for (map::iterator im = mds_import_map[ex->second].begin(); im != mds_import_map[ex->second].end(); ++im) { - double maxim = get_maxim(im->first); + double maxim = get_maxim(state, im->first); if (maxim <= .001) continue; - try_match(ex->second, maxex, - im->first, maxim); + try_match(state, ex->second, maxex, im->first, maxim); if (maxex <= .001) break; } } @@ -635,11 +628,10 @@ void MDBalancer::prep_rebalance(int beat) multimap::iterator im = importers.begin(); while (ex != exporters.rend() && im != importers.end()) { - double maxex = get_maxex(ex->second); - double maxim = get_maxim(im->second); + double maxex = get_maxex(state, ex->second); + double maxim = get_maxim(state, im->second); if (maxex < .001 || maxim < .001) break; - try_match(ex->second, maxex, - im->second, maxim); + try_match(state, ex->second, maxex, im->second, maxim); if (maxex <= .001) ++ex; if (maxim <= .001) ++im; } @@ -650,23 +642,31 @@ void MDBalancer::prep_rebalance(int beat) multimap::iterator im = importers.begin(); while (ex != exporters.end() && im != importers.end()) { - double maxex = get_maxex(ex->second); - double maxim = get_maxim(im->second); + double maxex = get_maxex(state, ex->second); + double maxim = get_maxim(state, im->second); if (maxex < .001 || maxim < .001) break; - try_match(ex->second, maxex, - im->second, maxim); + try_match(state, ex->second, maxex, im->second, maxim); if (maxex <= .001) ++ex; if (maxim <= .001) ++im; } } } - try_rebalance(); + try_rebalance(state); } - +void MDBalancer::hit_targets(const balance_state_t& state) +{ + utime_t now = ceph_clock_now(); + for (auto &it : state.targets) { + mds_rank_t target = it.first; + mds->hit_export_target(now, target, g_conf->mds_bal_target_decay); + } +} int MDBalancer::mantle_prep_rebalance() { + balance_state_t state; + /* refresh balancer if it has changed */ if (bal_version != mds->mdsmap->get_balancer()) { bal_version.assign(""); @@ -681,9 +681,6 @@ int MDBalancer::mantle_prep_rebalance() /* prepare for balancing */ int cluster_size = mds->get_mds_map()->get_num_in_mds(); rebalance_time = ceph_clock_now(); - my_targets.clear(); - imported.clear(); - exported.clear(); mds->mdcache->migrator->clear_export_queue(); /* fill in the metrics for each mds by grabbing load struct */ @@ -704,24 +701,24 @@ int MDBalancer::mantle_prep_rebalance() /* execute the balancer */ Mantle mantle; - int ret = mantle.balance(bal_code, mds->get_nodeid(), metrics, my_targets); - dout(2) << " mantle decided that new targets=" << my_targets << dendl; + int ret = mantle.balance(bal_code, mds->get_nodeid(), metrics, state.targets); + dout(2) << " mantle decided that new targets=" << state.targets << dendl; /* mantle doesn't know about cluster size, so check target len here */ - if ((int) my_targets.size() != cluster_size) + if ((int) state.targets.size() != cluster_size) return -EINVAL; else if (ret) return ret; - try_rebalance(); + try_rebalance(state); return 0; } -void MDBalancer::try_rebalance() +void MDBalancer::try_rebalance(balance_state_t& state) { - if (!check_targets()) + if (!check_targets(state)) return; if (g_conf->mds_thrash_exports) { @@ -765,11 +762,9 @@ void MDBalancer::try_rebalance() // do my exports! set already_exporting; - for (map::iterator it = my_targets.begin(); - it != my_targets.end(); - ++it) { - mds_rank_t target = (*it).first; - double amount = (*it).second; + for (auto &it : state.targets) { + mds_rank_t target = it.first; + double amount = it.second; if (amount < MIN_OFFLOAD) continue; if (amount / target_load < .2) continue; @@ -877,62 +872,15 @@ void MDBalancer::try_rebalance() } -/* returns true if all my_target MDS are in the MDSMap. - */ -bool MDBalancer::check_targets() +/* Check that all targets are in the MDSMap export_targets for my rank. */ +bool MDBalancer::check_targets(const balance_state_t& state) { - // get MonMap's idea of my_targets - const set& map_targets = mds->mdsmap->get_mds_info(mds->get_nodeid()).export_targets; - - bool send = false; - bool ok = true; - - // make sure map targets are in the old_prev_targets map - for (set::iterator p = map_targets.begin(); p != map_targets.end(); ++p) { - if (old_prev_targets.count(*p) == 0) - old_prev_targets[*p] = 0; - if (my_targets.count(*p) == 0) - old_prev_targets[*p]++; - } - - // check if the current MonMap has all our targets - set need_targets; - for (map::iterator i = my_targets.begin(); - i != my_targets.end(); - ++i) { - need_targets.insert(i->first); - old_prev_targets[i->first] = 0; - - if (!map_targets.count(i->first)) { - dout(20) << " target mds." << i->first << " not in map's export_targets" << dendl; - send = true; - ok = false; - } - } - - set want_targets = need_targets; - map::iterator p = old_prev_targets.begin(); - while (p != old_prev_targets.end()) { - if (map_targets.count(p->first) == 0 && - need_targets.count(p->first) == 0) { - old_prev_targets.erase(p++); - continue; + for (const auto &it : state.targets) { + if (!mds->is_export_target(it.first)) { + return false; } - dout(20) << " target mds." << p->first << " has been non-target for " << p->second << dendl; - if (p->second < g_conf->mds_bal_target_removal_min) - want_targets.insert(p->first); - if (p->second >= g_conf->mds_bal_target_removal_max) - send = true; - ++p; - } - - dout(10) << "check_targets have " << map_targets << " need " << need_targets << " want " << want_targets << dendl; - - if (send) { - MMDSLoadTargets* m = new MMDSLoadTargets(mds_gid_t(mon_client->get_global_id()), want_targets); - mon_client->send_mon_message(m); } - return ok; + return true; } void MDBalancer::find_exports(CDir *dir, diff --git a/src/mds/MDBalancer.h b/src/mds/MDBalancer.h index b62b0bea0565..ca5f290657eb 100644 --- a/src/mds/MDBalancer.h +++ b/src/mds/MDBalancer.h @@ -36,14 +36,14 @@ class MonClient; class MDBalancer { friend class C_Bal_SendHeartbeat; - public: MDBalancer(MDSRank *m, Messenger *msgr, MonClient *monc) : mds(m), messenger(msgr), mon_client(monc), beat_epoch(0), - last_epoch_under(0), last_epoch_over(0), my_load(0.0), target_load(0.0) { } + last_epoch_under(0), last_epoch_over(0), my_load(0.0), target_load(0.0) + { } mds_load_t get_load(utime_t); @@ -56,15 +56,6 @@ public: */ void tick(); - /** - * Try to rebalance after receiving monitor mdsmap update. - * - * Check if the monitor has recorded the current export targets; - * if it has then do the actual export. Otherwise send off our - * export targets message again. - */ - void try_rebalance(); - void subtract_export(CDir *ex, utime_t now); void add_import(CDir *im, utime_t now); @@ -83,6 +74,12 @@ public: void maybe_fragment(CDir *dir, bool hot); private: + typedef struct { + std::map targets; + std::map imported; + std::map exported; + } balance_state_t; + //set up the rebalancing targets for export and do one if the //MDSMap is up to date void prep_rebalance(int beat); @@ -90,7 +87,8 @@ private: void export_empties(); int localize_balancer(); - bool check_targets(); + bool check_targets(const balance_state_t& state); + void hit_targets(const balance_state_t& state); void send_heartbeat(); void handle_heartbeat(MHeartbeat *m); void find_exports(CDir *dir, @@ -99,15 +97,26 @@ private: double& have, set& already_exporting); - double try_match(mds_rank_t ex, double& maxex, + double try_match(balance_state_t &state, + mds_rank_t ex, double& maxex, mds_rank_t im, double& maxim); - double get_maxim(mds_rank_t im) { - return target_load - mds_meta_load[im] - imported[im]; + + double get_maxim(balance_state_t &state, mds_rank_t im) { + return target_load - mds_meta_load[im] - state.imported[im]; } - double get_maxex(mds_rank_t ex) { - return mds_meta_load[ex] - target_load - exported[ex]; + double get_maxex(balance_state_t &state, mds_rank_t ex) { + return mds_meta_load[ex] - target_load - state.exported[ex]; } + /** + * Try to rebalance. + * + * Check if the monitor has recorded the current export targets; + * if it has then do the actual export. Otherwise send off our + * export targets message again. + */ + void try_rebalance(balance_state_t& state); + MDSRank *mds; Messenger *messenger; MonClient *mon_client; @@ -135,11 +144,6 @@ private: // per-epoch state double my_load, target_load; - map my_targets; - map imported; - map exported; - - map old_prev_targets; // # iterations they _haven't_ been targets }; #endif diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 49577fc99b88..faaaef32110f 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -8998,7 +8998,7 @@ void MDCache::dispatch_request(MDRequestRef& mdr) dispatch_fragment_dir(mdr); break; case CEPH_MDS_OP_EXPORTDIR: - migrator->dispatch_export_dir(mdr); + migrator->dispatch_export_dir(mdr, 0); break; case CEPH_MDS_OP_ENQUEUE_SCRUB: enqueue_scrub_work(mdr); diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 9652e26d9ea5..4d0dc1c0c5ba 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -16,6 +16,7 @@ #include "common/errno.h" #include "messages/MClientRequestForward.h" +#include "messages/MMDSLoadTargets.h" #include "messages/MMDSMap.h" #include "messages/MMDSTableRequest.h" #include "messages/MCommand.h" @@ -180,6 +181,62 @@ void MDSRankDispatcher::init() finisher->start(); } +void MDSRank::update_targets(utime_t now) +{ + // get MonMap's idea of my export_targets + const set& map_targets = mdsmap->get_mds_info(get_nodeid()).export_targets; + + dout(20) << "updating export targets, currently " << map_targets.size() << " ranks are targets" << dendl; + + bool send = false; + set new_map_targets; + + auto it = export_targets.begin(); + while (it != export_targets.end()) { + mds_rank_t rank = it->first; + double val = it->second.get(now); + dout(20) << "export target mds." << rank << " value is " << val << " @ " << now << dendl; + + if (val <= 0.01) { + dout(15) << "export target mds." << rank << " is no longer an export target" << dendl; + export_targets.erase(it++); + send = true; + continue; + } + if (!map_targets.count(rank)) { + dout(15) << "export target mds." << rank << " not in map's export_targets" << dendl; + send = true; + } + new_map_targets.insert(rank); + it++; + } + if (new_map_targets.size() < map_targets.size()) { + dout(15) << "export target map holds stale targets, sending update" << dendl; + send = true; + } + + if (send) { + dout(15) << "updating export_targets, now " << new_map_targets.size() << " ranks are targets" << dendl; + MMDSLoadTargets* m = new MMDSLoadTargets(mds_gid_t(monc->get_global_id()), new_map_targets); + monc->send_mon_message(m); + } +} + +void MDSRank::hit_export_target(utime_t now, mds_rank_t rank, double amount) +{ + double rate = g_conf->mds_bal_target_decay; + if (amount < 0.0) { + amount = 100.0/g_conf->mds_bal_target_decay; /* a good default for "i am trying to keep this export_target active" */ + } + auto em = export_targets.emplace(std::piecewise_construct, std::forward_as_tuple(rank), std::forward_as_tuple(now, DecayRate(rate))); + if (em.second) { + dout(15) << "hit export target (new) " << amount << " @ " << now << dendl; + } else { + dout(15) << "hit export target " << amount << " @ " << now << dendl; + } + em.first->second.hit(now, amount); +} + void MDSRankDispatcher::tick() { heartbeat_reset(); @@ -206,8 +263,7 @@ void MDSRankDispatcher::tick() } // log - utime_t now = ceph_clock_now(); - mds_load_t load = balancer->get_load(now); + mds_load_t load = balancer->get_load(ceph_clock_now()); if (logger) { logger->set(l_mds_load_cent, 100 * load.mds_load()); @@ -228,6 +284,7 @@ void MDSRankDispatcher::tick() if (is_active()) { balancer->tick(); + update_targets(ceph_clock_now()); mdcache->find_stale_fragment_freeze(); mdcache->migrator->find_stale_export_freeze(); if (snapserver) @@ -1635,9 +1692,6 @@ void MDSRankDispatcher::handle_mds_map( mdcache->migrator->handle_mds_failure_or_stop(*p); } - if (!is_any_replay()) - balancer->try_rebalance(); - { map >::iterator p = waiting_for_mdsmap.begin(); while (p != waiting_for_mdsmap.end() && p->first <= mdsmap->get_epoch()) { diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index 8c0802561b69..e53ece154d9d 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -15,9 +15,10 @@ #ifndef MDS_RANK_H_ #define MDS_RANK_H_ -#include "common/TrackedOp.h" +#include "common/DecayCounter.h" #include "common/LogClient.h" #include "common/Timer.h" +#include "common/TrackedOp.h" #include "messages/MCommand.h" @@ -271,6 +272,8 @@ class MDSRank { void bcast_mds_map(); // to mounted clients epoch_t last_client_mdsmap_bcast; + map export_targets; /* targets this MDS is exporting to or wants/tries to */ + void create_logger(); public: @@ -397,6 +400,12 @@ class MDSRank { void dump_status(Formatter *f) const; + void hit_export_target(utime_t now, mds_rank_t rank, double amount=-1.0); + bool is_export_target(mds_rank_t rank) { + const set& map_targets = mdsmap->get_mds_info(get_nodeid()).export_targets; + return map_targets.count(rank); + } + protected: void dump_clientreplay_status(Formatter *f) const; void command_scrub_path(Formatter *f, const string& path, vector& scrubop_vec); @@ -486,6 +495,9 @@ class MDSRank { void handle_mds_recovery(mds_rank_t who); void handle_mds_failure(mds_rank_t who); // <<< + + /* Update MDSMap export_targets for this rank. Called on ::tick(). */ + void update_targets(utime_t now); }; /* This expects to be given a reference which it is responsible for. diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 9a62999cf4e5..0d9a5c15235b 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -757,6 +757,18 @@ void Migrator::get_export_lock_set(CDir *dir, set& locks) } +class C_M_ExportTargetWait : public MigratorContext { + MDRequestRef mdr; + int count; +public: + C_M_ExportTargetWait(Migrator *m, MDRequestRef mdr, int count) + : MigratorContext(m), mdr(mdr), count(count) {} + void finish(int r) override { + mig->dispatch_export_dir(mdr, count); + } +}; + + /** export_dir(dir, dest) * public method to initiate an export. * will fail if the directory is freezing, frozen, unpinnable, or root. @@ -797,6 +809,8 @@ void Migrator::export_dir(CDir *dir, mds_rank_t dest) return; } + mds->hit_export_target(ceph_clock_now(), dest, -1); + dir->auth_pin(this); dir->state_set(CDir::STATE_EXPORTING); @@ -813,11 +827,11 @@ void Migrator::export_dir(CDir *dir, mds_rank_t dest) return mds->mdcache->dispatch_request(mdr); } -void Migrator::dispatch_export_dir(MDRequestRef& mdr) +void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count) { dout(7) << "dispatch_export_dir " << *mdr << dendl; - CDir *dir = mdr->more()->export_dir; + CDir *dir = mdr->more()->export_dir; map::iterator it = export_state.find(dir); if (it == export_state.end() || it->second.tid != mdr->reqid.tid) { // export must have aborted. @@ -827,6 +841,19 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr) } assert(it->second.state == EXPORT_LOCKING); + mds_rank_t dest = it->second.peer; + + if (!mds->is_export_target(dest)) { + dout(7) << "dest is not yet an export target" << dendl; + if (count > 3) { + dout(5) << "dest has not been added as export target after three MDSMap epochs, canceling export" << dendl; + mds->mdcache->request_finish(mdr); + return; + } + mds->wait_for_mdsmap(mds->mdsmap->get_epoch(), new C_M_ExportTargetWait(this, mdr, count+1)); + return; + } + if (mdr->aborted || dir->is_frozen() || dir->is_freezing()) { dout(7) << "wouldblock|freezing|frozen, canceling export" << dendl; export_try_cancel(dir); @@ -867,7 +894,7 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr) MExportDirDiscover *discover = new MExportDirDiscover(dir->dirfrag(), path, mds->get_nodeid(), it->second.tid); - mds->send_message_mds(discover, it->second.peer); + mds->send_message_mds(discover, dest); assert(g_conf->mds_kill_export_at != 2); it->second.last_cum_auth_pins_change = ceph_clock_now(); @@ -887,15 +914,19 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr) void Migrator::handle_export_discover_ack(MExportDirDiscoverAck *m) { CDir *dir = cache->get_dirfrag(m->get_dirfrag()); + mds_rank_t dest(m->get_source().num()); + utime_t now = ceph_clock_now(); assert(dir); dout(7) << "export_discover_ack from " << m->get_source() << " on " << *dir << dendl; + mds->hit_export_target(now, dest, -1); + map::iterator it = export_state.find(dir); if (it == export_state.end() || it->second.tid != m->get_tid() || - it->second.peer != mds_rank_t(m->get_source().num())) { + it->second.peer != dest) { dout(7) << "must have aborted" << dendl; } else { assert(it->second.state == EXPORT_DISCOVERING); @@ -1150,10 +1181,14 @@ void Migrator::get_export_client_set(CInode *in, set& client_set) void Migrator::handle_export_prep_ack(MExportDirPrepAck *m) { CDir *dir = cache->get_dirfrag(m->get_dirfrag()); + mds_rank_t dest(m->get_source().num()); + utime_t now = ceph_clock_now(); assert(dir); dout(7) << "export_prep_ack " << *dir << dendl; + mds->hit_export_target(now, dest, -1); + map::iterator it = export_state.find(dir); if (it == export_state.end() || it->second.tid != m->get_tid() || @@ -1238,7 +1273,6 @@ void Migrator::export_go(CDir *dir) void Migrator::export_go_synced(CDir *dir, uint64_t tid) { - map::iterator it = export_state.find(dir); if (it == export_state.end() || it->second.state == EXPORT_CANCELLING || @@ -1248,8 +1282,9 @@ void Migrator::export_go_synced(CDir *dir, uint64_t tid) return; } assert(it->second.state == EXPORT_WARNING); + mds_rank_t dest = it->second.peer; - dout(7) << "export_go_synced " << *dir << " to " << it->second.peer << dendl; + dout(7) << "export_go_synced " << *dir << " to " << dest << dendl; cache->show_subtrees(); @@ -1260,7 +1295,7 @@ void Migrator::export_go_synced(CDir *dir, uint64_t tid) assert(dir->get_cum_auth_pins() == 0); // set ambiguous auth - cache->adjust_subtree_auth(dir, mds->get_nodeid(), it->second.peer); + cache->adjust_subtree_auth(dir, mds->get_nodeid(), dest); // take away the popularity we're sending. utime_t now = ceph_clock_now(); @@ -1285,9 +1320,11 @@ void Migrator::export_go_synced(CDir *dir, uint64_t tid) req->add_export((*p)->dirfrag()); // send - mds->send_message_mds(req, it->second.peer); + mds->send_message_mds(req, dest); assert(g_conf->mds_kill_export_at != 8); + mds->hit_export_target(now, dest, num_exported_inodes+1); + // stats if (mds->logger) mds->logger->inc(l_mds_exported); if (mds->logger) mds->logger->inc(l_mds_exported_inodes, num_exported_inodes); @@ -1590,12 +1627,16 @@ public: void Migrator::handle_export_ack(MExportDirAck *m) { CDir *dir = cache->get_dirfrag(m->get_dirfrag()); + mds_rank_t dest(m->get_source().num()); + utime_t now = ceph_clock_now(); assert(dir); assert(dir->is_frozen_tree_root()); // i'm exporting! // yay! dout(7) << "handle_export_ack " << *dir << dendl; + mds->hit_export_target(now, dest, -1); + map::iterator it = export_state.find(dir); assert(it != export_state.end()); assert(it->second.state == EXPORT_EXPORTING); @@ -1797,9 +1838,13 @@ void Migrator::export_logged_finish(CDir *dir) void Migrator::handle_export_notify_ack(MExportDirNotifyAck *m) { CDir *dir = cache->get_dirfrag(m->get_dirfrag()); + mds_rank_t dest(m->get_source().num()); + utime_t now = ceph_clock_now(); assert(dir); mds_rank_t from = mds_rank_t(m->get_source().num()); + mds->hit_export_target(now, dest, -1); + auto export_state_entry = export_state.find(dir); if (export_state_entry != export_state.end()) { export_state_t& stat = export_state_entry->second; diff --git a/src/mds/Migrator.h b/src/mds/Migrator.h index 3e1df9d43454..d14561920a41 100644 --- a/src/mds/Migrator.h +++ b/src/mds/Migrator.h @@ -284,7 +284,7 @@ public: // -- import/export -- // exporter - void dispatch_export_dir(MDRequestRef& mdr); + void dispatch_export_dir(MDRequestRef& mdr, int count); void export_dir(CDir *dir, mds_rank_t dest); void export_empty_import(CDir *dir);