From: Yan, Zheng Date: Tue, 17 Jul 2018 06:32:24 +0000 (+0800) Subject: mds: export subtree part by part X-Git-Tag: v13.2.3~115^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=767efe0ff5122f7b684f35ff663fac4ed561eb9c;p=ceph.git mds: export subtree part by part When exporting a large subtree, migrator may only exports some portions of the subtree. This patch makes migrator continue to export the rest partions when previous operations finish. Signed-off-by: "Yan, Zheng" (cherry picked from commit da7fc1ad2cf08d1a7cc5754257a8009181b58ff7) --- diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 287afce65850..0a6f06e19b60 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -392,6 +392,7 @@ void Migrator::export_cancel_finish(export_state_iterator& it) { CDir *dir = it->first; bool unpin = (it->second.state == EXPORT_CANCELLING); + auto parent = std::move(it->second.parent); total_exporting_size -= it->second.approx_size; export_state.erase(it); @@ -405,6 +406,9 @@ void Migrator::export_cancel_finish(export_state_iterator& it) } // send pending import_maps? (these need to go out when all exports have finished.) cache->maybe_send_pending_resolves(); + + if (parent) + child_export_finish(parent, false); } // ========================================================== @@ -878,7 +882,8 @@ void Migrator::export_dir(CDir *dir, mds_rank_t dest) * check if directory is too large to be export in whole. If it is, * choose some subdirs, whose total size is suitable. */ -void Migrator::maybe_split_export(CDir* dir, vector >& results) +void Migrator::maybe_split_export(CDir* dir, uint64_t max_size, bool null_okay, + vector >& results) { static const unsigned frag_size = 800; static const unsigned inode_size = 1000; @@ -903,7 +908,6 @@ void Migrator::maybe_split_export(CDir* dir, vector >& resul vector stack; stack.emplace_back(dir); - uint64_t max_size = max_export_size; size_t found_size = 0; size_t skipped_size = 0; @@ -1007,7 +1011,7 @@ void Migrator::maybe_split_export(CDir* dir, vector >& resul for (auto& p : stack) results.insert(results.end(), p.subdirs.begin(), p.subdirs.end()); - if (results.empty()) + if (results.empty() && (!skipped_size || !null_okay)) results.emplace_back(dir, found_size + skipped_size); } @@ -1096,8 +1100,10 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count) assert(g_conf->mds_kill_export_at != 1); + auto parent = it->second.parent; + vector > results; - maybe_split_export(dir, results); + maybe_split_export(dir, max_export_size, (bool)parent, results); if (results.size() == 1 && results.front().first == dir) { num_locking_exports--; @@ -1122,7 +1128,21 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count) return; } - dout(7) << "subtree is too large, splitting it into: " << dendl; + if (parent) { + parent->pending_children += results.size(); + } else { + parent = std::make_shared(dir->dirfrag(), dest, + results.size(), export_queue_gen); + } + + if (results.empty()) { + dout(7) << "subtree's children all are under exporting, retry rest parts of parent export " + << parent->dirfrag << dendl; + parent->restart = true; + } else { + dout(7) << "subtree is too large, splitting it into: " << dendl; + } + for (auto& p : results) { CDir *sub = p.first; assert(sub != dir); @@ -1141,6 +1161,7 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count) stat.peer = dest; stat.tid = _mdr->reqid.tid; stat.mut = _mdr; + stat.parent = parent; mds->mdcache->dispatch_request(_mdr); } @@ -1148,6 +1169,22 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count) export_try_cancel(dir); } +void Migrator::child_export_finish(std::shared_ptr& parent, bool success) +{ + if (success) + parent->restart = true; + if (--parent->pending_children == 0) { + if (parent->restart && + parent->export_queue_gen == export_queue_gen) { + CDir *origin = mds->mdcache->get_dirfrag(parent->dirfrag); + if (origin && origin->is_auth()) { + dout(7) << "child_export_finish requeue " << *origin << dendl; + export_queue.emplace_front(origin->dirfrag(), parent->dest); + } + } + } +} + /* * called on receipt of MExportDirDiscoverAck * the importer now has the directory's _inode_ in memory, and pinned. @@ -2183,7 +2220,8 @@ void Migrator::export_finish(CDir *dir) if (!finished.empty()) mds->queue_waiters(finished); - MutationRef mut = it->second.mut; + MutationRef mut = std::move(it->second.mut); + auto parent = std::move(it->second.parent); // remove from exporting list, clean up state total_exporting_size -= it->second.approx_size; export_state.erase(it); @@ -2204,7 +2242,10 @@ void Migrator::export_finish(CDir *dir) mds->locker->drop_locks(mut.get()); mut->cleanup(); } - + + if (parent) + child_export_finish(parent, true); + maybe_do_queued_export(); } diff --git a/src/mds/Migrator.h b/src/mds/Migrator.h index 5fa93e963863..caee7f074c0a 100644 --- a/src/mds/Migrator.h +++ b/src/mds/Migrator.h @@ -109,6 +109,16 @@ public: const MDSMap &mds_map); protected: + struct export_base_t { + dirfrag_t dirfrag; + mds_rank_t dest; + unsigned pending_children; + uint64_t export_queue_gen; + bool restart = false; + export_base_t(dirfrag_t df, mds_rank_t d, unsigned c, uint64_t g) : + dirfrag(df), dest(d), pending_children(c), export_queue_gen(g) {} + }; + // export fun struct export_state_t { int state = 0; @@ -124,6 +134,8 @@ protected: int last_cum_auth_pins = 0; int num_remote_waiters = 0; // number of remote authpin waiters export_state_t() {} + + std::shared_ptr parent; }; map export_state; typedef map::iterator export_state_iterator; @@ -132,6 +144,7 @@ protected: unsigned num_locking_exports = 0; // exports in locking state (approx_size == 0) list > export_queue; + uint64_t export_queue_gen = 1; // import fun struct import_state_t { @@ -297,9 +310,12 @@ public: void maybe_do_queued_export(); void clear_export_queue() { export_queue.clear(); + export_queue_gen++; } - void maybe_split_export(CDir* dir, vector >& results); + void maybe_split_export(CDir* dir, uint64_t max_size, bool null_okay, + vector >& results); + void child_export_finish(std::shared_ptr& parent, bool success); void get_export_lock_set(CDir *dir, set& locks); void get_export_client_set(CDir *dir, set &client_set); void get_export_client_set(CInode *in, set &client_set);