From b43af152bab2c9f67fe311ba9450e06fd41e82e4 Mon Sep 17 00:00:00 2001 From: Simon Gao Date: Fri, 15 May 2020 17:19:14 +0800 Subject: [PATCH] mds: change scrub traverse from post-order to breadth-first search After using breadth-first search, scrubing a dir inode does not need to wait until all of its descendant difrags/inodes are scrubbed. This simplfies scrub code a lot. The downside is that a scrubbed dir inode no longer implies corresponding subtree has been fully scrubbed. It makes later scrub (without force option) less efficient. Signed-off-by: Simon Gao Signed-off-by: "Yan, Zheng" --- src/mds/CDir.cc | 209 +++---------------------------- src/mds/CDir.h | 77 ++---------- src/mds/CInode.cc | 192 +++-------------------------- src/mds/CInode.h | 71 ++--------- src/mds/ScrubStack.cc | 278 +++++++++++++----------------------------- src/mds/ScrubStack.h | 51 ++------ 6 files changed, 140 insertions(+), 738 deletions(-) diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index cab73b2352e..5073c6b4ea3 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -2049,11 +2049,6 @@ void CDir::_omap_fetched(bufferlist& hdrbl, map& omap, wanted_items.clear(); mark_complete(); state_clear(STATE_FETCHING); - - if (scrub_infop && scrub_infop->need_scrub_local) { - scrub_infop->need_scrub_local = false; - scrub_local(); - } } // open & force frags @@ -3469,10 +3464,8 @@ void CDir::scrub_info_create() const std::unique_ptr si(new scrub_info_t()); - si->last_recursive.version = si->recursive_start.version = - pf->recursive_scrub_version; - si->last_recursive.time = si->recursive_start.time = - pf->recursive_scrub_stamp; + si->last_recursive.version = pf->recursive_scrub_version; + si->last_recursive.time = pf->recursive_scrub_stamp; si->last_local.version = pf->localized_scrub_version; si->last_local.time = pf->localized_scrub_stamp; @@ -3488,227 +3481,55 @@ void CDir::scrub_initialize(const ScrubHeaderRef& header, MDSContext* f) scrub_info(); scrub_infop->header = header; scrub_infop->on_finish = f; -} - -void CDir::scrub_initialize_data() -{ - dout(20) << __func__ << dendl; - ceph_assert(is_complete()); - - // FIXME: weird implicit construction, is someone else meant - // to be calling scrub_info_create first? - scrub_info(); - ceph_assert(scrub_infop && !scrub_infop->directory_scrubbing); - - scrub_infop->recursive_start.version = get_projected_version(); - scrub_infop->recursive_start.time = ceph_clock_now(); - - scrub_infop->directories_to_scrub.clear(); - scrub_infop->directories_scrubbing.clear(); - scrub_infop->directories_scrubbed.clear(); - scrub_infop->others_to_scrub.clear(); - scrub_infop->others_scrubbing.clear(); - scrub_infop->others_scrubbed.clear(); - - for (auto i = items.begin(); - i != items.end(); - ++i) { - // TODO: handle snapshot scrubbing - if (i->first.snapid != CEPH_NOSNAP) - continue; - - CDentry::linkage_t *dnl = i->second->get_projected_linkage(); - if (dnl->is_primary()) { - if (dnl->get_inode()->is_dir()) - scrub_infop->directories_to_scrub.insert(i->first); - else - scrub_infop->others_to_scrub.insert(i->first); - } else if (dnl->is_remote()) { - // TODO: check remote linkage - } - } scrub_infop->directory_scrubbing = true; } void CDir::scrub_aborted(MDSContext **c) { dout(20) << __func__ << dendl; - ceph_assert(scrub_infop && scrub_infop->directory_scrubbing); + ceph_assert(scrub_is_in_progress()); *c = scrub_infop->on_finish; scrub_infop->on_finish = nullptr; scrub_infop->directory_scrubbing = false; - scrub_infop->need_scrub_local = false; scrub_infop->last_scrub_dirty = false; - scrub_infop->pending_scrub_error = false; - scrub_infop->dirty_scrub_stamps.clear(); scrub_infop.reset(); } void CDir::scrub_finished(MDSContext **c) { dout(20) << __func__ << dendl; - ceph_assert(scrub_infop && scrub_infop->directory_scrubbing); - - ceph_assert(scrub_infop->directories_to_scrub.empty()); - ceph_assert(scrub_infop->directories_scrubbing.empty()); - scrub_infop->directories_scrubbed.clear(); - ceph_assert(scrub_infop->others_to_scrub.empty()); - ceph_assert(scrub_infop->others_scrubbing.empty()); - scrub_infop->others_scrubbed.clear(); + ceph_assert(scrub_is_in_progress()); scrub_infop->directory_scrubbing = false; - scrub_infop->last_recursive = scrub_infop->recursive_start; + scrub_infop->last_local.time = ceph_clock_now(); + scrub_infop->last_local.version = get_version(); + if (scrub_infop->header->get_recursive()) + scrub_infop->last_recursive = scrub_infop->last_local; + scrub_infop->last_scrub_dirty = true; *c = scrub_infop->on_finish; scrub_infop->on_finish = nullptr; } -int CDir::_next_dentry_on_set(dentry_key_set &dns, bool missing_okay, - MDSContext *cb, CDentry **dnout) -{ - dentry_key_t dnkey; - CDentry *dn; - - while (!dns.empty()) { - set::iterator front = dns.begin(); - dnkey = *front; - dn = lookup(dnkey.name); - if (!dn) { - if (!is_complete() && - (!has_bloom() || is_in_bloom(dnkey.name))) { - // need to re-read this dirfrag - fetch(cb); - return EAGAIN; - } - // okay, we lost it - if (missing_okay) { - dout(15) << " we no longer have directory dentry " - << dnkey.name << ", assuming it got renamed" << dendl; - dns.erase(dnkey); - continue; - } else { - dout(5) << " we lost dentry " << dnkey.name - << ", bailing out because that's impossible!" << dendl; - ceph_abort(); - } - } - // okay, we got a dentry - dns.erase(dnkey); - - if (dn->get_projected_version() < scrub_infop->last_recursive.version && - !(scrub_infop->header->get_force())) { - dout(15) << " skip dentry " << dnkey.name - << ", no change since last scrub" << dendl; - continue; - } - - if (!dn->get_linkage()->is_primary()) { - dout(15) << " skip dentry " << dnkey.name - << ", no longer primary" << dendl; - continue; - } - - *dnout = dn; - return 0; - } - *dnout = NULL; - return ENOENT; -} - -int CDir::scrub_dentry_next(MDSContext *cb, CDentry **dnout) -{ - dout(20) << __func__ << dendl; - ceph_assert(scrub_infop && scrub_infop->directory_scrubbing); - - dout(20) << "trying to scrub directories underneath us" << dendl; - int rval = _next_dentry_on_set(scrub_infop->directories_to_scrub, true, - cb, dnout); - if (rval == 0) { - dout(20) << __func__ << " inserted to directories scrubbing: " - << *dnout << dendl; - scrub_infop->directories_scrubbing.insert((*dnout)->key()); - } else if (rval == EAGAIN) { - // we don't need to do anything else - } else { // we emptied out the directory scrub set - ceph_assert(rval == ENOENT); - dout(20) << "no directories left, moving on to other kinds of dentries" - << dendl; - - rval = _next_dentry_on_set(scrub_infop->others_to_scrub, false, cb, dnout); - if (rval == 0) { - dout(20) << __func__ << " inserted to others scrubbing: " - << *dnout << dendl; - scrub_infop->others_scrubbing.insert((*dnout)->key()); - } - } - dout(20) << " returning " << rval << " with dn=" << *dnout << dendl; - return rval; -} - -std::vector CDir::scrub_dentries_scrubbing() -{ - dout(20) << __func__ << dendl; - ceph_assert(scrub_infop && scrub_infop->directory_scrubbing); - - std::vector result; - for (auto& scrub_info : scrub_infop->directories_scrubbing) { - CDentry *d = lookup(scrub_info.name, scrub_info.snapid); - ceph_assert(d); - result.push_back(d); - } - for (auto& scrub_info : scrub_infop->others_scrubbing) { - CDentry *d = lookup(scrub_info.name, scrub_info.snapid); - ceph_assert(d); - result.push_back(d); - } - return result; -} - -void CDir::scrub_dentry_finished(CDentry *dn) -{ - dout(20) << __func__ << " on dn " << *dn << dendl; - ceph_assert(scrub_infop && scrub_infop->directory_scrubbing); - dentry_key_t dn_key = dn->key(); - if (scrub_infop->directories_scrubbing.erase(dn_key)) { - scrub_infop->directories_scrubbed.insert(dn_key); - } else { - ceph_assert(scrub_infop->others_scrubbing.count(dn_key)); - scrub_infop->others_scrubbing.erase(dn_key); - scrub_infop->others_scrubbed.insert(dn_key); - } -} - void CDir::scrub_maybe_delete_info() { if (scrub_infop && !scrub_infop->directory_scrubbing && - !scrub_infop->need_scrub_local && - !scrub_infop->last_scrub_dirty && - !scrub_infop->pending_scrub_error && - scrub_infop->dirty_scrub_stamps.empty()) { + !scrub_infop->last_scrub_dirty) scrub_infop.reset(); - } } bool CDir::scrub_local() { ceph_assert(is_complete()); - bool rval = check_rstats(true); - - scrub_info(); - if (rval) { - scrub_infop->last_local.time = ceph_clock_now(); - scrub_infop->last_local.version = get_projected_version(); - scrub_infop->pending_scrub_error = false; - scrub_infop->last_scrub_dirty = true; - } else { - scrub_infop->pending_scrub_error = true; - if (scrub_infop->header->get_repair()) - mdcache->repair_dirfrag_stats(this); + bool good = check_rstats(true); + if (!good && scrub_infop->header->get_repair()) { + mdcache->repair_dirfrag_stats(this); + scrub_infop->header->set_repaired(); } - return rval; + return good; } std::string CDir::get_path() const diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 1dc6e63d2d9..db9dee02801 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -90,41 +90,19 @@ public: public: MEMPOOL_CLASS_HELPERS(); struct scrub_stamps { - version_t version; + version_t version = 0; utime_t time; - scrub_stamps() : version(0) {} - void operator=(const scrub_stamps &o) { - version = o.version; - time = o.time; - } }; - scrub_info_t() : - directory_scrubbing(false), - need_scrub_local(false), - last_scrub_dirty(false), - pending_scrub_error(false) {} - - /// inodes we contain with dirty scrub stamps - dentry_key_map dirty_scrub_stamps; // TODO: make use of this! + scrub_info_t() {} MDSContext *on_finish = nullptr; - scrub_stamps recursive_start; // when we last started a recursive scrub + scrub_stamps last_recursive; // when we last finished a recursive scrub scrub_stamps last_local; // when we last did a local scrub - bool directory_scrubbing; /// safety check - bool need_scrub_local; - bool last_scrub_dirty; /// is scrub info dirty or is it flushed to fnode? - bool pending_scrub_error; - - /// these are lists of children in each stage of scrubbing - dentry_key_set directories_to_scrub; - dentry_key_set directories_scrubbing; - dentry_key_set directories_scrubbed; - dentry_key_set others_to_scrub; - dentry_key_set others_scrubbing; - dentry_key_set others_scrubbed; + bool directory_scrubbing = false; /// safety check + bool last_scrub_dirty = false; /// is scrub info dirty or is it flushed to fnode? ScrubHeaderRef header; }; @@ -321,41 +299,15 @@ public: * @pre The CDir is marked complete. * @post It has set up its internal scrubbing state. */ - void scrub_initialize(const ScrubHeaderRef& header, - MDSContext* f); - void scrub_initialize_data(); + void scrub_initialize(const ScrubHeaderRef& header, MDSContext* f); ScrubHeaderRef get_scrub_header() { return scrub_infop ? scrub_infop->header : nullptr; } - /** - * Get the next dentry to scrub. Gives you a CDentry* and its meaning. This - * function will give you all directory-representing dentries before any - * others. - * 0: success, you should scrub this CDentry right now - * EAGAIN: is currently fetching the next CDentry into memory for you. - * It will activate your callback when done; try again when it does! - * ENOENT: there are no remaining dentries to scrub - * <0: There was an unexpected error - * - * @param cb An MDSContext which will be activated only if - * we return EAGAIN via rcode, or else ignored - * @param dnout CDentry * which you should next scrub, or NULL - * @returns a value as described above - */ - int scrub_dentry_next(MDSContext *cb, CDentry **dnout); - /** - * Get the currently scrubbing dentries. When returned, the passed-in - * list will be filled with all CDentry * which have been returned - * from scrub_dentry_next() but not sent back via scrub_dentry_finished(). - */ - std::vector scrub_dentries_scrubbing(); - /** - * Report to the CDir that a CDentry has been scrubbed. Call this - * for every CDentry returned from scrub_dentry_next(). - * @param dn The CDentry which has been scrubbed. - */ - void scrub_dentry_finished(CDentry *dn); + bool scrub_is_in_progress() const { + return (scrub_infop && scrub_infop->directory_scrubbing); + } + /** * Call this once all CDentries have been scrubbed, according to * scrub_dentry_next's listing. It finalizes the scrub statistics. @@ -371,9 +323,8 @@ public: bool scrub_local(); const scrub_info_t *scrub_info() const { - if (!scrub_infop) { + if (!scrub_infop) scrub_info_create(); - } return scrub_infop.get(); } @@ -806,12 +757,6 @@ private: * Delete the scrub_infop if it's not got any useful data. */ void scrub_maybe_delete_info(); - /** - * Check the given set (presumably one of those in scrub_info_t) for the - * next key to scrub and look it up (or fail!). - */ - int _next_dentry_on_set(dentry_key_set &dns, bool missing_okay, - MDSContext *cb, CDentry **dnout); void link_inode_work( CDentry *dn, CInode *in ); void unlink_inode_work( CDentry *dn ); diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 94790889e88..63f6548b0ae 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -306,17 +306,9 @@ ostream& operator<<(ostream& out, const CInode& in) return out; } -ostream& operator<<(ostream& out, const CInode::scrub_stamp_info_t& si) -{ - out << "{scrub_start_version: " << si.scrub_start_version - << ", scrub_start_stamp: " << si.scrub_start_stamp - << ", last_scrub_version: " << si.last_scrub_version - << ", last_scrub_stamp: " << si.last_scrub_stamp; - return out; -} - CInode::CInode(MDCache *c, bool auth, snapid_t f, snapid_t l) : - mdcache(c), first(f), last(l), item_dirty(this), + mdcache(c), first(f), last(l), + item_dirty(this), item_caps(this), item_open_file(this), item_dirty_parent(this), @@ -4819,32 +4811,10 @@ next: } bool check_dirfrag_rstats() { - MDSGatherBuilder gather(g_ceph_context); - frag_vec_t leaves; - in->dirfragtree.get_leaves(leaves); - for (const auto& leaf : leaves) { - CDir *dir = in->get_or_open_dirfrag(in->mdcache, leaf); - dir->scrub_info(); - if (!dir->scrub_infop->header) - dir->scrub_infop->header = in->scrub_infop->header; - if (dir->is_complete()) { - dir->scrub_local(); - } else { - dir->scrub_infop->need_scrub_local = true; - dir->fetch(gather.new_sub(), false); - } - } - if (gather.has_subs()) { - gather.set_finisher(get_internal_callback(DIRFRAGS)); - gather.activate(); - return false; - } else { - return immediate(DIRFRAGS, 0); - } + return immediate(DIRFRAGS, 0); } bool _dirfrags(int rval) { - int frags_errors = 0; // basic reporting setup results->raw_stats.checked = true; results->raw_stats.ondisk_read_retval = rval; @@ -4865,18 +4835,6 @@ next: ceph_assert(dir->get_version() > 0); nest_info.add(dir->get_fnode()->accounted_rstat); dir_info.add(dir->get_fnode()->accounted_fragstat); - if (dir->scrub_infop->pending_scrub_error) { - dir->scrub_infop->pending_scrub_error = false; - if (dir->scrub_infop->header->get_repair()) { - results->raw_stats.repaired = true; - results->raw_stats.error_str - << "dirfrag(" << p.first << ") has bad stats (will be fixed); "; - } else { - results->raw_stats.error_str - << "dirfrag(" << p.first << ") has bad stats; "; - } - frags_errors++; - } } nest_info.rsubdirs++; // it gets one to account for self if (const sr_t *srnode = in->get_projected_srnode(); srnode) @@ -4896,8 +4854,6 @@ next: } goto next; } - if (frags_errors > 0) - goto next; results->raw_stats.passed = true; next: @@ -5127,11 +5083,11 @@ void CInode::scrub_info_create() const CInode *me = const_cast(this); const auto& pi = me->get_projected_inode(); - scrub_info_t *si = new scrub_info_t(); - si->scrub_start_stamp = si->last_scrub_stamp = pi->last_scrub_stamp; - si->scrub_start_version = si->last_scrub_version = pi->last_scrub_version; + std::unique_ptr si(new scrub_info_t()); + si->last_scrub_stamp = pi->last_scrub_stamp; + si->last_scrub_version = pi->last_scrub_version; - me->scrub_infop = si; + me->scrub_infop.swap(si); } void CInode::scrub_maybe_delete_info() @@ -5139,118 +5095,22 @@ void CInode::scrub_maybe_delete_info() if (scrub_infop && !scrub_infop->scrub_in_progress && !scrub_infop->last_scrub_dirty) { - delete scrub_infop; - scrub_infop = NULL; + scrub_infop.reset(); } } -void CInode::scrub_initialize(CDentry *scrub_parent, - ScrubHeaderRef& header, +void CInode::scrub_initialize(ScrubHeaderRef& header, MDSContext *f) { dout(20) << __func__ << " with scrub_version " << get_version() << dendl; - if (scrub_is_in_progress()) { - dout(20) << __func__ << " inode moved during scrub, reinitializing " - << dendl; - ceph_assert(scrub_infop->scrub_parent); - CDentry *dn = scrub_infop->scrub_parent; - CDir *dir = dn->dir; - dn->put(CDentry::PIN_SCRUBPARENT); - ceph_assert(dir->scrub_infop && dir->scrub_infop->directory_scrubbing); - dir->scrub_infop->directories_scrubbing.erase(dn->key()); - dir->scrub_infop->others_scrubbing.erase(dn->key()); - } - scrub_info(); - if (!scrub_infop) - scrub_infop = new scrub_info_t(); - if (get_projected_inode()->is_dir()) { - // fill in dirfrag_stamps with initial state - frag_vec_t leaves; - dirfragtree.get_leaves(leaves); - for (const auto& leaf : leaves) { - if (header->get_force()) - scrub_infop->dirfrag_stamps[leaf].reset(); - else - scrub_infop->dirfrag_stamps[leaf]; - } - } - - if (scrub_parent) - scrub_parent->get(CDentry::PIN_SCRUBPARENT); - scrub_infop->scrub_parent = scrub_parent; + scrub_info(); scrub_infop->on_finish = f; scrub_infop->scrub_in_progress = true; - scrub_infop->children_scrubbed = false; scrub_infop->header = header; - - scrub_infop->scrub_start_version = get_version(); - scrub_infop->scrub_start_stamp = ceph_clock_now(); // right now we don't handle remote inodes } -int CInode::scrub_dirfrag_next(frag_t* out_dirfrag) -{ - dout(20) << __func__ << dendl; - ceph_assert(scrub_is_in_progress()); - - if (!is_dir()) { - return -ENOTDIR; - } - - std::map::iterator i = - scrub_infop->dirfrag_stamps.begin(); - - while (i != scrub_infop->dirfrag_stamps.end()) { - if (i->second.scrub_start_version < scrub_infop->scrub_start_version) { - i->second.scrub_start_version = get_projected_version(); - i->second.scrub_start_stamp = ceph_clock_now(); - *out_dirfrag = i->first; - dout(20) << " return frag " << *out_dirfrag << dendl; - return 0; - } - ++i; - } - - dout(20) << " no frags left, ENOENT " << dendl; - return ENOENT; -} - -void CInode::scrub_dirfrags_scrubbing(frag_vec_t* out_dirfrags) -{ - ceph_assert(out_dirfrags != NULL); - ceph_assert(scrub_infop != NULL); - - out_dirfrags->clear(); - std::map::iterator i = - scrub_infop->dirfrag_stamps.begin(); - - while (i != scrub_infop->dirfrag_stamps.end()) { - if (i->second.scrub_start_version >= scrub_infop->scrub_start_version) { - if (i->second.last_scrub_version < scrub_infop->scrub_start_version) - out_dirfrags->push_back(i->first); - } else { - return; - } - - ++i; - } -} - -void CInode::scrub_dirfrag_finished(frag_t dirfrag) -{ - dout(20) << __func__ << " on frag " << dirfrag << dendl; - ceph_assert(scrub_is_in_progress()); - - std::map::iterator i = - scrub_infop->dirfrag_stamps.find(dirfrag); - ceph_assert(i != scrub_infop->dirfrag_stamps.end()); - - scrub_stamp_info_t &si = i->second; - si.last_scrub_stamp = si.scrub_start_stamp; - si.last_scrub_version = si.scrub_start_version; -} - void CInode::scrub_aborted(MDSContext **c) { dout(20) << __func__ << dendl; ceph_assert(scrub_is_in_progress()); @@ -5258,43 +5118,19 @@ void CInode::scrub_aborted(MDSContext **c) { *c = nullptr; std::swap(*c, scrub_infop->on_finish); - if (scrub_infop->scrub_parent) { - CDentry *dn = scrub_infop->scrub_parent; - scrub_infop->scrub_parent = NULL; - dn->dir->scrub_dentry_finished(dn); - dn->put(CDentry::PIN_SCRUBPARENT); - } - - delete scrub_infop; - scrub_infop = nullptr; + scrub_infop->scrub_in_progress = false; + scrub_maybe_delete_info(); } void CInode::scrub_finished(MDSContext **c) { dout(20) << __func__ << dendl; ceph_assert(scrub_is_in_progress()); - for (std::map::iterator i = - scrub_infop->dirfrag_stamps.begin(); - i != scrub_infop->dirfrag_stamps.end(); - ++i) { - if(i->second.last_scrub_version != i->second.scrub_start_version) { - derr << i->second.last_scrub_version << " != " - << i->second.scrub_start_version << dendl; - } - ceph_assert(i->second.last_scrub_version == i->second.scrub_start_version); - } - scrub_infop->last_scrub_version = scrub_infop->scrub_start_version; - scrub_infop->last_scrub_stamp = scrub_infop->scrub_start_stamp; + scrub_infop->last_scrub_version = get_version(); + scrub_infop->last_scrub_stamp = ceph_clock_now(); scrub_infop->last_scrub_dirty = true; scrub_infop->scrub_in_progress = false; - if (scrub_infop->scrub_parent) { - CDentry *dn = scrub_infop->scrub_parent; - scrub_infop->scrub_parent = NULL; - dn->dir->scrub_dentry_finished(dn); - dn->put(CDentry::PIN_SCRUBPARENT); - } - *c = scrub_infop->on_finish; scrub_infop->on_finish = NULL; diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 2168284819b..e38286dbc6e 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -294,36 +294,17 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter dirfrag_stamps; // XXX not part of mempool ScrubHeaderRef header; }; @@ -444,15 +425,11 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counterheader; - } + return scrub_infop ? scrub_infop->header : nullptr; } bool scrub_is_in_progress() const { @@ -466,32 +443,7 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counterchildren_scrubbed = true; - } void scrub_set_finisher(MDSContext *c) { ceph_assert(!scrub_infop->on_finish); scrub_infop->on_finish = c; @@ -1290,12 +1236,11 @@ private: int num_exporting_dirs = 0; int stickydir_ref = 0; - scrub_info_t *scrub_infop = nullptr; + std::unique_ptr scrub_infop; /** @} Scrubbing and fsck */ }; std::ostream& operator<<(std::ostream& out, const CInode& in); -std::ostream& operator<<(std::ostream& out, const CInode::scrub_stamp_info_t& si); extern cinode_lock_info_t cinode_lock_info[]; extern int num_cinode_locks; diff --git a/src/mds/ScrubStack.cc b/src/mds/ScrubStack.cc index c7da5a1c03f..3e66f7603dd 100644 --- a/src/mds/ScrubStack.cc +++ b/src/mds/ScrubStack.cc @@ -58,15 +58,14 @@ void ScrubStack::dequeue(MDSCacheObject *obj) stack_size--; } -void ScrubStack::_enqueue(MDSCacheObject *obj, CDentry *parent, - ScrubHeaderRef& header, +void ScrubStack::_enqueue(MDSCacheObject *obj, ScrubHeaderRef& header, MDSContext *on_finish, bool top) { ceph_assert(ceph_mutex_is_locked_by_me(mdcache->mds->mds_lock)); if (CInode *in = dynamic_cast(obj)) { dout(10) << __func__ << " with {" << *in << "}" << ", on_finish=" << on_finish << ", top=" << top << dendl; - in->scrub_initialize(parent, header, on_finish); + in->scrub_initialize(header, on_finish); } else if (CDir *dir = dynamic_cast(obj)) { dout(10) << __func__ << " with {" << *dir << "}" << ", on_finish=" << on_finish << ", top=" << top << dendl; @@ -99,7 +98,7 @@ void ScrubStack::enqueue(CInode *in, ScrubHeaderRef& header, scrub_origins.emplace(in); clog_scrub_summary(in); - _enqueue(in, nullptr, header, on_finish, top); + _enqueue(in, header, on_finish, top); kick_off_scrubs(); } @@ -131,10 +130,8 @@ void ScrubStack::kick_off_scrubs() dout(20) << __func__ << " entering with " << scrubs_in_progress << " in " "progress and " << stack_size << " in the stack" << dendl; - bool can_continue = true; elist::iterator it = scrub_stack.begin(); - while (g_conf()->mds_max_scrub_ops_in_progress > scrubs_in_progress && - can_continue) { + while (g_conf()->mds_max_scrub_ops_in_progress > scrubs_in_progress) { if (it.end()) { if (scrubs_in_progress == 0) { set_state(STATE_IDLE); @@ -147,9 +144,8 @@ void ScrubStack::kick_off_scrubs() set_state(STATE_RUNNING); if (CInode *in = dynamic_cast(*it)) { - ++it; // we have our reference, push iterator forward dout(20) << __func__ << " examining " << *in << dendl; - + ++it; if (!in->is_dir()) { // it's a regular file, symlink, or hard link dequeue(in); // we only touch it this once, so remove from stack @@ -159,37 +155,26 @@ void ScrubStack::kick_off_scrubs() in->scrub_set_finisher(&scrub_kick); } scrub_file_inode(in); - can_continue = true; } else { - bool done; // it's done, so pop it off the stack - bool added_children; // it added new dentries to the top of the stack + bool added_children = false; + bool done = false; // it's done, so pop it off the stack scrub_dir_inode(in, &added_children, &done); if (done) { dout(20) << __func__ << " dir inode, done" << dendl; dequeue(in); - } else if (added_children) { - dout(20) << __func__ << " dir inode, added_children" << dendl; - // we added new stuff to top of stack, so reset ourselves there + } + if (added_children) { + // dirfrags were queued at top of stack it = scrub_stack.begin(); - } else { - dout(20) << __func__ << " dir inode, no progress" << dendl; - can_continue = false; } } } else if (CDir *dir = dynamic_cast(*it)) { - bool done; // it's done, so pop it off the stack - bool added_children; // it added new dentries to the top of the stack - scrub_dirfrag(dir, &added_children, &done); + bool done = false; // it's done, so pop it off the stack + scrub_dirfrag(dir, &done); + ++it; if (done) { dout(20) << __func__ << " dirfrag, done" << dendl; dequeue(dir); - } else if (added_children) { - dout(20) << __func__ << " dirfrag, added_children" << dendl; - // we added new stuff to top of stack, so reset ourselves there - it = scrub_stack.begin(); - } else { - dout(20) << __func__ << " dirfrag, no progress" << dendl; - can_continue = false; } } else { ceph_assert(0 == "dentry in scrub stack"); @@ -197,118 +182,47 @@ void ScrubStack::kick_off_scrubs() } } -void ScrubStack::scrub_dir_inode(CInode *in, - bool *added_children, - bool *done) +void ScrubStack::scrub_dir_inode(CInode *in, bool *added_children, bool *done) { dout(10) << __func__ << " " << *in << dendl; - *added_children = false; - bool all_frags_done = true; - ScrubHeaderRef header = in->get_scrub_header(); - ceph_assert(header != nullptr); + ceph_assert(header); - if (header->get_recursive()) { - frag_vec_t scrubbing_frags; - std::queue scrubbing_cdirs; - in->scrub_dirfrags_scrubbing(&scrubbing_frags); - dout(20) << __func__ << " iterating over " << scrubbing_frags.size() - << " scrubbing frags" << dendl; - for (const auto& fg : scrubbing_frags) { - // turn frags into CDir * - CDir *dir = in->get_dirfrag(fg); - if (dir) { - scrubbing_cdirs.push(dir); - dout(25) << __func__ << " got CDir " << *dir << " presently scrubbing" << dendl; - } else { - in->scrub_dirfrag_finished(fg); - dout(25) << __func__ << " missing dirfrag " << fg << " skip scrubbing" << dendl; - } - } + MDSGatherBuilder gather(g_ceph_context); - dout(20) << __func__ << " consuming from " << scrubbing_cdirs.size() - << " scrubbing cdirs" << dendl; + frag_vec_t frags; + in->dirfragtree.get_leaves(frags); + dout(20) << __func__ << "recursive mode, frags " << frags << dendl; - while (g_conf()->mds_max_scrub_ops_in_progress > scrubs_in_progress) { - // select next CDir - CDir *cur_dir = NULL; - if (!scrubbing_cdirs.empty()) { - cur_dir = scrubbing_cdirs.front(); - scrubbing_cdirs.pop(); - dout(20) << __func__ << " got cur_dir = " << *cur_dir << dendl; - } else { - bool ready = get_next_cdir(in, &cur_dir); - dout(20) << __func__ << " get_next_cdir ready=" << ready << dendl; - - if (ready && cur_dir) { - cur_dir->scrub_initialize(header, nullptr); - scrubbing_cdirs.push(cur_dir); - } else if (!ready) { - // We are waiting for load of a frag - all_frags_done = false; - break; - } else { - // Finished with all frags - break; - } - } - // scrub that CDir - bool frag_added_children = false; - bool frag_done = false; - scrub_dirfrag(cur_dir, - &frag_added_children, &frag_done); - if (frag_done) { - cur_dir->inode->scrub_dirfrag_finished(cur_dir->frag); - } - *added_children |= frag_added_children; - all_frags_done = all_frags_done && frag_done; - } - - dout(20) << "finished looping, all_frags_done=" << all_frags_done << dendl; - } else { - dout(20) << "!scrub_recursive" << dendl; + for (auto &fg : frags) { + CDir *dir = in->get_or_open_dirfrag(mdcache, fg); + if (dir->get_version() == 0) + dir->fetch(gather.new_sub()); } - - if (all_frags_done) { - assert (!*added_children); // can't do this if children are still pending - - // OK, so now I can... fire off a validate on the dir inode, and - // when it completes, come through here again, noticing that we've - // set a flag to indicate the validate happened, and - scrub_dir_inode_final(in); + if (gather.has_subs()) { + scrubs_in_progress++; + gather.set_finisher(&scrub_kick); + gather.activate(); + dout(10) << __func__ << " barebones dirfrags, fetching" << dendl; + return; } - *done = all_frags_done; - dout(10) << __func__ << " is exiting " << *done << dendl; - return; -} - -bool ScrubStack::get_next_cdir(CInode *in, CDir **new_dir) -{ - dout(20) << __func__ << " on " << *in << dendl; - frag_t next_frag; - int r = in->scrub_dirfrag_next(&next_frag); - assert (r >= 0); - - if (r == 0) { - // we got a frag to scrub, otherwise it would be ENOENT - dout(25) << "looking up new frag " << next_frag << dendl; - CDir *next_dir = in->get_or_open_dirfrag(mdcache, next_frag); - if (!next_dir->is_complete()) { - scrubs_in_progress++; - next_dir->fetch(&scrub_kick); - dout(25) << "fetching frag from RADOS" << dendl; - return false; + std::vector dfs; + in->get_dirfrags(dfs); + for (auto &dir : dfs) { + if (dir->is_auth()){ + _enqueue(dir, header, nullptr, true); + *added_children = true; + } else { + // FIXME: ask auth mds to scrub } - *new_dir = next_dir; - dout(25) << "returning dir " << *new_dir << dendl; - return true; } - ceph_assert(r == ENOENT); - // there are no dirfrags left - *new_dir = NULL; - return true; + + scrub_dir_inode_final(in); + + *done = true; + dout(10) << __func__ << " done" << dendl; } class C_InodeValidated : public MDSInternalContext @@ -333,90 +247,62 @@ void ScrubStack::scrub_dir_inode_final(CInode *in) { dout(20) << __func__ << " " << *in << dendl; - // Two passes through this function. First one triggers inode validation, - // second one sets finally_done - // FIXME: kind of overloading scrub_in_progress here, using it while - // dentry is still on stack to indicate that we have finished - // doing our validate_disk_state on the inode - // FIXME: the magic-constructing scrub_info() is going to leave - // an unneeded scrub_infop lying around here - if (!in->scrub_info()->children_scrubbed) { - if (!in->scrub_info()->on_finish) { - scrubs_in_progress++; - in->scrub_set_finisher(&scrub_kick); - } - - in->scrub_children_finished(); - C_InodeValidated *fin = new C_InodeValidated(mdcache->mds, this, in); - in->validate_disk_state(&fin->result, fin); + if (!in->scrub_info()->on_finish) { + scrubs_in_progress++; + in->scrub_set_finisher(&scrub_kick); } + C_InodeValidated *fin = new C_InodeValidated(mdcache->mds, this, in); + in->validate_disk_state(&fin->result, fin); + return; } -void ScrubStack::scrub_dirfrag(CDir *dir, bool *added_children, bool *done) +void ScrubStack::scrub_dirfrag(CDir *dir, bool *done) { ceph_assert(dir != NULL); - dout(20) << __func__ << " on " << *dir << dendl; - *added_children = false; - *done = false; - - ScrubHeaderRef header = dir->get_scrub_header(); - - if (!dir->scrub_info()->directory_scrubbing) { - // Get the frag complete before calling - // scrub initialize, so that it can populate its lists - // of dentries. - if (!dir->is_complete()) { - scrubs_in_progress++; - dir->fetch(&scrub_kick); - return; - } - - dir->scrub_initialize_data(); - } + dout(10) << __func__ << " " << *dir << dendl; - int r = 0; - while(r == 0) { - CDentry *dn = NULL; + if (!dir->is_complete()) { scrubs_in_progress++; - r = dir->scrub_dentry_next(&scrub_kick, &dn); - if (r != EAGAIN) { - scrubs_in_progress--; - } - - if (r == EAGAIN) { - // Drop out, CDir fetcher will call back our kicker context - dout(20) << __func__ << " waiting for fetch on " << *dir << dendl; - return; - } + dir->fetch(&scrub_kick, true); // already auth pinned + dout(10) << __func__ << " incomplete, fetching" << dendl; + return; + } - if (r == ENOENT) { - // Nothing left to scrub, are we done? - auto&& scrubbing = dir->scrub_dentries_scrubbing(); - if (scrubbing.empty()) { - dout(20) << __func__ << " dirfrag done: " << *dir << dendl; - MDSContext *c = nullptr; - dir->scrub_finished(&c); - if (c) - finisher->queue(new MDSIOContextWrapper(mdcache->mds, c), 0); - *done = true; - } else { - dout(20) << __func__ << " " << scrubbing.size() << " dentries still " - "scrubbing in " << *dir << dendl; + ScrubHeaderRef header = dir->get_scrub_header(); + version_t last_scrub = dir->scrub_info()->last_recursive.version; + if (header->get_recursive()) { + for (auto it = dir->begin(); it != dir->end(); ++it) { + if (it->first.snapid != CEPH_NOSNAP) + continue; + CDentry *dn = it->second; + CDentry::linkage_t *dnl = dn->get_linkage(); + if (dn->get_version() <= last_scrub && + dnl->get_remote_d_type() != DT_DIR && + !header->get_force()) { + dout(15) << __func__ << " skip dentry " << it->first + << ", no change since last scrub" << dendl; + continue; + } + if (dnl->is_primary()) { + _enqueue(dnl->get_inode(), header, nullptr, false); + } else if (dnl->is_remote()) { + // TODO: check remote linkage } - return; } + } - // scrub_dentry_next defined to only give EAGAIN, ENOENT, 0 -- we should - // never get random IO errors here. - ceph_assert(r == 0); + dir->scrub_local(); - _enqueue(dn->get_projected_inode(), dn, header, nullptr, true); + MDSContext *c = nullptr; + dir->scrub_finished(&c); + if (c) + finisher->queue(new MDSIOContextWrapper(mdcache->mds, c), 0); - *added_children = true; - } + *done = true; + dout(10) << __func__ << " done" << dendl; } void ScrubStack::scrub_file_inode(CInode *in) diff --git a/src/mds/ScrubStack.h b/src/mds/ScrubStack.h index 768d466b846..02c17299be1 100644 --- a/src/mds/ScrubStack.h +++ b/src/mds/ScrubStack.h @@ -141,7 +141,7 @@ private: friend class C_InodeValidated; - void _enqueue(MDSCacheObject *obj, CDentry *parent, ScrubHeaderRef& header, + void _enqueue(MDSCacheObject *obj, ScrubHeaderRef& header, MDSContext *on_finish, bool top); /** * Remove the inode/dirfrag from the stack. @@ -169,59 +169,28 @@ private: const CInode::validated_data &result); /** - * Make progress on scrubbing a directory-representing dirfrag and - * its children.. + * Scrub a directory inode. It queues child dirfrags, then does + * final scrub of the inode. * - * 1) Select the next dirfrag which hasn't been scrubbed, and make progress - * on it if possible. - * - * 2) If not, move on to the next dirfrag and start it up, if any. - * - * 3) If waiting for results from dirfrag scrubs, do nothing. - * - * 4) If all dirfrags have been scrubbed, scrub my inode. - * - * @param in The CInode to scrub as a directory + * @param in The directory indoe to scrub * @param added_children set to true if we pushed some of our children - * onto the ScrubStack - * remaining to start scrubbing. - * @param done set to true if we and all our children have finished scrubbing + * @param done set to true if we started to do final scrub */ void scrub_dir_inode(CInode *in, bool *added_children, bool *done); /** - * Make progress on scrubbing a dirfrag. It may return after each of the - * following steps, but will report making progress on each one. - * - * 1) enqueues the next unscrubbed child directory dentry at the - * top of the stack. - * - * 2) Initiates a scrub on the next unscrubbed file dentry - * - * If there are scrubs currently in progress on child dentries, no more child - * dentries to scrub, and this function is invoked, it will report no - * progress. Try again later. + * Scrub a dirfrag. It queues child dentries, then does final + * scrub of the dirfrag. * + * @param dir The dirfrag to scrub + * @param done set to true if we started to do final scrub */ - void scrub_dirfrag(CDir *dir, bool *added_children, bool *done); + void scrub_dirfrag(CDir *dir, bool *done); /** * Scrub a directory-representing dentry. * * @param in The directory inode we're doing final scrub on. */ void scrub_dir_inode_final(CInode *in); - - /** - * Get a CDir into memory, and return it if it's already complete. - * Otherwise, fetch it and kick off scrubbing when done. - * - * @param in The Inode to get the next directory from - * @param new_dir The CDir we're returning to you. NULL if - * not ready yet or there aren't any. - * @returns false if you have to wait, true if there's no work - * left to do (we returned it, or there are none left in this inode). - */ - bool get_next_cdir(CInode *in, CDir **new_dir); - /** * Set scrub state * @param next_state State to move the scrub to. -- 2.39.5