From: Shen, Hang Date: Tue, 19 Jan 2021 06:25:47 +0000 (+0800) Subject: mds: fetch single dentry instead of complete dirfrag for lookup X-Git-Tag: v18.0.0~1394^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=91746db4d6c2192ea1d0dea585bc3bc74858d9ea;p=ceph.git mds: fetch single dentry instead of complete dirfrag for lookup Signed-off-by: "Shen, Hang" --- diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index d9db8a88b25..162edff6b4e 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -315,7 +315,7 @@ void CDir::adjust_num_inodes_with_caps(int d) CDentry *CDir::lookup(std::string_view name, snapid_t snap) { - dout(20) << "lookup (" << snap << ", '" << name << "')" << dendl; + dout(20) << "lookup (" << name << ", '" << snap << "')" << dendl; auto iter = items.lower_bound(dentry_key_t(snap, name, inode->hash_dentry_name(name))); if (iter == items.end()) return 0; @@ -1495,20 +1495,18 @@ void CDir::last_put() // ----------------------- // FETCH -void CDir::fetch(MDSContext *c, bool ignore_authpinnability) +void CDir::fetch(std::string_view dname, snapid_t last, + MDSContext *c, bool ignore_authpinnability) { - string want; - return fetch(c, want, ignore_authpinnability); -} - -void CDir::fetch(MDSContext *c, std::string_view want_dn, bool ignore_authpinnability) -{ - dout(10) << "fetch on " << *this << dendl; + if (dname.empty()) + dout(10) << "fetch on " << *this << dendl; + else + dout(10) << "fetch key(" << dname << ", '" << last << "')" << dendl; ceph_assert(is_auth()); ceph_assert(!is_complete()); - if (!can_auth_pin() && !ignore_authpinnability) { + if (!ignore_authpinnability && !can_auth_pin()) { if (c) { dout(7) << "fetch waiting for authpinnable" << dendl; add_waiter(WAIT_UNFREEZE, c); @@ -1518,8 +1516,8 @@ void CDir::fetch(MDSContext *c, std::string_view want_dn, bool ignore_authpinnab } // unlinked directory inode shouldn't have any entry - if (!inode->is_base() && get_parent_dir()->inode->is_stray() && - !inode->snaprealm) { + if (CDir *pdir = get_parent_dir(); + pdir && pdir->inode->is_stray() && !inode->snaprealm) { dout(7) << "fetch dirfrag for unlinked directory, mark complete" << dendl; if (get_version() == 0) { ceph_assert(inode->is_auth()); @@ -1540,8 +1538,16 @@ void CDir::fetch(MDSContext *c, std::string_view want_dn, bool ignore_authpinnab return; } - if (c) add_waiter(WAIT_COMPLETE, c); - if (!want_dn.empty()) wanted_items.insert(mempool::mds_co::string(want_dn)); + // FIXME: to fetch a snap dentry, we need to get omap key in range + // [(name, last), (name, CEPH_NOSNAP)) + if (!dname.empty() && last == CEPH_NOSNAP) { + dentry_key_t key(last, dname, inode->hash_dentry_name(dname)); + fetch_keys({key}, c); + return; + } + + if (c) + add_waiter(WAIT_COMPLETE, c); // already fetching? if (state_test(CDir::STATE_FETCHING)) { @@ -1552,38 +1558,76 @@ void CDir::fetch(MDSContext *c, std::string_view want_dn, bool ignore_authpinnab auth_pin(this); state_set(CDir::STATE_FETCHING); - if (mdcache->mds->logger) mdcache->mds->logger->inc(l_mds_dir_fetch); + _omap_fetch(nullptr, nullptr); + if (mdcache->mds->logger) + mdcache->mds->logger->inc(l_mds_dir_fetch_complete); mdcache->mds->balancer->hit_dir(this, META_POP_FETCH); - - std::set empty; - _omap_fetch(NULL, empty); } -void CDir::fetch(MDSContext *c, const std::set& keys) +void CDir::fetch_keys(const std::vector& keys, MDSContext *c) { dout(10) << "fetch " << keys.size() << " keys on " << *this << dendl; - ceph_assert(is_auth()); ceph_assert(!is_complete()); - if (!can_auth_pin()) { - dout(7) << "fetch keys waiting for authpinnable" << dendl; - add_waiter(WAIT_UNFREEZE, c); + if (CDir *pdir = get_parent_dir(); + pdir && pdir->inode->is_stray() && !inode->snaprealm) { + fetch(c, true); return; } + + MDSContext::vec_alloc *fallback_waiting = nullptr; + std::set str_keys; + for (auto& key : keys) { + ceph_assert(key.snapid == CEPH_NOSNAP); + if (waiting_on_dentry.empty()) + get(PIN_DNWAITER); + auto em = waiting_on_dentry.emplace(std::piecewise_construct, + std::forward_as_tuple(key.name, key.snapid), + std::forward_as_tuple()); + if (!em.second) { + if (!fallback_waiting) + fallback_waiting = &em.first->second; + continue; + } + + if (c) { + em.first->second.push_back(c); + c = nullptr; + } + + string str; + key.encode(str); + str_keys.emplace(std::move(str)); + } + + if (str_keys.empty()) { + if (c && fallback_waiting) { + fallback_waiting->push_back(c); + c = nullptr; + } + + if (get_version() > 0) { + dout(7) << "fetch keys, all are already being fetched" << dendl; + ceph_assert(!c); + return; + } + } + if (state_test(CDir::STATE_FETCHING)) { - dout(7) << "fetch keys waiting for full fetch" << dendl; - add_waiter(WAIT_COMPLETE, c); + dout(7) << "fetch keys, waiting for full fetch" << dendl; + if (c) + add_waiter(WAIT_COMPLETE, c); return; } auth_pin(this); - if (mdcache->mds->logger) mdcache->mds->logger->inc(l_mds_dir_fetch); + _omap_fetch(&str_keys, c); + if (mdcache->mds->logger) + mdcache->mds->logger->inc(l_mds_dir_fetch_keys); mdcache->mds->balancer->hit_dir(this, META_POP_FETCH); - - _omap_fetch(c, keys); } class C_IO_Dir_OMAP_FetchedMore : public CDirIOContext { @@ -1600,7 +1644,7 @@ public: void finish(int r) { if (omap_version < dir->get_committed_version()) { omap.clear(); - dir->_omap_fetch(fin, {}); + dir->_omap_fetch(nullptr, fin); return; } @@ -1613,7 +1657,7 @@ public: if (more) { dir->_omap_fetch_more(omap_version, hdrbl, omap, fin); } else { - dir->_omap_fetched(hdrbl, omap, !fin, r); + dir->_omap_fetched(hdrbl, omap, true, {}, r); if (fin) fin->complete(r); } @@ -1627,6 +1671,8 @@ class C_IO_Dir_OMAP_Fetched : public CDirIOContext { MDSContext *fin; public: const version_t omap_version; + bool complete = true; + std::set keys; bufferlist hdrbl; bool more = false; map omap; @@ -1646,44 +1692,37 @@ public: if (more) { if (omap_version < dir->get_committed_version()) { - omap.clear(); - dir->_omap_fetch(fin, {}); + dir->_omap_fetch(nullptr, fin); } else { - dir->_omap_fetch_more(omap_version, hdrbl, omap, fin); + dir->_omap_fetch_more(omap_version, hdrbl, omap, fin); } return; } - dir->_omap_fetched(hdrbl, omap, !fin, r); + dir->_omap_fetched(hdrbl, omap, complete, keys, r); if (fin) fin->complete(r); - } void print(ostream& out) const override { out << "dirfrag_fetch(" << dir->dirfrag() << ")"; } }; -void CDir::_omap_fetch(MDSContext *c, const std::set& keys) +void CDir::_omap_fetch(std::set *keys, MDSContext *c) { C_IO_Dir_OMAP_Fetched *fin = new C_IO_Dir_OMAP_Fetched(this, c); object_t oid = get_ondisk_object(); object_locator_t oloc(mdcache->mds->mdsmap->get_metadata_pool()); ObjectOperation rd; rd.omap_get_header(&fin->hdrbl, &fin->ret1); - if (keys.empty()) { + if (keys) { + fin->complete = false; + fin->keys.swap(*keys); + rd.omap_get_vals_by_keys(fin->keys, &fin->omap, &fin->ret2); + } else { ceph_assert(!c); rd.omap_get_vals("", "", g_conf()->mds_dir_keys_per_op, &fin->omap, &fin->more, &fin->ret2); - } else { - ceph_assert(c); - std::set str_keys; - for (auto p : keys) { - string str; - p.encode(str); - str_keys.insert(str); - } - rd.omap_get_vals_by_keys(str_keys, &fin->omap, &fin->ret2); } // check the correctness of backtrace if (g_conf()->mds_verify_backtrace > 0 && frag == frag_t()) { @@ -1930,7 +1969,7 @@ CDentry *CDir::_load_dentry( } void CDir::_omap_fetched(bufferlist& hdrbl, map& omap, - bool complete, int r) + bool complete, const std::set& keys, int r) { LogChannelRef clog = mdcache->mds->clog; dout(10) << "_fetched header " << hdrbl.length() << " bytes " @@ -2007,22 +2046,85 @@ void CDir::_omap_fetched(bufferlist& hdrbl, map& omap, } } + + MDSContext::vec finished; + std::vector null_keys; + + auto k_it = keys.rbegin(); + auto w_it = waiting_on_dentry.rbegin(); + std::string_view last_name = ""; + + auto proc_waiters = [&](const string_snap_t& key) { + bool touch = false; + if (last_name < key.name) { + // string_snap_t and key string are not in the same order + w_it = decltype(w_it)(waiting_on_dentry.upper_bound(key)); + } + while (w_it != waiting_on_dentry.rend()) { + int cmp = w_it->first.compare(key); + if (cmp < 0) + break; + if (cmp == 0) { + touch = true; + std::copy(w_it->second.begin(), w_it->second.end(), + std::back_inserter(finished)); + waiting_on_dentry.erase(std::next(w_it).base()); + if (waiting_on_dentry.empty()) + put(PIN_DNWAITER); + break; + } + ++w_it; + } + return touch; + }; + auto proc_nulls_and_waiters = [&](const string& str_key, const string_snap_t& key) { + bool touch = false; + while (k_it != keys.rend()) { + int cmp = k_it->compare(str_key); + if (cmp < 0) + break; + if (cmp == 0) { + touch = true; + proc_waiters(key); + ++k_it; + break; + } + string_snap_t n_key; + dentry_key_t::decode_helper(*k_it, n_key.name, n_key.snapid); + ceph_assert(n_key.snapid == CEPH_NOSNAP); + proc_waiters(n_key); + last_name = std::string_view(k_it->c_str(), n_key.name.length()); + null_keys.emplace_back(std::move(n_key)); + ++k_it; + } + return touch; + }; + unsigned pos = omap.size() - 1; double rand_threshold = get_inode()->get_ephemeral_rand(); - for (map::reverse_iterator p = omap.rbegin(); - p != omap.rend(); - ++p, --pos) { - string dname; - snapid_t last; - dentry_key_t::decode_helper(p->first, dname, last); - - CDentry *dn = NULL; + for (auto p = omap.rbegin(); p != omap.rend(); ++p, --pos) { + string_snap_t key; + dentry_key_t::decode_helper(p->first, key.name, key.snapid); + bool touch; + + if (key.snapid == CEPH_NOSNAP) { + if (complete) { + touch = proc_waiters(key); + } else { + touch = proc_nulls_and_waiters(p->first, key); + } + last_name = std::string_view(p->first.c_str(), key.name.length()); + } else { + touch = false; + } + + CDentry *dn = nullptr; try { dn = _load_dentry( - p->first, dname, last, p->second, pos, snaps, + p->first, key.name, key.snapid, p->second, pos, snaps, rand_threshold, &force_dirty); } catch (const buffer::error &err) { - mdcache->mds->clog->warn() << "Corrupt dentry '" << dname << "' in " + mdcache->mds->clog->warn() << "Corrupt dentry '" << key.name << "' in " "dir frag " << dirfrag() << ": " << err.what() << "(" << get_path() << ")"; @@ -2030,7 +2132,7 @@ void CDir::_omap_fetched(bufferlist& hdrbl, map& omap, // that try to act directly on it will get their CEPHFS_EIOs, but this // dirfrag as a whole will continue to look okay (minus the // mysteriously-missing dentry) - go_bad_dentry(last, dname); + go_bad_dentry(key.snapid, key.name); // Anyone who was WAIT_DENTRY for this guy will get kicked // to RetryRequest, and hit the DamageTable-interrogating path. @@ -2042,12 +2144,40 @@ void CDir::_omap_fetched(bufferlist& hdrbl, map& omap, if (!dn) continue; + if (touch) { + dout(10) << " touching wanted dn " << *dn << dendl; + mdcache->touch_dentry(dn); + } + CDentry::linkage_t *dnl = dn->get_linkage(); if (dnl->is_primary() && dnl->get_inode()->state_test(CInode::STATE_REJOINUNDEF)) undef_inodes.push_back(dnl->get_inode()); + } - if (wanted_items.count(mempool::mds_co::string(dname)) > 0 || !complete) { - dout(10) << " touching wanted dn " << *dn << dendl; + if (complete) { + if (!waiting_on_dentry.empty()) { + for (auto &p : waiting_on_dentry) { + std::copy(p.second.begin(), p.second.end(), std::back_inserter(finished)); + if (p.first.snapid == CEPH_NOSNAP) + null_keys.emplace_back(p.first); + } + waiting_on_dentry.clear(); + put(PIN_DNWAITER); + } + } else { + proc_nulls_and_waiters("", string_snap_t()); + } + + if (!null_keys.empty()) { + snapid_t first = mdcache->get_global_snaprealm()->get_newest_seq() + 1; + for (auto& key : null_keys) { + CDentry* dn = lookup(key.name, key.snapid); + if (dn) { + dout(12) << "_fetched got null for key " << key << ", have " << *dn << dendl; + } else { + dn = add_null_dentry(key.name, first, key.snapid); + dout(12) << "_fetched got null for key " << key << ", added " << *dn << dendl; + } mdcache->touch_dentry(dn); } } @@ -2056,9 +2186,9 @@ void CDir::_omap_fetched(bufferlist& hdrbl, map& omap, // mark complete, !fetching if (complete) { - wanted_items.clear(); mark_complete(); state_clear(STATE_FETCHING); + take_waiting(WAIT_COMPLETE, finished); } // open & force frags @@ -2075,10 +2205,8 @@ void CDir::_omap_fetched(bufferlist& hdrbl, map& omap, auth_unpin(this); - if (complete) { - // kick waiters - finish_waiting(WAIT_COMPLETE, 0); - } + if (!finished.empty()) + mdcache->mds->queue_waiters(finished); } void CDir::go_bad_dentry(snapid_t last, std::string_view dname) diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 0f203872fce..064061f1c8b 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -471,9 +471,12 @@ public: object_t get_ondisk_object() { return file_object_t(ino(), frag); } - void fetch(MDSContext *c, bool ignore_authpinnability=false); - void fetch(MDSContext *c, std::string_view want_dn, bool ignore_authpinnability=false); - void fetch(MDSContext *c, const std::set& keys); + void fetch(std::string_view dname, snapid_t last, + MDSContext *c, bool ignore_authpinnability=false); + void fetch(MDSContext *c, bool ignore_authpinnability=false) { + fetch("", CEPH_NOSNAP, c, ignore_authpinnability); + } + void fetch_keys(const std::vector& keys, MDSContext *c); #if 0 // unused? void wait_for_commit(Context *c, version_t v=0); @@ -643,7 +646,7 @@ protected: friend class C_IO_Dir_Committed; friend class C_IO_Dir_Commit_Ops; - void _omap_fetch(MDSContext *fin, const std::set& keys); + void _omap_fetch(std::set *keys, MDSContext *fin=nullptr); void _omap_fetch_more(version_t omap_version, bufferlist& hdrbl, std::map& omap, MDSContext *fin); CDentry *_load_dentry( @@ -667,7 +670,7 @@ protected: void go_bad(bool complete); void _omap_fetched(ceph::buffer::list& hdrbl, std::map& omap, - bool complete, int r); + bool complete, const std::set& keys, int r); // -- commit -- void _commit(version_t want, int op_prio); @@ -741,11 +744,10 @@ protected: /* If you set up the bloom filter, you must keep it accurate! * It's deleted when you mark_complete() and is deliberately not serialized.*/ - mempool::mds_co::compact_set wanted_items; mempool::mds_co::compact_map > waiting_for_commit; // -- waiters -- - mempool::mds_co::compact_map< string_snap_t, MDSContext::vec_alloc > waiting_on_dentry; // FIXME string_snap_t not in mempool + mempool::mds_co::map< string_snap_t, MDSContext::vec_alloc > waiting_on_dentry; // FIXME string_snap_t not in mempool private: friend std::ostream& operator<<(std::ostream& out, const class CDir& dir); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 5ed8a9181d2..3616c91eaff 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -799,7 +799,7 @@ void MDCache::populate_mydir() ceph_assert(!dir->state_test(CDir::STATE_BADFRAG)); if (dir->get_version() == 0) { - dir->fetch(new C_MDS_RetryOpenRoot(this)); + dir->fetch_keys({}, new C_MDS_RetryOpenRoot(this)); return; } @@ -5966,15 +5966,23 @@ bool MDCache::open_undef_inodes_dirfrags() << rejoin_undef_inodes.size() << " inodes " << rejoin_undef_dirfrags.size() << " dirfrags" << dendl; - set fetch_queue = rejoin_undef_dirfrags; + // dirfrag -> (fetch_complete, keys_to_fetch) + map > > fetch_queue; + for (auto& dir : rejoin_undef_dirfrags) { + ceph_assert(dir->get_version() == 0); + (void)fetch_queue[dir]; + } - for (set::iterator p = rejoin_undef_inodes.begin(); - p != rejoin_undef_inodes.end(); - ++p) { - CInode *in = *p; - ceph_assert(!in->is_base()); - ceph_assert(in->get_parent_dir()); - fetch_queue.insert(in->get_parent_dir()); + for (auto& in : rejoin_undef_inodes) { + assert(!in->is_base()); + CDentry *dn = in->get_parent_dn(); + auto& p = fetch_queue[dn->get_dir()]; + if (dn->last != CEPH_NOSNAP) { + p.first = true; + p.second.clear(); + } else if (!p.first) { + p.second.push_back(dn->key()); + } } if (fetch_queue.empty()) @@ -5989,16 +5997,17 @@ bool MDCache::open_undef_inodes_dirfrags() ) ); - for (set::iterator p = fetch_queue.begin(); - p != fetch_queue.end(); - ++p) { - CDir *dir = *p; + for (auto& p : fetch_queue) { + CDir *dir = p.first; CInode *diri = dir->get_inode(); if (diri->state_test(CInode::STATE_REJOINUNDEF)) continue; if (dir->state_test(CDir::STATE_REJOINUNDEF)) ceph_assert(diri->dirfragtree.is_leaf(dir->get_frag())); - dir->fetch(gather.new_sub()); + if (p.second.first) + dir->fetch(gather.new_sub()); + else + dir->fetch_keys(p.second.second, gather.new_sub()); } ceph_assert(gather.has_subs()); gather.activate(); @@ -8478,7 +8487,7 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf, // directory isn't complete; reload dout(7) << "traverse: incomplete dir contents for " << *cur << ", fetching" << dendl; touch_inode(cur); - curdir->fetch(cf.build(), path[depth]); + curdir->fetch(path[depth], snapid, cf.build()); if (mds->logger) mds->logger->inc(l_mds_traverse_dir_fetch); return 1; } @@ -8890,11 +8899,12 @@ void MDCache::_open_ino_traverse_dir(inodeno_t ino, open_ino_info_t& info, int r do_open_ino(ino, info, ret); } -void MDCache::_open_ino_fetch_dir(inodeno_t ino, const cref_t &m, CDir *dir, bool parent) +void MDCache::_open_ino_fetch_dir(inodeno_t ino, const cref_t &m, bool parent, + CDir *dir, std::string_view dname) { if (dir->state_test(CDir::STATE_REJOINUNDEF)) ceph_assert(dir->get_inode()->dirfragtree.is_leaf(dir->get_frag())); - dir->fetch(new C_MDC_OpenInoTraverseDir(this, ino, m, parent)); + dir->fetch(dname, CEPH_NOSNAP, new C_MDC_OpenInoTraverseDir(this, ino, m, parent)); if (mds->logger) mds->logger->inc(l_mds_openino_dir_fetch); } @@ -8918,11 +8928,14 @@ int MDCache::open_ino_traverse_dir(inodeno_t ino, const cref_t &m, } if (diri->state_test(CInode::STATE_REJOINUNDEF)) { - CDir *dir = diri->get_parent_dir(); + CDentry *dn = diri->get_parent_dn(); + CDir *dir = dn->get_dir(); while (dir->state_test(CDir::STATE_REJOINUNDEF) && - dir->get_inode()->state_test(CInode::STATE_REJOINUNDEF)) - dir = dir->get_inode()->get_parent_dir(); - _open_ino_fetch_dir(ino, m, dir, i == 0); + dir->get_inode()->state_test(CInode::STATE_REJOINUNDEF)) { + dn = dir->get_inode()->get_parent_dn(); + dir = dn->get_dir(); + } + _open_ino_fetch_dir(ino, m, i == 0, dir, dn->name); return 1; } @@ -8957,14 +8970,14 @@ int MDCache::open_ino_traverse_dir(inodeno_t ino, const cref_t &m, if (dnl && dnl->is_primary() && dnl->get_inode()->state_test(CInode::STATE_REJOINUNDEF)) { dout(10) << " fetching undef " << *dnl->get_inode() << dendl; - _open_ino_fetch_dir(ino, m, dir, i == 0); + _open_ino_fetch_dir(ino, m, i == 0, dir, name); return 1; } if (!dnl && !dir->is_complete() && (!dir->has_bloom() || dir->is_in_bloom(name))) { dout(10) << " fetching incomplete " << *dir << dendl; - _open_ino_fetch_dir(ino, m, dir, i == 0); + _open_ino_fetch_dir(ino, m, i == 0, dir, name); return 1; } @@ -10310,31 +10323,35 @@ void MDCache::handle_discover(const cref_t &dis) // lookup CDentry *dn = 0; + std::string_view dname; + if (dis->get_want().depth() > 0) + dname = dis->get_dentry(i); if (curdir->get_version() == 0) { // fetch newly opened dir ceph_assert(!curdir->has_bloom()); - } else if (dis->get_want().depth() > 0) { + } else if (dname.size() > 0) { // lookup dentry - dn = curdir->lookup(dis->get_dentry(i), snapid); + dn = curdir->lookup(dname, snapid); } else break; // done! // incomplete dir? if (!dn) { if (!curdir->is_complete() && - !(snapid == CEPH_NOSNAP && + !(dname.size() > 0 && + snapid == CEPH_NOSNAP && curdir->has_bloom() && - !curdir->is_in_bloom(dis->get_dentry(i)))) { + !curdir->is_in_bloom(dname))) { // readdir dout(7) << "incomplete dir contents for " << *curdir << ", fetching" << dendl; if (reply->is_empty()) { // fetch and wait - curdir->fetch(new C_MDS_RetryMessage(mds, dis), + curdir->fetch(dname, snapid, new C_MDS_RetryMessage(mds, dis), dis->wants_base_dir() && curdir->get_version() == 0); return; } else { // initiate fetch, but send what we have so far - curdir->fetch(0); + curdir->fetch(dname, snapid, nullptr); break; } } @@ -12966,7 +12983,7 @@ void MDCache::repair_inode_stats_work(MDRequestRef& mdr) } if (dir->get_version() == 0) { ceph_assert(dir->is_auth()); - dir->fetch(new C_MDS_RetryRequest(this, mdr)); + dir->fetch_keys({}, new C_MDS_RetryRequest(this, mdr)); return; } } diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index e08168b7141..8f3ea5be26f 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -1099,7 +1099,8 @@ class MDCache { void _open_ino_backtrace_fetched(inodeno_t ino, bufferlist& bl, int err); void _open_ino_parent_opened(inodeno_t ino, int ret); void _open_ino_traverse_dir(inodeno_t ino, open_ino_info_t& info, int err); - void _open_ino_fetch_dir(inodeno_t ino, const cref_t &m, CDir *dir, bool parent); + void _open_ino_fetch_dir(inodeno_t ino, const cref_t &m, bool parent, + CDir *dir, std::string_view dname); int open_ino_traverse_dir(inodeno_t ino, const cref_t &m, const std::vector& ancestors, bool discover, bool want_xlocked, mds_rank_t *hint); diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 886a379dd7a..a3eb88dbe51 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -3374,7 +3374,10 @@ void MDSRank::create_logger() mds_plb.add_u64(l_mds_root_rfiles, "root_rfiles", "root inode rfiles"); mds_plb.add_u64(l_mds_root_rbytes, "root_rbytes", "root inode rbytes"); mds_plb.add_u64(l_mds_root_rsnaps, "root_rsnaps", "root inode rsnaps"); - mds_plb.add_u64_counter(l_mds_dir_fetch, "dir_fetch", "Directory fetch"); + mds_plb.add_u64_counter(l_mds_dir_fetch_complete, + "dir_fetch_complete", "Fetch complete dirfrag"); + mds_plb.add_u64_counter(l_mds_dir_fetch_keys, + "dir_fetch_keys", "Fetch keys from dirfrag"); mds_plb.add_u64_counter(l_mds_dir_commit, "dir_commit", "Directory commit"); mds_plb.add_u64_counter(l_mds_dir_split, "dir_split", "Directory split"); mds_plb.add_u64_counter(l_mds_dir_merge, "dir_merge", "Directory merge"); diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index 784aa574158..3df1abb5d13 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -54,7 +54,8 @@ enum { l_mds_reply, l_mds_reply_latency, l_mds_forward, - l_mds_dir_fetch, + l_mds_dir_fetch_complete, + l_mds_dir_fetch_keys, l_mds_dir_commit, l_mds_dir_split, l_mds_dir_merge, diff --git a/src/mds/ScrubStack.cc b/src/mds/ScrubStack.cc index f709ecbe271..1e0bf8d7742 100644 --- a/src/mds/ScrubStack.cc +++ b/src/mds/ScrubStack.cc @@ -318,7 +318,7 @@ void ScrubStack::scrub_dir_inode(CInode *in, bool *added_children, bool *done) dir->add_waiter(CDir::WAIT_UNFREEZE, gather.new_sub()); } else if (dir->get_version() == 0) { dout(20) << __func__ << " barebones " << *dir << dendl; - dir->fetch(gather.new_sub()); + dir->fetch_keys({}, gather.new_sub()); } else { _enqueue(dir, header, true); queued.insert_raw(dir->get_frag()); diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index bfb27910846..70f0700eca6 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -1431,6 +1431,15 @@ struct string_snap_t { string_snap_t() {} string_snap_t(std::string_view n, snapid_t s) : name(n), snapid(s) {} + int compare(const string_snap_t& r) const { + int ret = name.compare(r.name); + if (ret) + return ret; + if (snapid == r.snapid) + return 0; + return snapid > r.snapid ? 1 : -1; + } + void encode(ceph::buffer::list& bl) const; void decode(ceph::buffer::list::const_iterator& p); void dump(ceph::Formatter *f) const; @@ -1441,6 +1450,10 @@ struct string_snap_t { }; WRITE_CLASS_ENCODER(string_snap_t) +inline bool operator==(const string_snap_t& l, const string_snap_t& r) { + return l.name == r.name && l.snapid == r.snapid; +} + inline bool operator<(const string_snap_t& l, const string_snap_t& r) { int c = l.name.compare(r.name); return c < 0 || (c == 0 && l.snapid < r.snapid);