CDentry *CDir::lookup(std::string_view name, snapid_t snap)
{
- dout(20) << "lookup (" << snap << ", '" << name << "')" << dendl;
+ dout(20) << "lookup (" << name << ", '" << snap << "')" << dendl;
auto iter = items.lower_bound(dentry_key_t(snap, name, inode->hash_dentry_name(name)));
if (iter == items.end())
return 0;
// -----------------------
// FETCH
-void CDir::fetch(MDSContext *c, bool ignore_authpinnability)
+void CDir::fetch(std::string_view dname, snapid_t last,
+ MDSContext *c, bool ignore_authpinnability)
{
- string want;
- return fetch(c, want, ignore_authpinnability);
-}
-
-void CDir::fetch(MDSContext *c, std::string_view want_dn, bool ignore_authpinnability)
-{
- dout(10) << "fetch on " << *this << dendl;
+ if (dname.empty())
+ dout(10) << "fetch on " << *this << dendl;
+ else
+ dout(10) << "fetch key(" << dname << ", '" << last << "')" << dendl;
ceph_assert(is_auth());
ceph_assert(!is_complete());
- if (!can_auth_pin() && !ignore_authpinnability) {
+ if (!ignore_authpinnability && !can_auth_pin()) {
if (c) {
dout(7) << "fetch waiting for authpinnable" << dendl;
add_waiter(WAIT_UNFREEZE, c);
}
// unlinked directory inode shouldn't have any entry
- if (!inode->is_base() && get_parent_dir()->inode->is_stray() &&
- !inode->snaprealm) {
+ if (CDir *pdir = get_parent_dir();
+ pdir && pdir->inode->is_stray() && !inode->snaprealm) {
dout(7) << "fetch dirfrag for unlinked directory, mark complete" << dendl;
if (get_version() == 0) {
ceph_assert(inode->is_auth());
return;
}
- if (c) add_waiter(WAIT_COMPLETE, c);
- if (!want_dn.empty()) wanted_items.insert(mempool::mds_co::string(want_dn));
+ // FIXME: to fetch a snap dentry, we need to get omap key in range
+ // [(name, last), (name, CEPH_NOSNAP))
+ if (!dname.empty() && last == CEPH_NOSNAP) {
+ dentry_key_t key(last, dname, inode->hash_dentry_name(dname));
+ fetch_keys({key}, c);
+ return;
+ }
+
+ if (c)
+ add_waiter(WAIT_COMPLETE, c);
// already fetching?
if (state_test(CDir::STATE_FETCHING)) {
auth_pin(this);
state_set(CDir::STATE_FETCHING);
- if (mdcache->mds->logger) mdcache->mds->logger->inc(l_mds_dir_fetch);
+ _omap_fetch(nullptr, nullptr);
+ if (mdcache->mds->logger)
+ mdcache->mds->logger->inc(l_mds_dir_fetch_complete);
mdcache->mds->balancer->hit_dir(this, META_POP_FETCH);
-
- std::set<dentry_key_t> empty;
- _omap_fetch(NULL, empty);
}
-void CDir::fetch(MDSContext *c, const std::set<dentry_key_t>& keys)
+void CDir::fetch_keys(const std::vector<dentry_key_t>& keys, MDSContext *c)
{
dout(10) << "fetch " << keys.size() << " keys on " << *this << dendl;
-
ceph_assert(is_auth());
ceph_assert(!is_complete());
- if (!can_auth_pin()) {
- dout(7) << "fetch keys waiting for authpinnable" << dendl;
- add_waiter(WAIT_UNFREEZE, c);
+ if (CDir *pdir = get_parent_dir();
+ pdir && pdir->inode->is_stray() && !inode->snaprealm) {
+ fetch(c, true);
return;
}
+
+ MDSContext::vec_alloc<mempool::mds_co::pool_allocator> *fallback_waiting = nullptr;
+ std::set<std::string> str_keys;
+ for (auto& key : keys) {
+ ceph_assert(key.snapid == CEPH_NOSNAP);
+ if (waiting_on_dentry.empty())
+ get(PIN_DNWAITER);
+ auto em = waiting_on_dentry.emplace(std::piecewise_construct,
+ std::forward_as_tuple(key.name, key.snapid),
+ std::forward_as_tuple());
+ if (!em.second) {
+ if (!fallback_waiting)
+ fallback_waiting = &em.first->second;
+ continue;
+ }
+
+ if (c) {
+ em.first->second.push_back(c);
+ c = nullptr;
+ }
+
+ string str;
+ key.encode(str);
+ str_keys.emplace(std::move(str));
+ }
+
+ if (str_keys.empty()) {
+ if (c && fallback_waiting) {
+ fallback_waiting->push_back(c);
+ c = nullptr;
+ }
+
+ if (get_version() > 0) {
+ dout(7) << "fetch keys, all are already being fetched" << dendl;
+ ceph_assert(!c);
+ return;
+ }
+ }
+
if (state_test(CDir::STATE_FETCHING)) {
- dout(7) << "fetch keys waiting for full fetch" << dendl;
- add_waiter(WAIT_COMPLETE, c);
+ dout(7) << "fetch keys, waiting for full fetch" << dendl;
+ if (c)
+ add_waiter(WAIT_COMPLETE, c);
return;
}
auth_pin(this);
- if (mdcache->mds->logger) mdcache->mds->logger->inc(l_mds_dir_fetch);
+ _omap_fetch(&str_keys, c);
+ if (mdcache->mds->logger)
+ mdcache->mds->logger->inc(l_mds_dir_fetch_keys);
mdcache->mds->balancer->hit_dir(this, META_POP_FETCH);
-
- _omap_fetch(c, keys);
}
class C_IO_Dir_OMAP_FetchedMore : public CDirIOContext {
void finish(int r) {
if (omap_version < dir->get_committed_version()) {
omap.clear();
- dir->_omap_fetch(fin, {});
+ dir->_omap_fetch(nullptr, fin);
return;
}
if (more) {
dir->_omap_fetch_more(omap_version, hdrbl, omap, fin);
} else {
- dir->_omap_fetched(hdrbl, omap, !fin, r);
+ dir->_omap_fetched(hdrbl, omap, true, {}, r);
if (fin)
fin->complete(r);
}
MDSContext *fin;
public:
const version_t omap_version;
+ bool complete = true;
+ std::set<string> keys;
bufferlist hdrbl;
bool more = false;
map<string, bufferlist> omap;
if (more) {
if (omap_version < dir->get_committed_version()) {
- omap.clear();
- dir->_omap_fetch(fin, {});
+ dir->_omap_fetch(nullptr, fin);
} else {
- dir->_omap_fetch_more(omap_version, hdrbl, omap, fin);
+ dir->_omap_fetch_more(omap_version, hdrbl, omap, fin);
}
return;
}
- dir->_omap_fetched(hdrbl, omap, !fin, r);
+ dir->_omap_fetched(hdrbl, omap, complete, keys, r);
if (fin)
fin->complete(r);
-
}
void print(ostream& out) const override {
out << "dirfrag_fetch(" << dir->dirfrag() << ")";
}
};
-void CDir::_omap_fetch(MDSContext *c, const std::set<dentry_key_t>& keys)
+void CDir::_omap_fetch(std::set<string> *keys, MDSContext *c)
{
C_IO_Dir_OMAP_Fetched *fin = new C_IO_Dir_OMAP_Fetched(this, c);
object_t oid = get_ondisk_object();
object_locator_t oloc(mdcache->mds->mdsmap->get_metadata_pool());
ObjectOperation rd;
rd.omap_get_header(&fin->hdrbl, &fin->ret1);
- if (keys.empty()) {
+ if (keys) {
+ fin->complete = false;
+ fin->keys.swap(*keys);
+ rd.omap_get_vals_by_keys(fin->keys, &fin->omap, &fin->ret2);
+ } else {
ceph_assert(!c);
rd.omap_get_vals("", "", g_conf()->mds_dir_keys_per_op,
&fin->omap, &fin->more, &fin->ret2);
- } else {
- ceph_assert(c);
- std::set<std::string> str_keys;
- for (auto p : keys) {
- string str;
- p.encode(str);
- str_keys.insert(str);
- }
- rd.omap_get_vals_by_keys(str_keys, &fin->omap, &fin->ret2);
}
// check the correctness of backtrace
if (g_conf()->mds_verify_backtrace > 0 && frag == frag_t()) {
}
void CDir::_omap_fetched(bufferlist& hdrbl, map<string, bufferlist>& omap,
- bool complete, int r)
+ bool complete, const std::set<string>& keys, int r)
{
LogChannelRef clog = mdcache->mds->clog;
dout(10) << "_fetched header " << hdrbl.length() << " bytes "
}
}
+
+ MDSContext::vec finished;
+ std::vector<string_snap_t> null_keys;
+
+ auto k_it = keys.rbegin();
+ auto w_it = waiting_on_dentry.rbegin();
+ std::string_view last_name = "";
+
+ auto proc_waiters = [&](const string_snap_t& key) {
+ bool touch = false;
+ if (last_name < key.name) {
+ // string_snap_t and key string are not in the same order
+ w_it = decltype(w_it)(waiting_on_dentry.upper_bound(key));
+ }
+ while (w_it != waiting_on_dentry.rend()) {
+ int cmp = w_it->first.compare(key);
+ if (cmp < 0)
+ break;
+ if (cmp == 0) {
+ touch = true;
+ std::copy(w_it->second.begin(), w_it->second.end(),
+ std::back_inserter(finished));
+ waiting_on_dentry.erase(std::next(w_it).base());
+ if (waiting_on_dentry.empty())
+ put(PIN_DNWAITER);
+ break;
+ }
+ ++w_it;
+ }
+ return touch;
+ };
+ auto proc_nulls_and_waiters = [&](const string& str_key, const string_snap_t& key) {
+ bool touch = false;
+ while (k_it != keys.rend()) {
+ int cmp = k_it->compare(str_key);
+ if (cmp < 0)
+ break;
+ if (cmp == 0) {
+ touch = true;
+ proc_waiters(key);
+ ++k_it;
+ break;
+ }
+ string_snap_t n_key;
+ dentry_key_t::decode_helper(*k_it, n_key.name, n_key.snapid);
+ ceph_assert(n_key.snapid == CEPH_NOSNAP);
+ proc_waiters(n_key);
+ last_name = std::string_view(k_it->c_str(), n_key.name.length());
+ null_keys.emplace_back(std::move(n_key));
+ ++k_it;
+ }
+ return touch;
+ };
+
unsigned pos = omap.size() - 1;
double rand_threshold = get_inode()->get_ephemeral_rand();
- for (map<string, bufferlist>::reverse_iterator p = omap.rbegin();
- p != omap.rend();
- ++p, --pos) {
- string dname;
- snapid_t last;
- dentry_key_t::decode_helper(p->first, dname, last);
-
- CDentry *dn = NULL;
+ for (auto p = omap.rbegin(); p != omap.rend(); ++p, --pos) {
+ string_snap_t key;
+ dentry_key_t::decode_helper(p->first, key.name, key.snapid);
+ bool touch;
+
+ if (key.snapid == CEPH_NOSNAP) {
+ if (complete) {
+ touch = proc_waiters(key);
+ } else {
+ touch = proc_nulls_and_waiters(p->first, key);
+ }
+ last_name = std::string_view(p->first.c_str(), key.name.length());
+ } else {
+ touch = false;
+ }
+
+ CDentry *dn = nullptr;
try {
dn = _load_dentry(
- p->first, dname, last, p->second, pos, snaps,
+ p->first, key.name, key.snapid, p->second, pos, snaps,
rand_threshold, &force_dirty);
} catch (const buffer::error &err) {
- mdcache->mds->clog->warn() << "Corrupt dentry '" << dname << "' in "
+ mdcache->mds->clog->warn() << "Corrupt dentry '" << key.name << "' in "
"dir frag " << dirfrag() << ": "
<< err.what() << "(" << get_path() << ")";
// that try to act directly on it will get their CEPHFS_EIOs, but this
// dirfrag as a whole will continue to look okay (minus the
// mysteriously-missing dentry)
- go_bad_dentry(last, dname);
+ go_bad_dentry(key.snapid, key.name);
// Anyone who was WAIT_DENTRY for this guy will get kicked
// to RetryRequest, and hit the DamageTable-interrogating path.
if (!dn)
continue;
+ if (touch) {
+ dout(10) << " touching wanted dn " << *dn << dendl;
+ mdcache->touch_dentry(dn);
+ }
+
CDentry::linkage_t *dnl = dn->get_linkage();
if (dnl->is_primary() && dnl->get_inode()->state_test(CInode::STATE_REJOINUNDEF))
undef_inodes.push_back(dnl->get_inode());
+ }
- if (wanted_items.count(mempool::mds_co::string(dname)) > 0 || !complete) {
- dout(10) << " touching wanted dn " << *dn << dendl;
+ if (complete) {
+ if (!waiting_on_dentry.empty()) {
+ for (auto &p : waiting_on_dentry) {
+ std::copy(p.second.begin(), p.second.end(), std::back_inserter(finished));
+ if (p.first.snapid == CEPH_NOSNAP)
+ null_keys.emplace_back(p.first);
+ }
+ waiting_on_dentry.clear();
+ put(PIN_DNWAITER);
+ }
+ } else {
+ proc_nulls_and_waiters("", string_snap_t());
+ }
+
+ if (!null_keys.empty()) {
+ snapid_t first = mdcache->get_global_snaprealm()->get_newest_seq() + 1;
+ for (auto& key : null_keys) {
+ CDentry* dn = lookup(key.name, key.snapid);
+ if (dn) {
+ dout(12) << "_fetched got null for key " << key << ", have " << *dn << dendl;
+ } else {
+ dn = add_null_dentry(key.name, first, key.snapid);
+ dout(12) << "_fetched got null for key " << key << ", added " << *dn << dendl;
+ }
mdcache->touch_dentry(dn);
}
}
// mark complete, !fetching
if (complete) {
- wanted_items.clear();
mark_complete();
state_clear(STATE_FETCHING);
+ take_waiting(WAIT_COMPLETE, finished);
}
// open & force frags
auth_unpin(this);
- if (complete) {
- // kick waiters
- finish_waiting(WAIT_COMPLETE, 0);
- }
+ if (!finished.empty())
+ mdcache->mds->queue_waiters(finished);
}
void CDir::go_bad_dentry(snapid_t last, std::string_view dname)
ceph_assert(!dir->state_test(CDir::STATE_BADFRAG));
if (dir->get_version() == 0) {
- dir->fetch(new C_MDS_RetryOpenRoot(this));
+ dir->fetch_keys({}, new C_MDS_RetryOpenRoot(this));
return;
}
<< rejoin_undef_inodes.size() << " inodes "
<< rejoin_undef_dirfrags.size() << " dirfrags" << dendl;
- set<CDir*> fetch_queue = rejoin_undef_dirfrags;
+ // dirfrag -> (fetch_complete, keys_to_fetch)
+ map<CDir*, pair<bool, std::vector<dentry_key_t> > > fetch_queue;
+ for (auto& dir : rejoin_undef_dirfrags) {
+ ceph_assert(dir->get_version() == 0);
+ (void)fetch_queue[dir];
+ }
- for (set<CInode*>::iterator p = rejoin_undef_inodes.begin();
- p != rejoin_undef_inodes.end();
- ++p) {
- CInode *in = *p;
- ceph_assert(!in->is_base());
- ceph_assert(in->get_parent_dir());
- fetch_queue.insert(in->get_parent_dir());
+ for (auto& in : rejoin_undef_inodes) {
+ assert(!in->is_base());
+ CDentry *dn = in->get_parent_dn();
+ auto& p = fetch_queue[dn->get_dir()];
+ if (dn->last != CEPH_NOSNAP) {
+ p.first = true;
+ p.second.clear();
+ } else if (!p.first) {
+ p.second.push_back(dn->key());
+ }
}
if (fetch_queue.empty())
)
);
- for (set<CDir*>::iterator p = fetch_queue.begin();
- p != fetch_queue.end();
- ++p) {
- CDir *dir = *p;
+ for (auto& p : fetch_queue) {
+ CDir *dir = p.first;
CInode *diri = dir->get_inode();
if (diri->state_test(CInode::STATE_REJOINUNDEF))
continue;
if (dir->state_test(CDir::STATE_REJOINUNDEF))
ceph_assert(diri->dirfragtree.is_leaf(dir->get_frag()));
- dir->fetch(gather.new_sub());
+ if (p.second.first)
+ dir->fetch(gather.new_sub());
+ else
+ dir->fetch_keys(p.second.second, gather.new_sub());
}
ceph_assert(gather.has_subs());
gather.activate();
// directory isn't complete; reload
dout(7) << "traverse: incomplete dir contents for " << *cur << ", fetching" << dendl;
touch_inode(cur);
- curdir->fetch(cf.build(), path[depth]);
+ curdir->fetch(path[depth], snapid, cf.build());
if (mds->logger) mds->logger->inc(l_mds_traverse_dir_fetch);
return 1;
}
do_open_ino(ino, info, ret);
}
-void MDCache::_open_ino_fetch_dir(inodeno_t ino, const cref_t<MMDSOpenIno> &m, CDir *dir, bool parent)
+void MDCache::_open_ino_fetch_dir(inodeno_t ino, const cref_t<MMDSOpenIno> &m, bool parent,
+ CDir *dir, std::string_view dname)
{
if (dir->state_test(CDir::STATE_REJOINUNDEF))
ceph_assert(dir->get_inode()->dirfragtree.is_leaf(dir->get_frag()));
- dir->fetch(new C_MDC_OpenInoTraverseDir(this, ino, m, parent));
+ dir->fetch(dname, CEPH_NOSNAP, new C_MDC_OpenInoTraverseDir(this, ino, m, parent));
if (mds->logger)
mds->logger->inc(l_mds_openino_dir_fetch);
}
}
if (diri->state_test(CInode::STATE_REJOINUNDEF)) {
- CDir *dir = diri->get_parent_dir();
+ CDentry *dn = diri->get_parent_dn();
+ CDir *dir = dn->get_dir();
while (dir->state_test(CDir::STATE_REJOINUNDEF) &&
- dir->get_inode()->state_test(CInode::STATE_REJOINUNDEF))
- dir = dir->get_inode()->get_parent_dir();
- _open_ino_fetch_dir(ino, m, dir, i == 0);
+ dir->get_inode()->state_test(CInode::STATE_REJOINUNDEF)) {
+ dn = dir->get_inode()->get_parent_dn();
+ dir = dn->get_dir();
+ }
+ _open_ino_fetch_dir(ino, m, i == 0, dir, dn->name);
return 1;
}
if (dnl && dnl->is_primary() &&
dnl->get_inode()->state_test(CInode::STATE_REJOINUNDEF)) {
dout(10) << " fetching undef " << *dnl->get_inode() << dendl;
- _open_ino_fetch_dir(ino, m, dir, i == 0);
+ _open_ino_fetch_dir(ino, m, i == 0, dir, name);
return 1;
}
if (!dnl && !dir->is_complete() &&
(!dir->has_bloom() || dir->is_in_bloom(name))) {
dout(10) << " fetching incomplete " << *dir << dendl;
- _open_ino_fetch_dir(ino, m, dir, i == 0);
+ _open_ino_fetch_dir(ino, m, i == 0, dir, name);
return 1;
}
// lookup
CDentry *dn = 0;
+ std::string_view dname;
+ if (dis->get_want().depth() > 0)
+ dname = dis->get_dentry(i);
if (curdir->get_version() == 0) {
// fetch newly opened dir
ceph_assert(!curdir->has_bloom());
- } else if (dis->get_want().depth() > 0) {
+ } else if (dname.size() > 0) {
// lookup dentry
- dn = curdir->lookup(dis->get_dentry(i), snapid);
+ dn = curdir->lookup(dname, snapid);
} else
break; // done!
// incomplete dir?
if (!dn) {
if (!curdir->is_complete() &&
- !(snapid == CEPH_NOSNAP &&
+ !(dname.size() > 0 &&
+ snapid == CEPH_NOSNAP &&
curdir->has_bloom() &&
- !curdir->is_in_bloom(dis->get_dentry(i)))) {
+ !curdir->is_in_bloom(dname))) {
// readdir
dout(7) << "incomplete dir contents for " << *curdir << ", fetching" << dendl;
if (reply->is_empty()) {
// fetch and wait
- curdir->fetch(new C_MDS_RetryMessage(mds, dis),
+ curdir->fetch(dname, snapid, new C_MDS_RetryMessage(mds, dis),
dis->wants_base_dir() && curdir->get_version() == 0);
return;
} else {
// initiate fetch, but send what we have so far
- curdir->fetch(0);
+ curdir->fetch(dname, snapid, nullptr);
break;
}
}
}
if (dir->get_version() == 0) {
ceph_assert(dir->is_auth());
- dir->fetch(new C_MDS_RetryRequest(this, mdr));
+ dir->fetch_keys({}, new C_MDS_RetryRequest(this, mdr));
return;
}
}