static const int STATE_PURGING = (1<<2);
static const int STATE_BADREMOTEINO = (1<<3);
static const int STATE_EVALUATINGSTRAY = (1<<4);
+ static const int STATE_PURGINGPINNED = (1<<5);
// stray dentry needs notification of releasing reference
static const int STATE_STRAY = STATE_NOTIFYREF;
if (cache->mds->logger) cache->mds->logger->inc(l_mds_dir_fetch);
- _omap_fetch();
+ std::set<dentry_key_t> empty;
+ _omap_fetch(NULL, empty);
+}
+
+void CDir::fetch(MDSInternalContextBase *c, const std::set<dentry_key_t>& keys)
+{
+ dout(10) << "fetch " << keys.size() << " keys on " << *this << dendl;
+
+ assert(is_auth());
+ assert(!is_complete());
+
+ if (!can_auth_pin()) {
+ dout(7) << "fetch keys waiting for authpinnable" << dendl;
+ add_waiter(WAIT_UNFREEZE, c);
+ return;
+ }
+ if (state_test(CDir::STATE_FETCHING)) {
+ dout(7) << "fetch keys waiting for full fetch" << dendl;
+ add_waiter(WAIT_COMPLETE, c);
+ return;
+ }
+
+ auth_pin(this);
+ if (cache->mds->logger) cache->mds->logger->inc(l_mds_dir_fetch);
+
+ _omap_fetch(c, keys);
}
class C_IO_Dir_TMAP_Fetched : public CDirIOContext {
bl.clear();
}
- _omap_fetched(header, omap, r);
+ _omap_fetched(header, omap, true, r);
}
class C_IO_Dir_OMAP_Fetched : public CDirIOContext {
- public:
+ MDSInternalContextBase *fin;
+public:
bufferlist hdrbl;
map<string, bufferlist> omap;
bufferlist btbl;
int ret1, ret2, ret3;
- C_IO_Dir_OMAP_Fetched(CDir *d) :
- CDirIOContext(d),
- ret1(0), ret2(0), ret3(0) {}
+ C_IO_Dir_OMAP_Fetched(CDir *d, MDSInternalContextBase *f) :
+ CDirIOContext(d), fin(f), ret1(0), ret2(0), ret3(0) { }
void finish(int r) {
// check the correctness of backtrace
if (r >= 0 && ret3 != -ECANCELED)
dir->inode->verify_diri_backtrace(btbl, ret3);
if (r >= 0) r = ret1;
if (r >= 0) r = ret2;
- dir->_omap_fetched(hdrbl, omap, r);
+ dir->_omap_fetched(hdrbl, omap, !fin, r);
+ if (fin)
+ fin->complete(r);
}
};
-void CDir::_omap_fetch()
+void CDir::_omap_fetch(MDSInternalContextBase *c, const std::set<dentry_key_t>& keys)
{
- C_IO_Dir_OMAP_Fetched *fin = new C_IO_Dir_OMAP_Fetched(this);
+ C_IO_Dir_OMAP_Fetched *fin = new C_IO_Dir_OMAP_Fetched(this, c);
object_t oid = get_ondisk_object();
object_locator_t oloc(cache->mds->mdsmap->get_metadata_pool());
ObjectOperation rd;
rd.omap_get_header(&fin->hdrbl, &fin->ret1);
- rd.omap_get_vals("", "", (uint64_t)-1, &fin->omap, &fin->ret2);
+ if (keys.empty()) {
+ assert(!c);
+ rd.omap_get_vals("", "", (uint64_t)-1, &fin->omap, &fin->ret2);
+ } else {
+ assert(c);
+ std::set<std::string> str_keys;
+ for (auto p = keys.begin(); p != keys.end(); ++p) {
+ string str;
+ p->encode(str);
+ str_keys.insert(str);
+ }
+ rd.omap_get_vals_by_keys(str_keys, &fin->omap, &fin->ret2);
+ }
// check the correctness of backtrace
if (g_conf->mds_verify_backtrace > 0 && frag == frag_t()) {
rd.getxattr("parent", &fin->btbl, &fin->ret3);
in->mark_dirty_rstat();
if (inode->is_stray()) {
- dn->state_set(CDentry::STATE_STRAY);
- if (in->inode.nlink == 0)
- in->state_set(CInode::STATE_ORPHAN);
+ cache->notify_stray_loaded(dn);
}
//in->hack_accessed = false;
}
void CDir::_omap_fetched(bufferlist& hdrbl, map<string, bufferlist>& omap,
- int r)
+ bool complete, int r)
{
LogChannelRef clog = cache->mds->clog;
dout(10) << "_fetched header " << hdrbl.length() << " bytes "
dout(0) << "_fetched missing object for " << *this << dendl;
clog->error() << "dir " << dirfrag() << " object missing on disk; some files may be lost\n";
- go_bad();
+ go_bad(complete);
return;
}
<< ": " << err << dendl;
clog->warn() << "Corrupt fnode header in " << dirfrag() << ": "
<< err;
- go_bad();
+ go_bad(complete);
return;
}
if (!p.end()) {
clog->warn() << "header buffer of dir " << dirfrag() << " has "
<< hdrbl.length() - p.get_off() << " extra bytes\n";
- go_bad();
+ go_bad(complete);
return;
}
}
continue;
}
- if (dn && wanted_items.count(dname) > 0) {
+ if (dn && (wanted_items.count(dname) > 0 || !complete)) {
dout(10) << " touching wanted dn " << *dn << dendl;
inode->mdcache->touch_dentry(dn);
}
//cache->mds->logger->inc("newin", num_new_inodes_loaded);
// mark complete, !fetching
- wanted_items.clear();
- mark_complete();
- state_clear(STATE_FETCHING);
+ if (complete) {
+ wanted_items.clear();
+ mark_complete();
+ state_clear(STATE_FETCHING);
- if (scrub_infop && scrub_infop->need_scrub_local) {
- scrub_infop->need_scrub_local = false;
- scrub_local();
+ if (scrub_infop && scrub_infop->need_scrub_local) {
+ scrub_infop->need_scrub_local = false;
+ scrub_local();
+ }
}
// open & force frags
else
auth_unpin(this);
- // kick waiters
- finish_waiting(WAIT_COMPLETE, 0);
+ if (complete) {
+ // kick waiters
+ finish_waiting(WAIT_COMPLETE, 0);
+ }
}
void CDir::_go_bad()
}
}
-void CDir::go_bad()
+void CDir::go_bad(bool complete)
{
const bool fatal = cache->mds->damage_table.notify_dirfrag(inode->ino(), frag);
if (fatal) {
assert(0); // unreachable, damaged() respawns us
}
- _go_bad();
+ if (complete)
+ _go_bad();
+ else
+ auth_unpin(this);
}
// -----------------------
}
void fetch(MDSInternalContextBase *c, bool ignore_authpinnability=false);
void fetch(MDSInternalContextBase *c, const std::string& want_dn, bool ignore_authpinnability=false);
+ void fetch(MDSInternalContextBase *c, const std::set<dentry_key_t>& keys);
protected:
compact_set<string> wanted_items;
- void _omap_fetch();
+ void _omap_fetch(MDSInternalContextBase *fin, const std::set<dentry_key_t>& keys);
CDentry *_load_dentry(
const std::string &key,
const std::string &dname,
/**
* Go bad due to a damaged header (register with damagetable and go BADFRAG)
*/
- void go_bad();
+ void go_bad(bool complete);
- void _omap_fetched(bufferlist& hdrbl, std::map<std::string, bufferlist>& omap, int r);
+ void _omap_fetched(bufferlist& hdrbl, std::map<std::string, bufferlist>& omap,
+ bool complete, int r);
void _tmap_fetch();
void _tmap_fetched(bufferlist &bl, int r);
null_dentry = true;
}
- // notify dentry authority?
- if (!dn->is_auth()) {
+ if (dn->is_auth()) {
+ if (dn->state_test(CDentry::STATE_PURGING)) {
+ stray_manager.notify_stray_trimmed(dn);
+ }
+ } else {
+ // notify dentry authority.
+
// If null replica dentry is not readable, it's likely we will
// receive a MDentryLink message soon. MDentryLink message only
// replicates an inode, so we should avoid trimming the inode's
// INODE
if (in->is_auth()) {
// eval stray after closing dirfrags
- if (dn) {
+ if (dn && !dn->state_test(CDentry::STATE_PURGING)) {
maybe_eval_stray(in);
- if (dn->get_num_ref() > 0) {
- // Independent of whether we passed this on to the purge queue,
- // if it still has refs then don't count it as trimmed
+ if (dn->state_test(CDentry::STATE_PURGING) || dn->get_num_ref() > 0)
return true;
- }
}
} else {
mds_authority_t auth = in->authority();
for (CDir::map_t::iterator q = dir->items.begin(); q != dir->items.end(); ++q) {
CDentry *dn = q->second;
dn->state_set(CDentry::STATE_STRAY);
- CDentry::linkage_t *dnl = dn->get_projected_linkage();
stray_manager.notify_stray_created();
+ CDentry::linkage_t *dnl = dn->get_projected_linkage();
if (dnl->is_primary()) {
CInode *in = dnl->get_inode();
if (in->inode.nlink == 0)
stray_manager.eval_stray(dn);
}
+ void notify_stray_loaded(CDentry *dn) {
+ stray_manager.notify_stray_loaded(dn);
+ }
+
void handle_conf_change(const struct md_config_t *conf,
const std::set <std::string> &changed);
dn->dir->pop_and_dirty_projected_fnode(ls);
in->state_clear(CInode::STATE_ORPHAN);
- dn->state_clear(CDentry::STATE_PURGING);
+ dn->state_clear(CDentry::STATE_PURGING | CDentry::STATE_PURGINGPINNED);
dn->put(CDentry::PIN_PURGING);
// drop inode
/* We consider a stray to be purging as soon as it is enqueued, to avoid
* enqueing it twice */
dn->state_set(CDentry::STATE_PURGING);
- dn->get(CDentry::PIN_PURGING);
in->state_set(CInode::STATE_PURGING);
/* We must clear this as soon as enqueuing it, to prevent the journal
} else {
dout(10) << __func__ << ": enqueuing this dentry for later purge: "
<< *dn << dendl;
+ if (!dn->state_test(CDentry::STATE_PURGINGPINNED) &&
+ ready_for_purge.size() < g_conf->mds_max_purge_files) {
+ dn->get(CDentry::PIN_PURGING);
+ dn->state_set(CDentry::STATE_PURGINGPINNED);
+ }
ready_for_purge.push_back(QueuedStray(dn, trunc, ops_required));
}
}
+class C_StraysFetched : public StrayManagerContext {
+public:
+ C_StraysFetched(StrayManager *sm_) :
+ StrayManagerContext(sm_) { }
+ void finish(int r) {
+ sm->_advance();
+ }
+};
+
void StrayManager::_advance()
{
- std::list<QueuedStray>::iterator i;
- for (i = ready_for_purge.begin();
- i != ready_for_purge.end(); ++i) {
- const QueuedStray &qs = *i;
- const bool consumed = _consume(qs.dn, qs.trunc, qs.ops_required);
+ std::map<CDir*, std::set<dentry_key_t> > to_fetch;
+
+ for (auto p = ready_for_purge.begin();
+ p != ready_for_purge.end();) {
+ const QueuedStray &qs = *p;
+ auto q = p++;
+ CDentry *dn = qs.dir->lookup_exact_snap(qs.name, CEPH_NOSNAP);
+ if (!dn) {
+ assert(trimmed_strays.count(qs.name) > 0);
+ if (fetching_strays.size() >= g_conf->mds_max_purge_files) {
+ break;
+ }
+
+ dout(10) << __func__ << ": fetching stray dentry " << qs.name << dendl;
+
+ auto it = fetching_strays.insert(qs);
+ assert(it.second);
+ to_fetch[qs.dir].insert(dentry_key_t(CEPH_NOSNAP, (it.first)->name.c_str()));
+ ready_for_purge.erase(q);
+ continue;
+ }
+
+ const bool consumed = _consume(dn, qs.trunc, qs.ops_required);
if (!consumed) {
break;
}
+ ready_for_purge.erase(q);
}
- // Erase all the ones that returned true from _consume
- ready_for_purge.erase(ready_for_purge.begin(), i);
+ MDSGatherBuilder gather(g_ceph_context);
+ for (auto p = to_fetch.begin(); p != to_fetch.end(); ++p)
+ p->first->fetch(gather.new_sub(), p->second);
+
+ if (gather.has_subs()) {
+ gather.set_finisher(new C_StraysFetched(this));
+ gather.activate();
+ }
}
/*
return false;
}
+ if (!dn->state_test(CDentry::STATE_PURGINGPINNED)) {
+ dn->get(CDentry::PIN_PURGING);
+ dn->state_set(CDentry::STATE_PURGINGPINNED);
+ }
+
// Resources are available, acquire them and execute the purge
files_purging += 1;
dout(10) << __func__ << ": allocating allowance "
<< ops_required << " to " << ops_in_flight << " in flight" << dendl;
ops_in_flight += ops_required;
logger->set(l_mdc_num_purge_ops, ops_in_flight);
+
+ _process(dn, trunc, ops_required);
+ return true;
+}
+
+class C_OpenSnapParents : public StrayManagerContext {
+ CDentry *dn;
+ bool trunc;
+ uint32_t ops_required;
+ public:
+ C_OpenSnapParents(StrayManager *sm_, CDentry *dn_, bool t, uint32_t ops) :
+ StrayManagerContext(sm_), dn(dn_), trunc(t), ops_required(ops) { }
+ void finish(int r) {
+ sm->_process(dn, trunc, ops_required);
+ }
+};
+
+void StrayManager::_process(CDentry *dn, bool trunc, uint32_t ops_required)
+{
+ CInode *in = dn->get_linkage()->get_inode();
+ if (in->snaprealm &&
+ !in->snaprealm->have_past_parents_open() &&
+ !in->snaprealm->open_parents(new C_OpenSnapParents(this, dn, trunc,
+ ops_required))) {
+ // this can happen if the dentry had been trimmed from cache.
+ return;
+ }
+
if (trunc) {
truncate(dn, ops_required);
} else {
purge(dn, ops_required);
}
- return true;
}
uint32_t StrayManager::_calculate_ops_required(CInode *in, bool trunc)
i != ready_for_purge.end(); ++i)
{
const QueuedStray &qs = *i;
- CDentry *dn = qs.dn;
+ CDentry *dn = qs.dir->lookup_exact_snap(qs.name, CEPH_NOSNAP);
+ if (!dn)
+ continue;
+
dout(10) << __func__ << ": aborting enqueued purge " << *dn << dendl;
CDentry::linkage_t *dnl = dn->get_projected_linkage();
assert(in);
// Clear flags set in enqueue
- dn->state_clear(CDentry::STATE_PURGING);
- dn->put(CDentry::PIN_PURGING);
+ if (dn->state_test(CDentry::STATE_PURGINGPINNED))
+ dn->put(CDentry::PIN_PURGING);
+ dn->state_clear(CDentry::STATE_PURGING | CDentry::STATE_PURGINGPINNED);
in->state_clear(CInode::STATE_PURGING);
}
ready_for_purge.clear();
+
+ trimmed_strays.clear();
+ fetching_strays.clear();
}
void StrayManager::truncate(CDentry *dn, uint32_t op_allowance)
dout(10) << __func__ << ": " << *dn << " " << *in << dendl;
- dn->state_clear(CDentry::STATE_PURGING);
+ dn->state_clear(CDentry::STATE_PURGING | CDentry::STATE_PURGINGPINNED);
dn->put(CDentry::PIN_PURGING);
in->pop_and_dirty_projected_inode(ls);
}
}
+void StrayManager::notify_stray_loaded(CDentry *dn)
+{
+ dout(10) << __func__ << ": " << *dn << dendl;
+
+ dn->state_set(CDentry::STATE_STRAY);
+ CInode *in = dn->get_linkage()->get_inode();
+ if (in->inode.nlink == 0)
+ in->state_set(CInode::STATE_ORPHAN);
+
+ auto p = trimmed_strays.find(dn->name);
+ if (p != trimmed_strays.end()) {
+ dn->state_set(CDentry::STATE_PURGING);
+ in->state_set(CInode::STATE_PURGING);
+ trimmed_strays.erase(p);
+
+ QueuedStray key(dn, false, 0);
+ auto q = fetching_strays.find(key);
+ if (q != fetching_strays.end()) {
+ ready_for_purge.push_front(*q);
+ fetching_strays.erase(q);
+ }
+ }
+}
+
+void StrayManager::notify_stray_trimmed(CDentry *dn)
+{
+ dout(10) << __func__ << ": " << *dn << dendl;
+
+ trimmed_strays.insert(dn->name);
+}
protected:
class QueuedStray {
public:
- CDentry *dn;
+ CDir *dir;
+ std::string name;
bool trunc;
uint32_t ops_required;
- QueuedStray(CDentry *dn_, bool t, uint32_t ops)
- : dn(dn_), trunc(t), ops_required(ops) {}
+ QueuedStray(CDentry *dn, bool t, uint32_t ops)
+ : dir(dn->get_dir()), name(dn->name),
+ trunc(t), ops_required(ops) {}
+ bool operator<(const QueuedStray& o) const {
+ return (name < o.name);
+ }
};
// Has passed through eval_stray and still has refs
// No more refs, can purge these
std::list<QueuedStray> ready_for_purge;
+ // strays that have been trimmed from cache
+ std::set<std::string> trimmed_strays;
+ // strays that are being fetching
+ std::set<QueuedStray> fetching_strays;
+
// Global references for doing I/O
MDSRank *mds;
PerfCounters *logger;
friend class StrayManagerIOContext;
friend class StrayManagerContext;
+ friend class C_StraysFetched;
+ friend class C_OpenSnapParents;
friend class C_PurgeStrayLogged;
friend class C_TruncateStrayLogged;
friend class C_IO_PurgeStrayPurged;
*/
bool _consume(CDentry *dn, bool trunc, uint32_t ops_required);
+ void _process(CDentry *dn, bool trunc, uint32_t ops_required);
+
+
/**
* Return the maximum number of concurrent RADOS ops that
* may be executed while purging this inode.
* Call this whenever one of those operands changes.
*/
void update_op_limit();
+
+ /*
+ * track stray dentries that have been trimmed from cache
+ */
+ void notify_stray_trimmed(CDentry *dn);
+ /*
+ * restore stray dentry's previous stats
+ */
+ void notify_stray_loaded(CDentry *dn);
};
#endif // STRAY_MANAGER_H