From: John Spray Date: Thu, 4 Aug 2016 11:25:39 +0000 (+0100) Subject: mds: trim null dentries proactively X-Git-Tag: v11.0.1~276^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=86f6522ded2ec2df943832c33aecd4460aead7df;p=ceph.git mds: trim null dentries proactively Instead of leaving null dentries (e.g. left behind from unlinks) in the cache until they fall out of the LRU, actively push them to the bottom of the LRU and then consume all nulls at the bottom in trim() even if the cache is not oversized yet. This fixes the case where standby replay daemons would otherwise accumulate a cache full of null dentries resulting from unlinks, and it makes the behaviour of active daemons more deterministic. Fixes: http://tracker.ceph.com/issues/16919 Signed-off-by: John Spray --- diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 5953ed66963e..72eb7a95b76e 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -6356,10 +6356,22 @@ bool MDCache::trim(int max, int count) bool is_standby_replay = mds->is_standby_replay(); int unexpirable = 0; list unexpirables; - // trim dentries from the LRU - while (lru.lru_get_size() + unexpirable > (unsigned)max) { + + // trim dentries from the LRU: only enough to satisfy `max`, + // unless we see null dentries at the bottom of the LRU, + // in which case trim all those. + bool trimming_nulls = true; + while (trimming_nulls || lru.lru_get_size() + unexpirable > (unsigned)max) { CDentry *dn = static_cast(lru.lru_expire()); - if (!dn) break; + if (!dn) { + break; + } + if (!dn->get_linkage()->is_null()) { + trimming_nulls = false; + if (lru.lru_get_size() + unexpirable <= (unsigned)max) { + break; + } + } if ((is_standby_replay && dn->get_linkage()->inode && dn->get_linkage()->inode->item_open_file.is_on_list()) || trim_dentry(dn, expiremap)) { diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index bc018fabcc6e..ccb02a02a15f 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -850,6 +850,7 @@ void MDLog::replay(MDSInternalContextBase *c) // empty? if (journaler->get_read_pos() == journaler->get_write_pos()) { dout(10) << "replay - journal empty, done." << dendl; + mds->mdcache->trim(-1); if (c) { c->complete(0); } diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 3954b612b081..70a843be1df2 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -1276,7 +1276,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) dn = dir->add_null_dentry(p->dn, p->dnfirst, p->dnlast); dn->set_version(p->dnv); if (p->is_dirty()) dn->_mark_dirty(logseg); - dout(10) << "EMetaBlob.replay added " << *dn << dendl; + dout(10) << "EMetaBlob.replay added (full) " << *dn << dendl; } else { dn->set_version(p->dnv); if (p->is_dirty()) dn->_mark_dirty(logseg); @@ -1302,6 +1302,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) mds->clog->warn(ss); } dir->unlink_inode(dn); + mds->mdcache->touch_dentry_bottom(dn); } if (unlinked.count(in)) linked.insert(in); @@ -1313,7 +1314,9 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) if (dn->get_linkage()->get_inode() != in && in->get_parent_dn()) { dout(10) << "EMetaBlob.replay unlinking " << *in << dendl; unlinked[in] = in->get_parent_dir(); + CDentry *unlinked_dn = in->get_parent_dn(); in->get_parent_dir()->unlink_inode(in->get_parent_dn()); + mds->mdcache->touch_dentry_bottom(unlinked_dn); } if (dn->get_linkage()->get_inode() != in) { if (!dn->get_linkage()->is_null()) { // note: might be remote. as with stray reintegration. @@ -1326,6 +1329,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) mds->clog->warn(ss); } dir->unlink_inode(dn); + mds->mdcache->touch_dentry_bottom(dn); } if (unlinked.count(in)) linked.insert(in); @@ -1371,6 +1375,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) dout(0) << ss.str() << dendl; } dir->unlink_inode(dn); + mds->mdcache->touch_dentry_bottom(dn); } dir->link_remote_inode(dn, p->ino, p->d_type); dn->set_version(p->dnv); @@ -1392,7 +1397,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) dn = dir->add_null_dentry(p->dn, p->dnfirst, p->dnlast); dn->set_version(p->dnv); if (p->dirty) dn->_mark_dirty(logseg); - dout(10) << "EMetaBlob.replay added " << *dn << dendl; + dout(10) << "EMetaBlob.replay added (nullbit) " << *dn << dendl; } else { dn->first = p->dnfirst; if (!dn->get_linkage()->is_null()) { @@ -1405,6 +1410,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) if (dn->get_linkage()->is_primary()) unlinked[in] = dir; dir->unlink_inode(dn); + mds->mdcache->touch_dentry_bottom(dn); } } dn->set_version(p->dnv); @@ -1415,6 +1421,10 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) olddir = dir; if (lump.is_importing()) dn->state_set(CDentry::STATE_AUTH); + + // Make null dentries the first things we trim + dout(10) << "EMetaBlob.replay pushing to bottom of lru " << *dn << dendl; + mds->mdcache->touch_dentry_bottom(dn); } } @@ -1622,7 +1632,13 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) CInode *in = mds->mdcache->get_inode(*p); if (in) { dout(10) << "EMetaBlob.replay destroyed " << *p << ", dropping " << *in << dendl; + CDentry *parent = in->get_parent_dn(); mds->mdcache->remove_inode(in); + if (parent) { + dout(10) << "EMetaBlob.replay unlinked from dentry " << *parent << dendl; + assert(parent->get_linkage()->is_null()); + mds->mdcache->touch_dentry_bottom(parent); + } } else { dout(10) << "EMetaBlob.replay destroyed " << *p << ", not in cache" << dendl; }