From: Yan, Zheng Date: Thu, 11 Sep 2014 01:36:44 +0000 (+0800) Subject: client: trim unused inodes before reconnecting to recovering MDS X-Git-Tag: v0.88~142^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2bd7ceeff53ad0f49d5825b6e7f378683616dffb;p=ceph.git client: trim unused inodes before reconnecting to recovering MDS So the recovering MDS does not need to fetch these ununsed inodes during cache rejoin. This may reduce MDS recovery time. Signed-off-by: Yan, Zheng --- diff --git a/src/client/Client.cc b/src/client/Client.cc index b7ae2b149773..4b7e9bc2fb32 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -505,6 +505,36 @@ void Client::trim_cache() } } +void Client::trim_cache_for_reconnect(MetaSession *s) +{ + int mds = s->mds_num; + ldout(cct, 20) << "trim_cache_for_reconnect mds." << mds << dendl; + + int trimmed = 0; + list skipped; + while (lru.lru_get_size() > 0) { + Dentry *dn = static_cast(lru.lru_expire()); + if (!dn) + break; + + if ((dn->inode && dn->inode->caps.count(mds)) || + dn->dir->parent_inode->caps.count(mds)) { + trim_dentry(dn); + trimmed++; + } else + skipped.push_back(dn); + } + + for(list::iterator p = skipped.begin(); p != skipped.end(); ++p) + lru.lru_insert_mid(*p); + + ldout(cct, 20) << "trim_cache_for_reconnect mds." << mds + << " trimmed " << trimmed << " dentries" << dendl; + + if (s->caps.size() > 0) + _invalidate_kernel_dcache(); +} + void Client::trim_dentry(Dentry *dn) { ldout(cct, 15) << "trim_dentry unlinking dn " << dn->name @@ -2051,8 +2081,13 @@ void Client::handle_mds_map(MMDSMap* m) if (!mdsmap->is_up(p->first) || mdsmap->get_inst(p->first) != p->second->inst) { p->second->con->mark_down(); - if (mdsmap->is_up(p->first)) + if (mdsmap->is_up(p->first)) { p->second->inst = mdsmap->get_inst(p->first); + // When new MDS starts to take over, notify kernel to trim unused entries + // in its dcache/icache. Hopefully, the kernel will release some unused + // inodes before the new MDS enters reconnect state. + trim_cache_for_reconnect(p->second); + } } else if (oldstate == newstate) continue; // no change @@ -2090,6 +2125,14 @@ void Client::send_reconnect(MetaSession *session) int mds = session->mds_num; ldout(cct, 10) << "send_reconnect to mds." << mds << dendl; + // trim unused caps to reduce MDS's cache rejoin time + trim_cache_for_reconnect(session); + + if (session->release) { + session->release->put(); + session->release = NULL; + } + MClientReconnect *m = new MClientReconnect; // i have an open session. @@ -3195,6 +3238,20 @@ void Client::remove_session_caps(MetaSession *s) sync_cond.Signal(); } +void Client::_invalidate_kernel_dcache() +{ + // notify kernel to invalidate top level directory entries. As a side effect, + // unused inodes underneath these entries get pruned. + if (dentry_invalidate_cb && root->dir) { + for (ceph::unordered_map::iterator p = root->dir->dentries.begin(); + p != root->dir->dentries.end(); + ++p) { + if (p->second->inode) + _schedule_invalidate_dentry_callback(p->second, false); + } + } +} + void Client::trim_caps(MetaSession *s, int max) { int mds = s->mds_num; @@ -3253,23 +3310,8 @@ void Client::trim_caps(MetaSession *s, int max) } s->s_cap_iterator = NULL; - - // notify kernel to invalidate top level directory entries. As a side effect, - // unused inodes underneath these entries get pruned. - if (dentry_invalidate_cb && s->caps.size() > max) { - assert(root); - if (root->dir) { - for (ceph::unordered_map::iterator p = root->dir->dentries.begin(); - p != root->dir->dentries.end(); - ++p) { - if (p->second->inode) - _schedule_invalidate_dentry_callback(p->second, false); - } - } else { - // This seems unnatural, as long as we are holding caps they must be on - // some descendent of the root, so why don't we have the root open? - } - } + if (s->caps.size() > max) + _invalidate_kernel_dcache(); } void Client::mark_caps_dirty(Inode *in, int caps) diff --git a/src/client/Client.h b/src/client/Client.h index 1ac9754e7675..26c5c4228bd5 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -401,8 +401,10 @@ protected: // trim cache. void trim_cache(); + void trim_cache_for_reconnect(MetaSession *s); void trim_dentry(Dentry *dn); void trim_caps(MetaSession *s, int max); + void _invalidate_kernel_dcache(); void dump_inode(Formatter *f, Inode *in, set& did, bool disconnected); void dump_cache(Formatter *f); // debug