From 2bd7ceeff53ad0f49d5825b6e7f378683616dffb Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 11 Sep 2014 09:36:44 +0800 Subject: [PATCH] client: trim unused inodes before reconnecting to recovering MDS So the recovering MDS does not need to fetch these ununsed inodes during cache rejoin. This may reduce MDS recovery time. Signed-off-by: Yan, Zheng --- src/client/Client.cc | 78 ++++++++++++++++++++++++++++++++++---------- src/client/Client.h | 2 ++ 2 files changed, 62 insertions(+), 18 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index b7ae2b149773a..4b7e9bc2fb329 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -505,6 +505,36 @@ void Client::trim_cache() } } +void Client::trim_cache_for_reconnect(MetaSession *s) +{ + int mds = s->mds_num; + ldout(cct, 20) << "trim_cache_for_reconnect mds." << mds << dendl; + + int trimmed = 0; + list skipped; + while (lru.lru_get_size() > 0) { + Dentry *dn = static_cast(lru.lru_expire()); + if (!dn) + break; + + if ((dn->inode && dn->inode->caps.count(mds)) || + dn->dir->parent_inode->caps.count(mds)) { + trim_dentry(dn); + trimmed++; + } else + skipped.push_back(dn); + } + + for(list::iterator p = skipped.begin(); p != skipped.end(); ++p) + lru.lru_insert_mid(*p); + + ldout(cct, 20) << "trim_cache_for_reconnect mds." << mds + << " trimmed " << trimmed << " dentries" << dendl; + + if (s->caps.size() > 0) + _invalidate_kernel_dcache(); +} + void Client::trim_dentry(Dentry *dn) { ldout(cct, 15) << "trim_dentry unlinking dn " << dn->name @@ -2051,8 +2081,13 @@ void Client::handle_mds_map(MMDSMap* m) if (!mdsmap->is_up(p->first) || mdsmap->get_inst(p->first) != p->second->inst) { p->second->con->mark_down(); - if (mdsmap->is_up(p->first)) + if (mdsmap->is_up(p->first)) { p->second->inst = mdsmap->get_inst(p->first); + // When new MDS starts to take over, notify kernel to trim unused entries + // in its dcache/icache. Hopefully, the kernel will release some unused + // inodes before the new MDS enters reconnect state. + trim_cache_for_reconnect(p->second); + } } else if (oldstate == newstate) continue; // no change @@ -2090,6 +2125,14 @@ void Client::send_reconnect(MetaSession *session) int mds = session->mds_num; ldout(cct, 10) << "send_reconnect to mds." << mds << dendl; + // trim unused caps to reduce MDS's cache rejoin time + trim_cache_for_reconnect(session); + + if (session->release) { + session->release->put(); + session->release = NULL; + } + MClientReconnect *m = new MClientReconnect; // i have an open session. @@ -3195,6 +3238,20 @@ void Client::remove_session_caps(MetaSession *s) sync_cond.Signal(); } +void Client::_invalidate_kernel_dcache() +{ + // notify kernel to invalidate top level directory entries. As a side effect, + // unused inodes underneath these entries get pruned. + if (dentry_invalidate_cb && root->dir) { + for (ceph::unordered_map::iterator p = root->dir->dentries.begin(); + p != root->dir->dentries.end(); + ++p) { + if (p->second->inode) + _schedule_invalidate_dentry_callback(p->second, false); + } + } +} + void Client::trim_caps(MetaSession *s, int max) { int mds = s->mds_num; @@ -3253,23 +3310,8 @@ void Client::trim_caps(MetaSession *s, int max) } s->s_cap_iterator = NULL; - - // notify kernel to invalidate top level directory entries. As a side effect, - // unused inodes underneath these entries get pruned. - if (dentry_invalidate_cb && s->caps.size() > max) { - assert(root); - if (root->dir) { - for (ceph::unordered_map::iterator p = root->dir->dentries.begin(); - p != root->dir->dentries.end(); - ++p) { - if (p->second->inode) - _schedule_invalidate_dentry_callback(p->second, false); - } - } else { - // This seems unnatural, as long as we are holding caps they must be on - // some descendent of the root, so why don't we have the root open? - } - } + if (s->caps.size() > max) + _invalidate_kernel_dcache(); } void Client::mark_caps_dirty(Inode *in, int caps) diff --git a/src/client/Client.h b/src/client/Client.h index 1ac9754e76750..26c5c4228bd54 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -401,8 +401,10 @@ protected: // trim cache. void trim_cache(); + void trim_cache_for_reconnect(MetaSession *s); void trim_dentry(Dentry *dn); void trim_caps(MetaSession *s, int max); + void _invalidate_kernel_dcache(); void dump_inode(Formatter *f, Inode *in, set& did, bool disconnected); void dump_cache(Formatter *f); // debug -- 2.39.5