From f5e6b6ad19cbd60dea125818ee3490f7ba07567b Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 23 Aug 2017 19:37:54 +0800 Subject: [PATCH] mds: force client to do snapflush Snap inodes that are waiting for flush are auth pinned, they affect subtree/dirfrag frozen. Force client to do snapflush after waiting for a period of time. Signed-off-by: "Yan, Zheng" --- src/mds/Locker.cc | 73 +++++++++++++++++++++++++++++++++++++++++++--- src/mds/Locker.h | 9 ++++-- src/mds/MDCache.cc | 12 +++++--- 3 files changed, 84 insertions(+), 10 deletions(-) diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 7b214eadb6f90..ad76f34132b18 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -82,6 +82,10 @@ public: } }; +Locker::Locker(MDSRank *m, MDCache *c) : + mds(m), mdcache(c), need_snapflush_inodes(member_offset(CInode, item_caps)) {} + + /* This function DOES put the passed message before returning */ void Locker::dispatch(Message *m) { @@ -1875,8 +1879,10 @@ void Locker::file_update_finish(CInode *in, MutationRef& mut, bool share_max, bo } } if (gather) { - if (in->client_snap_caps.empty()) + if (in->client_snap_caps.empty()) { in->item_open_file.remove_myself(); + in->item_caps.remove_myself(); + } eval_cap_gather(in, &need_issue); } } else { @@ -2564,7 +2570,47 @@ void Locker::adjust_cap_wanted(Capability *cap, int wanted, int issue_seq) } } +void Locker::snapflush_nudge(CInode *in) +{ + assert(in->last != CEPH_NOSNAP); + if (in->client_snap_caps.empty()) + return; + + CInode *head = mdcache->get_inode(in->ino()); + assert(head); + assert(head->is_auth()); + if (head->client_need_snapflush.empty()) + return; + + SimpleLock *hlock = head->get_lock(CEPH_LOCK_IFILE); + if (hlock->get_state() == LOCK_SYNC || !hlock->is_stable()) { + hlock = NULL; + for (int i = 0; i < num_cinode_locks; i++) { + SimpleLock *lock = head->get_lock(cinode_lock_info[i].lock); + if (lock->get_state() != LOCK_SYNC && lock->is_stable()) { + hlock = lock; + break; + } + } + } + if (hlock) { + _rdlock_kick(hlock, true); + } else { + // also, requeue, in case of unstable lock + need_snapflush_inodes.push_back(&in->item_caps); + } +} +void Locker::mark_need_snapflush_inode(CInode *in) +{ + assert(in->last != CEPH_NOSNAP); + if (!in->item_caps.is_on_list()) { + need_snapflush_inodes.push_back(&in->item_caps); + utime_t now = ceph_clock_now(); + in->last_dirstat_prop = now; + dout(10) << "mark_need_snapflush_inode " << *in << " - added at " << now << dendl; + } +} void Locker::_do_null_snapflush(CInode *head_in, client_t client, snapid_t last) { @@ -3575,9 +3621,28 @@ void Locker::caps_tick() { utime_t now = ceph_clock_now(); + if (!need_snapflush_inodes.empty()) { + // snap inodes that needs flush are auth pinned, they affect + // subtree/difrarg freeze. + utime_t cutoff = now; + cutoff -= g_conf->mds_freeze_tree_timeout / 3; + + CInode *last = need_snapflush_inodes.back(); + while (!need_snapflush_inodes.empty()) { + CInode *in = need_snapflush_inodes.front(); + if (in->last_dirstat_prop >= cutoff) + break; + in->item_caps.remove_myself(); + snapflush_nudge(in); + if (in == last) + break; + } + } + dout(20) << __func__ << " " << revoking_caps.size() << " revoking caps" << dendl; - int i = 0; + now = ceph_clock_now(); + int n = 0; for (xlist::iterator p = revoking_caps.begin(); !p.end(); ++p) { Capability *cap = *p; @@ -3587,8 +3652,8 @@ void Locker::caps_tick() dout(20) << __func__ << " age below timeout " << g_conf->mds_revoke_cap_timeout << dendl; break; } else { - ++i; - if (i > MAX_WARN_CAPS) { + ++n; + if (n > MAX_WARN_CAPS) { dout(1) << __func__ << " more than " << MAX_WARN_CAPS << " caps are late" << "revoking, ignoring subsequent caps" << dendl; break; diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 4f47344dbea0c..b878451712864 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -49,7 +49,7 @@ private: MDCache *mdcache; public: - Locker(MDSRank *m, MDCache *c) : mds(m), mdcache(c) {} + Locker(MDSRank *m, MDCache *c); SimpleLock *get_lock(int lock_type, MDSCacheObjectInfo &info); @@ -187,7 +187,7 @@ public: void get_late_revoking_clients(std::list *result) const; bool any_late_revoking_caps(xlist const &revoking) const; - protected: +protected: bool _need_flush_mdlog(CInode *in, int wanted_caps); void adjust_cap_wanted(Capability *cap, int wanted, int issue_seq); void handle_client_caps(class MClientCaps *m); @@ -205,6 +205,11 @@ public: // Maintain a per-client list to find clients responsible for late ones quickly std::map > revoking_caps_by_client; + elist need_snapflush_inodes; +public: + void snapflush_nudge(CInode *in); + void mark_need_snapflush_inode(CInode *in); + // local public: void local_wrlock_grab(LocalLock *lock, MutationRef& mut); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index ff0607f8ff479..e3a02985b923f 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -309,10 +309,12 @@ void MDCache::remove_inode(CInode *o) export_pin_queue.erase(o); // remove from inode map - if (o->last == CEPH_NOSNAP) + if (o->last == CEPH_NOSNAP) { inode_map.erase(o->ino()); - else + } else { + o->item_caps.remove_myself(); snap_inode_map.erase(o->vino()); + } if (o->ino() < MDS_INO_SYSTEM_BASE) { if (o == root) root = 0; @@ -1670,8 +1672,10 @@ void MDCache::journal_cow_dentry(MutationImpl *mut, EMetaBlob *metablob, oldin->inode.version = olddn->pre_dirty(); dout(10) << " olddn " << *olddn << dendl; bool need_snapflush = !oldin->client_snap_caps.empty(); - if (need_snapflush) + if (need_snapflush) { mut->ls->open_files.push_back(&oldin->item_open_file); + mds->locker->mark_need_snapflush_inode(oldin); + } metablob->add_primary_dentry(olddn, 0, true, false, false, need_snapflush); mut->add_cow_dentry(olddn); } else { @@ -5484,7 +5488,7 @@ void MDCache::rebuild_need_snapflush(CInode *head_in, SnapRealm *realm, lock->set_state(LOCK_SNAP_SYNC); lock->get_wrlock(true); } - + mds->locker->mark_need_snapflush_inode(in); } } -- 2.39.5