From fec1912111b34467f54243f6e151fcf62895b3d0 Mon Sep 17 00:00:00 2001 From: Sam Lang Date: Fri, 28 Sep 2012 19:26:16 -0700 Subject: [PATCH] client: Fix #2215 with cache inval in thread The client currently deadlocks with kernel buffer cache invalidation enabled, due to the client lock calling the invalidate callback, which in turn sends up calls back to the userspace process which try to lock the same client lock. The fix is to invoke the invalidate callback in a separate thread, allowing _release, _flushed, etc. to complete, unlocking the client lock so that the invalidate callback avoids deadlock when the up call is made. We construct a separate work queue (Finisher) that allows scheduling the invalidate callbacks in a separate thread. The thread only starts when the invalidate callback is set. If no callback is set, the cache capability reference is decremented inline as before. Some callers of invalidate_inode_cache (flush and update_inode_file_bits) don't expect the cache capability to be decremented. Pass a keep_caps flag to only decrement the capability ref in the _release case. Also, we need to make sure the mds is aware that the client has dropped the cache capability, so we add a call to check_caps in put_cap_ref for the CEPH_CAP_FILE_CACHE capability. Signed-off-by: Sam Lang --- src/client/Client.cc | 115 +++++++++++++++++++++++++++++-------------- src/client/Client.h | 10 +++- 2 files changed, 87 insertions(+), 38 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index a1f06f8667d04..3c10f72be5faf 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -107,14 +107,13 @@ dir_result_t::dir_result_t(Inode *in) inode->get(); } - - // cons/des Client::Client(Messenger *m, MonClient *mc) : Dispatcher(m->cct), cct(m->cct), logger(NULL), timer(m->cct, client_lock), ino_invalidate_cb(NULL), ino_invalidate_cb_handle(NULL), + async_ino_invalidator(m->cct), tick_event(NULL), monclient(mc), messenger(m), whoami(m->get_myname().num()), initialized(false), mounted(false), unmounting(false), @@ -315,6 +314,11 @@ void Client::shutdown() { ldout(cct, 1) << "shutdown" << dendl; + if (ino_invalidate_cb) { + async_ino_invalidator.wait_for_empty(); + async_ino_invalidator.stop(); + } + objectcacher->stop(); // outside of client_lock! this does a join. client_lock.Lock(); @@ -407,7 +411,7 @@ void Client::update_inode_file_bits(Inode *in, // truncate cached file data if (prior_size > size) { - _invalidate_inode_cache(in, truncate_size, prior_size - truncate_size); + _invalidate_inode_cache(in, truncate_size, prior_size - truncate_size, true); } } } @@ -1914,28 +1918,32 @@ void Client::get_cap_ref(Inode *in, int cap) void Client::put_cap_ref(Inode *in, int cap) { - if (in->put_cap_ref(cap) && in->snapid == CEPH_NOSNAP) { - if ((cap & CEPH_CAP_FILE_WR) && - in->cap_snaps.size() && - in->cap_snaps.rbegin()->second->writing) { - ldout(cct, 10) << "put_cap_ref finishing pending cap_snap on " << *in << dendl; - in->cap_snaps.rbegin()->second->writing = 0; - finish_cap_snap(in, in->cap_snaps.rbegin()->second, in->caps_used()); - signal_cond_list(in->waitfor_caps); // wake up blocked sync writers - } - if (cap & CEPH_CAP_FILE_BUFFER) { - bool last = (in->cap_refs[CEPH_CAP_FILE_BUFFER] == 0); - for (map::iterator p = in->cap_snaps.begin(); - p != in->cap_snaps.end(); - p++) - p->second->dirty_data = 0; - check_caps(in, false); - signal_cond_list(in->waitfor_commit); - if (last) { + bool last = in->put_cap_ref(cap); + if (last) { + if (in->snapid == CEPH_NOSNAP) { + if ((cap & CEPH_CAP_FILE_WR) && + in->cap_snaps.size() && + in->cap_snaps.rbegin()->second->writing) { + ldout(cct, 10) << "put_cap_ref finishing pending cap_snap on " << *in << dendl; + in->cap_snaps.rbegin()->second->writing = 0; + finish_cap_snap(in, in->cap_snaps.rbegin()->second, in->caps_used()); + signal_cond_list(in->waitfor_caps); // wake up blocked sync writers + } + if (cap & CEPH_CAP_FILE_BUFFER) { + for (map::iterator p = in->cap_snaps.begin(); + p != in->cap_snaps.end(); + p++) + p->second->dirty_data = 0; + check_caps(in, false); + signal_cond_list(in->waitfor_commit); ldout(cct, 5) << "put_cap_ref dropped last FILE_BUFFER ref on " << *in << dendl; put_inode(in); } } + if (cap & CEPH_CAP_FILE_CACHE) { + check_caps(in, false); + ldout(cct, 5) << "put_cap_ref dropped last FILE_CACHE ref on " << *in << dendl; + } } } @@ -2328,41 +2336,75 @@ void Client::wake_inode_waiters(int mds_num) } +// flush dirty data (from objectcache) + +class C_Client_CacheInvalidate : public Context { +private: + Client *client; + Inode *inode; + int64_t offset, length; + bool keep_caps; +public: + C_Client_CacheInvalidate(Client *c, Inode *in, int64_t off, int64_t len, bool keep) : + client(c), inode(in), offset(off), length(len), keep_caps(keep) { + inode->get(); + } + void finish(int r) { + client->_async_invalidate(inode, offset, length, keep_caps); + } +}; + +void Client::_async_invalidate(Inode *in, int64_t off, int64_t len, bool keep_caps) { + + ino_invalidate_cb(ino_invalidate_cb_handle, in->vino(), off, len); -void Client::_invalidate_inode_cache(Inode *in) + client_lock.Lock(); + if (!keep_caps) { + put_cap_ref(in, CEPH_CAP_FILE_CACHE); + } + put_inode(in); + client_lock.Unlock(); +} + +void Client::_schedule_invalidate_callback(Inode *in, int64_t off, int64_t len, bool keep_caps) { + + if (ino_invalidate_cb) + // we queue the invalidate, which calls the callback and decrements the ref + async_ino_invalidator.queue(new C_Client_CacheInvalidate(this, in, off, len, keep_caps)); + else if (!keep_caps) + // if not set, we just decrement the cap ref here + in->put_cap_ref(CEPH_CAP_FILE_CACHE); +} + +void Client::_invalidate_inode_cache(Inode *in, bool keep_caps) { ldout(cct, 10) << "_invalidate_inode_cache " << *in << dendl; + // invalidate our userspace inode cache if (cct->_conf->client_oc) objectcacher->release_set(&in->oset); - - if (ino_invalidate_cb) - ino_invalidate_cb(ino_invalidate_cb_handle, in->vino(), 0, 0); + + _schedule_invalidate_callback(in, 0, 0, keep_caps); } -void Client::_invalidate_inode_cache(Inode *in, int64_t off, int64_t len) +void Client::_invalidate_inode_cache(Inode *in, int64_t off, int64_t len, bool keep_caps) { ldout(cct, 10) << "_invalidate_inode_cache " << *in << " " << off << "~" << len << dendl; + // invalidate our userspace inode cache if (cct->_conf->client_oc) { vector ls; Filer::file_to_extents(cct, in->ino, &in->layout, off, len, ls); objectcacher->discard_set(&in->oset, ls); } - - if (ino_invalidate_cb) - ino_invalidate_cb(ino_invalidate_cb_handle, in->vino(), off, len); -} - -// flush dirty data (from objectcache) + _schedule_invalidate_callback(in, off, len, keep_caps); +} void Client::_release(Inode *in) { if (in->cap_refs[CEPH_CAP_FILE_CACHE]) { - - _invalidate_inode_cache(in); - in->put_cap_ref(CEPH_CAP_FILE_CACHE); + _invalidate_inode_cache(in, false); } } @@ -2411,7 +2453,7 @@ void Client::_flushed(Inode *in) // release clean pages too, if we dont hold RDCACHE reference if (in->cap_refs[CEPH_CAP_FILE_CACHE] == 0) { - _invalidate_inode_cache(in); + _invalidate_inode_cache(in, true); } put_cap_ref(in, CEPH_CAP_FILE_BUFFER); @@ -5631,6 +5673,7 @@ void Client::ll_register_ino_invalidate_cb(client_ino_callback_t cb, void *handl Mutex::Locker l(client_lock); ino_invalidate_cb = cb; ino_invalidate_cb_handle = handle; + async_ino_invalidator.start(); } int Client::_sync_fs() diff --git a/src/client/Client.h b/src/client/Client.h index ffc285555d89c..e937b55cc8d2b 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -42,6 +42,7 @@ using namespace __gnu_cxx; #include "common/Mutex.h" #include "common/Timer.h" +#include "common/Finisher.h" #include "common/compiler_extensions.h" @@ -198,6 +199,8 @@ class Client : public Dispatcher { client_ino_callback_t ino_invalidate_cb; void *ino_invalidate_cb_handle; + Finisher async_ino_invalidator; + Context *tick_event; utime_t last_cap_renew; void renew_caps(); @@ -318,6 +321,7 @@ protected: void close_dir(Dir *dir); friend class C_Client_PutInode; // calls put_inode() + friend class C_Client_CacheInvalidate; // calls ino_invalidate_cb //int get_cache_size() { return lru.lru_get_size(); } //void set_cache_size(int m) { lru.lru_set_max(m); } @@ -420,8 +424,10 @@ protected: void finish_cap_snap(Inode *in, CapSnap *capsnap, int used); void _flushed_cap_snap(Inode *in, snapid_t seq); - void _invalidate_inode_cache(Inode *in); - void _invalidate_inode_cache(Inode *in, int64_t off, int64_t len); + void _schedule_invalidate_callback(Inode *in, int64_t off, int64_t len, bool keep_caps); + void _invalidate_inode_cache(Inode *in, bool keep_caps); + void _invalidate_inode_cache(Inode *in, int64_t off, int64_t len, bool keep_caps); + void _async_invalidate(Inode *in, int64_t off, int64_t len, bool keep_caps); void _release(Inode *in); bool _flush(Inode *in); void _flushed(Inode *in); -- 2.39.5