From 65e6daa7f198b18137919e121ff578cdca401da9 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 13 Jun 2018 16:53:20 +0800 Subject: [PATCH] client: introduce ceph_abort_conn() the function forcibly close mds sessions, leave client state intact on MDS. Fixes: http://tracker.ceph.com/issues/24465 Signed-off-by: "Yan, Zheng" --- src/client/Client.cc | 138 +++++++++++++++++++-------------- src/client/Client.h | 12 +-- src/client/Delegation.cc | 2 +- src/include/cephfs/libcephfs.h | 8 ++ src/libcephfs.cc | 13 ++++ src/pybind/cephfs/cephfs.pyx | 12 +++ 6 files changed, 122 insertions(+), 63 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 213b78465d4..e2d92c23712 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -248,8 +248,6 @@ Client::Client(Messenger *m, MonClient *mc, Objecter *objecter_) objecter(objecter_), whoami(mc->get_global_id()), cap_epoch_barrier(0), last_tid(0), oldest_tid(0), last_flush_tid(1), - initialized(false), - mounted(false), unmounting(false), blacklisted(false), local_osd(-ENXIO), local_osd_epoch(0), unsafe_sync_write(0), client_lock("Client::client_lock"), @@ -2421,29 +2419,8 @@ void Client::handle_osd_map(MOSDMap *m) }); lderr(cct) << "I was blacklisted at osd epoch " << epoch << dendl; blacklisted = true; - for (std::map::iterator p = mds_requests.begin(); - p != mds_requests.end(); ) { - auto req = p->second; - ++p; - req->abort(-EBLACKLISTED); - if (req->caller_cond) { - req->kick = true; - req->caller_cond->Signal(); - } - } - // Progress aborts on any requests that were on this waitlist. Any - // requests that were on a waiting_for_open session waitlist - // will get kicked during close session below. - signal_cond_list(waiting_for_mdsmap); - - // Force-close all sessions: assume this is not abandoning any state - // on the MDS side because the MDS will have seen the blacklist too. - while(!mds_sessions.empty()) { - auto i = mds_sessions.begin(); - auto &session = i->second; - _closed_mds_session(&session); - } + _abort_mds_sessions(-EBLACKLISTED); // Since we know all our OSD ops will fail, cancel them all preemtively, // so that on an unhealthy cluster we can umount promptly even if e.g. @@ -5784,17 +5761,58 @@ void Client::flush_mdlog(MetaSession *session) } -void Client::_unmount() +void Client::_abort_mds_sessions(int err) +{ + for (auto p = mds_requests.begin(); p != mds_requests.end(); ) { + auto req = p->second; + ++p; + // unsafe requests will be removed during close session below. + if (req->got_unsafe) + continue; + + req->abort(err); + if (req->caller_cond) { + req->kick = true; + req->caller_cond->Signal(); + } + } + + // Process aborts on any requests that were on this waitlist. + // Any requests that were on a waiting_for_open session waitlist + // will get kicked during close session below. + signal_cond_list(waiting_for_mdsmap); + + // Force-close all sessions + while(!mds_sessions.empty()) { + auto& session = mds_sessions.begin()->second; + _closed_mds_session(&session); + } +} + +void Client::_unmount(bool abort) { if (unmounting) return; - ldout(cct, 2) << "unmounting" << dendl; + if (abort || blacklisted) { + ldout(cct, 2) << "unmounting (" << (abort ? "abort)" : "blacklisted)") << dendl; + } else { + ldout(cct, 2) << "unmounting" << dendl; + } unmounting = true; deleg_timeout = 0; - flush_mdlog_sync(); // flush the mdlog for pending requests, if any + if (abort) { + // Abort all mds sessions + _abort_mds_sessions(-ENOTCONN); + + objecter->op_cancel_writes(-ENOTCONN); + } else { + // flush the mdlog for pending requests, if any + flush_mdlog_sync(); + } + while (!mds_requests.empty()) { ldout(cct, 10) << "waiting on " << mds_requests.size() << " requests" << dendl; mount_cond.Wait(client_lock); @@ -5830,23 +5848,6 @@ void Client::_unmount() _ll_drop_pins(); - if (blacklisted) { - ldout(cct, 0) << " skipping clean shutdown, we are blacklisted" << dendl; - - if (cct->_conf->client_oc) { - // Purge all cached data so that ObjectCacher doesn't get hung up - // trying to flush it. ObjectCacher's behaviour on EBLACKLISTED - // is to just leave things marked dirty - // (http://tracker.ceph.com/issues/9105) - for (const auto &i : inode_map) { - objectcacher->purge_set(&(i.second->oset)); - } - } - - mounted = false; - return; - } - while (unsafe_sync_write > 0) { ldout(cct, 0) << unsafe_sync_write << " unsafe_sync_writes, waiting" << dendl; mount_cond.Wait(client_lock); @@ -5854,27 +5855,40 @@ void Client::_unmount() if (cct->_conf->client_oc) { // flush/release all buffered data - ceph::unordered_map::iterator next; - for (ceph::unordered_map::iterator p = inode_map.begin(); - p != inode_map.end(); - p = next) { - next = p; - ++next; - Inode *in = p->second; + std::list anchor; + for (auto& p : inode_map) { + Inode *in = p.second; if (!in) { - ldout(cct, 0) << "null inode_map entry ino " << p->first << dendl; + ldout(cct, 0) << "null inode_map entry ino " << p.first << dendl; assert(in); } - if (!in->caps.empty()) { - InodeRef tmp_ref(in); + + // prevent inode from getting freed + anchor.emplace_back(in); + + if (abort || blacklisted) { + objectcacher->purge_set(&in->oset); + } else if (!in->caps.empty()) { _release(in); _flush(in, new C_Client_FlushComplete(this, in)); } } } - flush_caps_sync(); - wait_sync_caps(last_flush_tid); + if (abort || blacklisted) { + for (auto p = dirty_list.begin(); !p.end(); ) { + Inode *in = *p; + ++p; + if (in->dirty_caps) { + ldout(cct, 0) << " drop dirty caps on " << *in << dendl; + in->mark_caps_clean(); + put_inode(in); + } + } + } else { + flush_caps_sync(); + wait_sync_caps(last_flush_tid); + } // empty lru cache trim_cache(); @@ -5910,7 +5924,13 @@ void Client::_unmount() void Client::unmount() { Mutex::Locker lock(client_lock); - _unmount(); + _unmount(false); +} + +void Client::abort_conn() +{ + Mutex::Locker lock(client_lock); + _unmount(true); } void Client::flush_cap_releases() @@ -9895,6 +9915,10 @@ void Client::_release_filelocks(Fh *fh) if (to_release.empty()) return; + // mds has already released filelocks if session was closed. + if (in->caps.empty()) + return; + struct flock fl; memset(&fl, 0, sizeof(fl)); fl.l_whence = SEEK_SET; diff --git a/src/client/Client.h b/src/client/Client.h index cb0af08c74e..c19f4de7edf 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -381,10 +381,10 @@ protected: void handle_client_reply(MClientReply *reply); bool is_dir_operation(MetaRequest *request); - bool initialized; - bool mounted; - bool unmounting; - bool blacklisted; + bool initialized = false; + bool mounted = false; + bool unmounting = false; + bool blacklisted = false; // When an MDS has sent us a REJECT, remember that and don't // contact it again. Remember which inst rejected us, so that @@ -492,8 +492,9 @@ protected: void put_inode(Inode *in, int n=1); void close_dir(Dir *dir); + void _abort_mds_sessions(int err); // same as unmount() but for when the client_lock is already held - void _unmount(); + void _unmount(bool abort); friend class C_Client_FlushComplete; // calls put_inode() friend class C_Client_CacheInvalidate; // calls ino_invalidate_cb @@ -952,6 +953,7 @@ public: int mount(const std::string &mount_root, const UserPerm& perms, bool require_mds=false); void unmount(); + void abort_conn(); int mds_command( const std::string &mds_spec, diff --git a/src/client/Delegation.cc b/src/client/Delegation.cc index 6c972fde05a..c628cf66df0 100644 --- a/src/client/Delegation.cc +++ b/src/client/Delegation.cc @@ -23,7 +23,7 @@ public: ": delegation return timeout for inode 0x" << std::hex << in->ino << ". Forcibly unmounting client. "<< client << std::dec << dendl; - client->_unmount(); + client->_unmount(false); } }; diff --git a/src/include/cephfs/libcephfs.h b/src/include/cephfs/libcephfs.h index e23bcf24d1b..77ebb5107b5 100644 --- a/src/include/cephfs/libcephfs.h +++ b/src/include/cephfs/libcephfs.h @@ -298,6 +298,14 @@ void ceph_buffer_free(char *buf); */ int ceph_unmount(struct ceph_mount_info *cmount); +/** + * Abort mds connections + * + * @param cmount the mount handle + * @return 0 on success, negative error code on failure + */ +int ceph_abort_conn(struct ceph_mount_info *cmount); + /** * Destroy the mount handle. * diff --git a/src/libcephfs.cc b/src/libcephfs.cc index a65e763da7e..e2d4b513731 100644 --- a/src/libcephfs.cc +++ b/src/libcephfs.cc @@ -147,6 +147,14 @@ public: shutdown(); return 0; } + int abort_conn() + { + if (mounted) { + client->abort_conn(); + mounted = false; + } + return 0; + } void shutdown() { @@ -349,6 +357,11 @@ extern "C" int ceph_unmount(struct ceph_mount_info *cmount) return cmount->unmount(); } +extern "C" int ceph_abort_conn(struct ceph_mount_info *cmount) +{ + return cmount->abort_conn(); +} + extern "C" int ceph_release(struct ceph_mount_info *cmount) { if (cmount->is_mounted()) diff --git a/src/pybind/cephfs/cephfs.pyx b/src/pybind/cephfs/cephfs.pyx index 47f2b1ea8bc..049d5be664d 100644 --- a/src/pybind/cephfs/cephfs.pyx +++ b/src/pybind/cephfs/cephfs.pyx @@ -111,6 +111,7 @@ cdef extern from "cephfs/libcephfs.h" nogil: int ceph_mount(ceph_mount_info *cmount, const char *root) int ceph_unmount(ceph_mount_info *cmount) + int ceph_abort_conn(ceph_mount_info *cmount) int ceph_fstatx(ceph_mount_info *cmount, int fd, statx *stx, unsigned want, unsigned flags) int ceph_statx(ceph_mount_info *cmount, const char *path, statx *stx, unsigned want, unsigned flags) int ceph_statfs(ceph_mount_info *cmount, const char *path, statvfs *stbuf) @@ -559,6 +560,17 @@ cdef class LibCephFS(object): raise make_ex(ret, "error calling ceph_unmount") self.state = "initialized" + def abort_conn(self): + """ + Abort mds connections. + """ + self.require_state("mounted") + with nogil: + ret = ceph_abort_conn(self.cluster) + if ret != 0: + raise make_ex(ret, "error calling ceph_abort_conn") + self.state = "initialized" + def statfs(self, path): """ Perform a statfs on the ceph file system. This call fills in file system wide statistics -- 2.39.5