From 616064e1d8bbd97c4d3f591c39852fd3ec59a9c7 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 13 Jul 2018 16:10:39 +0800 Subject: [PATCH] mds: health warning for slow metadata IO Fixes: http://tracker.ceph.com/issues/24879 Signed-off-by: "Yan, Zheng" (cherry picked from commit 0f735f40315448560fde049ed3ea019a7d30d868) Conflicts: src/mds/Beacon.cc : Resolved in notify_health --- src/mds/Beacon.cc | 24 ++++++++++++++++-- src/mds/CDir.cc | 9 +++++++ src/mds/CInode.cc | 9 +++++++ src/mds/MDCache.cc | 15 +++++++++-- src/mds/MDLog.cc | 6 ++++- src/mds/MDSContext.cc | 52 +++++++++++++++++++++++++++++++++++++++ src/mds/MDSContext.h | 30 +++++++++++++++++++++- src/mds/MDSRank.cc | 3 +++ src/mds/MDSTable.cc | 6 +++++ src/mds/MDSTable.h | 3 +++ src/mds/OpenFileTable.cc | 16 ++++++++++-- src/mds/RecoveryQueue.cc | 6 ++++- src/mds/SessionMap.cc | 16 ++++++++++-- src/mds/StrayManager.cc | 4 +++ src/messages/MMDSBeacon.h | 6 ++++- 15 files changed, 193 insertions(+), 12 deletions(-) diff --git a/src/mds/Beacon.cc b/src/mds/Beacon.cc index 94cd8ea6fd222..673f016262cff 100644 --- a/src/mds/Beacon.cc +++ b/src/mds/Beacon.cc @@ -459,16 +459,36 @@ void Beacon::notify_health(MDSRank const *mds) // Detect MDS_HEALTH_SLOW_REQUEST condition { int slow = mds->get_mds_slow_req_count(); - dout(20) << slow << " slow request found" << dendl; if (slow) { + dout(20) << slow << " slow request found" << dendl; std::ostringstream oss; - oss << slow << " slow requests are blocked > " << g_conf->mds_op_complaint_time << " sec"; + oss << slow << " slow requests are blocked > " << g_conf->mds_op_complaint_time << " secs"; MDSHealthMetric m(MDS_HEALTH_SLOW_REQUEST, HEALTH_WARN, oss.str()); health.metrics.push_back(m); } } + { + auto complaint_time = g_conf->osd_op_complaint_time; + auto now = ceph::coarse_mono_clock::now(); + auto cutoff = now - ceph::make_timespan(complaint_time); + + std::string count; + ceph::coarse_mono_time oldest; + if (MDSIOContextBase::check_ios_in_flight(cutoff, count, oldest)) { + dout(20) << count << " slow metadata IOs found" << dendl; + + auto oldest_secs = std::chrono::duration(now - oldest).count(); + std::ostringstream oss; + oss << count << " slow metadata IOs are blocked > " << complaint_time + << " secs, oldest blocked for " << (int64_t)oldest_secs << " secs"; + + MDSHealthMetric m(MDS_HEALTH_SLOW_METADATA_IO, HEALTH_WARN, oss.str()); + health.metrics.push_back(m); + } + } + // Report a health warning if we are readonly if (mds->mdcache->is_readonly()) { MDSHealthMetric m(MDS_HEALTH_READ_ONLY, HEALTH_WARN, diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 2c76aa461b2c2..3487b10cc9884 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -1601,6 +1601,9 @@ public: fin->complete(r); } } + void print(ostream& out) const override { + out << "dirfrag_fetch_more(" << dir->dirfrag() << ")"; + } }; class C_IO_Dir_OMAP_Fetched : public CDirIOContext { @@ -1628,6 +1631,9 @@ public: fin->complete(r); } } + void print(ostream& out) const override { + out << "dirfrag_fetch(" << dir->dirfrag() << ")"; + } }; void CDir::_omap_fetch(MDSInternalContextBase *c, const std::set& keys) @@ -2127,6 +2133,9 @@ public: void finish(int r) override { dir->_committed(r, version); } + void print(ostream& out) const override { + out << "dirfrag_commit(" << dir->dirfrag() << ")"; + } }; /** diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 34c246d56fc4e..fb9b010b87424 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1059,6 +1059,9 @@ struct C_IO_Inode_Stored : public CInodeIOContext { void finish(int r) override { in->_stored(r, version, fin); } + void print(ostream& out) const override { + out << "inode_store(" << in->ino() << ")"; + } }; object_t InodeStoreBase::get_object_name(inodeno_t ino, frag_t fg, const char *suffix) @@ -1151,6 +1154,9 @@ struct C_IO_Inode_Fetched : public CInodeIOContext { // Ignore 'r', because we fetch from two places, so r is usually ENOENT in->_fetched(bl, bl2, fin); } + void print(ostream& out) const override { + out << "inode_fetch(" << in->ino() << ")"; + } }; void CInode::fetch(MDSInternalContextBase *fin) @@ -1240,6 +1246,9 @@ struct C_IO_Inode_StoredBacktrace : public CInodeIOContext { void finish(int r) override { in->_stored_backtrace(r, version, fin); } + void print(ostream& out) const override { + out << "backtrace_store(" << in->ino() << ")"; + } }; void CInode::store_backtrace(MDSInternalContextBase *fin, int op_prio) diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 41d77edbdd3d1..66913b377949e 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -152,7 +152,8 @@ protected: return mdcache->mds; } public: - explicit MDCacheIOContext(MDCache *mdc_) : mdcache(mdc_) {} + explicit MDCacheIOContext(MDCache *mdc_, bool track=true) : + MDSIOContextBase(track), mdcache(mdc_) {} }; class MDCacheLogContext : public virtual MDSLogContextBase { @@ -6442,11 +6443,15 @@ struct C_IO_MDC_TruncateFinish : public MDCacheIOContext { CInode *in; LogSegment *ls; C_IO_MDC_TruncateFinish(MDCache *c, CInode *i, LogSegment *l) : - MDCacheIOContext(c), in(i), ls(l) {} + MDCacheIOContext(c, false), in(i), ls(l) { + } void finish(int r) override { assert(r == 0 || r == -ENOENT); mdcache->truncate_inode_finish(in, ls); } + void print(ostream& out) const override { + out << "file_truncate(" << in->ino() << ")"; + } }; void MDCache::_truncate_inode(CInode *in, LogSegment *ls) @@ -8516,6 +8521,9 @@ class C_IO_MDC_OpenInoBacktraceFetched : public MDCacheIOContext { void finish(int r) override { mdcache->_open_ino_backtrace_fetched(ino, bl, r); } + void print(ostream& out) const override { + out << "openino_backtrace_fetch" << ino << ")"; + } }; struct C_MDC_OpenInoTraverseDir : public MDCacheContext { @@ -11440,6 +11448,9 @@ public: assert(r == 0 || r == -ENOENT); mdcache->_fragment_finish(basedirfrag, resultfrags); } + void print(ostream& out) const override { + out << "dirfrags_commit(" << basedirfrag << ")"; + } }; void MDCache::fragment_frozen(MDRequestRef& mdr, int r) diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index 785a7d1b41f53..2dbc9935a96ff 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -111,7 +111,11 @@ class C_MDL_WriteError : public MDSIOContextBase { } public: - explicit C_MDL_WriteError(MDLog *m) : mdlog(m) {} + explicit C_MDL_WriteError(MDLog *m) : + MDSIOContextBase(false), mdlog(m) {} + void print(ostream& out) const override { + out << "mdlog_write_error"; + } }; diff --git a/src/mds/MDSContext.cc b/src/mds/MDSContext.cc index 3d1d1688620ee..ca62a2acb7987 100644 --- a/src/mds/MDSContext.cc +++ b/src/mds/MDSContext.cc @@ -45,12 +45,64 @@ void MDSInternalContextWrapper::finish(int r) fin->complete(r); } +elist MDSIOContextBase::ctx_list(member_offset(MDSIOContextBase, list_item)); +ceph::spinlock MDSIOContextBase::ctx_list_lock; + +MDSIOContextBase::MDSIOContextBase(bool track) +{ + created_at = ceph::coarse_mono_clock::now(); + if (track) { + ctx_list_lock.lock(); + ctx_list.push_back(&list_item); + ctx_list_lock.unlock(); + } +} + +MDSIOContextBase::~MDSIOContextBase() +{ + ctx_list_lock.lock(); + list_item.remove_myself(); + ctx_list_lock.unlock(); +} + +bool MDSIOContextBase::check_ios_in_flight(ceph::coarse_mono_time cutoff, + std::string& slow_count, + ceph::coarse_mono_time& oldest) +{ + static const unsigned MAX_COUNT = 100; + unsigned slow = 0; + + ctx_list_lock.lock(); + for (elist::iterator p = ctx_list.begin(); !p.end(); ++p) { + MDSIOContextBase *c = *p; + if (c->created_at >= cutoff) + break; + ++slow; + if (slow > MAX_COUNT) + break; + if (slow == 1) + oldest = c->created_at; + } + ctx_list_lock.unlock(); + + if (slow > 0) { + if (slow > MAX_COUNT) + slow_count = std::to_string(MAX_COUNT) + "+"; + else + slow_count = std::to_string(slow); + return true; + } else { + return false; + } +} + void MDSIOContextBase::complete(int r) { MDSRank *mds = get_mds(); dout(10) << "MDSIOContextBase::complete: " << typeid(*this).name() << dendl; assert(mds != NULL); Mutex::Locker l(mds->mds_lock); + if (mds->is_daemon_stopping()) { dout(4) << "MDSIOContextBase::complete: dropping for stopping " << typeid(*this).name() << dendl; diff --git a/src/mds/MDSContext.h b/src/mds/MDSContext.h index 11bde1b4fac81..4e6372110aa75 100644 --- a/src/mds/MDSContext.h +++ b/src/mds/MDSContext.h @@ -17,6 +17,8 @@ #define MDS_CONTEXT_H #include "include/Context.h" +#include "include/elist.h" +#include "common/ceph_time.h" class MDSRank; @@ -70,15 +72,32 @@ protected: MDSRank *mds; Context *fin; MDSRank *get_mds() override; + void finish(int r) override; public: MDSInternalContextWrapper(MDSRank *m, Context *c) : mds(m), fin(c) {} - void finish(int r) override; }; class MDSIOContextBase : public MDSContext { public: + MDSIOContextBase(bool track=true); + virtual ~MDSIOContextBase(); + MDSIOContextBase(const MDSIOContextBase&) = delete; + MDSIOContextBase& operator=(const MDSIOContextBase&) = delete; + void complete(int r) override; + + virtual void print(ostream& out) const = 0; + + static bool check_ios_in_flight(ceph::coarse_mono_time cutoff, + std::string& slow_count, + ceph::coarse_mono_time& oldest); +private: + ceph::coarse_mono_time created_at; + elist::item list_item; + + static elist ctx_list; + static ceph::spinlock ctx_list_lock; }; /** @@ -95,6 +114,9 @@ public: void complete(int r) final; void set_write_pos(uint64_t wp) { write_pos = wp; } virtual void pre_finish(int r) {} + void print(ostream& out) const override { + out << "log_event(" << write_pos << ")"; + } }; /** @@ -126,6 +148,9 @@ protected: public: MDSIOContextWrapper(MDSRank *m, Context *c) : mds(m), fin(c) {} void finish(int r) override; + void print(ostream& out) const override { + out << "io_context_wrapper(" << fin << ")"; + } }; /** @@ -166,6 +191,9 @@ public: } } void complete(int r) final; + void print(ostream& out) const override { + out << "io_wrapper(" << wrapped << ")"; + } }; diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 93a0f4839f29a..1ef0ba5944e3e 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1297,6 +1297,9 @@ public: void finish(int r) override { mds->_standby_replay_restart_finish(r, old_read_pos); } + void print(ostream& out) const override { + out << "standby_replay_restart"; + } }; void MDSRank::_standby_replay_restart_finish(int r, uint64_t old_read_pos) diff --git a/src/mds/MDSTable.cc b/src/mds/MDSTable.cc index be12dbfc0d63e..f4104239f6188 100644 --- a/src/mds/MDSTable.cc +++ b/src/mds/MDSTable.cc @@ -53,6 +53,9 @@ public: void finish(int r) override { ida->save_2(r, version); } + void print(ostream& out) const override { + out << "table_save(" << ida->table_name << ")"; + } }; void MDSTable::save(MDSInternalContextBase *onfinish, version_t v) @@ -130,6 +133,9 @@ public: void finish(int r) override { ida->load_2(r, bl, onfinish); } + void print(ostream& out) const override { + out << "table_load(" << ida->table_name << ")"; + } }; object_t MDSTable::get_object_name() const diff --git a/src/mds/MDSTable.h b/src/mds/MDSTable.h index 13e9bf55f96c8..3b62a123fb214 100644 --- a/src/mds/MDSTable.h +++ b/src/mds/MDSTable.h @@ -87,6 +87,9 @@ public: virtual void reset_state() = 0; virtual void decode_state(bufferlist::iterator& p) = 0; virtual void encode_state(bufferlist& bl) const = 0; + + friend class C_IO_MT_Load; + friend class C_IO_MT_Save; }; #endif diff --git a/src/mds/OpenFileTable.cc b/src/mds/OpenFileTable.cc index 2e3e2faa590c5..aa8e2000b1630 100644 --- a/src/mds/OpenFileTable.cc +++ b/src/mds/OpenFileTable.cc @@ -216,6 +216,9 @@ public: void finish(int r) { oft->_commit_finish(r, log_seq, fin); } + void print(ostream& out) const override { + out << "openfiles_save"; + } }; void OpenFileTable::_commit_finish(int r, uint64_t log_seq, MDSInternalContextBase *fin) @@ -251,6 +254,9 @@ public: void finish(int r) { oft->_journal_finish(r, log_seq, fin, ops_map); } + void print(ostream& out) const override { + out << "openfiles_journal"; + } }; void OpenFileTable::_journal_finish(int r, uint64_t log_seq, MDSInternalContextBase *c, @@ -646,9 +652,12 @@ public: C_IO_OFT_Load(OpenFileTable *t, unsigned i, bool f) : oft(t), index(i), first(f) {} - void finish(int r) { + void finish(int r) override { oft->_load_finish(r, header_r, values_r, index, first, more, header_bl, values); } + void print(ostream& out) const override { + out << "openfiles_load"; + } }; class C_IO_OFT_Recover : public MDSIOContextBase { @@ -657,9 +666,12 @@ protected: MDSRank *get_mds() override { return oft->mds; } public: C_IO_OFT_Recover(OpenFileTable *t) : oft(t) {} - void finish(int r) { + void finish(int r) override { oft->_recover_finish(r); } + void print(ostream& out) const override { + out << "openfiles_recover"; + } }; void OpenFileTable::_recover_finish(int r) diff --git a/src/mds/RecoveryQueue.cc b/src/mds/RecoveryQueue.cc index 6956c6355e7c0..16e89980c67d7 100644 --- a/src/mds/RecoveryQueue.cc +++ b/src/mds/RecoveryQueue.cc @@ -41,9 +41,13 @@ public: uint64_t size; utime_t mtime; - C_MDC_Recover(RecoveryQueue *rq_, CInode *i) : rq(rq_), in(i), size(0) { + C_MDC_Recover(RecoveryQueue *rq_, CInode *i) : + MDSIOContextBase(false), rq(rq_), in(i), size(0) { assert(rq != NULL); } + void print(ostream& out) const override { + out << "file_recover(" << in->ino() << ")"; + } }; diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc index dff223cf61171..8cb449a98a540 100644 --- a/src/mds/SessionMap.cc +++ b/src/mds/SessionMap.cc @@ -99,6 +99,9 @@ public: sessionmap->_load_finish(r, header_r, values_r, first, header_bl, session_vals, more_session_vals); } + void print(ostream& out) const override { + out << "session_load"; + } }; } @@ -279,6 +282,9 @@ public: void finish(int r) override { sessionmap->_load_legacy_finish(r, bl); } + void print(ostream& out) const override { + out << "session_load_legacy"; + } }; } @@ -346,6 +352,9 @@ public: sessionmap->_save_finish(version); } } + void print(ostream& out) const override { + out << "session_save"; + } }; } @@ -690,6 +699,9 @@ public: on_safe->complete(r); } } + void print(ostream& out) const override { + out << "session_save_one"; + } }; } @@ -766,8 +778,8 @@ void SessionMap::save_if_dirty(const std::set &tgt_sessions, object_locator_t oloc(mds->mdsmap->get_metadata_pool()); MDSInternalContextBase *on_safe = gather_bld->new_sub(); mds->objecter->mutate(oid, oloc, op, snapc, - ceph::real_clock::now(), - 0, new C_OnFinisher( + ceph::real_clock::now(), 0, + new C_OnFinisher( new C_IO_SM_Save_One(this, on_safe), mds->finisher)); } diff --git a/src/mds/StrayManager.cc b/src/mds/StrayManager.cc index a310fb34f27ef..08c64f61c2042 100644 --- a/src/mds/StrayManager.cc +++ b/src/mds/StrayManager.cc @@ -80,6 +80,10 @@ public: assert(r == 0 || r == -ENOENT); sm->_purge_stray_purged(dn, only_head); } + void print(ostream& out) const override { + CInode *in = dn->get_projected_linkage()->get_inode(); + out << "purge_stray(" << in->ino() << ")"; + } }; diff --git a/src/messages/MMDSBeacon.h b/src/messages/MMDSBeacon.h index 44a3eb9c63cb7..136b5ecd214f8 100644 --- a/src/messages/MMDSBeacon.h +++ b/src/messages/MMDSBeacon.h @@ -42,7 +42,8 @@ enum mds_metric_t { MDS_HEALTH_DAMAGE, MDS_HEALTH_READ_ONLY, MDS_HEALTH_SLOW_REQUEST, - MDS_HEALTH_CACHE_OVERSIZED + MDS_HEALTH_CACHE_OVERSIZED, + MDS_HEALTH_SLOW_METADATA_IO, }; inline const char *mds_metric_name(mds_metric_t m) @@ -59,6 +60,7 @@ inline const char *mds_metric_name(mds_metric_t m) case MDS_HEALTH_READ_ONLY: return "MDS_READ_ONLY"; case MDS_HEALTH_SLOW_REQUEST: return "MDS_SLOW_REQUEST"; case MDS_HEALTH_CACHE_OVERSIZED: return "MDS_CACHE_OVERSIZED"; + case MDS_HEALTH_SLOW_METADATA_IO: return "MDS_SLOW_METADATA_IO"; default: return "???"; } @@ -90,6 +92,8 @@ inline const char *mds_metric_summary(mds_metric_t m) return "%num% MDSs report slow requests"; case MDS_HEALTH_CACHE_OVERSIZED: return "%num% MDSs report oversized cache"; + case MDS_HEALTH_SLOW_METADATA_IO: + return "%num% MDSs report slow metadata IOs"; default: return "???"; } -- 2.39.5