From 163be0c4df6ec9f7d795512157459c65714e6ad2 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 13 Jul 2018 16:10:39 +0800 Subject: [PATCH] mds: health warning for slow metadata IO Fixes: http://tracker.ceph.com/issues/24879 Signed-off-by: "Yan, Zheng" (cherry picked from commit 0f735f40315448560fde049ed3ea019a7d30d868) Conflicts: src/mds/Beacon.cc src/mds/OpenFileTable.cc src/mds/MDSContext.h src/mds/MDSContext.cc --- src/mds/Beacon.cc | 22 ++++++++++++++++- src/mds/CDir.cc | 9 +++++++ src/mds/CInode.cc | 9 +++++++ src/mds/MDCache.cc | 15 +++++++++-- src/mds/MDLog.cc | 6 ++++- src/mds/MDSContext.cc | 52 +++++++++++++++++++++++++++++++++++++++ src/mds/MDSContext.h | 31 ++++++++++++++++++++++- src/mds/MDSRank.cc | 3 +++ src/mds/MDSTable.cc | 6 +++++ src/mds/MDSTable.h | 3 +++ src/mds/RecoveryQueue.cc | 6 ++++- src/mds/SessionMap.cc | 16 ++++++++++-- src/mds/StrayManager.cc | 4 +++ src/messages/MMDSBeacon.h | 6 ++++- 14 files changed, 179 insertions(+), 9 deletions(-) diff --git a/src/mds/Beacon.cc b/src/mds/Beacon.cc index 10a388b84f65..60b0cbe3a698 100644 --- a/src/mds/Beacon.cc +++ b/src/mds/Beacon.cc @@ -471,8 +471,8 @@ void Beacon::notify_health(MDSRank const *mds) // Detect MDS_HEALTH_SLOW_REQUEST condition { int slow = mds->get_mds_slow_req_count(); - dout(20) << slow << " slow request found" << dendl; if (slow) { + dout(20) << slow << " slow request found" << dendl; std::ostringstream oss; oss << slow << " slow requests are blocked > " << g_conf->mds_op_complaint_time << " sec"; @@ -481,6 +481,26 @@ void Beacon::notify_health(MDSRank const *mds) } } + { + auto complaint_time = g_conf->osd_op_complaint_time; + auto now = ceph::coarse_mono_clock::now(); + auto cutoff = now - ceph::make_timespan(complaint_time); + + std::string count; + ceph::coarse_mono_time oldest; + if (MDSIOContextBase::check_ios_in_flight(cutoff, count, oldest)) { + dout(20) << count << " slow metadata IOs found" << dendl; + + auto oldest_secs = std::chrono::duration(now - oldest).count(); + std::ostringstream oss; + oss << count << " slow metadata IOs are blocked > " << complaint_time + << " secs, oldest blocked for " << (int64_t)oldest_secs << " secs"; + + MDSHealthMetric m(MDS_HEALTH_SLOW_METADATA_IO, HEALTH_WARN, oss.str()); + health.metrics.push_back(m); + } + } + // Report a health warning if we are readonly if (mds->mdcache->is_readonly()) { MDSHealthMetric m(MDS_HEALTH_READ_ONLY, HEALTH_WARN, diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 93974895c697..c3ffbc796360 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -1570,6 +1570,9 @@ public: fin->complete(r); } } + void print(ostream& out) const override { + out << "dirfrag_fetch_more(" << dir->dirfrag() << ")"; + } }; class C_IO_Dir_OMAP_Fetched : public CDirIOContext { @@ -1597,6 +1600,9 @@ public: fin->complete(r); } } + void print(ostream& out) const override { + out << "dirfrag_fetch(" << dir->dirfrag() << ")"; + } }; void CDir::_omap_fetch(MDSInternalContextBase *c, const std::set& keys) @@ -2096,6 +2102,9 @@ public: void finish(int r) override { dir->_committed(r, version); } + void print(ostream& out) const override { + out << "dirfrag_commit(" << dir->dirfrag() << ")"; + } }; /** diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index bf1ad8f26b6f..8ba75de12c81 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -922,6 +922,9 @@ struct C_IO_Inode_Stored : public CInodeIOContext { void finish(int r) override { in->_stored(r, version, fin); } + void print(ostream& out) const override { + out << "inode_store(" << in->ino() << ")"; + } }; object_t InodeStoreBase::get_object_name(inodeno_t ino, frag_t fg, const char *suffix) @@ -1013,6 +1016,9 @@ struct C_IO_Inode_Fetched : public CInodeIOContext { // Ignore 'r', because we fetch from two places, so r is usually ENOENT in->_fetched(bl, bl2, fin); } + void print(ostream& out) const override { + out << "inode_fetch(" << in->ino() << ")"; + } }; void CInode::fetch(MDSInternalContextBase *fin) @@ -1101,6 +1107,9 @@ struct C_IO_Inode_StoredBacktrace : public CInodeIOContext { void finish(int r) override { in->_stored_backtrace(r, version, fin); } + void print(ostream& out) const override { + out << "backtrace_store(" << in->ino() << ")"; + } }; void CInode::store_backtrace(MDSInternalContextBase *fin, int op_prio) diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 59a365ef0675..869f4b6d1477 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -152,7 +152,8 @@ protected: return mdcache->mds; } public: - explicit MDCacheIOContext(MDCache *mdc_) : mdcache(mdc_) {} + explicit MDCacheIOContext(MDCache *mdc_, bool track=true) : + MDSIOContextBase(track), mdcache(mdc_) {} }; class MDCacheLogContext : public virtual MDSLogContextBase { @@ -6314,11 +6315,15 @@ struct C_IO_MDC_TruncateFinish : public MDCacheIOContext { CInode *in; LogSegment *ls; C_IO_MDC_TruncateFinish(MDCache *c, CInode *i, LogSegment *l) : - MDCacheIOContext(c), in(i), ls(l) {} + MDCacheIOContext(c, false), in(i), ls(l) { + } void finish(int r) override { assert(r == 0 || r == -ENOENT); mdcache->truncate_inode_finish(in, ls); } + void print(ostream& out) const override { + out << "file_truncate(" << in->ino() << ")"; + } }; void MDCache::_truncate_inode(CInode *in, LogSegment *ls) @@ -8363,6 +8368,9 @@ class C_IO_MDC_OpenInoBacktraceFetched : public MDCacheIOContext { void finish(int r) override { mdcache->_open_ino_backtrace_fetched(ino, bl, r); } + void print(ostream& out) const override { + out << "openino_backtrace_fetch" << ino << ")"; + } }; struct C_MDC_OpenInoTraverseDir : public MDCacheContext { @@ -11242,6 +11250,9 @@ public: assert(r == 0 || r == -ENOENT); mdcache->_fragment_finish(basedirfrag, resultfrags); } + void print(ostream& out) const override { + out << "dirfrags_commit(" << basedirfrag << ")"; + } }; void MDCache::fragment_frozen(MDRequestRef& mdr, int r) diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index a38f2d0db205..ba148b3be679 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -111,7 +111,11 @@ class C_MDL_WriteError : public MDSIOContextBase { } public: - explicit C_MDL_WriteError(MDLog *m) : mdlog(m) {} + explicit C_MDL_WriteError(MDLog *m) : + MDSIOContextBase(false), mdlog(m) {} + void print(ostream& out) const override { + out << "mdlog_write_error"; + } }; diff --git a/src/mds/MDSContext.cc b/src/mds/MDSContext.cc index 3d1d1688620e..429c94967277 100644 --- a/src/mds/MDSContext.cc +++ b/src/mds/MDSContext.cc @@ -45,12 +45,64 @@ void MDSInternalContextWrapper::finish(int r) fin->complete(r); } +elist MDSIOContextBase::ctx_list(member_offset(MDSIOContextBase, list_item)); +std::atomic_flag MDSIOContextBase::ctx_list_lock = ATOMIC_FLAG_INIT; + +MDSIOContextBase::MDSIOContextBase(bool track) +{ + created_at = ceph::coarse_mono_clock::now(); + if (track) { + simple_spin_lock(&ctx_list_lock); + ctx_list.push_back(&list_item); + simple_spin_unlock(&ctx_list_lock); + } +} + +MDSIOContextBase::~MDSIOContextBase() +{ + simple_spin_lock(&ctx_list_lock); + list_item.remove_myself(); + simple_spin_unlock(&ctx_list_lock); +} + +bool MDSIOContextBase::check_ios_in_flight(ceph::coarse_mono_time cutoff, + std::string& slow_count, + ceph::coarse_mono_time& oldest) +{ + static const unsigned MAX_COUNT = 100; + unsigned slow = 0; + + simple_spin_lock(&ctx_list_lock); + for (elist::iterator p = ctx_list.begin(); !p.end(); ++p) { + MDSIOContextBase *c = *p; + if (c->created_at >= cutoff) + break; + ++slow; + if (slow > MAX_COUNT) + break; + if (slow == 1) + oldest = c->created_at; + } + simple_spin_unlock(&ctx_list_lock); + + if (slow > 0) { + if (slow > MAX_COUNT) + slow_count = std::to_string(MAX_COUNT) + "+"; + else + slow_count = std::to_string(slow); + return true; + } else { + return false; + } +} + void MDSIOContextBase::complete(int r) { MDSRank *mds = get_mds(); dout(10) << "MDSIOContextBase::complete: " << typeid(*this).name() << dendl; assert(mds != NULL); Mutex::Locker l(mds->mds_lock); + if (mds->is_daemon_stopping()) { dout(4) << "MDSIOContextBase::complete: dropping for stopping " << typeid(*this).name() << dendl; diff --git a/src/mds/MDSContext.h b/src/mds/MDSContext.h index 11bde1b4fac8..d338169b534c 100644 --- a/src/mds/MDSContext.h +++ b/src/mds/MDSContext.h @@ -17,6 +17,9 @@ #define MDS_CONTEXT_H #include "include/Context.h" +#include "include/elist.h" +#include "common/ceph_time.h" +#include "common/simple_spin.h" class MDSRank; @@ -70,15 +73,32 @@ protected: MDSRank *mds; Context *fin; MDSRank *get_mds() override; + void finish(int r) override; public: MDSInternalContextWrapper(MDSRank *m, Context *c) : mds(m), fin(c) {} - void finish(int r) override; }; class MDSIOContextBase : public MDSContext { public: + MDSIOContextBase(bool track=true); + virtual ~MDSIOContextBase(); + MDSIOContextBase(const MDSIOContextBase&) = delete; + MDSIOContextBase& operator=(const MDSIOContextBase&) = delete; + void complete(int r) override; + + virtual void print(ostream& out) const = 0; + + static bool check_ios_in_flight(ceph::coarse_mono_time cutoff, + std::string& slow_count, + ceph::coarse_mono_time& oldest); +private: + ceph::coarse_mono_time created_at; + elist::item list_item; + + static elist ctx_list; + static std::atomic_flag ctx_list_lock; }; /** @@ -95,6 +115,9 @@ public: void complete(int r) final; void set_write_pos(uint64_t wp) { write_pos = wp; } virtual void pre_finish(int r) {} + void print(ostream& out) const override { + out << "log_event(" << write_pos << ")"; + } }; /** @@ -126,6 +149,9 @@ protected: public: MDSIOContextWrapper(MDSRank *m, Context *c) : mds(m), fin(c) {} void finish(int r) override; + void print(ostream& out) const override { + out << "io_context_wrapper(" << fin << ")"; + } }; /** @@ -166,6 +192,9 @@ public: } } void complete(int r) final; + void print(ostream& out) const override { + out << "io_wrapper(" << wrapped << ")"; + } }; diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 94dc44e2d897..51fedf726495 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1243,6 +1243,9 @@ public: void finish(int r) override { mds->_standby_replay_restart_finish(r, old_read_pos); } + void print(ostream& out) const override { + out << "standby_replay_restart"; + } }; void MDSRank::_standby_replay_restart_finish(int r, uint64_t old_read_pos) diff --git a/src/mds/MDSTable.cc b/src/mds/MDSTable.cc index 13a007d29bb2..78541aa4f715 100644 --- a/src/mds/MDSTable.cc +++ b/src/mds/MDSTable.cc @@ -53,6 +53,9 @@ public: void finish(int r) override { ida->save_2(r, version); } + void print(ostream& out) const override { + out << "table_save(" << ida->table_name << ")"; + } }; void MDSTable::save(MDSInternalContextBase *onfinish, version_t v) @@ -128,6 +131,9 @@ public: void finish(int r) override { ida->load_2(r, bl, onfinish); } + void print(ostream& out) const override { + out << "table_load(" << ida->table_name << ")"; + } }; object_t MDSTable::get_object_name() const diff --git a/src/mds/MDSTable.h b/src/mds/MDSTable.h index 13e9bf55f96c..3b62a123fb21 100644 --- a/src/mds/MDSTable.h +++ b/src/mds/MDSTable.h @@ -87,6 +87,9 @@ public: virtual void reset_state() = 0; virtual void decode_state(bufferlist::iterator& p) = 0; virtual void encode_state(bufferlist& bl) const = 0; + + friend class C_IO_MT_Load; + friend class C_IO_MT_Save; }; #endif diff --git a/src/mds/RecoveryQueue.cc b/src/mds/RecoveryQueue.cc index 2e6ff30f2b8d..4349ac1f4cfa 100644 --- a/src/mds/RecoveryQueue.cc +++ b/src/mds/RecoveryQueue.cc @@ -41,9 +41,13 @@ public: uint64_t size; utime_t mtime; - C_MDC_Recover(RecoveryQueue *rq_, CInode *i) : rq(rq_), in(i), size(0) { + C_MDC_Recover(RecoveryQueue *rq_, CInode *i) : + MDSIOContextBase(false), rq(rq_), in(i), size(0) { assert(rq != NULL); } + void print(ostream& out) const override { + out << "file_recover(" << in->ino() << ")"; + } }; diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc index eadae2770dfb..2d06e4eb1b68 100644 --- a/src/mds/SessionMap.cc +++ b/src/mds/SessionMap.cc @@ -99,6 +99,9 @@ public: sessionmap->_load_finish(r, header_r, values_r, first, header_bl, session_vals, more_session_vals); } + void print(ostream& out) const override { + out << "session_load"; + } }; } @@ -279,6 +282,9 @@ public: void finish(int r) override { sessionmap->_load_legacy_finish(r, bl); } + void print(ostream& out) const override { + out << "session_load_legacy"; + } }; } @@ -346,6 +352,9 @@ public: sessionmap->_save_finish(version); } } + void print(ostream& out) const override { + out << "session_save"; + } }; } @@ -690,6 +699,9 @@ public: on_safe->complete(r); } } + void print(ostream& out) const override { + out << "session_save_one"; + } }; } @@ -770,8 +782,8 @@ void SessionMap::save_if_dirty(const std::set &tgt_sessions, object_locator_t oloc(mds->mdsmap->get_metadata_pool()); MDSInternalContextBase *on_safe = gather_bld->new_sub(); mds->objecter->mutate(oid, oloc, op, snapc, - ceph::real_clock::now(), - 0, new C_OnFinisher( + ceph::real_clock::now(), 0, + new C_OnFinisher( new C_IO_SM_Save_One(this, on_safe), mds->finisher)); } diff --git a/src/mds/StrayManager.cc b/src/mds/StrayManager.cc index 81fa7ba1e9e3..b94dd57cba29 100644 --- a/src/mds/StrayManager.cc +++ b/src/mds/StrayManager.cc @@ -80,6 +80,10 @@ public: assert(r == 0 || r == -ENOENT); sm->_purge_stray_purged(dn, only_head); } + void print(ostream& out) const override { + CInode *in = dn->get_projected_linkage()->get_inode(); + out << "purge_stray(" << in->ino() << ")"; + } }; diff --git a/src/messages/MMDSBeacon.h b/src/messages/MMDSBeacon.h index 83583bb63a74..3ed7113917f5 100644 --- a/src/messages/MMDSBeacon.h +++ b/src/messages/MMDSBeacon.h @@ -42,7 +42,8 @@ enum mds_metric_t { MDS_HEALTH_DAMAGE, MDS_HEALTH_READ_ONLY, MDS_HEALTH_SLOW_REQUEST, - MDS_HEALTH_CACHE_OVERSIZED + MDS_HEALTH_CACHE_OVERSIZED, + MDS_HEALTH_SLOW_METADATA_IO, }; static inline const char *mds_metric_name(mds_metric_t m) @@ -59,6 +60,7 @@ static inline const char *mds_metric_name(mds_metric_t m) case MDS_HEALTH_READ_ONLY: return "MDS_READ_ONLY"; case MDS_HEALTH_SLOW_REQUEST: return "MDS_SLOW_REQUEST"; case MDS_HEALTH_CACHE_OVERSIZED: return "MDS_CACHE_OVERSIZED"; + case MDS_HEALTH_SLOW_METADATA_IO: return "MDS_SLOW_METADATA_IO"; default: return "???"; } @@ -90,6 +92,8 @@ static inline const char *mds_metric_summary(mds_metric_t m) return "%num% MDSs report slow requests"; case MDS_HEALTH_CACHE_OVERSIZED: return "%num% MDSs report oversized cache"; + case MDS_HEALTH_SLOW_METADATA_IO: + return "%num% MDSs report slow metadata IOs"; default: return "???"; } -- 2.47.3