From: Patrick Donnelly Date: Fri, 28 Jul 2017 00:21:54 +0000 (-0700) Subject: mds: use mempool for cache objects X-Git-Tag: v13.0.1~926^2~9 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e035b64fcb0482c3318656e1680d683814f494fe;p=ceph.git mds: use mempool for cache objects The purpose of this is to allow us to track memory usage by cached objects so we can limit cache size based on memory available/allocated to the MDS. This commit is a first step: it adds CInode, CDir, and CDentry to the mempool but not all of the containers in these classes (e.g. std::map). However, MDSCacheObject has been changed to allocate its containers through the mempool by converting compact_* containers to the std versions offered through mempool via the new alloc_ptr. (A compact_* class simply wraps a pointer to the std:: version to reduce memory usage of an object when the container is only occasionally used. The alloc_ptr allows us to achieve the same thing explicitly with only a little handholding: when all entries in the wrapped container are deleted, the caller must call alloc_ptr.release().) Signed-off-by: Patrick Donnelly --- diff --git a/src/include/mempool.h b/src/include/mempool.h index 7d75ce0e6d90..2cd61ad2afc3 100644 --- a/src/include/mempool.h +++ b/src/include/mempool.h @@ -160,6 +160,7 @@ namespace mempool { f(osdmap) \ f(osdmap_mapping) \ f(pgmap) \ + f(mds_co) \ f(unittest_1) \ f(unittest_2) diff --git a/src/mds/CDentry.cc b/src/mds/CDentry.cc index 44dc6d52a522..591e8d8ff6e3 100644 --- a/src/mds/CDentry.cc +++ b/src/mds/CDentry.cc @@ -620,3 +620,5 @@ std::string CDentry::linkage_t::get_remote_d_type_string() const default: ceph_abort(); return ""; } } + +MEMPOOL_DEFINE_OBJECT_FACTORY(CDentry, co_dentry, mds_co); diff --git a/src/mds/CDentry.h b/src/mds/CDentry.h index 29c5cd63308d..e9416104ad15 100644 --- a/src/mds/CDentry.h +++ b/src/mds/CDentry.h @@ -49,6 +49,7 @@ bool operator<(const CDentry& l, const CDentry& r); // dentry class CDentry : public MDSCacheObject, public LRUObject, public Counter { public: + MEMPOOL_CLASS_HELPERS(); friend class CDir; struct linkage_t { diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 57b1e981f03a..3c4a54281938 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -3293,3 +3293,4 @@ bool CDir::should_split_fast() const return effective_size > fast_limit; } +MEMPOOL_DEFINE_OBJECT_FACTORY(CDir, co_dir, mds_co); diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 5b0079a74de4..45b2998b3d24 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -46,6 +46,7 @@ class CDir : public MDSCacheObject, public Counter { friend ostream& operator<<(ostream& out, const class CDir& dir); public: + MEMPOOL_CLASS_HELPERS(); // -- pins -- static const int PIN_DNWAITER = 1; static const int PIN_INOWAITER = 2; diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 3b4e76d92a2f..d07da8a7aba5 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -4523,3 +4523,5 @@ bool CInode::is_exportable(mds_rank_t dest) const return true; } } + +MEMPOOL_DEFINE_OBJECT_FACTORY(CInode, co_inode, mds_co); diff --git a/src/mds/CInode.h b/src/mds/CInode.h index d5cf7d3fc15a..8734a7733df3 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -143,6 +143,7 @@ WRITE_CLASS_ENCODER_FEATURES(InodeStoreBare) // cached inode wrapper class CInode : public MDSCacheObject, public InodeStoreBase, public Counter { public: + MEMPOOL_CLASS_HELPERS(); // -- pins -- static const int PIN_DIRFRAG = -1; static const int PIN_CAPS = 2; // client caps diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 9e423bada23d..1eb4cc5a0f6b 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -11820,6 +11820,18 @@ void MDCache::show_cache() show_func(p.second); } +int MDCache::cache_status(Formatter *f) +{ + f->open_object_section("cache"); + + f->open_object_section("pool"); + mempool::get_pool(mempool::mds_co::id).dump(f); + f->close_section(); + + f->close_section(); + return 0; +} + int MDCache::dump_cache(std::string const &file_name) { return dump_cache(file_name.c_str(), NULL); diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 4c8d4890cc5f..01c230aab292 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -1146,6 +1146,8 @@ public: int dump_cache(Formatter *f); int dump_cache(const std::string& dump_root, int depth, Formatter *f); + int cache_status(Formatter *f); + void dump_resolve_status(Formatter *f) const; void dump_rejoin_status(Formatter *f) const; diff --git a/src/mds/MDSCacheObject.cc b/src/mds/MDSCacheObject.cc index eca6fac148b4..6286bbe5d1cb 100644 --- a/src/mds/MDSCacheObject.cc +++ b/src/mds/MDSCacheObject.cc @@ -8,7 +8,7 @@ uint64_t MDSCacheObject::last_wait_seq = 0; void MDSCacheObject::finish_waiting(uint64_t mask, int result) { - list finished; + std::list finished; take_waiting(mask, finished); finish_contexts(g_ceph_context, finished, result); } diff --git a/src/mds/MDSCacheObject.h b/src/mds/MDSCacheObject.h index 0d5c2aeeed48..845a4a92f032 100644 --- a/src/mds/MDSCacheObject.h +++ b/src/mds/MDSCacheObject.h @@ -1,17 +1,17 @@ #ifndef CEPH_MDSCACHEOBJECT_H #define CEPH_MDSCACHEOBJECT_H -#include -#include #include -using namespace std; - #include "common/config.h" + +#include "include/Context.h" +#include "include/alloc_ptr.h" #include "include/assert.h" +#include "include/mempool.h" #include "include/types.h" #include "include/xlist.h" -#include "include/Context.h" + #include "mdstypes.h" #define MDS_REF_SET // define me for improved debug output, sanity checking @@ -145,7 +145,7 @@ class MDSCacheObject { protected: __s32 ref; // reference count #ifdef MDS_REF_SET - std::map ref_map; + mempool::mds_co::map ref_map; #endif public: @@ -226,7 +226,7 @@ protected: int auth_pins; int nested_auth_pins; #ifdef MDS_AUTHPIN_SET - multiset auth_pin_set; + mempool::mds_co::multiset auth_pin_set; #endif public: @@ -253,43 +253,46 @@ protected: // replication (across mds cluster) protected: unsigned replica_nonce; // [replica] defined on replica - compact_map replica_map; // [auth] mds -> nonce + typedef mempool::mds_co::map replica_map_type; + alloc_ptr replica_map; // [auth] mds -> nonce public: - bool is_replicated() const { return !replica_map.empty(); } - bool is_replica(mds_rank_t mds) const { return replica_map.count(mds); } - int num_replicas() const { return replica_map.size(); } + bool is_replicated() const { return !get_replicas().empty(); } + bool is_replica(mds_rank_t mds) const { return get_replicas().count(mds); } + int num_replicas() const { return get_replicas().size(); } unsigned add_replica(mds_rank_t mds) { - if (replica_map.count(mds)) - return ++replica_map[mds]; // inc nonce - if (replica_map.empty()) + if (get_replicas().count(mds)) + return ++get_replicas()[mds]; // inc nonce + if (get_replicas().empty()) get(PIN_REPLICATED); - return replica_map[mds] = 1; + return get_replicas()[mds] = 1; } void add_replica(mds_rank_t mds, unsigned nonce) { - if (replica_map.empty()) + if (get_replicas().empty()) get(PIN_REPLICATED); - replica_map[mds] = nonce; + get_replicas()[mds] = nonce; } unsigned get_replica_nonce(mds_rank_t mds) { - assert(replica_map.count(mds)); - return replica_map[mds]; + assert(get_replicas().count(mds)); + return get_replicas()[mds]; } void remove_replica(mds_rank_t mds) { - assert(replica_map.count(mds)); - replica_map.erase(mds); - if (replica_map.empty()) + assert(get_replicas().count(mds)); + get_replicas().erase(mds); + if (get_replicas().empty()) { put(PIN_REPLICATED); + replica_map.reset(); + } } void clear_replica_map() { - if (!replica_map.empty()) + if (!get_replicas().empty()) put(PIN_REPLICATED); - replica_map.clear(); + replica_map.reset(); } - compact_map& get_replicas() { return replica_map; } - const compact_map& get_replicas() const { return replica_map; } + replica_map_type& get_replicas() { return *replica_map; } + const replica_map_type& get_replicas() const { return *replica_map; } void list_replicas(std::set& ls) const { - for (const auto &p : replica_map) { + for (const auto &p : get_replicas()) { ls.insert(p.first); } } @@ -301,7 +304,7 @@ protected: // --------------------------------------------- // waiting protected: - compact_multimap > waiting; + alloc_ptr>> waiting; static uint64_t last_wait_seq; public: @@ -311,8 +314,8 @@ protected: while (min & (min-1)) // if more than one bit is set min &= min-1; // clear LSB } - for (auto p = waiting.lower_bound(min); - p != waiting.end(); + for (auto p = waiting->lower_bound(min); + p != waiting->end(); ++p) { if (p->first & mask) return true; if (p->first > mask) return false; @@ -320,7 +323,7 @@ protected: return false; } virtual void add_waiter(uint64_t mask, MDSInternalContextBase *c) { - if (waiting.empty()) + if (waiting->empty()) get(PIN_WAITER); uint64_t seq = 0; @@ -328,7 +331,7 @@ protected: seq = ++last_wait_seq; mask &= ~WAIT_ORDERED; } - waiting.insert(pair >( + waiting->insert(pair >( mask, pair(seq, c))); // pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this)) @@ -337,41 +340,40 @@ protected: // << dendl; } - virtual void take_waiting(uint64_t mask, list& ls) { - if (waiting.empty()) return; + virtual void take_waiting(uint64_t mask, std::list& ls) { + if (waiting->empty()) return; // process ordered waiters in the same order that they were added. std::map ordered_waiters; - for (auto it = waiting.begin(); - it != waiting.end(); ) { + for (auto it = waiting->begin(); it != waiting->end(); ) { if (it->first & mask) { - - if (it->second.first > 0) - ordered_waiters.insert(it->second); - else - ls.push_back(it->second.second); + if (it->second.first > 0) { + ordered_waiters.insert(it->second); + } else { + ls.push_back(it->second.second); + } // pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this)) // << "take_waiting mask " << hex << mask << dec << " took " << it->second // << " tag " << hex << it->first << dec // << " on " << *this // << dendl; - waiting.erase(it++); + waiting->erase(it++); } else { // pdout(10,g_conf->debug_mds) << "take_waiting mask " << hex << mask << dec << " SKIPPING " << it->second // << " tag " << hex << it->first << dec // << " on " << *this // << dendl; - ++it; + ++it; } } - for (auto it = ordered_waiters.begin(); - it != ordered_waiters.end(); - ++it) { + for (auto it = ordered_waiters.begin(); it != ordered_waiters.end(); ++it) { ls.push_back(it->second); } - if (waiting.empty()) + if (waiting->empty()) { put(PIN_WAITER); + waiting.release(); + } } void finish_waiting(uint64_t mask, int result = 0); diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index 737940c540d1..4c30b6747136 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -242,6 +242,11 @@ void MDSDaemon::set_up_admin_socket() asok_hook, "dump metadata cache (optionally to a file)"); assert(r == 0); + r = admin_socket->register_command("cache status", + "cache status", + asok_hook, + "show cache status"); + assert(r == 0); r = admin_socket->register_command("dump tree", "dump tree " "name=root,type=CephString,req=true " @@ -316,6 +321,7 @@ void MDSDaemon::clean_up_admin_socket() admin_socket->unregister_command("flush_path"); admin_socket->unregister_command("export dir"); admin_socket->unregister_command("dump cache"); + admin_socket->unregister_command("cache status"); admin_socket->unregister_command("dump tree"); admin_socket->unregister_command("session evict"); admin_socket->unregister_command("osdmap barrier"); diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 52e357f0742e..5b0d76d82cf5 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1938,6 +1938,12 @@ bool MDSRankDispatcher::handle_asok_command( if (r != 0) { ss << "Failed to dump cache: " << cpp_strerror(r); } + } else if (command == "cache status") { + Mutex::Locker l(mds_lock); + int r = mdcache->cache_status(f); + if (r != 0) { + ss << "Failed to get cache status: " << cpp_strerror(r); + } } else if (command == "dump tree") { string root; int64_t depth;