From 0bc909e54b0edd8a32a3e7db16877f1d875c3186 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Mon, 10 Aug 2015 04:25:03 -0700 Subject: [PATCH] mon: add a cache layer over MonitorDBStore the cache of of leveldb does not perform well under some condition, so we need a cache in our own stack. * add an option "mon_osd_cache_size" to control the size of cache size of MonitorDBStore. Fixes: #12638 Signed-off-by: Kefu Chai (cherry picked from commit 5e99a578050976ca22b549812ac80d494fe7041d) Conflicts: src/mon/OSDMonitor.h minor differences in OSDMonitor ctor --- src/common/config_opts.h | 2 ++ src/mon/OSDMonitor.cc | 29 +++++++++++++++++++++++++++++ src/mon/OSDMonitor.h | 10 +++++++--- src/mon/PaxosService.h | 4 ++-- 4 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index f2c34fe8a4a..0dce3da2b86 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -175,6 +175,8 @@ OPTION(mon_sync_fs_threshold, OPT_INT, 5) // sync() when writing this many obj OPTION(mon_compact_on_start, OPT_BOOL, false) // compact leveldb on ceph-mon start OPTION(mon_compact_on_bootstrap, OPT_BOOL, false) // trigger leveldb compaction on bootstrap OPTION(mon_compact_on_trim, OPT_BOOL, true) // compact (a prefix) when we trim old states +OPTION(mon_osd_cache_size, OPT_INT, 10) // the size of osdmaps cache, not to rely on underlying store's cache + OPTION(mon_tick_interval, OPT_INT, 5) OPTION(mon_subscribe_interval, OPT_DOUBLE, 300) OPTION(mon_delta_reset_interval, OPT_DOUBLE, 10) // seconds of inactivity before we reset the pg delta to 0 diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 71ec38573f3..8d310bb8a5e 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -70,6 +70,12 @@ static ostream& _prefix(std::ostream *_dout, Monitor *mon, OSDMap& osdmap) { << ").osd e" << osdmap.get_epoch() << " "; } +OSDMonitor::OSDMonitor(Monitor *mn, Paxos *p, string service_name) + : PaxosService(mn, p, service_name), + inc_osd_cache(g_conf->mon_osd_cache_size), + full_osd_cache(g_conf->mon_osd_cache_size), + thrash_map(0), thrash_last_up_osd(-1) { } + bool OSDMonitor::_have_pending_crush() { return pending_inc.crush.length(); @@ -2246,6 +2252,29 @@ void OSDMonitor::send_incremental(epoch_t first, MonSession *session, } } +int OSDMonitor::get_version(version_t ver, bufferlist& bl) +{ + if (inc_osd_cache.lookup(ver, &bl)) { + return 0; + } + int ret = PaxosService::get_version(ver, bl); + if (!ret) { + inc_osd_cache.add(ver, bl); + } + return ret; +} + +int OSDMonitor::get_version_full(version_t ver, bufferlist& bl) +{ + if (full_osd_cache.lookup(ver, &bl)) { + return 0; + } + int ret = PaxosService::get_version_full(ver, bl); + if (!ret) { + full_osd_cache.add(ver, bl); + } + return ret; +} diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index afeacde4a52..414bf082410 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -26,6 +26,7 @@ using namespace std; #include "include/types.h" +#include "common/simple_cache.hpp" #include "msg/Messenger.h" #include "osd/OSDMap.h" @@ -139,6 +140,8 @@ private: * optimization to try to avoid sending the same inc maps twice. */ map osd_epoch; + SimpleLRU inc_osd_cache; + SimpleLRU full_osd_cache; void note_osd_has_epoch(int osd, epoch_t epoch); @@ -380,9 +383,7 @@ private: bool prepare_remove_snaps(struct MRemoveSnaps *m); public: - OSDMonitor(Monitor *mn, Paxos *p, string service_name) - : PaxosService(mn, p, service_name), - thrash_map(0), thrash_last_up_osd(-1) { } + OSDMonitor(Monitor *mn, Paxos *p, string service_name); void tick(); // check state, take actions @@ -407,6 +408,9 @@ private: send_incremental(m, start); } + int get_version(version_t ver, bufferlist& bl); + int get_version_full(version_t ver, bufferlist& bl); + epoch_t blacklist(const entity_addr_t& a, utime_t until); void dump_info(Formatter *f); diff --git a/src/mon/PaxosService.h b/src/mon/PaxosService.h index 7c2259218e3..b5c4e5084c4 100644 --- a/src/mon/PaxosService.h +++ b/src/mon/PaxosService.h @@ -858,7 +858,7 @@ public: * @param bl The bufferlist to be populated * @return 0 on success; <0 otherwise */ - int get_version(version_t ver, bufferlist& bl) { + virtual int get_version(version_t ver, bufferlist& bl) { return mon->store->get(get_service_name(), ver, bl); } /** @@ -868,7 +868,7 @@ public: * @param bl The bufferlist to be populated * @returns 0 on success; <0 otherwise */ - int get_version_full(version_t ver, bufferlist& bl) { + virtual int get_version_full(version_t ver, bufferlist& bl) { string key = mon->store->combine_strings(full_prefix_name, ver); return mon->store->get(get_service_name(), key, bl); } -- 2.47.3