From 15dfa71cf7c8ff761f305638a89438b0551d64cf Mon Sep 17 00:00:00 2001
From: Pere Diaz Bou <pdiazbou@redhat.com>
Date: Wed, 26 May 2021 17:47:06 +0200
Subject: [PATCH] mgr: TTLCache basic implementation

Signed-off-by: Pere Diaz Bou <pdiazbou@redhat.com>
Fixes: https://tracker.ceph.com/issues/48388
---
 qa/mgr_ttl_cache/disable.yaml                 |   5 +
 qa/mgr_ttl_cache/enable.yaml                  |   5 +
 qa/suites/rados/mgr/mgr_ttl_cache/.qa         |   1 +
 .../rados/mgr/mgr_ttl_cache/disable.yaml      |   1 +
 qa/suites/rados/mgr/mgr_ttl_cache/enable.yaml |   1 +
 qa/tasks/ceph_test_case.py                    |   5 +
 qa/tasks/mgr/test_cache.py                    |  83 ++++++++++++
 src/common/Formatter.h                        |   3 +
 src/common/options/global.yaml.in             |   7 +
 src/mgr/ActivePyModules.cc                    | 111 +++++++++-------
 src/mgr/ActivePyModules.h                     |   7 +
 src/mgr/BaseMgrModule.cc                      |   3 +-
 src/mgr/CMakeLists.txt                        |   1 +
 src/mgr/MgrStandby.cc                         |   9 +-
 src/mgr/PyFormatter.cc                        |  13 ++
 src/mgr/PyFormatter.h                         |  18 +++
 src/mgr/TTLCache.cc                           | 100 ++++++++++++++
 src/mgr/TTLCache.h                            | 124 ++++++++++++++++++
 src/mgr/mgr_perf_counters.cc                  |  28 ++++
 src/mgr/mgr_perf_counters.h                   |  20 +++
 src/pybind/mgr/mgr_module.py                  |   6 +-
 src/test/mgr/CMakeLists.txt                   |   7 +-
 src/test/mgr/test_ttlcache.cc                 |  70 ++++++++++
 23 files changed, 572 insertions(+), 56 deletions(-)
 create mode 100644 qa/mgr_ttl_cache/disable.yaml
 create mode 100644 qa/mgr_ttl_cache/enable.yaml
 create mode 120000 qa/suites/rados/mgr/mgr_ttl_cache/.qa
 create mode 120000 qa/suites/rados/mgr/mgr_ttl_cache/disable.yaml
 create mode 120000 qa/suites/rados/mgr/mgr_ttl_cache/enable.yaml
 create mode 100644 qa/tasks/mgr/test_cache.py
 create mode 100644 src/mgr/TTLCache.cc
 create mode 100644 src/mgr/TTLCache.h
 create mode 100644 src/mgr/mgr_perf_counters.cc
 create mode 100644 src/mgr/mgr_perf_counters.h
 create mode 100644 src/test/mgr/test_ttlcache.cc

diff --git a/qa/mgr_ttl_cache/disable.yaml b/qa/mgr_ttl_cache/disable.yaml
new file mode 100644
index 0000000000000..bbd78d53f2f85
--- /dev/null
+++ b/qa/mgr_ttl_cache/disable.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        mgr ttl cache expire seconds: 0
diff --git a/qa/mgr_ttl_cache/enable.yaml b/qa/mgr_ttl_cache/enable.yaml
new file mode 100644
index 0000000000000..2c1c0e0533ec2
--- /dev/null
+++ b/qa/mgr_ttl_cache/enable.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        mgr ttl cache expire seconds: 5
diff --git a/qa/suites/rados/mgr/mgr_ttl_cache/.qa b/qa/suites/rados/mgr/mgr_ttl_cache/.qa
new file mode 120000
index 0000000000000..fea2489fdf6d9
--- /dev/null
+++ b/qa/suites/rados/mgr/mgr_ttl_cache/.qa
@@ -0,0 +1 @@
+../.qa
\ No newline at end of file
diff --git a/qa/suites/rados/mgr/mgr_ttl_cache/disable.yaml b/qa/suites/rados/mgr/mgr_ttl_cache/disable.yaml
new file mode 120000
index 0000000000000..d7db486dd9f12
--- /dev/null
+++ b/qa/suites/rados/mgr/mgr_ttl_cache/disable.yaml
@@ -0,0 +1 @@
+.qa/mgr_ttl_cache/disable.yaml
\ No newline at end of file
diff --git a/qa/suites/rados/mgr/mgr_ttl_cache/enable.yaml b/qa/suites/rados/mgr/mgr_ttl_cache/enable.yaml
new file mode 120000
index 0000000000000..18286a656bafa
--- /dev/null
+++ b/qa/suites/rados/mgr/mgr_ttl_cache/enable.yaml
@@ -0,0 +1 @@
+.qa/mgr_ttl_cache/enable.yaml
\ No newline at end of file
diff --git a/qa/tasks/ceph_test_case.py b/qa/tasks/ceph_test_case.py
index 8f5b27e28a818..ad4834a6eb4ec 100644
--- a/qa/tasks/ceph_test_case.py
+++ b/qa/tasks/ceph_test_case.py
@@ -86,6 +86,11 @@ class CephTestCase(unittest.TestCase):
        self._mon_configs_set.add((section, key))
        self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "set", section, key, str(value))
 
+    def cluster_cmd(self, command: str):
+        assert self.ceph_cluster is not None
+        return self.ceph_cluster.mon_manager.raw_cluster_cmd(*(command.split(" ")))
+
+
     def assert_cluster_log(self, expected_pattern, invert_match=False,
                            timeout=10, watch_channel=None):
         """
diff --git a/qa/tasks/mgr/test_cache.py b/qa/tasks/mgr/test_cache.py
new file mode 100644
index 0000000000000..71131cbc6afa6
--- /dev/null
+++ b/qa/tasks/mgr/test_cache.py
@@ -0,0 +1,83 @@
+import json
+
+from .mgr_test_case import MgrTestCase
+
+class TestCache(MgrTestCase):
+
+    def setUp(self):
+        super(TestCache, self).setUp()
+        self.setup_mgrs()
+        self._load_module("cli_api")
+        self.ttl = 10
+        self.enable_cache(self.ttl)
+
+    def tearDown(self):
+        self.disable_cache()
+
+    def get_hit_miss_ratio(self):
+        perf_dump_command = f"daemon mgr.{self.mgr_cluster.get_active_id()} perf dump"
+        perf_dump_res = self.cluster_cmd(perf_dump_command)
+        perf_dump = json.loads(perf_dump_res)
+        h = perf_dump["mgr"]["cache_hit"]
+        m = perf_dump["mgr"]["cache_miss"]
+        return int(h), int(m)
+
+    def enable_cache(self, ttl):
+        set_ttl = f"config set mgr mgr_ttl_cache_expire_seconds {ttl}"
+        self.cluster_cmd(set_ttl)
+
+    def disable_cache(self):
+        set_ttl = "config set mgr mgr_ttl_cache_expire_seconds 0"
+        self.cluster_cmd(set_ttl)
+
+
+    def test_init_cache(self):
+        get_ttl = "config get mgr mgr_ttl_cache_expire_seconds"
+        res = self.cluster_cmd(get_ttl)
+        self.assertEquals(int(res), 10)
+
+    def test_health_not_cached(self):
+        get_health = "mgr api get health"
+
+        h_start, m_start = self.get_hit_miss_ratio()
+        self.cluster_cmd(get_health)
+        h, m = self.get_hit_miss_ratio()
+
+        self.assertEquals(h, h_start)
+        self.assertEquals(m, m_start)
+
+    def test_osdmap(self):
+        get_osdmap = "mgr api get osd_map"
+
+        # store in cache
+        self.cluster_cmd(get_osdmap)
+        # get from cache
+        res = self.cluster_cmd(get_osdmap)
+        osd_map = json.loads(res)
+        self.assertIn("osds", osd_map)
+        self.assertGreater(len(osd_map["osds"]), 0)
+        self.assertIn("epoch", osd_map)
+
+
+
+    def test_hit_miss_ratio(self):
+        get_osdmap = "mgr api get osd_map"
+
+        hit_start, miss_start = self.get_hit_miss_ratio()
+
+        def wait_miss():
+            self.cluster_cmd(get_osdmap)
+            _, m = self.get_hit_miss_ratio()
+            return m == miss_start + 1
+
+        # Miss, add osd_map to cache
+        self.wait_until_true(wait_miss, self.ttl + 5)
+        h, m = self.get_hit_miss_ratio()
+        self.assertEquals(h, hit_start)
+        self.assertEquals(m, miss_start+1)
+
+        # Hit, get osd_map from cache
+        self.cluster_cmd(get_osdmap)
+        h, m = self.get_hit_miss_ratio()
+        self.assertEquals(h, hit_start+1)
+        self.assertEquals(m, miss_start+1)
diff --git a/src/common/Formatter.h b/src/common/Formatter.h
index e57ede878d9e5..6751e4709ce0b 100644
--- a/src/common/Formatter.h
+++ b/src/common/Formatter.h
@@ -171,6 +171,8 @@ namespace ceph {
       return false; /* is handling done? */
     }
 
+    int stack_size() { return m_stack.size(); }
+
   private:
 
     struct json_formatter_stack_entry_d {
@@ -309,3 +311,4 @@ namespace ceph {
   std::string fixed_u_to_string(uint64_t num, int scale);
 }
 #endif
+
diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in
index 1c81c3a1701cb..565583c319e31 100644
--- a/src/common/options/global.yaml.in
+++ b/src/common/options/global.yaml.in
@@ -6113,3 +6113,10 @@ options:
   - rgw
   - osd
   with_legacy: true
+- name: mgr_ttl_cache_expire_seconds
+  type: uint
+  level: dev
+  desc: Set the time to live in seconds - set to 0 to disable the cache.
+  default: 0
+  services:
+  - mgr
diff --git a/src/mgr/ActivePyModules.cc b/src/mgr/ActivePyModules.cc
index 808daaf7f7742..0c08f3a02b523 100644
--- a/src/mgr/ActivePyModules.cc
+++ b/src/mgr/ActivePyModules.cc
@@ -23,6 +23,8 @@
 #include "mon/MonMap.h"
 #include "osd/osd_types.h"
 #include "mgr/MgrContext.h"
+#include "mgr/TTLCache.h"
+#include "mgr/mgr_perf_counters.h"
 
 // For ::mgr_store_prefix
 #include "PyModule.h"
@@ -167,16 +169,47 @@ PyObject *ActivePyModules::get_daemon_status_python(
   return f.get();
 }
 
+void ActivePyModules::update_cache_metrics() {
+    auto hit_miss_ratio = ttl_cache.get_hit_miss_ratio();
+    perfcounter->set(l_mgr_cache_hit, hit_miss_ratio.first);
+    perfcounter->set(l_mgr_cache_miss, hit_miss_ratio.second);
+}
+
+PyObject *ActivePyModules::cacheable_get_python(const std::string &what)
+{
+  uint64_t ttl_seconds = g_conf().get_val<uint64_t>("mgr_ttl_cache_expire_seconds");
+  if(ttl_seconds > 0) {
+    ttl_cache.set_ttl(ttl_seconds);
+    try{
+      PyObject* cached = ttl_cache.get(what);
+      update_cache_metrics();
+      return cached;
+    } catch (std::out_of_range& e) {}
+  }
+
+  PyObject *obj = get_python(what);
+  if(ttl_seconds && ttl_cache.is_cacheable(what)) {
+    ttl_cache.insert(what, obj);
+    Py_INCREF(obj);
+  }
+  update_cache_metrics();
+  return obj;
+}
+
 PyObject *ActivePyModules::get_python(const std::string &what)
 {
-  PyFormatter f;
+  uint64_t ttl_seconds = g_conf().get_val<uint64_t>("mgr_ttl_cache_expire_seconds");
+
+  PyFormatter pf;
+  PyJSONFormatter jf;
+  // Use PyJSONFormatter if TTL cache is enabled.
+  Formatter &f = ttl_seconds ? (Formatter&)jf : (Formatter&)pf;
 
   if (what == "fs_map") {
     without_gil_t no_gil;
-    return cluster_state.with_fsmap([&](const FSMap &fsmap) {
+    cluster_state.with_fsmap([&](const FSMap &fsmap) {
       no_gil.acquire_gil();
       fsmap.dump(&f);
-      return f.get();
     });
   } else if (what == "osdmap_crush_map_text") {
     without_gil_t no_gil;
@@ -189,7 +222,7 @@ PyObject *ActivePyModules::get_python(const std::string &what)
     return PyUnicode_FromString(crush_text.c_str());
   } else if (what.substr(0, 7) == "osd_map") {
     without_gil_t no_gil;
-    return cluster_state.with_osdmap([&](const OSDMap &osd_map){
+    cluster_state.with_osdmap([&](const OSDMap &osd_map){
       no_gil.acquire_gil();
       if (what == "osd_map") {
         osd_map.dump(&f);
@@ -198,7 +231,6 @@ PyObject *ActivePyModules::get_python(const std::string &what)
       } else if (what == "osd_map_crush") {
         osd_map.crush->dump(&f);
       }
-      return f.get();
     });
   } else if (what == "modified_config_options") {
     without_gil_t no_gil;
@@ -216,27 +248,23 @@ PyObject *ActivePyModules::get_python(const std::string &what)
       f.dump_string("name", name);
     }
     f.close_section();
-    return f.get();
   } else if (what.substr(0, 6) == "config") {
     if (what == "config_options") {
       g_conf().config_options(&f);
     } else if (what == "config") {
       g_conf().show_config(&f);
     }
-    return f.get();
   } else if (what == "mon_map") {
     without_gil_t no_gil;
-    return cluster_state.with_monmap([&](const MonMap &monmap) {
+    cluster_state.with_monmap([&](const MonMap &monmap) {
       no_gil.acquire_gil();
       monmap.dump(&f);
-      return f.get();
     });
   } else if (what == "service_map") {
     without_gil_t no_gil;
-    return cluster_state.with_servicemap([&](const ServiceMap &service_map) {
+    cluster_state.with_servicemap([&](const ServiceMap &service_map) {
       no_gil.acquire_gil();
       service_map.dump(&f);
-      return f.get();
     });
   } else if (what == "osd_metadata") {
     without_gil_t no_gil;
@@ -252,7 +280,6 @@ PyObject *ActivePyModules::get_python(const std::string &what)
         f.close_section();
       });
     }
-    return with_gil(no_gil, [&] { return f.get(); });
   } else if (what == "mds_metadata") {
     without_gil_t no_gil;
     auto dmc = daemon_state.get_by_service("mds");
@@ -267,10 +294,9 @@ PyObject *ActivePyModules::get_python(const std::string &what)
         f.close_section();
       });
     }
-    return with_gil(no_gil, [&] { return f.get(); });
   } else if (what == "pg_summary") {
     without_gil_t no_gil;
-    return cluster_state.with_pgmap(
+    cluster_state.with_pgmap(
         [&f, &no_gil](const PGMap &pg_map) {
           std::map<std::string, std::map<std::string, uint32_t> > osds;
           std::map<std::string, std::map<std::string, uint32_t> > pools;
@@ -312,25 +338,22 @@ PyObject *ActivePyModules::get_python(const std::string &what)
           f.open_object_section("pg_stats_sum");
           pg_map.pg_sum.dump(&f);
           f.close_section();
-	  return f.get();
         }
     );
   } else if (what == "pg_status") {
     without_gil_t no_gil;
-    return cluster_state.with_pgmap(
+    cluster_state.with_pgmap(
         [&](const PGMap &pg_map) {
 	  no_gil.acquire_gil();
 	  pg_map.print_summary(&f, nullptr);
-          return f.get();
         }
     );
   } else if (what == "pg_dump") {
     without_gil_t no_gil;
-    return cluster_state.with_pgmap(
+    cluster_state.with_pgmap(
       [&](const PGMap &pg_map) {
 	no_gil.acquire_gil();
 	pg_map.dump(&f, false);
-	return f.get();
       }
     );
   } else if (what == "devices") {
@@ -342,9 +365,8 @@ PyObject *ActivePyModules::get_python(const std::string &what)
       [&](const DeviceState &dev) {
         with_gil(no_gil, [&] { f.dump_object("device", dev); });
       });
-    return with_gil(no_gil, [&] {
+    with_gil(no_gil, [&] {
       f.close_section();
-      return f.get();
     });
   } else if (what.size() > 7 &&
 	     what.substr(0, 7) == "device ") {
@@ -357,70 +379,61 @@ PyObject *ActivePyModules::get_python(const std::string &what)
       })) {
       // device not found
     }
-    return with_gil(no_gil, [&] { return f.get(); });
   } else if (what == "io_rate") {
     without_gil_t no_gil;
-    return cluster_state.with_pgmap(
+    cluster_state.with_pgmap(
       [&](const PGMap &pg_map) {
         no_gil.acquire_gil();
         pg_map.dump_delta(&f);
-	return f.get();
       }
     );
   } else if (what == "df") {
     without_gil_t no_gil;
-    return cluster_state.with_osdmap_and_pgmap(
+    cluster_state.with_osdmap_and_pgmap(
       [&](
 	const OSDMap& osd_map,
 	const PGMap &pg_map) {
         no_gil.acquire_gil();
         pg_map.dump_cluster_stats(nullptr, &f, true);
         pg_map.dump_pool_stats_full(osd_map, nullptr, &f, true);
-	return f.get();
       });
   } else if (what == "pg_stats") {
     without_gil_t no_gil;
-    return cluster_state.with_pgmap([&](const PGMap &pg_map) {
+    cluster_state.with_pgmap([&](const PGMap &pg_map) {
       no_gil.acquire_gil();
       pg_map.dump_pg_stats(&f, false);
-      return f.get();
     });
   } else if (what == "pool_stats") {
     without_gil_t no_gil;
-    return cluster_state.with_pgmap([&](const PGMap &pg_map) {
+    cluster_state.with_pgmap([&](const PGMap &pg_map) {
       no_gil.acquire_gil();
       pg_map.dump_pool_stats(&f);
-      return f.get();
     });
   } else if (what == "pg_ready") {
     server.dump_pg_ready(&f);
-    return f.get();
   } else if (what == "pg_progress") {
     without_gil_t no_gil;
-    return cluster_state.with_pgmap([&](const PGMap &pg_map) {
+    cluster_state.with_pgmap([&](const PGMap &pg_map) {
       no_gil.acquire_gil();
       pg_map.dump_pg_progress(&f);
       server.dump_pg_ready(&f);
-      return f.get();
     });
   } else if (what == "osd_stats") {
     without_gil_t no_gil;
-    return cluster_state.with_pgmap([&](const PGMap &pg_map) {
+    cluster_state.with_pgmap([&](const PGMap &pg_map) {
       no_gil.acquire_gil();
       pg_map.dump_osd_stats(&f, false);
-      return f.get();
     });
   } else if (what == "osd_ping_times") {
     without_gil_t no_gil;
-    return cluster_state.with_pgmap([&](const PGMap &pg_map) {
+    cluster_state.with_pgmap([&](const PGMap &pg_map) {
       no_gil.acquire_gil();
       pg_map.dump_osd_ping_times(&f);
-      return f.get();
     });
   } else if (what == "osd_pool_stats") {
     without_gil_t no_gil;
     int64_t poolid = -ENOENT;
-    return cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap,
+    cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap,
 					    const PGMap& pg_map) {
       no_gil.acquire_gil();
       f.open_array_section("pool_stats");
@@ -429,29 +442,25 @@ PyObject *ActivePyModules::get_python(const std::string &what)
         pg_map.dump_pool_stats_and_io_rate(poolid, osdmap, &f, nullptr);
       }
       f.close_section();
-      return f.get();
     });
   } else if (what == "health") {
     without_gil_t no_gil;
-    return cluster_state.with_health([&](const ceph::bufferlist &health_json) {
+    cluster_state.with_health([&](const ceph::bufferlist &health_json) {
       no_gil.acquire_gil();
       f.dump_string("json", health_json.to_str());
-      return f.get();
     });
   } else if (what == "mon_status") {
     without_gil_t no_gil;
-    return cluster_state.with_mon_status(
+    cluster_state.with_mon_status(
         [&](const ceph::bufferlist &mon_status_json) {
       no_gil.acquire_gil();
       f.dump_string("json", mon_status_json.to_str());
-      return f.get();
     });
   } else if (what == "mgr_map") {
     without_gil_t no_gil;
-    return cluster_state.with_mgrmap([&](const MgrMap &mgr_map) {
+    cluster_state.with_mgrmap([&](const MgrMap &mgr_map) {
       no_gil.acquire_gil();
       mgr_map.dump(&f);
-      return f.get();
     });
   } else if (what == "mgr_ips") {
     entity_addrvec_t myaddrs = server.get_myaddrs();
@@ -464,10 +473,8 @@ PyObject *ActivePyModules::get_python(const std::string &what)
       }
     }
     f.close_section();
-    return f.get();
   } else if (what == "have_local_config_map") {
     f.dump_bool("have_local_config_map", have_local_config_map);
-    return f.get();
   } else if (what == "active_clean_pgs"){
     without_gil_t no_gil;
     cluster_state.with_pgmap(
@@ -491,11 +498,17 @@ PyObject *ActivePyModules::get_python(const std::string &what)
       const auto num_pg = pg_map.num_pg;
       f.dump_unsigned("total_num_pgs", num_pg);
     });
-    return f.get();
   } else {
     derr << "Python module requested unknown data '" << what << "'" << dendl;
     Py_RETURN_NONE;
   }
+  without_gil_t no_gil;
+  no_gil.acquire_gil();
+  if(ttl_seconds) {
+    return jf.get();
+  } else {
+    return pf.get();
+  }
 }
 
 void ActivePyModules::start_one(PyModuleRef py_module)
@@ -1277,7 +1290,7 @@ void ActivePyModules::config_notify()
     // C++ code, and avoid any potential lock cycles.
     dout(15) << "notify (config) " << name << dendl;
     // workaround for https://bugs.llvm.org/show_bug.cgi?id=35984
-    finisher.queue(new LambdaContext([module=module](int r){ 
+    finisher.queue(new LambdaContext([module=module](int r){
       module->config_notify();
     }));
   }
diff --git a/src/mgr/ActivePyModules.h b/src/mgr/ActivePyModules.h
index da21bb99fe343..3054fa3a2772f 100644
--- a/src/mgr/ActivePyModules.h
+++ b/src/mgr/ActivePyModules.h
@@ -27,6 +27,7 @@
 #include "mon/MonCommand.h"
 #include "mon/mon_types.h"
 #include "mon/ConfigMap.h"
+#include "mgr/TTLCache.h"
 
 #include "DaemonState.h"
 #include "ClusterState.h"
@@ -55,6 +56,7 @@ class ActivePyModules
   Objecter &objecter;
   Client   &client;
   Finisher &finisher;
+  TTLCache<string, PyObject*> ttl_cache;
 public:
   Finisher cmd_finisher;
 private:
@@ -80,6 +82,7 @@ public:
   MonClient &get_monc() {return monc;}
   Objecter  &get_objecter() {return objecter;}
   Client    &get_client() {return client;}
+  PyObject *cacheable_get_python(const std::string &what);
   PyObject *get_python(const std::string &what);
   PyObject *get_server_python(const std::string &hostname);
   PyObject *list_servers_python();
@@ -217,4 +220,8 @@ public:
 
   void cluster_log(const std::string &channel, clog_type prio,
     const std::string &message);
+
+  bool inject_python_on() const;
+  void update_cache_metrics();
 };
+
diff --git a/src/mgr/BaseMgrModule.cc b/src/mgr/BaseMgrModule.cc
index c32070263d622..bd6475a2bedb0 100644
--- a/src/mgr/BaseMgrModule.cc
+++ b/src/mgr/BaseMgrModule.cc
@@ -375,7 +375,7 @@ ceph_state_get(BaseMgrModule *self, PyObject *args)
     return NULL;
   }
 
-  return self->py_modules->get_python(what);
+  return self->py_modules->cacheable_get_python(what);
 }
 
 
@@ -1592,4 +1592,3 @@ PyTypeObject BaseMgrModuleType = {
   0,                         /* tp_alloc */
   BaseMgrModule_new,     /* tp_new */
 };
-
diff --git a/src/mgr/CMakeLists.txt b/src/mgr/CMakeLists.txt
index 55147af4fc6ba..d688030343edb 100644
--- a/src/mgr/CMakeLists.txt
+++ b/src/mgr/CMakeLists.txt
@@ -17,6 +17,7 @@ if(WITH_MGR)
     DaemonState.cc
     Gil.cc
     Mgr.cc
+    mgr_perf_counters.cc
     MgrStandby.cc
     MetricCollector.cc
     OSDPerfMetricTypes.cc
diff --git a/src/mgr/MgrStandby.cc b/src/mgr/MgrStandby.cc
index 96c18892d0a3d..7649298432f18 100644
--- a/src/mgr/MgrStandby.cc
+++ b/src/mgr/MgrStandby.cc
@@ -24,6 +24,7 @@
 
 #include "mgr/MgrContext.h"
 #include "mgr/mgr_commands.h"
+#include "mgr/mgr_perf_counters.h"
 
 #include "messages/MMgrBeacon.h"
 #include "messages/MMgrMap.h"
@@ -201,6 +202,8 @@ int MgrStandby::init()
   timer.init();
 
   py_module_registry.init();
+  mgr_perf_start(g_ceph_context);
+
 
   tick();
 
@@ -275,7 +278,7 @@ void MgrStandby::send_beacon()
 
     m->set_services(active_mgr->get_services());
   }
-                                 
+
   monc.send_mon_message(std::move(m));
 }
 
@@ -289,7 +292,7 @@ void MgrStandby::tick()
       new LambdaContext([this](int r){
           tick();
       }
-  )); 
+  ));
 }
 
 void MgrStandby::shutdown()
@@ -324,6 +327,7 @@ void MgrStandby::shutdown()
   // to touch references to the things we're about to tear down
   finisher.wait_for_empty();
   finisher.stop();
+  mgr_perf_stop(g_ceph_context);
 }
 
 void MgrStandby::respawn()
@@ -487,4 +491,3 @@ std::string MgrStandby::state_str()
     return "active (starting)";
   }
 }
-
diff --git a/src/mgr/PyFormatter.cc b/src/mgr/PyFormatter.cc
index 48f1cca290317..8e58f6e9a84ab 100644
--- a/src/mgr/PyFormatter.cc
+++ b/src/mgr/PyFormatter.cc
@@ -16,6 +16,7 @@
 
 
 #include "PyFormatter.h"
+#include <fstream>
 
 #define LARGE_SIZE 1024
 
@@ -125,3 +126,15 @@ void PyFormatter::finish_pending_streams()
   pending_streams.clear();
 }
 
+PyObject* PyJSONFormatter::get()
+{
+  if(json_formatter::stack_size()) {
+    close_section();
+  }
+  ceph_assert(!json_formatter::stack_size());
+  std::ostringstream ss;
+  flush(ss);
+  std::string s = ss.str();
+  PyObject* obj = PyBytes_FromStringAndSize(std::move(s.c_str()), s.size());
+  return obj;
+}
diff --git a/src/mgr/PyFormatter.h b/src/mgr/PyFormatter.h
index cd609eebeca92..5e4c0a679ac34 100644
--- a/src/mgr/PyFormatter.h
+++ b/src/mgr/PyFormatter.h
@@ -141,5 +141,23 @@ private:
 
 };
 
+class PyJSONFormatter : public JSONFormatter {
+public:
+  PyObject *get();
+  PyJSONFormatter (const PyJSONFormatter&) = default;
+  PyJSONFormatter(bool pretty=false, bool is_array=false) : JSONFormatter(pretty) {
+    if(is_array) {
+      open_array_section("");
+    } else {
+      open_object_section("");
+    }
+}
+
+private:
+  using json_formatter = JSONFormatter;
+  template <class T> void add_value(std::string_view name, T val);
+  void add_value(std::string_view name, std::string_view val, bool quoted);
+};
+
 #endif
 
diff --git a/src/mgr/TTLCache.cc b/src/mgr/TTLCache.cc
new file mode 100644
index 0000000000000..05fe95987095c
--- /dev/null
+++ b/src/mgr/TTLCache.cc
@@ -0,0 +1,100 @@
+#include "TTLCache.h"
+
+#include <chrono>
+#include <functional>
+#include <string>
+
+#include "PyUtil.h"
+
+template <class Key, class Value>
+void TTLCacheBase<Key, Value>::insert(Key key, Value value) {
+  auto now = std::chrono::steady_clock::now();
+
+  if (!ttl) return;
+  int16_t random_ttl_offset =
+      ttl * ttl_spread_ratio * (2l * rand() / float(RAND_MAX) - 1);
+  // in order not to have spikes of misses we increase or decrease by 25% of
+  // the ttl
+  int16_t spreaded_ttl = ttl + random_ttl_offset;
+  auto expiration_date = now + std::chrono::seconds(spreaded_ttl);
+  cache::insert(key, {value, expiration_date});
+}
+
+template <class Key, class Value> Value TTLCacheBase<Key, Value>::get(Key key) {
+  if (!exists(key)) {
+    throw_key_not_found(key);
+  }
+  if (expired(key)) {
+    erase(key);
+    throw_key_not_found(key);
+  }
+  Value value = {get_value(key)};
+  return value;
+}
+
+template <class Key> PyObject* TTLCache<Key, PyObject*>::get(Key key) {
+  if (!this->exists(key)) {
+    this->throw_key_not_found(key);
+  }
+  if (this->expired(key)) {
+    this->erase(key);
+    this->throw_key_not_found(key);
+  }
+  PyObject* cached_value = this->get_value(key);
+  Py_INCREF(cached_value);
+  return cached_value;
+}
+
+template <class Key, class Value>
+void TTLCacheBase<Key, Value>::erase(Key key) {
+  cache::erase(key);
+}
+
+template <class Key> void TTLCache<Key, PyObject*>::erase(Key key) {
+  Py_DECREF(this->get_value(key, false));
+  ttl_base::erase(key);
+}
+
+template <class Key, class Value>
+bool TTLCacheBase<Key, Value>::expired(Key key) {
+  ttl_time_point expiration_date = get_value_time_point(key);
+  auto now = std::chrono::steady_clock::now();
+  if (now >= expiration_date) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+template <class Key, class Value> void TTLCacheBase<Key, Value>::clear() {
+  cache::clear();
+}
+
+template <class Key, class Value>
+Value TTLCacheBase<Key, Value>::get_value(Key key, bool count_hit) {
+  value_type stored_value = cache::get(key, count_hit);
+  Value value = std::get<0>(stored_value);
+  return value;
+}
+
+template <class Key, class Value>
+ttl_time_point TTLCacheBase<Key, Value>::get_value_time_point(Key key) {
+  value_type stored_value = cache::get(key, false);
+  ttl_time_point tp = std::get<1>(stored_value);
+  return tp;
+}
+
+template <class Key, class Value>
+void TTLCacheBase<Key, Value>::set_ttl(uint16_t ttl) {
+  this->ttl = ttl;
+}
+
+template <class Key, class Value>
+bool TTLCacheBase<Key, Value>::exists(Key key) {
+  return cache::exists(key);
+}
+
+template <class Key, class Value>
+void TTLCacheBase<Key, Value>::throw_key_not_found(Key key) {
+  cache::throw_key_not_found(key);
+}
diff --git a/src/mgr/TTLCache.h b/src/mgr/TTLCache.h
new file mode 100644
index 0000000000000..a6d5ddf2e2d03
--- /dev/null
+++ b/src/mgr/TTLCache.h
@@ -0,0 +1,124 @@
+#pragma once
+
+#include <atomic>
+#include <chrono>
+#include <functional>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "PyUtil.h"
+
+using namespace std;
+
+template <class Key, class Value> class Cache {
+ private:
+  std::atomic<uint64_t> hits, misses;
+
+ protected:
+  unsigned int capacity;
+  Cache(unsigned int size = UINT16_MAX) : hits{0}, misses{0}, capacity{size} {};
+  std::map<Key, Value> content;
+  std::vector<string> allowed_keys = {"osd_map", "pg_dump", "pg_stats"};
+
+  void mark_miss() {
+    misses++;
+  }
+
+  void mark_hit() {
+    hits++;
+  }
+
+  unsigned int get_misses() { return misses; }
+  unsigned int get_hits() { return hits; }
+  void throw_key_not_found(Key key) {
+    std::stringstream ss;
+    ss << "Key " << key << " couldn't be found\n";
+    throw std::out_of_range(ss.str());
+  }
+
+ public:
+  void insert(Key key, Value value) {
+    mark_miss();
+    if (content.size() < capacity) {
+      content.insert({key, value});
+    }
+  }
+  Value get(Key key, bool count_hit = true) {
+    if (count_hit) {
+      mark_hit();
+    }
+    return content[key];
+  }
+  void erase(Key key) { content.erase(content.find(key)); }
+  void clear() { content.clear(); }
+  bool exists(Key key) { return content.find(key) != content.end(); }
+  std::pair<uint64_t, uint64_t> get_hit_miss_ratio() {
+    return std::make_pair(hits.load(), misses.load());
+  }
+  bool is_cacheable(Key key) {
+    for (auto k : allowed_keys) {
+      if (key == k) return true;
+    }
+    return false;
+  }
+  int size() { return content.size(); }
+
+  ~Cache(){};
+};
+
+using ttl_time_point = std::chrono::time_point<std::chrono::steady_clock>;
+template <class Key, class Value>
+class TTLCacheBase : public Cache<Key, std::pair<Value, ttl_time_point>> {
+ private:
+  uint16_t ttl;
+  float ttl_spread_ratio;
+  using value_type = std::pair<Value, ttl_time_point>;
+  using cache = Cache<Key, value_type>;
+
+ protected:
+  Value get_value(Key key, bool count_hit = true);
+  ttl_time_point get_value_time_point(Key key);
+  bool exists(Key key);
+  bool expired(Key key);
+  void finish_get(Key key);
+  void finish_erase(Key key);
+  void throw_key_not_found(Key key);
+
+ public:
+  TTLCacheBase(uint16_t ttl_ = 0, uint16_t size = UINT16_MAX,
+               float spread = 0.25)
+      : Cache<Key, value_type>(size), ttl{ttl_}, ttl_spread_ratio{spread} {}
+  ~TTLCacheBase(){};
+  void insert(Key key, Value value);
+  Value get(Key key);
+  void erase(Key key);
+  void clear();
+  uint16_t get_ttl() { return ttl; };
+  void set_ttl(uint16_t ttl);
+};
+
+template <class Key, class Value>
+class TTLCache : public TTLCacheBase<Key, Value> {
+ public:
+  TTLCache(uint16_t ttl_ = 0, uint16_t size = UINT16_MAX, float spread = 0.25)
+      : TTLCacheBase<Key, Value>(ttl_, size, spread) {}
+  ~TTLCache(){};
+};
+
+template <class Key>
+class TTLCache<Key, PyObject*> : public TTLCacheBase<Key, PyObject*> {
+ public:
+  TTLCache(uint16_t ttl_ = 0, uint16_t size = UINT16_MAX, float spread = 0.25)
+      : TTLCacheBase<Key, PyObject*>(ttl_, size, spread) {}
+  ~TTLCache(){};
+  PyObject* get(Key key);
+  void erase(Key key);
+
+ private:
+  using ttl_base = TTLCacheBase<Key, PyObject*>;
+};
+
+#include "TTLCache.cc"
+
diff --git a/src/mgr/mgr_perf_counters.cc b/src/mgr/mgr_perf_counters.cc
new file mode 100644
index 0000000000000..1b5585f9ec948
--- /dev/null
+++ b/src/mgr/mgr_perf_counters.cc
@@ -0,0 +1,28 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "mgr_perf_counters.h"
+#include "common/perf_counters.h"
+#include "common/ceph_context.h"
+
+PerfCounters *perfcounter = NULL;
+
+int mgr_perf_start(CephContext *cct)
+{
+  PerfCountersBuilder plb(cct, "mgr", l_mgr_first, l_mgr_last);
+  plb.set_prio_default(PerfCountersBuilder::PRIO_USEFUL);
+
+  plb.add_u64_counter(l_mgr_cache_hit, "cache_hit", "Cache hits");
+  plb.add_u64_counter(l_mgr_cache_miss, "cache_miss", "Cache miss");
+
+  perfcounter = plb.create_perf_counters();
+  cct->get_perfcounters_collection()->add(perfcounter);
+  return 0;
+}
+
+void mgr_perf_stop(CephContext *cct)
+{
+  ceph_assert(perfcounter);
+  cct->get_perfcounters_collection()->remove(perfcounter);
+  delete perfcounter;
+}
diff --git a/src/mgr/mgr_perf_counters.h b/src/mgr/mgr_perf_counters.h
new file mode 100644
index 0000000000000..d695d905fe96c
--- /dev/null
+++ b/src/mgr/mgr_perf_counters.h
@@ -0,0 +1,20 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#pragma once
+#include "include/common_fwd.h"
+
+extern PerfCounters* perfcounter;
+
+extern int mgr_perf_start(CephContext* cct);
+extern void mgr_perf_stop(CephContext* cct);
+
+enum {
+  l_mgr_first,
+
+  l_mgr_cache_hit,
+  l_mgr_cache_miss,
+
+  l_mgr_last,
+};
+
diff --git a/src/pybind/mgr/mgr_module.py b/src/pybind/mgr/mgr_module.py
index 7f45ad0b75f83..3301966627376 100644
--- a/src/pybind/mgr/mgr_module.py
+++ b/src/pybind/mgr/mgr_module.py
@@ -1276,7 +1276,11 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin):
             All these structures have their own JSON representations: experiment
             or look at the C++ ``dump()`` methods to learn about them.
         """
-        return self._ceph_get(data_name)
+        obj =  self._ceph_get(data_name)
+        if isinstance(obj, bytes):
+            obj = json.loads(obj)
+
+        return obj
 
     def _stattype_to_str(self, stattype: int) -> str:
 
diff --git a/src/test/mgr/CMakeLists.txt b/src/test/mgr/CMakeLists.txt
index 9e6950d799ee8..169243824815c 100644
--- a/src/test/mgr/CMakeLists.txt
+++ b/src/test/mgr/CMakeLists.txt
@@ -5,6 +5,12 @@ add_executable(unittest_mgr_mgrcap
 add_ceph_unittest(unittest_mgr_mgrcap)
 target_link_libraries(unittest_mgr_mgrcap global)
 
+# unittest_mgr_ttlcache
+add_executable(unittest_mgr_ttlcache test_ttlcache.cc)
+add_ceph_unittest(unittest_mgr_ttlcache)
+target_link_libraries(unittest_mgr_ttlcache
+  Python3::Python ${CMAKE_DL_LIBS} ${GSSAPI_LIBRARIES})
+
 #scripts
 if(WITH_MGR_DASHBOARD_FRONTEND)
   if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm|ARM")
@@ -13,4 +19,3 @@ if(WITH_MGR_DASHBOARD_FRONTEND)
 
   add_ceph_test(mgr-dashboard-smoke.sh ${CMAKE_CURRENT_SOURCE_DIR}/mgr-dashboard-smoke.sh)
 endif(WITH_MGR_DASHBOARD_FRONTEND)
-
diff --git a/src/test/mgr/test_ttlcache.cc b/src/test/mgr/test_ttlcache.cc
new file mode 100644
index 0000000000000..a1ee0a8626f42
--- /dev/null
+++ b/src/test/mgr/test_ttlcache.cc
@@ -0,0 +1,70 @@
+#include <iostream>
+
+#include "mgr/TTLCache.h"
+#include "gtest/gtest.h"
+
+using namespace std;
+
+TEST(TTLCache, Get) {
+	TTLCache<string, int> c{100};
+	c.insert("foo", 1);
+	int foo = c.get("foo");
+	ASSERT_EQ(foo, 1);
+}
+
+TEST(TTLCache, Erase) {
+	TTLCache<string, int> c{100};
+	c.insert("foo", 1);
+	int foo = c.get("foo");
+	ASSERT_EQ(foo, 1);
+	c.erase("foo");
+  try{
+    foo = c.get("foo");
+    FAIL();
+  } catch (std::out_of_range& e) {
+    SUCCEED();
+  }
+}
+
+TEST(TTLCache, Clear) {
+	TTLCache<string, int> c{100};
+	c.insert("foo", 1);
+	c.insert("foo2", 2);
+	c.clear();
+	ASSERT_FALSE(c.size());
+}
+
+TEST(TTLCache, NoTTL) {
+	TTLCache<string, int> c{100};
+	c.insert("foo", 1);
+	int foo = c.get("foo");
+	ASSERT_EQ(foo, 1);
+	c.set_ttl(0);
+	c.insert("foo2", 2);
+  try{
+    foo = c.get("foo2");
+    FAIL();
+  } catch (std::out_of_range& e) {
+    SUCCEED();
+  }
+}
+
+TEST(TTLCache, SizeLimit) {
+	TTLCache<string, int> c{100, 2};
+	c.insert("foo", 1);
+	c.insert("foo2", 2);
+	c.insert("foo3", 3);
+	ASSERT_EQ(c.size(), 2);
+}
+
+TEST(TTLCache, HitRatio) {
+	TTLCache<string, int> c{100};
+	c.insert("foo", 1);
+	c.insert("foo2", 2);
+	c.insert("foo3", 3);
+	c.get("foo2");
+	c.get("foo3");
+	std::pair<uint64_t, uint64_t> hit_miss_ratio = c.get_hit_miss_ratio();
+	ASSERT_EQ(std::get<1>(hit_miss_ratio), 3);
+	ASSERT_EQ(std::get<0>(hit_miss_ratio), 2);
+}
-- 
2.39.5