From a968f65d784b3d6c6a172929aa293f09e6917fa6 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 4 Feb 2021 14:14:13 +0800 Subject: [PATCH] mgr: enhance the rados service For some use cases, like the tcmu-runner, there maybe handreds or thousands of LUNs, and then for each LUN it will register one service daemon, then in the `ceph -s` output will be full of useless info. This will allow to classify the sevices service daemons in one specified format by adding two pairs in metadata: "daemon_type" : "${TYPE}" "daemon_prefix" : "${PREFIX}" TYPE: will be used to replace the default "daemon(s)" showed in `ceph -s`. If absent, the "daemon" will be used. PREFIX: if present the active members will be classified by the prefix instead of "daemon_name". For exmaple for iscsi gateways, it will be something likes: "daemon_type" : "portal" "daemon_prefix" : "gw${N}" Then the `ceph -s` output will be: ... services: mon: 3 daemons, quorum a,b,c (age 50m) mgr: x(active, since 49m) mds: a:1 {0=c=up:active} 2 up:standby osd: 3 osds: 3 up (since 49m), 3 in (since 49m) iscsi: 8 portals active (gw0, gw1, gw2, gw3, gw4, gw5, gw6, gw7) ... Fixes: https://tracker.ceph.com/issues/49057 Signed-off-by: Xiubo Li --- src/mgr/ServiceMap.cc | 66 +++++++++++++++--- src/test/librados/service.cc | 127 +++++++++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 11 deletions(-) diff --git a/src/mgr/ServiceMap.cc b/src/mgr/ServiceMap.cc index 812691a8478..4babb81f224 100644 --- a/src/mgr/ServiceMap.cc +++ b/src/mgr/ServiceMap.cc @@ -3,7 +3,9 @@ #include "mgr/ServiceMap.h" +#include #include +#include #include "common/Formatter.h" @@ -75,19 +77,61 @@ std::string ServiceMap::Service::get_summary() const if (daemons.empty()) { return "no daemons active"; } + std::ostringstream ss; - ss << daemons.size() << (daemons.size() > 1 ? " daemons" : " daemon") - << " active"; - - if (!daemons.empty()) { - ss << " ("; - for (auto p = daemons.begin(); p != daemons.end(); ++p) { - if (p != daemons.begin()) { - ss << ", "; - } - ss << p->first; + + // The format two pairs in metadata: + // "daemon_type" : "${TYPE}" + // "daemon_prefix" : "${PREFIX}" + // + // TYPE: will be used to replace the default "daemon(s)" + // showed in `ceph -s`. If absent, the "daemon" will be used. + // PREFIX: if present the active members will be classified + // by the prefix instead of "daemon_name". + // + // For exmaple for iscsi gateways, it will be something likes: + // "daemon_type" : "portal" + // "daemon_prefix" : "gateway${N}" + // The `ceph -s` will be something likes: + // iscsi: 3 portals active (gateway0, gateway1, gateway2) + + std::map> prefs; + for (auto& d : daemons) { + // In case the "daemon_type" is absent, use the + // default "daemon" type + std::string type("daemon"); + std::string prefix; + + auto t = d.second.metadata.find("daemon_type"); + if (t != d.second.metadata.end()) { + type = d.second.metadata.at("daemon_type"); + } + auto p = d.second.metadata.find("daemon_prefix"); + if (p != d.second.metadata.end()) { + prefix = d.second.metadata.at("daemon_prefix"); + } else { + // In case the "daemon_prefix" is absent, show + // the daemon_name instead. + prefix = d.first; + } + auto& pref = prefs[type]; + pref.insert(prefix); + } + + for (auto &pr : prefs) { + if (!ss.str().empty()) + ss << ", "; + + ss << pr.second.size() << " " << pr.first + << (pr.second.size() > 1 ? "s" : "") + << " active"; + + if (pr.second.size()) { + ss << " ("; + std::copy(std::begin(pr.second), std::end(pr.second), + std::experimental::make_ostream_joiner(ss, ", ")); + ss << ")"; } - ss << ")"; } return ss.str(); diff --git a/src/test/librados/service.cc b/src/test/librados/service.cc index 9bc9cd50691..9611b40eb11 100644 --- a/src/test/librados/service.cc +++ b/src/test/librados/service.cc @@ -4,7 +4,10 @@ #include "common/config_proxy.h" #include "test/librados/test.h" #include "test/librados/TestCase.h" +#include +#include +#include #include #include #include @@ -45,6 +48,130 @@ TEST(LibRadosService, RegisterLate) { rados_shutdown(cluster); } +static void status_format_func(const int i, std::mutex &lock, + std::condition_variable &cond, + int &threads_started, bool &stopped) +{ + rados_t cluster; + char *metadata_buf = NULL; + + ASSERT_EQ(0, rados_create(&cluster, "admin")); + ASSERT_EQ(0, rados_conf_read_file(cluster, NULL)); + ASSERT_EQ(0, rados_conf_parse_env(cluster, NULL)); + + ASSERT_EQ(0, rados_connect(cluster)); + if (i == 0) { + ASSERT_NE(-1, asprintf(&metadata_buf, "%s%c%s%c", + "foo", '\0', "bar", '\0')); + } else if (i == 1) { + ASSERT_NE(-1, asprintf(&metadata_buf, "%s%c%s%c", + "daemon_type", '\0', "portal", '\0')); + } else if (i == 2) { + ASSERT_NE(-1, asprintf(&metadata_buf, "%s%c%s%c", + "daemon_prefix", '\0', "gateway", '\0')); + } else { + string prefix = string("gw") + stringify(i % 4); + ASSERT_NE(-1, asprintf(&metadata_buf, "%s%c%s%c%s%c%s%c", + "daemon_type", '\0', "portal", '\0', + "daemon_prefix", '\0', prefix.c_str(), '\0')); + } + string name = string("rbd/image") + stringify(i); + ASSERT_EQ(0, rados_service_register(cluster, "iscsi", name.c_str(), + metadata_buf)); + + std::unique_lock l(lock); + threads_started++; + cond.notify_all(); + cond.wait(l, [&stopped] { + return stopped; + }); + + rados_shutdown(cluster); +} + +TEST(LibRadosService, StatusFormat) { + const int nthreads = 16; + std::thread threads[nthreads]; + std::mutex lock; + std::condition_variable cond; + bool stopped = false; + int threads_started = 0; + + // Need a bunch of fd's for this test + struct rlimit rold, rnew; + ASSERT_EQ(getrlimit(RLIMIT_NOFILE, &rold), 0); + rnew = rold; + rnew.rlim_cur = rnew.rlim_max; + ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &rnew), 0); + + for (int i = 0; i < nthreads; ++i) + threads[i] = std::thread(status_format_func, i, std::ref(lock), + std::ref(cond), std::ref(threads_started), + std::ref(stopped)); + + { + std::unique_lock l(lock); + cond.wait(l, [nthreads, &threads_started] { + return nthreads == threads_started; + }); + } + + int retry = 5; + while (retry) { + rados_t cluster; + + ASSERT_EQ(0, rados_create(&cluster, "admin")); + ASSERT_EQ(0, rados_conf_read_file(cluster, NULL)); + ASSERT_EQ(0, rados_conf_parse_env(cluster, NULL)); + + ASSERT_EQ(0, rados_connect(cluster)); + JSONFormatter cmd_f; + cmd_f.open_object_section("command"); + cmd_f.dump_string("prefix", "status"); + cmd_f.close_section(); + std::ostringstream cmd_stream; + cmd_f.flush(cmd_stream); + const std::string serialized_cmd = cmd_stream.str(); + const char *cmd[2]; + cmd[1] = NULL; + cmd[0] = serialized_cmd.c_str(); + char *outbuf = NULL; + size_t outlen = 0; + ASSERT_EQ(0, rados_mon_command(cluster, (const char **)cmd, 1, "", 0, + &outbuf, &outlen, NULL, NULL)); + std::string out(outbuf, outlen); + bool success = false; + auto r1 = out.find("5 portals active (gw0, gw1, gw2, gw3, rbd/image1)"); + auto r2 = out.find("2 daemons active (gateway, rbd/image0"); + if (std::string::npos != r1 && std::string::npos != r2) { + success = true; + } + rados_buffer_free(outbuf); + rados_shutdown(cluster); + + if (success || !retry) { + break; + } + + // wait for 2 seconds to make sure all the + // services have been successfully updated + // to ceph mon, then retry it. + sleep(2); + retry--; + } + ASSERT_NE(0, retry); + + { + std::scoped_lock l(lock); + stopped = true; + cond.notify_all(); + } + for (int i = 0; i < nthreads; ++i) + threads[i].join(); + + ASSERT_EQ(setrlimit(RLIMIT_NOFILE, &rold), 0); +} + TEST(LibRadosService, Status) { rados_t cluster; ASSERT_EQ(0, rados_create(&cluster, "admin")); -- 2.39.5