From af02d1031d58826464b5defc40026be2d7419aa8 Mon Sep 17 00:00:00 2001 From: xie xingguo Date: Tue, 5 Mar 2019 14:28:59 +0800 Subject: [PATCH] mgr: 'osd df' by specified class or (crush) name For large clusters, we use device classes to isolate storage pools. The existing 'osd df' output turns out to be too nosiy, say, if you care about only single storage pool with osds possibly spanning over all hosts. With this change you are now being able to do 'osd df' by class (or by pool, if you simply use classes to separate different pools), or by a specified crush bucket name you are currently interested in, which is much more convenient. Some examples: ``` $ bin/ceph osd df tree ID CLASS WEIGHT REWEIGHT SIZE RAW USE DATA OMAP META AVAIL %USE VAR PGS STATUS TYPE NAME -1 0.05878 - 60 GiB 6.4 GiB 23 MiB 0 B 6 GiB 54 GiB 10.60 1.00 - root default -3 0.02939 - 30 GiB 3.2 GiB 12 MiB 0 B 3 GiB 27 GiB 10.60 1.00 - host ceph11 3 aaa 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 56 up osd.3 4 bbb 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 58 up osd.4 5 ccc 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 60 up osd.5 -5 0.02939 - 30 GiB 3.2 GiB 12 MiB 0 B 3 GiB 27 GiB 10.60 1.00 - host ceph12 0 aaa 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 50 up osd.0 1 bbb 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 61 up osd.1 2 ccc 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 51 up osd.2 TOTAL 60 GiB 6.4 GiB 23 MiB 0 B 6 GiB 54 GiB 10.60 MIN/MAX VAR: 1.00/1.00 STDDEV: 0 $ bin/ceph osd df tree class aaa ID CLASS WEIGHT REWEIGHT SIZE RAW USE DATA OMAP META AVAIL %USE VAR PGS STATUS TYPE NAME -1 0.05878 - 20 GiB 2.1 GiB 7.8 MiB 0 B 2 GiB 18 GiB 10.60 1.00 - root default -3 0.02939 - 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 - host ceph11 3 aaa 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 56 up osd.3 -5 0.02939 - 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 - host ceph12 0 aaa 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 50 up osd.0 TOTAL 20 GiB 2.1 GiB 7.8 MiB 0 B 2 GiB 18 GiB 10.60 MIN/MAX VAR: 1.00/1.00 STDDEV: 0 $ bin/ceph osd df tree name ceph11 ID CLASS WEIGHT REWEIGHT SIZE RAW USE DATA OMAP META AVAIL %USE VAR PGS STATUS TYPE NAME -3 0.02939 - 30 GiB 3.2 GiB 12 MiB 0 B 3 GiB 27 GiB 10.60 1.00 - host ceph11 3 aaa 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 56 up osd.3 4 bbb 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 58 up osd.4 5 ccc 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 60 up osd.5 TOTAL 30 GiB 3.2 GiB 12 MiB 0 B 3 GiB 27 GiB 10.60 MIN/MAX VAR: 1.00/1.00 STDDEV: 0 ``` Signed-off-by: xie xingguo --- qa/workunits/cephtool/test.sh | 9 ++++ src/crush/CrushWrapper.cc | 24 +++++++++ src/crush/CrushWrapper.h | 7 +++ src/mgr/DaemonServer.cc | 31 +++++++++++- src/mgr/MgrCommands.h | 4 +- src/mon/PGMap.h | 12 ++++- src/osd/OSDMap.cc | 94 +++++++++++++++++++++++++++-------- src/osd/OSDMap.h | 10 ++-- 8 files changed, 161 insertions(+), 30 deletions(-) diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh index e20d61d34e9c..b304a13adaa6 100755 --- a/qa/workunits/cephtool/test.sh +++ b/qa/workunits/cephtool/test.sh @@ -718,6 +718,15 @@ function test_mon_misc() ceph --concise osd dump | grep '^epoch' ceph osd df | grep 'MIN/MAX VAR' + osd_class=$(ceph osd crush get-device-class 0) + ceph osd df tree class $osd_class | grep 'osd.0' + ceph osd crush rm-device-class 0 + # create class first in case old device class may + # have already been automatically destroyed + ceph osd crush class create $osd_class + ceph osd df tree class $osd_class | expect_false grep 'osd.0' + ceph osd crush set-device-class $osd_class 0 + ceph osd df tree name osd.0 | grep 'osd.0' # df ceph df > $TMPFILE diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index 9ec0557dc19f..966ae59d62fa 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -858,6 +858,30 @@ int CrushWrapper::get_children(int id, list *children) const return b->size; } +int CrushWrapper::get_all_children(int id, set *children) const +{ + // leaf? + if (id >= 0) { + return 0; + } + + auto *b = get_bucket(id); + if (IS_ERR(b)) { + return -ENOENT; + } + + int c = 0; + for (unsigned n = 0; n < b->size; n++) { + children->insert(b->items[n]); + c++; + auto r = get_all_children(b->items[n], children); + if (r < 0) + return r; + c += r; + } + return c; +} + void CrushWrapper::get_children_of_type(int id, int type, vector *children, diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index b5973d716341..9e070419b970 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -735,6 +735,13 @@ public: * @return number of items, or error */ int get_children(int id, list *children) const; + /** + * enumerate all children of given node + * + * @param id parent bucket or device id + * @return number of items, or error + */ + int get_all_children(int id, set *children) const; void get_children_of_type(int id, int type, vector *children, diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index 534de149ade9..7f5205c8d83d 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -1250,14 +1250,41 @@ bool DaemonServer::_handle_command( } else if (prefix == "osd df") { string method; cmd_getval(g_ceph_context, cmdctx->cmdmap, "output_method", method); + string filter_by; + string filter; + cmd_getval(g_ceph_context, cmdctx->cmdmap, "filter_by", filter_by); + cmd_getval(g_ceph_context, cmdctx->cmdmap, "filter", filter); + if (filter_by.empty() != filter.empty()) { + cmdctx->reply(-EINVAL, "you must specify both 'filter_by' and 'filter'"); + return true; + } + stringstream rs; r = cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap, const PGMap& pgmap) { + string class_name; + string item_name; + // sanity check filter(s) + if (filter_by == "class") { + if (!osdmap.crush->class_exists(filter)) { + rs << "specified class '" << filter << "' does not exist"; + return -EINVAL; + } + class_name = filter; + } + if (filter_by == "name") { + if (!osdmap.crush->name_exists(filter)) { + rs << "specified name '" << filter << "' does not exist"; + return -EINVAL; + } + item_name = filter; + } print_osd_utilization(osdmap, pgmap, ss, - f.get(), method == "tree"); + f.get(), method == "tree", + class_name, item_name); cmdctx->odata.append(ss); return 0; }); - cmdctx->reply(r, ""); + cmdctx->reply(r, rs); return true; } else if (prefix == "osd pool stats") { string pool_name; diff --git a/src/mgr/MgrCommands.h b/src/mgr/MgrCommands.h index b5dcab56555c..4116318b9953 100644 --- a/src/mgr/MgrCommands.h +++ b/src/mgr/MgrCommands.h @@ -68,7 +68,9 @@ COMMAND("osd perf", \ "osd", \ "r") COMMAND("osd df " \ - "name=output_method,type=CephChoices,strings=plain|tree,req=false", \ + "name=output_method,type=CephChoices,strings=plain|tree,req=false " \ + "name=filter_by,type=CephChoices,strings=class|name,req=false " \ + "name=filter,type=CephString,req=false", \ "show OSD utilization", "osd", "r") COMMAND("osd blocked-by", \ "print histogram of which OSDs are blocking their peers", \ diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index d767a35328e5..fdc7cb87f656 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -387,8 +387,16 @@ public: return pool_stat_t(); } - const osd_stat_t& get_osd_sum() const { - return osd_sum; + osd_stat_t get_osd_sum(const set& osds) const { + if (osds.empty()) // all + return osd_sum; + osd_stat_t sum; + for (auto i : osds) { + auto os = get_osd_stat(i); + if (os) + sum.add(*os); + } + return sum; } const osd_stat_t *get_osd_stat(int osd) const { diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 1a148b47a77d..99d8b63e3779 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -4815,19 +4815,51 @@ public: typedef CrushTreeDumper::Dumper Parent; OSDUtilizationDumper(const CrushWrapper *crush, const OSDMap *osdmap_, - const PGMap& pgmap_, bool tree_) : + const PGMap& pgmap_, bool tree_, + const string& class_name_, + const string& item_name_) : Parent(crush, osdmap_->get_pool_names()), osdmap(osdmap_), pgmap(pgmap_), tree(tree_), - average_util(average_utilization()), + class_name(class_name_), + item_name(item_name_), min_var(-1), max_var(-1), stddev(0), sum(0) { + if (osdmap->crush->name_exists(item_name)) { + // filter out items we are allowed to dump + auto item_id = osdmap->crush->get_item_id(item_name); + allowed.insert(item_id); + osdmap->crush->get_all_children(item_id, &allowed); + } + average_util = average_utilization(); } protected: + + bool should_dump(int id) const { + if (!allowed.empty() && !allowed.count(id)) // filter by name + return false; + if (id >= 0 && !class_name.empty()) { + const char* item_class_name = osdmap->crush->get_item_class(id); + if (!item_class_name || // not bound to a class yet + item_class_name != class_name) // or already bound to + // a different class + return false; + } + return true; + } + + set get_dumped_osds() { + if (class_name.empty() && item_name.empty()) { + // old way, all + return {}; + } + return dumped_osds; + } + void dump_stray(F *f) { for (int i = 0; i < osdmap->get_max_osd(); i++) { if (osdmap->exists(i) && !this->is_touched(i)) @@ -4838,7 +4870,11 @@ protected: void dump_item(const CrushTreeDumper::Item &qi, F *f) override { if (!tree && qi.is_bucket()) return; + if (!should_dump(qi.id)) + return; + if (!qi.is_bucket()) + dumped_osds.insert(qi.id); float reweight = qi.is_bucket() ? -1 : osdmap->get_weightf(qi.id); int64_t kb = 0, kb_used = 0, kb_used_data = 0, kb_used_omap = 0, kb_used_meta = 0, kb_avail = 0; @@ -4891,7 +4927,9 @@ protected: double average_utilization() { int64_t kb = 0, kb_used = 0; for (int i = 0; i < osdmap->get_max_osd(); i++) { - if (!osdmap->exists(i) || osdmap->get_weight(i) == 0) + if (!osdmap->exists(i) || + osdmap->get_weight(i) == 0 || + !should_dump(i)) continue; int64_t kb_i, kb_used_i, kb_used_data_i, kb_used_omap_i, kb_used_meta_i, kb_avail_i; @@ -4927,7 +4965,7 @@ protected: int64_t* kb_used_meta, int64_t* kb_avail) const { if (id >= 0) { - if (osdmap->is_out(id)) { + if (osdmap->is_out(id) || !should_dump(id)) { *kb = 0; *kb_used = 0; *kb_used_data = 0; @@ -4969,11 +5007,15 @@ protected: const OSDMap *osdmap; const PGMap& pgmap; bool tree; + const string class_name; + const string item_name; double average_util; double min_var; double max_var; double stddev; double sum; + set allowed; + set dumped_osds; }; @@ -4982,8 +5024,10 @@ public: typedef OSDUtilizationDumper Parent; OSDUtilizationPlainDumper(const CrushWrapper *crush, const OSDMap *osdmap, - const PGMap& pgmap, bool tree) : - Parent(crush, osdmap, pgmap, tree) {} + const PGMap& pgmap, bool tree, + const string& class_name, + const string& item_name) : + Parent(crush, osdmap, pgmap, tree, class_name, item_name) {} void dump(TextTable *tbl) { tbl->define_column("ID", TextTable::LEFT, TextTable::RIGHT); @@ -5007,15 +5051,16 @@ public: dump_stray(tbl); + auto sum = pgmap.get_osd_sum(get_dumped_osds()); *tbl << "" << "" << "" << "TOTAL" - << byte_u_t(pgmap.get_osd_sum().statfs.total) - << byte_u_t(pgmap.get_osd_sum().statfs.get_used_raw()) - << byte_u_t(pgmap.get_osd_sum().statfs.allocated) - << byte_u_t(pgmap.get_osd_sum().statfs.omap_allocated) - << byte_u_t(pgmap.get_osd_sum().statfs.internal_metadata) - << byte_u_t(pgmap.get_osd_sum().statfs.available) + << byte_u_t(sum.statfs.total) + << byte_u_t(sum.statfs.get_used_raw()) + << byte_u_t(sum.statfs.allocated) + << byte_u_t(sum.statfs.omap_allocated) + << byte_u_t(sum.statfs.internal_metadata) + << byte_u_t(sum.statfs.available) << lowprecision_t(average_util) << "" << TextTable::endrow; @@ -5116,8 +5161,10 @@ public: typedef OSDUtilizationDumper Parent; OSDUtilizationFormatDumper(const CrushWrapper *crush, const OSDMap *osdmap, - const PGMap& pgmap, bool tree) : - Parent(crush, osdmap, pgmap, tree) {} + const PGMap& pgmap, bool tree, + const string& class_name, + const string& item_name) : + Parent(crush, osdmap, pgmap, tree, class_name, item_name) {} void dump(Formatter *f) { f->open_array_section("nodes"); @@ -5171,7 +5218,8 @@ protected: public: void summary(Formatter *f) { f->open_object_section("summary"); - auto& s = pgmap.get_osd_sum().statfs; + auto sum = pgmap.get_osd_sum(get_dumped_osds()); + auto& s = sum.statfs; f->dump_int("total_kb", s.kb()); f->dump_int("total_kb_used", s.kb_used_raw()); @@ -5188,21 +5236,25 @@ public: }; void print_osd_utilization(const OSDMap& osdmap, - const PGMap& pgmap, - ostream& out, - Formatter *f, - bool tree) + const PGMap& pgmap, + ostream& out, + Formatter *f, + bool tree, + const string& class_name, + const string& item_name) { const CrushWrapper *crush = osdmap.crush.get(); if (f) { f->open_object_section("df"); - OSDUtilizationFormatDumper d(crush, &osdmap, pgmap, tree); + OSDUtilizationFormatDumper d(crush, &osdmap, pgmap, tree, + class_name, item_name); d.dump(f); d.summary(f); f->close_section(); f->flush(out); } else { - OSDUtilizationPlainDumper d(crush, &osdmap, pgmap, tree); + OSDUtilizationPlainDumper d(crush, &osdmap, pgmap, tree, + class_name, item_name); TextTable tbl; d.dump(&tbl); out << tbl << d.summary() << "\n"; diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index 321182498397..a477d5827e4e 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -1493,9 +1493,11 @@ inline ostream& operator<<(ostream& out, const OSDMap& m) { class PGMap; void print_osd_utilization(const OSDMap& osdmap, - const PGMap& pgmap, - ostream& out, - Formatter *f, - bool tree); + const PGMap& pgmap, + ostream& out, + Formatter *f, + bool tree, + const string& class_name, + const string& item_name); #endif -- 2.47.3