From: xie xingguo Date: Wed, 24 Apr 2019 00:57:17 +0000 (+0800) Subject: osd: add no{out,down,in,out} flags by device class X-Git-Tag: v14.2.2~77^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2a6583ad75355a2e3723c397546517cc49b44ff9;p=ceph.git osd: add no{out,down,in,out} flags by device class This works as a good supplement of https://github.com/ceph/ceph/pull/27563. Signed-off-by: xie xingguo (cherry picked from commit 5d695267ef03e56bf8b28853554d74e4b94b03b7) Conflicts: slight conflict from the "Remove dependence on 'using namespac'" change, see https://github.com/ceph/ceph/pull/27255 --- diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh index e38c91d77cd5..abe0f1830f2b 100755 --- a/qa/workunits/cephtool/test.sh +++ b/qa/workunits/cephtool/test.sh @@ -1675,6 +1675,44 @@ function test_mon_osd() ceph osd crush rm foo ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep 'foo' + # test device class flags + osd_0_device_class=$(ceph osd crush get-device-class osd.0) + ceph osd set-group noup $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noup' + ceph osd set-group noup,nodown $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noup' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'nodown' + ceph osd set-group noup,nodown,noin $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noup' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin' + ceph osd set-group noup,nodown,noin,noout $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noup' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout' + + ceph osd unset-group noup $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | expect_false grep 'noup' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout' + ceph osd unset-group noup,nodown $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | expect_false grep 'noup\|nodown' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout' + ceph osd unset-group noup,nodown,noin $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | expect_false grep 'noup\|nodown\|noin' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout' + ceph osd unset-group noup,nodown,noin,noout $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | expect_false grep 'noup\|nodown\|noin\|noout' + + ceph osd set-group noin,noout $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout' + ceph osd unset-group noin,noout $osd_0_device_class + ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep $osd_0_device_class + # make sure mark out preserves weight ceph osd reweight osd.0 .5 ceph osd dump | grep ^osd.0 | grep 'weight 0.5' diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 5abd0f42d631..a1ad8b584315 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -505,6 +505,12 @@ public: return 0; return get_class_name(p->second); } + int get_item_class_id(int t) const { + auto p = class_map.find(t); + if (p == class_map.end()) + return -ENOENT; + return p->second; + } int set_item_class(int i, const string& name) { if (!is_valid_crush_name(name)) return -EINVAL; diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index db0ff48f21a2..34f9fcf93c09 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -2460,6 +2460,14 @@ bool OSDMonitor::can_mark_down(int i) return false; } + if (auto class_id = osdmap.crush->get_item_class_id(i); class_id >= 0 && + (osdmap.get_device_class_flags(class_id) & CEPH_OSD_NODOWN)) { + dout(5) << __func__ << " osd." << i + << " is marked as nodown via device class, " + << "will not mark it down" << dendl; + return false; + } + int num_osds = osdmap.get_num_osds(); if (num_osds == 0) { dout(5) << __func__ << " no osds" << dendl; @@ -2497,6 +2505,14 @@ bool OSDMonitor::can_mark_up(int i) return false; } + if (auto class_id = osdmap.crush->get_item_class_id(i); class_id >= 0 && + (osdmap.get_device_class_flags(class_id) & CEPH_OSD_NOUP)) { + dout(5) << __func__ << " osd." << i + << " is marked as noup via device class, " + << "will not mark it up" << dendl; + return false; + } + return true; } @@ -2524,6 +2540,14 @@ bool OSDMonitor::can_mark_out(int i) return false; } + if (auto class_id = osdmap.crush->get_item_class_id(i); class_id >= 0 && + (osdmap.get_device_class_flags(class_id) & CEPH_OSD_NOOUT)) { + dout(5) << __func__ << " osd." << i + << " is marked as noout via device class, " + << "will not mark it out" << dendl; + return false; + } + int num_osds = osdmap.get_num_osds(); if (num_osds == 0) { dout(5) << __func__ << " no osds" << dendl; @@ -2567,6 +2591,14 @@ bool OSDMonitor::can_mark_in(int i) return false; } + if (auto class_id = osdmap.crush->get_item_class_id(i); class_id >= 0 && + (osdmap.get_device_class_flags(class_id) & CEPH_OSD_NOIN)) { + dout(5) << __func__ << " osd." << i + << " is marked as noin via device class, " + << "will not mark it in" << dendl; + return false; + } + return true; } @@ -10570,6 +10602,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } set osds; set crush_nodes; + set device_classes; for (auto& w : who) { if (w == "any" || w == "all" || w == "*") { osdmap.get_all_osds(osds); @@ -10580,11 +10613,14 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, osds.insert(osd); } else if (osdmap.crush->name_exists(w)) { crush_nodes.insert(osdmap.crush->get_item_id(w)); + } else if (osdmap.crush->class_exists(w)) { + device_classes.insert(osdmap.crush->get_class_id(w)); } else { - ss << "unable to parse osd id or crush node:\"" << w << "\". "; + ss << "unable to parse osd id or crush node or device class: " + << "\"" << w << "\". "; } } - if (osds.empty() && crush_nodes.empty()) { + if (osds.empty() && crush_nodes.empty() && device_classes.empty()) { // ss has reason for failure err = -EINVAL; goto reply; @@ -10650,6 +10686,17 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } any = true; } + for (auto& id : device_classes) { + auto old_flags = osdmap.get_device_class_flags(id); + auto& pending_flags = pending_inc.new_device_class_flags[id]; + pending_flags |= old_flags; + if (do_set) { + pending_flags |= flags; + } else { + pending_flags &= ~flags; + } + any = true; + } if (any) { getline(ss, rs); wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, err, rs, diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index d846231164ff..0fdf0bc464a9 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -581,7 +581,7 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const } { - uint8_t target_v = 8; + uint8_t target_v = 9; if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { target_v = 2; } else if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) { @@ -625,6 +625,9 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const if (target_v >= 8) { encode(new_crush_node_flags, bl); } + if (target_v >= 9) { + encode(new_device_class_flags, bl); + } ENCODE_FINISH(bl); // osd-only data } @@ -834,7 +837,7 @@ void OSDMap::Incremental::decode(bufferlist::const_iterator& bl) } { - DECODE_START(8, bl); // extended, osd-only data + DECODE_START(9, bl); // extended, osd-only data decode(new_hb_back_up, bl); decode(new_up_thru, bl); decode(new_last_clean_interval, bl); @@ -888,6 +891,9 @@ void OSDMap::Incremental::decode(bufferlist::const_iterator& bl) if (struct_v >= 8) { decode(new_crush_node_flags, bl); } + if (struct_v >= 9) { + decode(new_device_class_flags, bl); + } DECODE_FINISH(bl); // osd-only data } @@ -1203,6 +1209,18 @@ void OSDMap::Incremental::dump(Formatter *f) const f->close_section(); } f->close_section(); + f->open_array_section("new_device_class_flags"); + for (auto& i : new_device_class_flags) { + f->open_object_section("device_class"); + f->dump_int("id", i.first); + set st; + calc_state_set(i.second, st); + for (auto& j : st) { + f->dump_string("flag", j); + } + f->close_section(); + } + f->close_section(); f->close_section(); } @@ -2113,6 +2131,14 @@ int OSDMap::apply_incremental(const Incremental &inc) } } + for (auto& i : inc.new_device_class_flags) { + if (i.second) { + device_class_flags[i.first] = i.second; + } else { + device_class_flags.erase(i.first); + } + } + // cluster snapshot? if (inc.cluster_snapshot.length()) { cluster_snapshot = inc.cluster_snapshot; @@ -2161,6 +2187,14 @@ int OSDMap::apply_incremental(const Incremental &inc) // it in the canonical version, don't change it. ++crush_version; } + for (auto it = device_class_flags.begin(); + it != device_class_flags.end();) { + const char* class_name = crush->get_class_name(it->first); + if (!class_name) // device class is gone + it = device_class_flags.erase(it); + else + it++; + } } calc_num_osds(); @@ -2811,7 +2845,7 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const { // NOTE: any new encoding dependencies must be reflected by // SIGNIFICANT_FEATURES - uint8_t target_v = 8; + uint8_t target_v = 9; if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { target_v = 1; } else if (!HAVE_FEATURE(features, SERVER_MIMIC)) { @@ -2864,6 +2898,9 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const if (target_v >= 8) { encode(crush_node_flags, bl); } + if (target_v >= 9) { + encode(device_class_flags, bl); + } ENCODE_FINISH(bl); // osd-only data } @@ -3124,7 +3161,7 @@ void OSDMap::decode(bufferlist::const_iterator& bl) } { - DECODE_START(8, bl); // extended, osd-only data + DECODE_START(9, bl); // extended, osd-only data decode(osd_addrs->hb_back_addrs, bl); decode(osd_info, bl); decode(blacklist, bl); @@ -3185,6 +3222,11 @@ void OSDMap::decode(bufferlist::const_iterator& bl) } else { crush_node_flags.clear(); } + if (struct_v >= 9) { + decode(device_class_flags, bl); + } else { + device_class_flags.clear(); + } DECODE_FINISH(bl); // osd-only data } @@ -3442,6 +3484,19 @@ void OSDMap::dump(Formatter *f) const f->close_section(); } f->close_section(); + f->open_object_section("device_class_flags"); + for (auto& i : device_class_flags) { + const char* class_name = crush->get_class_name(i.first); + string s = class_name ? class_name : stringify(i.first); + f->open_array_section(s.c_str()); + set st; + calc_state_set(i.second, st); + for (auto& j : st) { + f->dump_string("flag", j); + } + f->close_section(); + } + f->close_section(); } void OSDMap::generate_test_instances(list& o) @@ -5624,9 +5679,19 @@ void OSDMap::check_health(health_check_map_t *checks) const detail.push_back(ss.str()); } } + for (auto& i : device_class_flags) { + const char* class_name = crush->get_class_name(i.first); + if (i.second && class_name) { + ostringstream ss; + set states; + OSDMap::calc_state_set(i.second, states); + ss << "device class '" << class_name << "' has flags " << states; + detail.push_back(ss.str()); + } + } if (!detail.empty()) { ostringstream ss; - ss << detail.size() << " OSDs or CRUSH nodes have {NOUP,NODOWN,NOIN,NOOUT} flags set"; + ss << detail.size() << " OSDs or CRUSH {nodes, device-classes} have {NOUP,NODOWN,NOIN,NOOUT} flags set"; auto& d = checks->add("OSD_FLAGS", HEALTH_WARN, ss.str()); d.detail.swap(detail); } @@ -5851,3 +5916,12 @@ unsigned OSDMap::get_crush_node_flags(int id) const flags = it->second; return flags; } + +unsigned OSDMap::get_device_class_flags(int id) const +{ + unsigned flags = 0; + auto it = device_class_flags.find(id); + if (it != device_class_flags.end()) + flags = it->second; + return flags; +} diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index 1cc3a1479aa1..21051a1cfdaa 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -400,6 +400,7 @@ public: mempool::osdmap::map new_purged_snaps; mempool::osdmap::map new_crush_node_flags; + mempool::osdmap::map new_device_class_flags; string cluster_snapshot; @@ -517,6 +518,7 @@ private: vector osd_state; mempool::osdmap::map crush_node_flags; // crush node -> CEPH_OSD_* flags + mempool::osdmap::map device_class_flags; // device class -> CEPH_OSD_* flags utime_t last_up_change, last_in_change; @@ -833,6 +835,7 @@ public: unsigned get_osd_crush_node_flags(int osd) const; unsigned get_crush_node_flags(int id) const; + unsigned get_device_class_flags(int id) const; bool is_noup(int osd) const { return exists(osd) && (osd_state[osd] & CEPH_OSD_NOUP);