From: Samuel Just Date: Wed, 13 Dec 2023 02:31:46 +0000 (-0800) Subject: erasure-code: add support for multiple osds in a single failure domain X-Git-Tag: v19.1.0~434^2~5 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=404e84f9aa8d20ec7232cb5b72f6a1619151d3df;p=ceph.git erasure-code: add support for multiple osds in a single failure domain Adds support for crush-osds-per-failure-domain and crush-num-failure-domains via MSR rules. Signed-off-by: Samuel Just --- diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index 69936a53d6515..4850e36f9b5cb 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -2261,6 +2261,7 @@ void CrushWrapper::reweight_bucket( int CrushWrapper::add_simple_rule_at( string name, string root_name, string failure_domain_name, + int num_failure_domains, string device_class, string mode, int rule_type, int rno, @@ -2332,17 +2333,19 @@ int CrushWrapper::add_simple_rule_at( } crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0); if (type) - crush_rule_set_step(rule, step++, - mode == "firstn" ? CRUSH_RULE_CHOOSELEAF_FIRSTN : - CRUSH_RULE_CHOOSELEAF_INDEP, - CRUSH_CHOOSE_N, - type); + crush_rule_set_step( + rule, step++, + mode == "firstn" ? CRUSH_RULE_CHOOSELEAF_FIRSTN : + CRUSH_RULE_CHOOSELEAF_INDEP, + num_failure_domains <= 0 ? CRUSH_CHOOSE_N : num_failure_domains, + type); else - crush_rule_set_step(rule, step++, - mode == "firstn" ? CRUSH_RULE_CHOOSE_FIRSTN : - CRUSH_RULE_CHOOSE_INDEP, - CRUSH_CHOOSE_N, - 0); + crush_rule_set_step( + rule, step++, + mode == "firstn" ? CRUSH_RULE_CHOOSE_FIRSTN : + CRUSH_RULE_CHOOSE_INDEP, + num_failure_domains <= 0 ? CRUSH_CHOOSE_N : num_failure_domains, + 0); crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); int ret = crush_add_rule(crush, rule, rno); @@ -2358,13 +2361,125 @@ int CrushWrapper::add_simple_rule_at( int CrushWrapper::add_simple_rule( string name, string root_name, string failure_domain_name, + int num_failure_domains, string device_class, string mode, int rule_type, ostream *err) { - return add_simple_rule_at(name, root_name, failure_domain_name, device_class, - mode, - rule_type, -1, err); + return add_simple_rule_at( + name, root_name, failure_domain_name, num_failure_domains, + device_class, + mode, + rule_type, -1, err); +} + +int CrushWrapper::add_multi_osd_per_failure_domain_rule_at( + string name, string root_name, string failure_domain_name, + int num_failure_domains, + int osds_per_failure_domain, + string device_class, + crush_rule_type rule_type, + int rno, + ostream *err) +{ + if (rule_exists(name)) { + if (err) + *err << "rule " << name << " exists"; + return -EEXIST; + } + if (rno >= 0) { + if (rule_exists(rno)) { + if (err) + *err << "rule with ruleno " << rno << " exists"; + return -EEXIST; + } + } else { + for (rno = 0; rno < get_max_rules(); rno++) { + if (!rule_exists(rno)) + break; + } + } + if (!name_exists(root_name)) { + if (err) + *err << "root item " << root_name << " does not exist"; + return -ENOENT; + } + int root = get_item_id(root_name); + int type = 0; + if (failure_domain_name.length()) { + type = get_type_id(failure_domain_name); + if (type < 0) { + if (err) + *err << "unknown type " << failure_domain_name; + return -EINVAL; + } + } + if (device_class.size()) { + if (!class_exists(device_class)) { + if (err) + *err << "device class " << device_class << " does not exist"; + return -EINVAL; + } + int c = get_class_id(device_class); + if (class_bucket.count(root) == 0 || + class_bucket[root].count(c) == 0) { + if (err) + *err << "root " << root_name << " has no devices with class " + << device_class; + return -EINVAL; + } + root = class_bucket[root][c]; + } + if (rule_type != CRUSH_RULE_TYPE_MSR_INDEP && + rule_type != CRUSH_RULE_TYPE_MSR_FIRSTN) { + if (err) + *err << "unknown rule_type " << rule_type; + return -EINVAL; + } + + int steps = 4; + crush_rule *rule = crush_make_rule(steps, rule_type); + ceph_assert(rule); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0); + crush_rule_set_step(rule, step++, + CRUSH_RULE_CHOOSE_MSR, + num_failure_domains, + type); + crush_rule_set_step(rule, step++, + CRUSH_RULE_CHOOSE_MSR, + osds_per_failure_domain, + 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + + int ret = crush_add_rule(crush, rule, rno); + if(ret < 0) { + *err << "failed to add rule " << rno << " because " << cpp_strerror(ret); + return ret; + } + set_rule_name(rno, name); + have_rmaps = false; + return rno; +} + + +int CrushWrapper::add_indep_multi_osd_per_failure_domain_rule( + string name, string root_name, + string failure_domain_name, + int num_failure_domains, + int osds_per_failure_domain, + string device_class, + ostream *err) +{ + return add_multi_osd_per_failure_domain_rule_at( + name, root_name, + failure_domain_name, + num_failure_domains, + osds_per_failure_domain, + device_class, + CRUSH_RULE_TYPE_MSR_INDEP, + -1, + err); } float CrushWrapper::_get_take_weight_osd_map(int root, diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 9cb1c487de952..317f4c28bdd6f 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -1232,16 +1232,52 @@ public: int add_simple_rule( std::string name, std::string root_name, std::string failure_domain_type, + int num_failure_domains, std::string device_class, std::string mode, int rule_type, std::ostream *err = 0); + int add_simple_rule( + std::string name, std::string root_name, std::string failure_domain_type, + std::string device_class, std::string mode, int rule_type, + std::ostream *err = 0) { + return add_simple_rule( + name, root_name, failure_domain_type, -1, + device_class, mode, rule_type, err); + } + + int add_indep_multi_osd_per_failure_domain_rule( + std::string name, std::string root_name, std::string failure_domain_type, + int osds_per_failure_domain, + int num_failure_domains, + std::string device_class, + std::ostream *err = 0); /** * @param rno rule[set] id to use, -1 to pick the lowest available */ int add_simple_rule_at( std::string name, std::string root_name, - std::string failure_domain_type, std::string device_class, std::string mode, + std::string failure_domain_type, + int num_failure_domains, + std::string device_class, std::string mode, int rule_type, int rno, std::ostream *err = 0); + int add_simple_rule_at( + std::string name, std::string root_name, + std::string failure_domain_type, + std::string device_class, std::string mode, + int rule_type, int rno, std::ostream *err = 0) { + return add_simple_rule_at( + name, root_name, failure_domain_type, -1, + device_class, mode, rule_type, rno, err); + } + + int add_multi_osd_per_failure_domain_rule_at( + std::string name, std::string root_name, std::string failure_domain_type, + int osds_per_failure_domain, + int num_failure_domains, + std::string device_class, + crush_rule_type rule_type, + int rno, + std::ostream *err = 0); int remove_rule(int ruleno); diff --git a/src/erasure-code/ErasureCode.cc b/src/erasure-code/ErasureCode.cc index 5212baee25187..928d05f2adb0a 100644 --- a/src/erasure-code/ErasureCode.cc +++ b/src/erasure-code/ErasureCode.cc @@ -52,6 +52,12 @@ int ErasureCode::init( err |= to_string("crush-failure-domain", profile, &rule_failure_domain, DEFAULT_RULE_FAILURE_DOMAIN, ss); + err |= to_int("crush-osds-per-failure-domain", profile, + &rule_osds_per_failure_domain, + "0", ss); + err |= to_int("crush-num-failure-domains", profile, + &rule_num_failure_domains, + "0", ss); err |= to_string("crush-device-class", profile, &rule_device_class, "", ss); @@ -66,19 +72,33 @@ int ErasureCode::create_rule( CrushWrapper &crush, std::ostream *ss) const { - int ruleid = crush.add_simple_rule( - name, - rule_root, - rule_failure_domain, - rule_device_class, - "indep", - pg_pool_t::TYPE_ERASURE, - ss); - - if (ruleid < 0) - return ruleid; - - return ruleid; + if (rule_osds_per_failure_domain <= 1) { + return crush.add_simple_rule( + name, + rule_root, + rule_failure_domain, + rule_num_failure_domains, + rule_device_class, + "indep", + pg_pool_t::TYPE_ERASURE, + ss); + } else { + if (rule_num_failure_domains < 1) { + if (ss) { + *ss << "crush-num-failure-domains " << rule_num_failure_domains + << " must be >= 1 if crush-osds-per-failure-domain specified"; + return -EINVAL; + } + } + return crush.add_indep_multi_osd_per_failure_domain_rule( + name, + rule_root, + rule_failure_domain, + rule_num_failure_domains, + rule_osds_per_failure_domain, + rule_device_class, + ss); + } } int ErasureCode::sanity_check_k_m(int k, int m, ostream *ss) diff --git a/src/erasure-code/ErasureCode.h b/src/erasure-code/ErasureCode.h index c246d5dc6b67d..fd6d1a41f714d 100644 --- a/src/erasure-code/ErasureCode.h +++ b/src/erasure-code/ErasureCode.h @@ -37,6 +37,8 @@ namespace ceph { std::string rule_root; std::string rule_failure_domain; std::string rule_device_class; + int rule_osds_per_failure_domain = -1; + int rule_num_failure_domains = -1; ~ErasureCode() override {}