]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
erasure-code: add support for multiple osds in a single failure domain
authorSamuel Just <sjust@redhat.com>
Wed, 13 Dec 2023 02:31:46 +0000 (18:31 -0800)
committerSamuel Just <sjust@redhat.com>
Tue, 9 Jan 2024 03:25:55 +0000 (19:25 -0800)
Adds support for crush-osds-per-failure-domain and
crush-num-failure-domains via MSR rules.

Signed-off-by: Samuel Just <sjust@redhat.com>
src/crush/CrushWrapper.cc
src/crush/CrushWrapper.h
src/erasure-code/ErasureCode.cc
src/erasure-code/ErasureCode.h

index 69936a53d65154c8e2e2c5e4e198e8eb6789499e..4850e36f9b5cbec979e1ffe9f0772419061b0825 100644 (file)
@@ -2261,6 +2261,7 @@ void CrushWrapper::reweight_bucket(
 int CrushWrapper::add_simple_rule_at(
   string name, string root_name,
   string failure_domain_name,
+  int num_failure_domains,
   string device_class,
   string mode, int rule_type,
   int rno,
@@ -2332,17 +2333,19 @@ int CrushWrapper::add_simple_rule_at(
   }
   crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0);
   if (type)
-    crush_rule_set_step(rule, step++,
-                       mode == "firstn" ? CRUSH_RULE_CHOOSELEAF_FIRSTN :
-                       CRUSH_RULE_CHOOSELEAF_INDEP,
-                       CRUSH_CHOOSE_N,
-                       type);
+    crush_rule_set_step(
+      rule, step++,
+      mode == "firstn" ? CRUSH_RULE_CHOOSELEAF_FIRSTN :
+      CRUSH_RULE_CHOOSELEAF_INDEP,
+      num_failure_domains <= 0 ? CRUSH_CHOOSE_N : num_failure_domains,
+      type);
   else
-    crush_rule_set_step(rule, step++,
-                       mode == "firstn" ? CRUSH_RULE_CHOOSE_FIRSTN :
-                       CRUSH_RULE_CHOOSE_INDEP,
-                       CRUSH_CHOOSE_N,
-                       0);
+    crush_rule_set_step(
+      rule, step++,
+      mode == "firstn" ? CRUSH_RULE_CHOOSE_FIRSTN :
+      CRUSH_RULE_CHOOSE_INDEP,
+      num_failure_domains <= 0 ? CRUSH_CHOOSE_N : num_failure_domains,
+      0);
   crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
 
   int ret = crush_add_rule(crush, rule, rno);
@@ -2358,13 +2361,125 @@ int CrushWrapper::add_simple_rule_at(
 int CrushWrapper::add_simple_rule(
   string name, string root_name,
   string failure_domain_name,
+  int num_failure_domains,
   string device_class,
   string mode, int rule_type,
   ostream *err)
 {
-  return add_simple_rule_at(name, root_name, failure_domain_name, device_class,
-                           mode,
-                           rule_type, -1, err);
+  return add_simple_rule_at(
+    name, root_name, failure_domain_name, num_failure_domains,
+    device_class,
+    mode,
+    rule_type, -1, err);
+}
+
+int CrushWrapper::add_multi_osd_per_failure_domain_rule_at(
+  string name, string root_name, string failure_domain_name,
+  int num_failure_domains,
+  int osds_per_failure_domain,
+  string device_class,
+  crush_rule_type rule_type,
+  int rno,
+  ostream *err)
+{
+  if (rule_exists(name)) {
+    if (err)
+      *err << "rule " << name << " exists";
+    return -EEXIST;
+  }
+  if (rno >= 0) {
+    if (rule_exists(rno)) {
+      if (err)
+        *err << "rule with ruleno " << rno << " exists";
+      return -EEXIST;
+    }
+  } else {
+    for (rno = 0; rno < get_max_rules(); rno++) {
+      if (!rule_exists(rno))
+        break;
+    }
+  }
+  if (!name_exists(root_name)) {
+    if (err)
+      *err << "root item " << root_name << " does not exist";
+    return -ENOENT;
+  }
+  int root = get_item_id(root_name);
+  int type = 0;
+  if (failure_domain_name.length()) {
+    type = get_type_id(failure_domain_name);
+    if (type < 0) {
+      if (err)
+       *err << "unknown type " << failure_domain_name;
+      return -EINVAL;
+    }
+  }
+  if (device_class.size()) {
+    if (!class_exists(device_class)) {
+      if (err)
+       *err << "device class " << device_class << " does not exist";
+      return -EINVAL;
+    }
+    int c = get_class_id(device_class);
+    if (class_bucket.count(root) == 0 ||
+       class_bucket[root].count(c) == 0) {
+      if (err)
+       *err << "root " << root_name << " has no devices with class "
+            << device_class;
+      return -EINVAL;
+    }
+    root = class_bucket[root][c];
+  }
+  if (rule_type != CRUSH_RULE_TYPE_MSR_INDEP &&
+      rule_type != CRUSH_RULE_TYPE_MSR_FIRSTN) {
+    if (err)
+      *err << "unknown rule_type " << rule_type;
+    return -EINVAL;
+  }
+
+  int steps = 4;
+  crush_rule *rule = crush_make_rule(steps, rule_type);
+  ceph_assert(rule);
+  int step = 0;
+  crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0);
+  crush_rule_set_step(rule, step++,
+                     CRUSH_RULE_CHOOSE_MSR,
+                     num_failure_domains,
+                     type);
+  crush_rule_set_step(rule, step++,
+                     CRUSH_RULE_CHOOSE_MSR,
+                     osds_per_failure_domain,
+                     0);
+  crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
+
+  int ret = crush_add_rule(crush, rule, rno);
+  if(ret < 0) {
+    *err << "failed to add rule " << rno << " because " << cpp_strerror(ret);
+    return ret;
+  }
+  set_rule_name(rno, name);
+  have_rmaps = false;
+  return rno;
+}
+
+
+int CrushWrapper::add_indep_multi_osd_per_failure_domain_rule(
+  string name, string root_name,
+  string failure_domain_name,
+  int num_failure_domains,
+  int osds_per_failure_domain,
+  string device_class,
+  ostream *err)
+{
+  return add_multi_osd_per_failure_domain_rule_at(
+    name, root_name,
+    failure_domain_name,
+    num_failure_domains,
+    osds_per_failure_domain,
+    device_class,
+    CRUSH_RULE_TYPE_MSR_INDEP,
+    -1,
+    err);
 }
 
 float CrushWrapper::_get_take_weight_osd_map(int root,
index 9cb1c487de9528ca802226a6a6caaa29ba222d5d..317f4c28bdd6f101cad19350461d94c52d95fa55 100644 (file)
@@ -1232,16 +1232,52 @@ public:
 
   int add_simple_rule(
     std::string name, std::string root_name, std::string failure_domain_type,
+    int num_failure_domains,
     std::string device_class, std::string mode, int rule_type,
     std::ostream *err = 0);
+  int add_simple_rule(
+    std::string name, std::string root_name, std::string failure_domain_type,
+    std::string device_class, std::string mode, int rule_type,
+    std::ostream *err = 0) {
+    return add_simple_rule(
+      name, root_name, failure_domain_type, -1,
+      device_class, mode, rule_type, err);
+  }
+
+  int add_indep_multi_osd_per_failure_domain_rule(
+    std::string name, std::string root_name, std::string failure_domain_type,
+    int osds_per_failure_domain,
+    int num_failure_domains,
+    std::string device_class,
+    std::ostream *err = 0);
 
   /**
    * @param rno rule[set] id to use, -1 to pick the lowest available
    */
   int add_simple_rule_at(
     std::string name, std::string root_name,
-    std::string failure_domain_type, std::string device_class, std::string mode,
+    std::string failure_domain_type,
+    int num_failure_domains,
+    std::string device_class, std::string mode,
     int rule_type, int rno, std::ostream *err = 0);
+  int add_simple_rule_at(
+    std::string name, std::string root_name,
+    std::string failure_domain_type,
+    std::string device_class, std::string mode,
+    int rule_type, int rno, std::ostream *err = 0) {
+    return add_simple_rule_at(
+      name, root_name, failure_domain_type, -1,
+      device_class, mode, rule_type, rno, err);
+  }
+
+  int add_multi_osd_per_failure_domain_rule_at(
+    std::string name, std::string root_name, std::string failure_domain_type,
+    int osds_per_failure_domain,
+    int num_failure_domains,
+    std::string device_class,
+    crush_rule_type rule_type,
+    int rno,
+    std::ostream *err = 0);
 
   int remove_rule(int ruleno);
 
index 5212baee25187072512da7bd137ae640cf92bf96..928d05f2adb0aada951d72829ad59b9f61c72c3f 100644 (file)
@@ -52,6 +52,12 @@ int ErasureCode::init(
   err |= to_string("crush-failure-domain", profile,
                   &rule_failure_domain,
                   DEFAULT_RULE_FAILURE_DOMAIN, ss);
+  err |= to_int("crush-osds-per-failure-domain", profile,
+               &rule_osds_per_failure_domain,
+               "0", ss);
+  err |= to_int("crush-num-failure-domains", profile,
+               &rule_num_failure_domains,
+               "0", ss);
   err |= to_string("crush-device-class", profile,
                   &rule_device_class,
                   "", ss);
@@ -66,19 +72,33 @@ int ErasureCode::create_rule(
   CrushWrapper &crush,
   std::ostream *ss) const
 {
-  int ruleid = crush.add_simple_rule(
-    name,
-    rule_root,
-    rule_failure_domain,
-    rule_device_class,
-    "indep",
-    pg_pool_t::TYPE_ERASURE,
-    ss);
-
-  if (ruleid < 0)
-    return ruleid;
-
-  return ruleid;
+  if (rule_osds_per_failure_domain <= 1) {
+    return crush.add_simple_rule(
+      name,
+      rule_root,
+      rule_failure_domain,
+      rule_num_failure_domains,
+      rule_device_class,
+      "indep",
+      pg_pool_t::TYPE_ERASURE,
+      ss);
+  } else {
+    if (rule_num_failure_domains < 1)  {
+      if (ss) {
+       *ss << "crush-num-failure-domains " << rule_num_failure_domains 
+           << " must be >= 1 if crush-osds-per-failure-domain specified";
+       return -EINVAL;
+      }
+    }
+    return crush.add_indep_multi_osd_per_failure_domain_rule(
+      name,
+      rule_root,
+      rule_failure_domain,
+      rule_num_failure_domains,
+      rule_osds_per_failure_domain,
+      rule_device_class,
+      ss);
+  }
 }
 
 int ErasureCode::sanity_check_k_m(int k, int m, ostream *ss)
index c246d5dc6b67d9a2d865f6876be53ed2d2ad0390..fd6d1a41f714dd131ff838e440b5cc02cc5fd6e2 100644 (file)
@@ -37,6 +37,8 @@ namespace ceph {
     std::string rule_root;
     std::string rule_failure_domain;
     std::string rule_device_class;
+    int rule_osds_per_failure_domain = -1;
+    int rule_num_failure_domains = -1;
 
     ~ErasureCode() override {}